kman/unitest.kman.py


#########################################################
## @file   : test_kman.py
## @desc   : unit test for kindle management
## @create : 2020/05/26
## @author : Chengan
## @email  : douboer@gmail.com
#########################################################

import unittest
import json

from collections import defaultdict

from kman import *
from parseweb import *
from mtable import *

# log info
logger = logging.getLogger()
#formatter = logging.Formatter
#    ('%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s')
formatter = logging.Formatter('')
handler = logging.FileHandler('debug')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.DEBUG)

class TestKman(unittest.TestCase):
    # initial
    def setUp(self):
        LOG2FILE = 1
        DELIMITER= '|'

        self.km = kMan()
        self.util = Util()

        global t_bm_sec
        global t_hl_sec
        global t_nt_sec
        global t_hl_sec2
        global t_hl_sec3
        global t_hl_sec4
        self.t_num_nt = 0
        global t_books
        t_bm_sec = ["""另一半中国史 (高洪雷)
""", \
                """- 您在位置 #2468 的书签 | 添加于 2020年1月12日星期日 下午11:09:06
"""]
        t_hl_sec = ["""薛兆丰经济学讲义 (薛兆丰)
""", \
                """- 您在位置 #1408-1410的标注 | 添加于 2020年1月13日星期一 上午8:11:05
""", \
                """边际就是“新增”带来的“新增”。
"""]
        t_nt_sec = ["""薛兆丰经济学讲义 (薛兆丰)
""", \
                """- 您在位置 #4286 的笔记 | 添加于 2020年1月30日星期四 下午10:26:31
""", \
                """山寨 假货 问题
"""]
        t_hl_sec2 = ["""枪炮、病菌与钢铁 : 人类社会的命运 (世纪人文系列丛书·开放人文) (贾雷德·戴蒙德)
""",\
                """- 您在位置 #4267-4268的标注 | 添加于 2020年1月29日星期三 上午12:42:32
""",\
                """从柏拉图到马克思的所有政治理论家都提出过这个问题xxxx"""]
        t_hl_sec3 = ["""枪炮、病菌与钢铁(贾雷德·戴蒙德)""",\
                """- 您在位置 #4267-4268的标注 | 添加于 2020年1月29日星期三 上午12:42:32""",\
                """从柏拉图到马克思的所有政治理论家都提出过这个问题yyyy"""]
        t_hl_sec4 = ["""枪炮、病菌与钢铁 : 人类社会的命运 (世纪人文系列丛书·开放人文) (贾雷德·戴蒙德)""",\
                """- 您在位置 #4267-4268的标注 | 添加于 2020年1月29日星期三 上午12:42:32""",\
                """从柏拉图到马克思的所有政治理论家都提出过这个问题zzzz"""]
        t_books = defaultdict(dict)

    def cre_tbooks(self):
        # parsing section & fill data structure
        self.t_num_nt = 0
        t_secd = self.km.parse_section(t_bm_sec,0)

        self.t_num_nt += 1
        t_secd = self.km.parse_section(t_hl_sec,self.t_num_nt)
        bn = t_secd['bookname']
        t_books[bn]['author'] = t_secd[bn]['author']
        t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
        t_secd.clear()

        self.t_num_nt += 1
        t_secd = self.km.parse_section(t_nt_sec,self.t_num_nt)
        bn = t_secd['bookname']
        t_books[bn]['author'] = t_secd[bn]['author']
        t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
        t_secd.clear()

        self.t_num_nt += 1
        t_secd = self.km.parse_section(t_hl_sec2,self.t_num_nt)
        bn = t_secd['bookname']
        t_books[bn]['author'] = t_secd[bn]['author']
        t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
        t_secd.clear()

        self.t_num_nt += 1
        t_secd = self.km.parse_section(t_hl_sec3,self.t_num_nt)
        bn = t_secd['bookname']
        t_books[bn]['author'] = t_secd[bn]['author']
        t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
        t_secd.clear()

        self.t_num_nt += 1
        t_secd = self.km.parse_section(t_hl_sec4,self.t_num_nt)
        bn = t_secd['bookname']
        t_books[bn]['author'] = t_secd[bn]['author']
        t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
        t_secd.clear()

        return t_books

    # test function parse_section
    def test_parse_section(self):
        # parsing section & fill data structure
        t_secd = self.km.parse_section(t_bm_sec,0)
        self.assertEqual(t_secd,False)

        t_secd = self.km.parse_section(t_hl_sec,1)
        bn = t_secd['bookname']
        self.assertIsNotNone(t_secd)
        self.assertEqual(bn,'薛兆丰经济学讲义')
        self.assertEqual(t_secd[bn]['author'],'薛兆丰')
        self.assertEqual(t_secd[bn]['1']['type'],'HL')
        self.assertEqual(t_secd[bn]['1']['position'],'1408-1410')
        self.assertEqual(t_secd[bn]['1']['day'],'2020年1月13日')
        self.assertEqual(t_secd[bn]['1']['week'],'星期一')
        self.assertEqual(t_secd[bn]['1']['meridiem'],'上午')
        self.assertEqual(t_secd[bn]['1']['time'],'8:11:05')
        self.assertEqual(t_secd[bn]['1']['content'],'边际就是“新增”带来的“新增”。\n')
        t_books[bn]['author'] = t_secd[bn]['author']
        t_books[bn]['1'] = t_secd[bn]['1']
        t_secd.clear()

        t_secd = self.km.parse_section(t_nt_sec,2)
        bn = t_secd['bookname']
        self.assertIsNotNone(t_secd)
        self.assertEqual(bn,'薛兆丰经济学讲义')
        self.assertEqual(t_secd[bn]['author'],'薛兆丰')
        self.assertEqual(t_secd[bn]['2']['type'],'NT')
        self.assertEqual(t_secd[bn]['2']['position'],'4286')
        self.assertEqual(t_secd[bn]['2']['day'],'2020年1月30日')
        self.assertEqual(t_secd[bn]['2']['week'],'星期四')
        self.assertEqual(t_secd[bn]['2']['meridiem'],'下午')
        self.assertEqual(t_secd[bn]['2']['time'],'10:26:31')
        self.assertEqual(t_secd[bn]['2']['content'],'山寨 假货 问题\n')
        t_books[bn]['author'] = t_secd[bn]['author']
        t_books[bn]['2'] = t_secd[bn]['2']
        t_secd.clear()

        # test drop_duplicate
    def test_drop_duplicate(self):
        t_books = self.cre_tbooks()
        t_secd = self.km.parse_section(t_hl_sec,3)
        bn = t_secd['bookname']
        t_books_du = t_books.copy()
        t_books_du[bn]['3'] = t_secd[bn]['3']
        t_books_du[bn]['4'] = t_secd[bn]['3']
        self.assertIsInstance(t_books_du[bn]['3'],dict)

        try:
            t_books_du = self.km.drop_duplicate(t_books_du)
            t = t_books_du[bn]['3']
        except KeyError as keyerror:
            print("与预期匹配，sidx 3 重复被删除，抛出: %s" % 'keyerror')

        t_secd.clear()

    # test function format_time()
    def test_format_time(self):
        t_ds = '2020年1月13日 星期一 下午 8:11:05'
        t_ds = self.km.format_time(t_ds)
        self.assertEqual(t_ds, '2020/1/13 20:11:05')

    # test function format_data
    def test_format_data(self):
        t_books = self.cre_tbooks()
        t_out = self.km.format_note_data(t_books, ft='MD')
        self.assertEqual(t_out[0], 'TYPE|BOOKNAME|AUTHOR|MARKTIME|CONTENT')
        self.assertEqual(t_out[1], '--|--|--|--|--')
        self.assertEqual(t_out[2], 'HL|薛兆丰经济学讲义|薛兆丰|2020/1/13 8:11:05|边际就是“新增”带来的“新增”。\n')
        t_out.clear()

    def test_add_note_to_highlight(self):
        t_books = self.cre_tbooks()
        t_books_remove_nt = self.km.add_note_to_highlight(t_books)
        self.assertEqual((t_books_remove_nt['薛兆丰经济学讲义']['1']['content']).replace('\n',''),\
                '边际就是“新增”带来的“新增”。'+NTPREF+ '山寨 假货 问题')

    def test_get_kindle_path(self):
        kp = self.km.get_kindle_path()
        s = u"kindle disconnected" if not kp else u"kindle connected {}".format(kp)
        print(s)

    def test_get_bookname_num(self):
        t_books = self.cre_tbooks()
        [nu, bn] = self.km.get_bookname_num(t_books)
        self.assertEqual(bn['薛兆丰经济学讲义'],2)

    def test_get_author_num(self):
        t_books = self.cre_tbooks()
        [nu, bn] = self.km.get_author_num(t_books)
        self.assertEqual(bn['薛兆丰'],2)

    def test_filter_clips(self):
        t_books = self.cre_tbooks()
        # no filter
        bn = self.km.filter_clips(t_books, '薛兆丰', 0)
        logger.debug('========== 1 ==========\n')
        logger.debug(json.dumps(bn, indent=2, ensure_ascii=False))
        bn = {}

        # by bookname
        bn = self.km.filter_clips(t_books, '枪炮、病菌与钢铁 : 人类社会的命运 (世纪人文系列丛书·开放人文)', 1)
        logger.debug('========== 2 ==========\n')
        logger.debug(json.dumps(bn, indent=2, ensure_ascii=False))
        bn = {}

        # by author
        bn = self.km.filter_clips(t_books, '贾雷德·戴蒙德', 2)
        logger.debug('========== 3 ==========\n')
        logger.debug(json.dumps(bn, indent=2, ensure_ascii=False))
        bn = {}

    # test import words & filter words
    def test_import_words(self):
        import pprint
        [bookinfo, lookups, words] =  self.km.import_words()
        # how to beauty print to logger file
        logger.debug('========== 4 ==========\n')
        logger.debug(json.dumps(bookinfo, indent=2, ensure_ascii=False))
        logger.debug('========== 5 ==========\n')
        logger.debug(json.dumps(lookups, indent=2, ensure_ascii=False))
        logger.debug('========== 6 ==========\n')
        logger.debug(json.dumps(words, indent=2, ensure_ascii=False))

        # test filter_words()
        logger.debug('========== 7 ==========\n')
        self.km.filter_words(self.km.import_words(), '中国历史风云录 (陈舜臣作品)', tp=0)

        logger.debug('========== 8 ==========\n')
        self.km.filter_words(self.km.import_words(), 'zh:闾', tp=1)

    def test_util(self):
        logger.debug( 'test get_app_path: {}'.format( self.util.get_app_path()))

    def test_mtable(self):
        print('\n---------------------')
        data = [['Ohiox','Ohio','Ohio','Nevada','Nevada'],
                [2000,2001,2002,2001,2002],
                [1.5,1.7,3.6,2.4,2.9],
                [1.5,1.7,3.6,2.4,2.9],
                ]

        mt = mTable(data,
                index = ['one', 'two','three','four','five'],
                columns = ['year','state','pop','debt','xx'])

        print('== frame\n', mt)
        print('== frame.iat[0, 0]\n', mt.get_iat(0, 0))
        print('== frame.iat[1, 1]\n', mt.get_iat(1, 1))
        print('== frame.iat[2, 2]\n', mt.get_iat(2, 2))
        print('== frame.iat[2, 1]\n', mt.get_iat(2, 1))
        print('== frame.shape[0]\n', mt.get_num_rows())
        print('== frame.shape[1]\n', mt.get_num_columns())
        print('== frame.columns\n', mt.get_columns())
        print('== frame.index\n', mt.get_index())

        mt = mTable(data,
                columns = ['year','state','pop','debt','xx'])
        print('== frame\n', mt)
        print('== frame.iat[2, 1]\n', mt.get_iat(2, 1))
        print('== frame.shape[0]\n', mt.get_num_rows())
        print('== frame.shape[1]\n', mt.get_num_columns())
        print('== frame.columns\n', mt.get_columns())
        print('== frame.index\n', mt.get_index())

        mt = mTable(data)
        mt.set_repr_width(20)
        print('== frame\n', mt)
        print('== frame.iat[2, 1]\n', mt.get_iat(2, 1))
        print('== frame.shape[0]\n', mt.get_num_rows())
        print('== frame.shape[1]\n', mt.get_num_columns())
        print('== frame.columns\n', mt.get_columns())
        print('== frame.index\n', mt.get_index())

        mt = mTable()
        print('== frame\n', mt)
        print('== frame.iat[2, 1]\n', mt.get_iat(2, 1))
        print('== frame.shape[0]\n', mt.get_num_rows())
        print('== frame.shape[1]\n', mt.get_num_columns())
        print('== frame.columns\n', mt.get_columns())
        print('== frame.index\n', mt.get_index())

    """
    def test_search_clip(self):
        pass
    def test_statistic(self):
        pass
    def test_dict2json(self):
        pass
    def test_json2dict(self):
        pass
    def test_import_clips(self):
        pass
    """

    def test_parseweb(self):
        spide = bookInfoSpide()

        """
        for bkname in testbooks:
            bkname = re.split(r'[\(（\-\:：_\s]',bkname.strip())[0]
            print(bkname)
            bkinfo = spide.grab_book_info(bkname)
            filter_bkinfo = spide.filter_spide_book(bkinfo)
            if filter_bkinfo: spide.down_book_img(filter_bkinfo)

            logger.debug('================ {} ================'.format(bkname))
            logger.debug(json.dumps(bkinfo,indent=2, ensure_ascii=False))
            logger.debug(json.dumps(filter_bkinfo,indent=2, ensure_ascii=False))
        """

    def test_kman(self):
        #books = defaultdict(dict)
        km = kMan()
        books = km.import_clips()

        # remove duplication
        km.drop_duplicate(books)

        # test search note function
        searchnote = km.search_clip(books, '三大都市圈', 'ALL', 'CONTENT')
        if searchnote[0] > 0: km.export_notes(searchnote[1], 'searchcontent', ft='MD')
        searchnote = km.search_clip(books, '经济', 'ALL', 'TITLE')
        if searchnote[0] > 0: km.export_notes(searchnote[1], 'searchtitle', ft='MD')
        searchnote = km.search_clip(books, '巴曙松', 'ALL', 'AUTHOR')
        if searchnote[0] > 0: km.export_notes(searchnote[1], 'searchauthor', ft='MD')

        #print(km.get_bookname_num(books))
        #print(km.get_author_num(books))

        # add note content to hightlight, then delete note
        km.add_note_to_highlight(books)

        # test dict json convert
        with open(os.path.join(CURRPATH,'xx'), 'w', encoding='utf8', errors='ignore') as fw:
            fw.write(km.dict2json(books))
        if km.json2dict(os.path.join(CURRPATH,'xx'))==books: print( 'test OK')

        km.export_notes(books, OUTPREF, ft='MD')

        # print data with json format
        logger.debug(json.dumps(books, indent=4, sort_keys=True, ensure_ascii=False))

    def test_somepath(self):
        frozen = 'not'
        if getattr(sys, 'frozen', False):
            # we are running in a bundle
            frozen = 'ever so'
            bundle_dir = sys._MEIPASS
        else:
            # we are running in a normal Python environment
            bundle_dir = os.path.dirname(os.path.abspath(__file__))
        print( 'we are',frozen,'frozen')
        print( 'bundle dir is', bundle_dir )
        print( 'sys.argv[0] is', sys.argv[0] )
        print( 'sys.executable is', sys.executable )
        print( 'os.getcwd is', os.getcwd() )

        print('sys.path[0]', sys.path[0])
        print('sys.argv[0]', sys.argv[0])
        print('os.path.realpath(sys.executable)', os.path.realpath(sys.executable))
        print('os.path.realpath(sys.argv[0]))', os.path.realpath(sys.argv[0]))
        print('os.path.dirname(os.path.realpath(sys.executable))',
                os.path.dirname(os.path.realpath(sys.executable)))
        print('os.path.dirname(os.path.realpath(sys.argv[0]))',
                os.path.dirname(os.path.realpath(sys.argv[0])))
        print('os.path.dirname(os.path.abspath(__file__))',
                os.path.dirname(os.path.abspath(__file__)))

    # clean
    def tearDown(self):
        pass

if __name__ == '__main__':

    """
    suite = unittest.TestSuite ()
    suite.addTest(TestKman('test_parse_section'))
    suite.addTest(TestKman('test_format_time'))
    suite.addTest(TestKman('test_format_data'))
    suite.addTest(TestKman('test_drop_duplicate'))
    suite.addTest(TestKman('test_add_note_to_highlight'))
    run = unittest.TextTestRunner (verbosity=2)
    run.run (suite)
    """

    # not callable sequency
    unittest.main()