kindle manager

2020-06-14 18:37:27 +08:00
parent 854d167d20
commit 9d5b787ff3
3 changed files with 0 additions and 141 deletions
--- a/unitest.kman.py
+++ b/unitest.kman.py
@@ -0,0 +1,268 @@
+
+#########################################################
+## @file   : test_kman.py
+## @desc   : unit test for kindle management
+## @create : 2020/05/26
+## @author : Chengan
+## @email  : douboer@gmail.com
+#########################################################
+
+import unittest
+import json
+
+from collections import defaultdict
+
+from kman import *
+
+# log info
+logger = logging.getLogger()
+#formatter = logging.Formatter
+#    ('%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s')
+formatter = logging.Formatter('')
+handler = logging.FileHandler('debug')
+handler.setFormatter(formatter)
+logger.addHandler(handler)
+logger.setLevel(logging.DEBUG)
+
+class TestKman(unittest.TestCase):
+    # initial
+    def setUp(self):
+        LOG2FILE = 1
+        DELIMITER= '|'
+
+        self.km = kMan()
+
+        global t_bm_sec
+        global t_hl_sec
+        global t_nt_sec
+        global t_hl_sec2
+        global t_hl_sec3
+        global t_hl_sec4
+        self.t_num_nt = 0
+        global t_books
+        t_bm_sec = ["""另一半中国史 (高洪雷)
+""", \
+                """- 您在位置 #2468 的书签 | 添加于 2020年1月12日星期日 下午11:09:06
+"""]
+        t_hl_sec = ["""薛兆丰经济学讲义 (薛兆丰)
+""", \
+                """- 您在位置 #1408-1410的标注 | 添加于 2020年1月13日星期一 上午8:11:05
+""", \
+                """边际就是“新增”带来的“新增”。
+"""]
+        t_nt_sec = ["""薛兆丰经济学讲义 (薛兆丰)
+""", \
+                """- 您在位置 #4286 的笔记 | 添加于 2020年1月30日星期四 下午10:26:31
+""", \
+                """山寨 假货 问题
+"""]
+        t_hl_sec2 = ["""枪炮、病菌与钢铁 : 人类社会的命运 (世纪人文系列丛书·开放人文) (贾雷德·戴蒙德)
+""",\
+                """- 您在位置 #4267-4268的标注 | 添加于 2020年1月29日星期三 上午12:42:32
+""",\
+                """从柏拉图到马克思的所有政治理论家都提出过这个问题xxxx"""]
+        t_hl_sec3 = ["""枪炮、病菌与钢铁(贾雷德·戴蒙德)""",\
+                """- 您在位置 #4267-4268的标注 | 添加于 2020年1月29日星期三 上午12:42:32""",\
+                """从柏拉图到马克思的所有政治理论家都提出过这个问题yyyy"""]
+        t_hl_sec4 = ["""枪炮、病菌与钢铁 : 人类社会的命运 (世纪人文系列丛书·开放人文) (贾雷德·戴蒙德)""",\
+                """- 您在位置 #4267-4268的标注 | 添加于 2020年1月29日星期三 上午12:42:32""",\
+                """从柏拉图到马克思的所有政治理论家都提出过这个问题zzzz"""]
+        t_books = defaultdict(dict)
+
+    def cre_tbooks(self):
+        # parsing section & fill data structure
+        self.t_num_nt = 0
+        t_secd = self.km.parse_section(t_bm_sec,0)
+
+        self.t_num_nt += 1
+        t_secd = self.km.parse_section(t_hl_sec,self.t_num_nt)
+        bn = t_secd['bookname']
+        t_books[bn]['author'] = t_secd[bn]['author']
+        t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
+        t_secd.clear()
+
+        self.t_num_nt += 1
+        t_secd = self.km.parse_section(t_nt_sec,self.t_num_nt)
+        bn = t_secd['bookname']
+        t_books[bn]['author'] = t_secd[bn]['author']
+        t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
+        t_secd.clear()
+
+        self.t_num_nt += 1
+        t_secd = self.km.parse_section(t_hl_sec2,self.t_num_nt)
+        bn = t_secd['bookname']
+        t_books[bn]['author'] = t_secd[bn]['author']
+        t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
+        t_secd.clear()
+
+        self.t_num_nt += 1
+        t_secd = self.km.parse_section(t_hl_sec3,self.t_num_nt)
+        bn = t_secd['bookname']
+        t_books[bn]['author'] = t_secd[bn]['author']
+        t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
+        t_secd.clear()
+
+        self.t_num_nt += 1
+        t_secd = self.km.parse_section(t_hl_sec4,self.t_num_nt)
+        bn = t_secd['bookname']
+        t_books[bn]['author'] = t_secd[bn]['author']
+        t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
+        t_secd.clear()
+
+        return t_books
+
+    # test function parse_section
+    def test_parse_section(self):
+        # parsing section & fill data structure
+        t_secd = self.km.parse_section(t_bm_sec,0)
+        self.assertEqual(t_secd,False)
+
+        t_secd = self.km.parse_section(t_hl_sec,1)
+        bn = t_secd['bookname']
+        self.assertIsNotNone(t_secd)
+        self.assertEqual(bn,'薛兆丰经济学讲义')
+        self.assertEqual(t_secd[bn]['author'],'薛兆丰')
+        self.assertEqual(t_secd[bn]['1']['type'],'HL')
+        self.assertEqual(t_secd[bn]['1']['position'],'1408-1410')
+        self.assertEqual(t_secd[bn]['1']['day'],'2020年1月13日')
+        self.assertEqual(t_secd[bn]['1']['week'],'星期一')
+        self.assertEqual(t_secd[bn]['1']['meridiem'],'上午')
+        self.assertEqual(t_secd[bn]['1']['time'],'8:11:05')
+        self.assertEqual(t_secd[bn]['1']['content'],'边际就是“新增”带来的“新增”。\n')
+        t_books[bn]['author'] = t_secd[bn]['author']
+        t_books[bn]['1'] = t_secd[bn]['1']
+        t_secd.clear()
+
+        t_secd = self.km.parse_section(t_nt_sec,2)
+        bn = t_secd['bookname']
+        self.assertIsNotNone(t_secd)
+        self.assertEqual(bn,'薛兆丰经济学讲义')
+        self.assertEqual(t_secd[bn]['author'],'薛兆丰')
+        self.assertEqual(t_secd[bn]['2']['type'],'NT')
+        self.assertEqual(t_secd[bn]['2']['position'],'4286')
+        self.assertEqual(t_secd[bn]['2']['day'],'2020年1月30日')
+        self.assertEqual(t_secd[bn]['2']['week'],'星期四')
+        self.assertEqual(t_secd[bn]['2']['meridiem'],'下午')
+        self.assertEqual(t_secd[bn]['2']['time'],'10:26:31')
+        self.assertEqual(t_secd[bn]['2']['content'],'山寨 假货 问题\n')
+        t_books[bn]['author'] = t_secd[bn]['author']
+        t_books[bn]['2'] = t_secd[bn]['2']
+        t_secd.clear()
+
+        # test drop_duplicate
+    def test_drop_duplicate(self):
+        t_books = self.cre_tbooks()
+        t_secd = self.km.parse_section(t_hl_sec,3)
+        bn = t_secd['bookname']
+        t_books_du = t_books.copy()
+        t_books_du[bn]['3'] = t_secd[bn]['3']
+        t_books_du[bn]['4'] = t_secd[bn]['3']
+        self.assertIsInstance(t_books_du[bn]['3'],dict)
+
+        try:
+            t_books_du = self.km.drop_duplicate(t_books_du)
+            t = t_books_du[bn]['3']
+        except KeyError as keyerror:
+            print("与预期匹配，sidx 3 重复被删除，抛出: %s" % 'keyerror')
+
+        t_secd.clear()
+
+    # test function format_time()
+    def test_format_time(self):
+        t_ds = '2020年1月13日 星期一 下午 8:11:05'
+        t_ds = self.km.format_time(t_ds)
+        self.assertEqual(t_ds, '2020/1/13 20:11:05')
+
+    # test function format_data
+    def test_format_data(self):
+        t_books = self.cre_tbooks()
+        t_out = self.km.format_data(t_books, ft='MD')
+        self.assertEqual(t_out[0], 'TYPE|BOOKNAME|AUTHOR|MARKTIME|CONTENT')
+        self.assertEqual(t_out[1], '--|--|--|--|--')
+        self.assertEqual(t_out[2], 'HL|薛兆丰经济学讲义|薛兆丰|2020/1/13 8:11:05|边际就是“新增”带来的“新增”。\n')
+        t_out.clear()
+
+    def test_add_note_to_highlight(self):
+        t_books = self.cre_tbooks()
+        t_books_remove_nt = self.km.add_note_to_highlight(t_books)
+        self.assertEqual((t_books_remove_nt['薛兆丰经济学讲义']['1']['content']).replace('\n',''),\
+                '边际就是“新增”带来的“新增”。'+NTPREF+ '山寨 假货 问题')
+
+    def test_get_kindle_path(self):
+        kp = self.km.get_kindle_path()
+        s = u"kindle disconnected" if not kp else u"kindle connected {}".format(kp)
+        print(s)
+
+    def test_get_bookname_num(self):
+        t_books = self.cre_tbooks()
+        [nu, bn] = self.km.get_bookname_num(t_books)
+        self.assertEqual(bn['薛兆丰经济学讲义'],2)
+
+    def test_get_author_num(self):
+        t_books = self.cre_tbooks()
+        [nu, bn] = self.km.get_author_num(t_books)
+        self.assertEqual(bn['薛兆丰'],2)
+
+    def test_filter_clips(self):
+        t_books = self.cre_tbooks()
+        # no filter
+        bn = self.km.filter_clips(t_books, '薛兆丰', 0)
+        logger.debug('========== 1 ==========\n')
+        logger.debug(json.dumps(bn, indent=2, ensure_ascii=False))
+        bn = {}  
+
+        # by bookname
+        bn = self.km.filter_clips(t_books, '枪炮、病菌与钢铁 : 人类社会的命运 (世纪人文系列丛书·开放人文)', 1)
+        logger.debug('========== 2 ==========\n')
+        logger.debug(json.dumps(bn, indent=2, ensure_ascii=False))
+        bn = {}  
+
+        # by author
+        bn = self.km.filter_clips(t_books, '贾雷德·戴蒙德', 2)
+        logger.debug('========== 3 ==========\n')
+        logger.debug(json.dumps(bn, indent=2, ensure_ascii=False))
+        bn = {}  
+
+    # test import words & filter words
+    def test_import_words(self):
+        import pprint
+        [bookinfo, lookups, words] =  self.km.import_words(fp=(CLIPPATH+WORDFN))
+        # how to beauty print to logger file
+        logger.debug('========== 4 ==========\n')
+        logger.debug(json.dumps(bookinfo, indent=2, ensure_ascii=False))
+        logger.debug('========== 5 ==========\n')
+        logger.debug(json.dumps(lookups, indent=2, ensure_ascii=False))
+        logger.debug('========== 6 ==========\n')
+        logger.debug(json.dumps(words, indent=2, ensure_ascii=False))
+
+        # test filter_words()
+        logger.debug('========== 7 ==========\n')
+        self.km.filter_words(lookups, '中国历史风云录 (陈舜臣作品)', tp=0)
+
+        logger.debug('========== 8 ==========\n')
+        self.km.filter_words(lookups, 'zh:闾', tp=1)
+
+    """
+    def test_search_clip(self):
+        pass
+    def test_statistic(self):
+        pass
+    def test_dict2json(self):
+        pass
+    def test_json2dict(self):
+        pass
+    def test_import_clips(self):
+        pass
+    """
+
+    # clean
+    def tearDown(self):
+        pass
+
+if __name__ == '__main__':
+
+    """
+    suite = unittest.TestSuite ()
+    suite.addTest(TestKman('test_parse_section'))
+    suite.addTest(TestKman('test_format_time'))
+    suite.addTest(TestKman('test_format_data'))