######################################################### ## @file : test_kman.py ## @desc : unit test for kindle management ## @create : 2020/05/26 ## @author : Chengan ## @email : douboer@gmail.com ######################################################### import unittest import json from collections import defaultdict from kman import * from parseweb import * from mtable import * # log info logger = logging.getLogger() #formatter = logging.Formatter # ('%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s') formatter = logging.Formatter('') handler = logging.FileHandler('debug') handler.setFormatter(formatter) logger.addHandler(handler) logger.setLevel(logging.DEBUG) class TestKman(unittest.TestCase): # initial def setUp(self): LOG2FILE = 1 DELIMITER= '|' self.km = kMan() self.util = Util() global t_bm_sec global t_hl_sec global t_nt_sec global t_hl_sec2 global t_hl_sec3 global t_hl_sec4 self.t_num_nt = 0 global t_books t_bm_sec = ["""另一半中国史 (高洪雷) """, \ """- 您在位置 #2468 的书签 | 添加于 2020年1月12日星期日 下午11:09:06 """] t_hl_sec = ["""薛兆丰经济学讲义 (薛兆丰) """, \ """- 您在位置 #1408-1410的标注 | 添加于 2020年1月13日星期一 上午8:11:05 """, \ """边际就是“新增”带来的“新增”。 """] t_nt_sec = ["""薛兆丰经济学讲义 (薛兆丰) """, \ """- 您在位置 #4286 的笔记 | 添加于 2020年1月30日星期四 下午10:26:31 """, \ """山寨 假货 问题 """] t_hl_sec2 = ["""枪炮、病菌与钢铁 : 人类社会的命运 (世纪人文系列丛书·开放人文) (贾雷德·戴蒙德) """,\ """- 您在位置 #4267-4268的标注 | 添加于 2020年1月29日星期三 上午12:42:32 """,\ """从柏拉图到马克思的所有政治理论家都提出过这个问题xxxx"""] t_hl_sec3 = ["""枪炮、病菌与钢铁(贾雷德·戴蒙德)""",\ """- 您在位置 #4267-4268的标注 | 添加于 2020年1月29日星期三 上午12:42:32""",\ """从柏拉图到马克思的所有政治理论家都提出过这个问题yyyy"""] t_hl_sec4 = ["""枪炮、病菌与钢铁 : 人类社会的命运 (世纪人文系列丛书·开放人文) (贾雷德·戴蒙德)""",\ """- 您在位置 #4267-4268的标注 | 添加于 2020年1月29日星期三 上午12:42:32""",\ """从柏拉图到马克思的所有政治理论家都提出过这个问题zzzz"""] t_books = defaultdict(dict) def cre_tbooks(self): # parsing section & fill data structure self.t_num_nt = 0 t_secd = self.km.parse_section(t_bm_sec,0) self.t_num_nt += 1 t_secd = self.km.parse_section(t_hl_sec,self.t_num_nt) bn = t_secd['bookname'] t_books[bn]['author'] = t_secd[bn]['author'] t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)] t_secd.clear() self.t_num_nt += 1 t_secd = self.km.parse_section(t_nt_sec,self.t_num_nt) bn = t_secd['bookname'] t_books[bn]['author'] = t_secd[bn]['author'] t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)] t_secd.clear() self.t_num_nt += 1 t_secd = self.km.parse_section(t_hl_sec2,self.t_num_nt) bn = t_secd['bookname'] t_books[bn]['author'] = t_secd[bn]['author'] t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)] t_secd.clear() self.t_num_nt += 1 t_secd = self.km.parse_section(t_hl_sec3,self.t_num_nt) bn = t_secd['bookname'] t_books[bn]['author'] = t_secd[bn]['author'] t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)] t_secd.clear() self.t_num_nt += 1 t_secd = self.km.parse_section(t_hl_sec4,self.t_num_nt) bn = t_secd['bookname'] t_books[bn]['author'] = t_secd[bn]['author'] t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)] t_secd.clear() return t_books # test function parse_section def test_parse_section(self): # parsing section & fill data structure t_secd = self.km.parse_section(t_bm_sec,0) self.assertEqual(t_secd,False) t_secd = self.km.parse_section(t_hl_sec,1) bn = t_secd['bookname'] self.assertIsNotNone(t_secd) self.assertEqual(bn,'薛兆丰经济学讲义') self.assertEqual(t_secd[bn]['author'],'薛兆丰') self.assertEqual(t_secd[bn]['1']['type'],'HL') self.assertEqual(t_secd[bn]['1']['position'],'1408-1410') self.assertEqual(t_secd[bn]['1']['day'],'2020年1月13日') self.assertEqual(t_secd[bn]['1']['week'],'星期一') self.assertEqual(t_secd[bn]['1']['meridiem'],'上午') self.assertEqual(t_secd[bn]['1']['time'],'8:11:05') self.assertEqual(t_secd[bn]['1']['content'],'边际就是“新增”带来的“新增”。\n') t_books[bn]['author'] = t_secd[bn]['author'] t_books[bn]['1'] = t_secd[bn]['1'] t_secd.clear() t_secd = self.km.parse_section(t_nt_sec,2) bn = t_secd['bookname'] self.assertIsNotNone(t_secd) self.assertEqual(bn,'薛兆丰经济学讲义') self.assertEqual(t_secd[bn]['author'],'薛兆丰') self.assertEqual(t_secd[bn]['2']['type'],'NT') self.assertEqual(t_secd[bn]['2']['position'],'4286') self.assertEqual(t_secd[bn]['2']['day'],'2020年1月30日') self.assertEqual(t_secd[bn]['2']['week'],'星期四') self.assertEqual(t_secd[bn]['2']['meridiem'],'下午') self.assertEqual(t_secd[bn]['2']['time'],'10:26:31') self.assertEqual(t_secd[bn]['2']['content'],'山寨 假货 问题\n') t_books[bn]['author'] = t_secd[bn]['author'] t_books[bn]['2'] = t_secd[bn]['2'] t_secd.clear() # test drop_duplicate def test_drop_duplicate(self): t_books = self.cre_tbooks() t_secd = self.km.parse_section(t_hl_sec,3) bn = t_secd['bookname'] t_books_du = t_books.copy() t_books_du[bn]['3'] = t_secd[bn]['3'] t_books_du[bn]['4'] = t_secd[bn]['3'] self.assertIsInstance(t_books_du[bn]['3'],dict) try: t_books_du = self.km.drop_duplicate(t_books_du) t = t_books_du[bn]['3'] except KeyError as keyerror: print("与预期匹配,sidx 3 重复被删除,抛出: %s" % 'keyerror') t_secd.clear() # test function format_time() def test_format_time(self): t_ds = '2020年1月13日 星期一 下午 8:11:05' t_ds = self.km.format_time(t_ds) self.assertEqual(t_ds, '2020/1/13 20:11:05') # test function format_data def test_format_data(self): t_books = self.cre_tbooks() t_out = self.km.format_note_data(t_books, ft='MD') self.assertEqual(t_out[0], 'TYPE|BOOKNAME|AUTHOR|MARKTIME|CONTENT') self.assertEqual(t_out[1], '--|--|--|--|--') self.assertEqual(t_out[2], 'HL|薛兆丰经济学讲义|薛兆丰|2020/1/13 8:11:05|边际就是“新增”带来的“新增”。\n') t_out.clear() def test_add_note_to_highlight(self): t_books = self.cre_tbooks() t_books_remove_nt = self.km.add_note_to_highlight(t_books) self.assertEqual((t_books_remove_nt['薛兆丰经济学讲义']['1']['content']).replace('\n',''),\ '边际就是“新增”带来的“新增”。'+NTPREF+ '山寨 假货 问题') def test_get_kindle_path(self): kp = self.km.get_kindle_path() s = u"kindle disconnected" if not kp else u"kindle connected {}".format(kp) print(s) def test_get_bookname_num(self): t_books = self.cre_tbooks() [nu, bn] = self.km.get_bookname_num(t_books) self.assertEqual(bn['薛兆丰经济学讲义'],2) def test_get_author_num(self): t_books = self.cre_tbooks() [nu, bn] = self.km.get_author_num(t_books) self.assertEqual(bn['薛兆丰'],2) def test_filter_clips(self): t_books = self.cre_tbooks() # no filter bn = self.km.filter_clips(t_books, '薛兆丰', 0) logger.debug('========== 1 ==========\n') logger.debug(json.dumps(bn, indent=2, ensure_ascii=False)) bn = {} # by bookname bn = self.km.filter_clips(t_books, '枪炮、病菌与钢铁 : 人类社会的命运 (世纪人文系列丛书·开放人文)', 1) logger.debug('========== 2 ==========\n') logger.debug(json.dumps(bn, indent=2, ensure_ascii=False)) bn = {} # by author bn = self.km.filter_clips(t_books, '贾雷德·戴蒙德', 2) logger.debug('========== 3 ==========\n') logger.debug(json.dumps(bn, indent=2, ensure_ascii=False)) bn = {} # test import words & filter words def test_import_words(self): import pprint [bookinfo, lookups, words] = self.km.import_words() # how to beauty print to logger file logger.debug('========== 4 ==========\n') logger.debug(json.dumps(bookinfo, indent=2, ensure_ascii=False)) logger.debug('========== 5 ==========\n') logger.debug(json.dumps(lookups, indent=2, ensure_ascii=False)) logger.debug('========== 6 ==========\n') logger.debug(json.dumps(words, indent=2, ensure_ascii=False)) # test filter_words() logger.debug('========== 7 ==========\n') self.km.filter_words(self.km.import_words(), '中国历史风云录 (陈舜臣作品)', tp=0) logger.debug('========== 8 ==========\n') self.km.filter_words(self.km.import_words(), 'zh:闾', tp=1) def test_util(self): logger.debug( 'test get_app_path: {}'.format( self.util.get_app_path())) def test_mtable(self): print('\n---------------------') data = [['Ohiox','Ohio','Ohio','Nevada','Nevada'], [2000,2001,2002,2001,2002], [1.5,1.7,3.6,2.4,2.9], [1.5,1.7,3.6,2.4,2.9], ] mt = mTable(data, index = ['one', 'two','three','four','five'], columns = ['year','state','pop','debt','xx']) print('== frame\n', mt) print('== frame.iat[0, 0]\n', mt.get_iat(0, 0)) print('== frame.iat[1, 1]\n', mt.get_iat(1, 1)) print('== frame.iat[2, 2]\n', mt.get_iat(2, 2)) print('== frame.iat[2, 1]\n', mt.get_iat(2, 1)) print('== frame.shape[0]\n', mt.get_num_rows()) print('== frame.shape[1]\n', mt.get_num_columns()) print('== frame.columns\n', mt.get_columns()) print('== frame.index\n', mt.get_index()) mt = mTable(data, columns = ['year','state','pop','debt','xx']) print('== frame\n', mt) print('== frame.iat[2, 1]\n', mt.get_iat(2, 1)) print('== frame.shape[0]\n', mt.get_num_rows()) print('== frame.shape[1]\n', mt.get_num_columns()) print('== frame.columns\n', mt.get_columns()) print('== frame.index\n', mt.get_index()) mt = mTable(data) mt.set_repr_width(20) print('== frame\n', mt) print('== frame.iat[2, 1]\n', mt.get_iat(2, 1)) print('== frame.shape[0]\n', mt.get_num_rows()) print('== frame.shape[1]\n', mt.get_num_columns()) print('== frame.columns\n', mt.get_columns()) print('== frame.index\n', mt.get_index()) mt = mTable() print('== frame\n', mt) print('== frame.iat[2, 1]\n', mt.get_iat(2, 1)) print('== frame.shape[0]\n', mt.get_num_rows()) print('== frame.shape[1]\n', mt.get_num_columns()) print('== frame.columns\n', mt.get_columns()) print('== frame.index\n', mt.get_index()) """ def test_search_clip(self): pass def test_statistic(self): pass def test_dict2json(self): pass def test_json2dict(self): pass def test_import_clips(self): pass """ def test_parseweb(self): spide = bookInfoSpide() """ for bkname in testbooks: bkname = re.split(r'[\((\-\::_\s]',bkname.strip())[0] print(bkname) bkinfo = spide.grab_book_info(bkname) filter_bkinfo = spide.filter_spide_book(bkinfo) if filter_bkinfo: spide.down_book_img(filter_bkinfo) logger.debug('================ {} ================'.format(bkname)) logger.debug(json.dumps(bkinfo,indent=2, ensure_ascii=False)) logger.debug(json.dumps(filter_bkinfo,indent=2, ensure_ascii=False)) """ def test_kman(self): #books = defaultdict(dict) km = kMan() books = km.import_clips() # remove duplication km.drop_duplicate(books) # test search note function searchnote = km.search_clip(books, '三大都市圈', 'ALL', 'CONTENT') if searchnote[0] > 0: km.export_notes(searchnote[1], 'searchcontent', ft='MD') searchnote = km.search_clip(books, '经济', 'ALL', 'TITLE') if searchnote[0] > 0: km.export_notes(searchnote[1], 'searchtitle', ft='MD') searchnote = km.search_clip(books, '巴曙松', 'ALL', 'AUTHOR') if searchnote[0] > 0: km.export_notes(searchnote[1], 'searchauthor', ft='MD') #print(km.get_bookname_num(books)) #print(km.get_author_num(books)) # add note content to hightlight, then delete note km.add_note_to_highlight(books) # test dict json convert with open(os.path.join(CURRPATH,'xx'), 'w', encoding='utf8', errors='ignore') as fw: fw.write(km.dict2json(books)) if km.json2dict(os.path.join(CURRPATH,'xx'))==books: print( 'test OK') km.export_notes(books, OUTPREF, ft='MD') # print data with json format logger.debug(json.dumps(books, indent=4, sort_keys=True, ensure_ascii=False)) def test_somepath(self): frozen = 'not' if getattr(sys, 'frozen', False): # we are running in a bundle frozen = 'ever so' bundle_dir = sys._MEIPASS else: # we are running in a normal Python environment bundle_dir = os.path.dirname(os.path.abspath(__file__)) print( 'we are',frozen,'frozen') print( 'bundle dir is', bundle_dir ) print( 'sys.argv[0] is', sys.argv[0] ) print( 'sys.executable is', sys.executable ) print( 'os.getcwd is', os.getcwd() ) print('sys.path[0]', sys.path[0]) print('sys.argv[0]', sys.argv[0]) print('os.path.realpath(sys.executable)', os.path.realpath(sys.executable)) print('os.path.realpath(sys.argv[0]))', os.path.realpath(sys.argv[0])) print('os.path.dirname(os.path.realpath(sys.executable))', os.path.dirname(os.path.realpath(sys.executable))) print('os.path.dirname(os.path.realpath(sys.argv[0]))', os.path.dirname(os.path.realpath(sys.argv[0]))) print('os.path.dirname(os.path.abspath(__file__))', os.path.dirname(os.path.abspath(__file__))) # clean def tearDown(self): pass if __name__ == '__main__': """ suite = unittest.TestSuite () suite.addTest(TestKman('test_parse_section')) suite.addTest(TestKman('test_format_time')) suite.addTest(TestKman('test_format_data')) suite.addTest(TestKman('test_drop_duplicate')) suite.addTest(TestKman('test_add_note_to_highlight')) run = unittest.TextTestRunner (verbosity=2) run.run (suite) """ # not callable sequency unittest.main()