Files
kman/unitest.kman.py
2020-06-30 08:31:21 +08:00

396 lines
16 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#########################################################
## @file : test_kman.py
## @desc : unit test for kindle management
## @create : 2020/05/26
## @author : Chengan
## @email : douboer@gmail.com
#########################################################
import unittest
import json
from collections import defaultdict
from kman import *
from parseweb import *
from mtable import *
# log info
logger = logging.getLogger()
#formatter = logging.Formatter
# ('%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s')
formatter = logging.Formatter('')
handler = logging.FileHandler('debug')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.DEBUG)
class TestKman(unittest.TestCase):
# initial
def setUp(self):
LOG2FILE = 1
DELIMITER= '|'
self.km = kMan()
self.util = Util()
global t_bm_sec
global t_hl_sec
global t_nt_sec
global t_hl_sec2
global t_hl_sec3
global t_hl_sec4
self.t_num_nt = 0
global t_books
t_bm_sec = ["""另一半中国史 (高洪雷)
""", \
"""- 您在位置 #2468 的书签 | 添加于 2020年1月12日星期日 下午11:09:06
"""]
t_hl_sec = ["""薛兆丰经济学讲义 (薛兆丰)
""", \
"""- 您在位置 #1408-1410的标注 | 添加于 2020年1月13日星期一 上午8:11:05
""", \
"""边际就是“新增”带来的“新增”。
"""]
t_nt_sec = ["""薛兆丰经济学讲义 (薛兆丰)
""", \
"""- 您在位置 #4286 的笔记 | 添加于 2020年1月30日星期四 下午10:26:31
""", \
"""山寨 假货 问题
"""]
t_hl_sec2 = ["""枪炮、病菌与钢铁 : 人类社会的命运 (世纪人文系列丛书·开放人文) (贾雷德·戴蒙德)
""",\
"""- 您在位置 #4267-4268的标注 | 添加于 2020年1月29日星期三 上午12:42:32
""",\
"""从柏拉图到马克思的所有政治理论家都提出过这个问题xxxx"""]
t_hl_sec3 = ["""枪炮、病菌与钢铁(贾雷德·戴蒙德)""",\
"""- 您在位置 #4267-4268的标注 | 添加于 2020年1月29日星期三 上午12:42:32""",\
"""从柏拉图到马克思的所有政治理论家都提出过这个问题yyyy"""]
t_hl_sec4 = ["""枪炮、病菌与钢铁 : 人类社会的命运 (世纪人文系列丛书·开放人文) (贾雷德·戴蒙德)""",\
"""- 您在位置 #4267-4268的标注 | 添加于 2020年1月29日星期三 上午12:42:32""",\
"""从柏拉图到马克思的所有政治理论家都提出过这个问题zzzz"""]
t_books = defaultdict(dict)
def cre_tbooks(self):
# parsing section & fill data structure
self.t_num_nt = 0
t_secd = self.km.parse_section(t_bm_sec,0)
self.t_num_nt += 1
t_secd = self.km.parse_section(t_hl_sec,self.t_num_nt)
bn = t_secd['bookname']
t_books[bn]['author'] = t_secd[bn]['author']
t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
t_secd.clear()
self.t_num_nt += 1
t_secd = self.km.parse_section(t_nt_sec,self.t_num_nt)
bn = t_secd['bookname']
t_books[bn]['author'] = t_secd[bn]['author']
t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
t_secd.clear()
self.t_num_nt += 1
t_secd = self.km.parse_section(t_hl_sec2,self.t_num_nt)
bn = t_secd['bookname']
t_books[bn]['author'] = t_secd[bn]['author']
t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
t_secd.clear()
self.t_num_nt += 1
t_secd = self.km.parse_section(t_hl_sec3,self.t_num_nt)
bn = t_secd['bookname']
t_books[bn]['author'] = t_secd[bn]['author']
t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
t_secd.clear()
self.t_num_nt += 1
t_secd = self.km.parse_section(t_hl_sec4,self.t_num_nt)
bn = t_secd['bookname']
t_books[bn]['author'] = t_secd[bn]['author']
t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
t_secd.clear()
return t_books
# test function parse_section
def test_parse_section(self):
# parsing section & fill data structure
t_secd = self.km.parse_section(t_bm_sec,0)
self.assertEqual(t_secd,False)
t_secd = self.km.parse_section(t_hl_sec,1)
bn = t_secd['bookname']
self.assertIsNotNone(t_secd)
self.assertEqual(bn,'薛兆丰经济学讲义')
self.assertEqual(t_secd[bn]['author'],'薛兆丰')
self.assertEqual(t_secd[bn]['1']['type'],'HL')
self.assertEqual(t_secd[bn]['1']['position'],'1408-1410')
self.assertEqual(t_secd[bn]['1']['day'],'2020年1月13日')
self.assertEqual(t_secd[bn]['1']['week'],'星期一')
self.assertEqual(t_secd[bn]['1']['meridiem'],'上午')
self.assertEqual(t_secd[bn]['1']['time'],'8:11:05')
self.assertEqual(t_secd[bn]['1']['content'],'边际就是“新增”带来的“新增”。\n')
t_books[bn]['author'] = t_secd[bn]['author']
t_books[bn]['1'] = t_secd[bn]['1']
t_secd.clear()
t_secd = self.km.parse_section(t_nt_sec,2)
bn = t_secd['bookname']
self.assertIsNotNone(t_secd)
self.assertEqual(bn,'薛兆丰经济学讲义')
self.assertEqual(t_secd[bn]['author'],'薛兆丰')
self.assertEqual(t_secd[bn]['2']['type'],'NT')
self.assertEqual(t_secd[bn]['2']['position'],'4286')
self.assertEqual(t_secd[bn]['2']['day'],'2020年1月30日')
self.assertEqual(t_secd[bn]['2']['week'],'星期四')
self.assertEqual(t_secd[bn]['2']['meridiem'],'下午')
self.assertEqual(t_secd[bn]['2']['time'],'10:26:31')
self.assertEqual(t_secd[bn]['2']['content'],'山寨 假货 问题\n')
t_books[bn]['author'] = t_secd[bn]['author']
t_books[bn]['2'] = t_secd[bn]['2']
t_secd.clear()
# test drop_duplicate
def test_drop_duplicate(self):
t_books = self.cre_tbooks()
t_secd = self.km.parse_section(t_hl_sec,3)
bn = t_secd['bookname']
t_books_du = t_books.copy()
t_books_du[bn]['3'] = t_secd[bn]['3']
t_books_du[bn]['4'] = t_secd[bn]['3']
self.assertIsInstance(t_books_du[bn]['3'],dict)
try:
t_books_du = self.km.drop_duplicate(t_books_du)
t = t_books_du[bn]['3']
except KeyError as keyerror:
print("与预期匹配sidx 3 重复被删除,抛出: %s" % 'keyerror')
t_secd.clear()
# test function format_time()
def test_format_time(self):
t_ds = '2020年1月13日 星期一 下午 8:11:05'
t_ds = self.km.format_time(t_ds)
self.assertEqual(t_ds, '2020/1/13 20:11:05')
# test function format_data
def test_format_data(self):
t_books = self.cre_tbooks()
t_out = self.km.format_note_data(t_books, ft='MD')
self.assertEqual(t_out[0], 'TYPE|BOOKNAME|AUTHOR|MARKTIME|CONTENT')
self.assertEqual(t_out[1], '--|--|--|--|--')
self.assertEqual(t_out[2], 'HL|薛兆丰经济学讲义|薛兆丰|2020/1/13 8:11:05|边际就是“新增”带来的“新增”。\n')
t_out.clear()
def test_add_note_to_highlight(self):
t_books = self.cre_tbooks()
t_books_remove_nt = self.km.add_note_to_highlight(t_books)
self.assertEqual((t_books_remove_nt['薛兆丰经济学讲义']['1']['content']).replace('\n',''),\
'边际就是“新增”带来的“新增”。'+NTPREF+ '山寨 假货 问题')
def test_get_kindle_path(self):
kp = self.km.get_kindle_path()
s = u"kindle disconnected" if not kp else u"kindle connected {}".format(kp)
print(s)
def test_get_bookname_num(self):
t_books = self.cre_tbooks()
[nu, bn] = self.km.get_bookname_num(t_books)
self.assertEqual(bn['薛兆丰经济学讲义'],2)
def test_get_author_num(self):
t_books = self.cre_tbooks()
[nu, bn] = self.km.get_author_num(t_books)
self.assertEqual(bn['薛兆丰'],2)
def test_filter_clips(self):
t_books = self.cre_tbooks()
# no filter
bn = self.km.filter_clips(t_books, '薛兆丰', 0)
logger.debug('========== 1 ==========\n')
logger.debug(json.dumps(bn, indent=2, ensure_ascii=False))
bn = {}
# by bookname
bn = self.km.filter_clips(t_books, '枪炮、病菌与钢铁 : 人类社会的命运 (世纪人文系列丛书·开放人文)', 1)
logger.debug('========== 2 ==========\n')
logger.debug(json.dumps(bn, indent=2, ensure_ascii=False))
bn = {}
# by author
bn = self.km.filter_clips(t_books, '贾雷德·戴蒙德', 2)
logger.debug('========== 3 ==========\n')
logger.debug(json.dumps(bn, indent=2, ensure_ascii=False))
bn = {}
# test import words & filter words
def test_import_words(self):
import pprint
[bookinfo, lookups, words] = self.km.import_words()
# how to beauty print to logger file
logger.debug('========== 4 ==========\n')
logger.debug(json.dumps(bookinfo, indent=2, ensure_ascii=False))
logger.debug('========== 5 ==========\n')
logger.debug(json.dumps(lookups, indent=2, ensure_ascii=False))
logger.debug('========== 6 ==========\n')
logger.debug(json.dumps(words, indent=2, ensure_ascii=False))
# test filter_words()
logger.debug('========== 7 ==========\n')
self.km.filter_words(self.km.import_words(), '中国历史风云录 (陈舜臣作品)', tp=0)
logger.debug('========== 8 ==========\n')
self.km.filter_words(self.km.import_words(), 'zh:闾', tp=1)
def test_util(self):
logger.debug( 'test get_app_path: {}'.format( self.util.get_app_path()))
def test_mtable(self):
print('\n---------------------')
data = [['Ohiox','Ohio','Ohio','Nevada','Nevada'],
[2000,2001,2002,2001,2002],
[1.5,1.7,3.6,2.4,2.9],
[1.5,1.7,3.6,2.4,2.9],
]
mt = mTable(data,
index = ['one', 'two','three','four','five'],
columns = ['year','state','pop','debt','xx'])
print('== frame\n', mt)
print('== frame.iat[0, 0]\n', mt.get_iat(0, 0))
print('== frame.iat[1, 1]\n', mt.get_iat(1, 1))
print('== frame.iat[2, 2]\n', mt.get_iat(2, 2))
print('== frame.iat[2, 1]\n', mt.get_iat(2, 1))
print('== frame.shape[0]\n', mt.get_num_rows())
print('== frame.shape[1]\n', mt.get_num_columns())
print('== frame.columns\n', mt.get_columns())
print('== frame.index\n', mt.get_index())
mt = mTable(data,
columns = ['year','state','pop','debt','xx'])
print('== frame\n', mt)
print('== frame.iat[2, 1]\n', mt.get_iat(2, 1))
print('== frame.shape[0]\n', mt.get_num_rows())
print('== frame.shape[1]\n', mt.get_num_columns())
print('== frame.columns\n', mt.get_columns())
print('== frame.index\n', mt.get_index())
mt = mTable(data)
mt.set_repr_width(20)
print('== frame\n', mt)
print('== frame.iat[2, 1]\n', mt.get_iat(2, 1))
print('== frame.shape[0]\n', mt.get_num_rows())
print('== frame.shape[1]\n', mt.get_num_columns())
print('== frame.columns\n', mt.get_columns())
print('== frame.index\n', mt.get_index())
mt = mTable()
print('== frame\n', mt)
print('== frame.iat[2, 1]\n', mt.get_iat(2, 1))
print('== frame.shape[0]\n', mt.get_num_rows())
print('== frame.shape[1]\n', mt.get_num_columns())
print('== frame.columns\n', mt.get_columns())
print('== frame.index\n', mt.get_index())
"""
def test_search_clip(self):
pass
def test_statistic(self):
pass
def test_dict2json(self):
pass
def test_json2dict(self):
pass
def test_import_clips(self):
pass
"""
def test_parseweb(self):
spide = bookInfoSpide()
"""
for bkname in testbooks:
bkname = re.split(r'[\(\-\:_\s]',bkname.strip())[0]
print(bkname)
bkinfo = spide.grab_book_info(bkname)
filter_bkinfo = spide.filter_spide_book(bkinfo)
if filter_bkinfo: spide.down_book_img(filter_bkinfo)
logger.debug('================ {} ================'.format(bkname))
logger.debug(json.dumps(bkinfo,indent=2, ensure_ascii=False))
logger.debug(json.dumps(filter_bkinfo,indent=2, ensure_ascii=False))
"""
def test_kman(self):
#books = defaultdict(dict)
km = kMan()
books = km.import_clips()
# remove duplication
km.drop_duplicate(books)
# test search note function
searchnote = km.search_clip(books, '三大都市圈', 'ALL', 'CONTENT')
if searchnote[0] > 0: km.export_notes(searchnote[1], 'searchcontent', ft='MD')
searchnote = km.search_clip(books, '经济', 'ALL', 'TITLE')
if searchnote[0] > 0: km.export_notes(searchnote[1], 'searchtitle', ft='MD')
searchnote = km.search_clip(books, '巴曙松', 'ALL', 'AUTHOR')
if searchnote[0] > 0: km.export_notes(searchnote[1], 'searchauthor', ft='MD')
#print(km.get_bookname_num(books))
#print(km.get_author_num(books))
# add note content to hightlight, then delete note
km.add_note_to_highlight(books)
# test dict json convert
with open(os.path.join(CURRPATH,'xx'), 'w', encoding='utf8', errors='ignore') as fw:
fw.write(km.dict2json(books))
if km.json2dict(os.path.join(CURRPATH,'xx'))==books: print( 'test OK')
km.export_notes(books, OUTPREF, ft='MD')
# print data with json format
logger.debug(json.dumps(books, indent=4, sort_keys=True, ensure_ascii=False))
def test_somepath(self):
frozen = 'not'
if getattr(sys, 'frozen', False):
# we are running in a bundle
frozen = 'ever so'
bundle_dir = sys._MEIPASS
else:
# we are running in a normal Python environment
bundle_dir = os.path.dirname(os.path.abspath(__file__))
print( 'we are',frozen,'frozen')
print( 'bundle dir is', bundle_dir )
print( 'sys.argv[0] is', sys.argv[0] )
print( 'sys.executable is', sys.executable )
print( 'os.getcwd is', os.getcwd() )
print('sys.path[0]', sys.path[0])
print('sys.argv[0]', sys.argv[0])
print('os.path.realpath(sys.executable)', os.path.realpath(sys.executable))
print('os.path.realpath(sys.argv[0]))', os.path.realpath(sys.argv[0]))
print('os.path.dirname(os.path.realpath(sys.executable))',
os.path.dirname(os.path.realpath(sys.executable)))
print('os.path.dirname(os.path.realpath(sys.argv[0]))',
os.path.dirname(os.path.realpath(sys.argv[0])))
print('os.path.dirname(os.path.abspath(__file__))',
os.path.dirname(os.path.abspath(__file__)))
# clean
def tearDown(self):
pass
if __name__ == '__main__':
"""
suite = unittest.TestSuite ()
suite.addTest(TestKman('test_parse_section'))
suite.addTest(TestKman('test_format_time'))
suite.addTest(TestKman('test_format_data'))
suite.addTest(TestKman('test_drop_duplicate'))
suite.addTest(TestKman('test_add_note_to_highlight'))
run = unittest.TextTestRunner (verbosity=2)
run.run (suite)
"""
# not callable sequency
unittest.main()