396 lines
16 KiB
Python
396 lines
16 KiB
Python
|
||
#########################################################
|
||
## @file : test_kman.py
|
||
## @desc : unit test for kindle management
|
||
## @create : 2020/05/26
|
||
## @author : Chengan
|
||
## @email : douboer@gmail.com
|
||
#########################################################
|
||
|
||
import unittest
|
||
import json
|
||
|
||
from collections import defaultdict
|
||
|
||
from kman import *
|
||
from parseweb import *
|
||
from mtable import *
|
||
|
||
# log info
|
||
logger = logging.getLogger()
|
||
#formatter = logging.Formatter
|
||
# ('%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s')
|
||
formatter = logging.Formatter('')
|
||
handler = logging.FileHandler('debug')
|
||
handler.setFormatter(formatter)
|
||
logger.addHandler(handler)
|
||
logger.setLevel(logging.DEBUG)
|
||
|
||
class TestKman(unittest.TestCase):
|
||
# initial
|
||
def setUp(self):
|
||
LOG2FILE = 1
|
||
DELIMITER= '|'
|
||
|
||
self.km = kMan()
|
||
self.util = Util()
|
||
|
||
global t_bm_sec
|
||
global t_hl_sec
|
||
global t_nt_sec
|
||
global t_hl_sec2
|
||
global t_hl_sec3
|
||
global t_hl_sec4
|
||
self.t_num_nt = 0
|
||
global t_books
|
||
t_bm_sec = ["""另一半中国史 (高洪雷)
|
||
""", \
|
||
"""- 您在位置 #2468 的书签 | 添加于 2020年1月12日星期日 下午11:09:06
|
||
"""]
|
||
t_hl_sec = ["""薛兆丰经济学讲义 (薛兆丰)
|
||
""", \
|
||
"""- 您在位置 #1408-1410的标注 | 添加于 2020年1月13日星期一 上午8:11:05
|
||
""", \
|
||
"""边际就是“新增”带来的“新增”。
|
||
"""]
|
||
t_nt_sec = ["""薛兆丰经济学讲义 (薛兆丰)
|
||
""", \
|
||
"""- 您在位置 #4286 的笔记 | 添加于 2020年1月30日星期四 下午10:26:31
|
||
""", \
|
||
"""山寨 假货 问题
|
||
"""]
|
||
t_hl_sec2 = ["""枪炮、病菌与钢铁 : 人类社会的命运 (世纪人文系列丛书·开放人文) (贾雷德·戴蒙德)
|
||
""",\
|
||
"""- 您在位置 #4267-4268的标注 | 添加于 2020年1月29日星期三 上午12:42:32
|
||
""",\
|
||
"""从柏拉图到马克思的所有政治理论家都提出过这个问题xxxx"""]
|
||
t_hl_sec3 = ["""枪炮、病菌与钢铁(贾雷德·戴蒙德)""",\
|
||
"""- 您在位置 #4267-4268的标注 | 添加于 2020年1月29日星期三 上午12:42:32""",\
|
||
"""从柏拉图到马克思的所有政治理论家都提出过这个问题yyyy"""]
|
||
t_hl_sec4 = ["""枪炮、病菌与钢铁 : 人类社会的命运 (世纪人文系列丛书·开放人文) (贾雷德·戴蒙德)""",\
|
||
"""- 您在位置 #4267-4268的标注 | 添加于 2020年1月29日星期三 上午12:42:32""",\
|
||
"""从柏拉图到马克思的所有政治理论家都提出过这个问题zzzz"""]
|
||
t_books = defaultdict(dict)
|
||
|
||
def cre_tbooks(self):
|
||
# parsing section & fill data structure
|
||
self.t_num_nt = 0
|
||
t_secd = self.km.parse_section(t_bm_sec,0)
|
||
|
||
self.t_num_nt += 1
|
||
t_secd = self.km.parse_section(t_hl_sec,self.t_num_nt)
|
||
bn = t_secd['bookname']
|
||
t_books[bn]['author'] = t_secd[bn]['author']
|
||
t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
|
||
t_secd.clear()
|
||
|
||
self.t_num_nt += 1
|
||
t_secd = self.km.parse_section(t_nt_sec,self.t_num_nt)
|
||
bn = t_secd['bookname']
|
||
t_books[bn]['author'] = t_secd[bn]['author']
|
||
t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
|
||
t_secd.clear()
|
||
|
||
self.t_num_nt += 1
|
||
t_secd = self.km.parse_section(t_hl_sec2,self.t_num_nt)
|
||
bn = t_secd['bookname']
|
||
t_books[bn]['author'] = t_secd[bn]['author']
|
||
t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
|
||
t_secd.clear()
|
||
|
||
self.t_num_nt += 1
|
||
t_secd = self.km.parse_section(t_hl_sec3,self.t_num_nt)
|
||
bn = t_secd['bookname']
|
||
t_books[bn]['author'] = t_secd[bn]['author']
|
||
t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
|
||
t_secd.clear()
|
||
|
||
self.t_num_nt += 1
|
||
t_secd = self.km.parse_section(t_hl_sec4,self.t_num_nt)
|
||
bn = t_secd['bookname']
|
||
t_books[bn]['author'] = t_secd[bn]['author']
|
||
t_books[bn][str(self.t_num_nt)] = t_secd[bn][str(self.t_num_nt)]
|
||
t_secd.clear()
|
||
|
||
return t_books
|
||
|
||
# test function parse_section
|
||
def test_parse_section(self):
|
||
# parsing section & fill data structure
|
||
t_secd = self.km.parse_section(t_bm_sec,0)
|
||
self.assertEqual(t_secd,False)
|
||
|
||
t_secd = self.km.parse_section(t_hl_sec,1)
|
||
bn = t_secd['bookname']
|
||
self.assertIsNotNone(t_secd)
|
||
self.assertEqual(bn,'薛兆丰经济学讲义')
|
||
self.assertEqual(t_secd[bn]['author'],'薛兆丰')
|
||
self.assertEqual(t_secd[bn]['1']['type'],'HL')
|
||
self.assertEqual(t_secd[bn]['1']['position'],'1408-1410')
|
||
self.assertEqual(t_secd[bn]['1']['day'],'2020年1月13日')
|
||
self.assertEqual(t_secd[bn]['1']['week'],'星期一')
|
||
self.assertEqual(t_secd[bn]['1']['meridiem'],'上午')
|
||
self.assertEqual(t_secd[bn]['1']['time'],'8:11:05')
|
||
self.assertEqual(t_secd[bn]['1']['content'],'边际就是“新增”带来的“新增”。\n')
|
||
t_books[bn]['author'] = t_secd[bn]['author']
|
||
t_books[bn]['1'] = t_secd[bn]['1']
|
||
t_secd.clear()
|
||
|
||
t_secd = self.km.parse_section(t_nt_sec,2)
|
||
bn = t_secd['bookname']
|
||
self.assertIsNotNone(t_secd)
|
||
self.assertEqual(bn,'薛兆丰经济学讲义')
|
||
self.assertEqual(t_secd[bn]['author'],'薛兆丰')
|
||
self.assertEqual(t_secd[bn]['2']['type'],'NT')
|
||
self.assertEqual(t_secd[bn]['2']['position'],'4286')
|
||
self.assertEqual(t_secd[bn]['2']['day'],'2020年1月30日')
|
||
self.assertEqual(t_secd[bn]['2']['week'],'星期四')
|
||
self.assertEqual(t_secd[bn]['2']['meridiem'],'下午')
|
||
self.assertEqual(t_secd[bn]['2']['time'],'10:26:31')
|
||
self.assertEqual(t_secd[bn]['2']['content'],'山寨 假货 问题\n')
|
||
t_books[bn]['author'] = t_secd[bn]['author']
|
||
t_books[bn]['2'] = t_secd[bn]['2']
|
||
t_secd.clear()
|
||
|
||
# test drop_duplicate
|
||
def test_drop_duplicate(self):
|
||
t_books = self.cre_tbooks()
|
||
t_secd = self.km.parse_section(t_hl_sec,3)
|
||
bn = t_secd['bookname']
|
||
t_books_du = t_books.copy()
|
||
t_books_du[bn]['3'] = t_secd[bn]['3']
|
||
t_books_du[bn]['4'] = t_secd[bn]['3']
|
||
self.assertIsInstance(t_books_du[bn]['3'],dict)
|
||
|
||
try:
|
||
t_books_du = self.km.drop_duplicate(t_books_du)
|
||
t = t_books_du[bn]['3']
|
||
except KeyError as keyerror:
|
||
print("与预期匹配,sidx 3 重复被删除,抛出: %s" % 'keyerror')
|
||
|
||
t_secd.clear()
|
||
|
||
# test function format_time()
|
||
def test_format_time(self):
|
||
t_ds = '2020年1月13日 星期一 下午 8:11:05'
|
||
t_ds = self.km.format_time(t_ds)
|
||
self.assertEqual(t_ds, '2020/1/13 20:11:05')
|
||
|
||
# test function format_data
|
||
def test_format_data(self):
|
||
t_books = self.cre_tbooks()
|
||
t_out = self.km.format_note_data(t_books, ft='MD')
|
||
self.assertEqual(t_out[0], 'TYPE|BOOKNAME|AUTHOR|MARKTIME|CONTENT')
|
||
self.assertEqual(t_out[1], '--|--|--|--|--')
|
||
self.assertEqual(t_out[2], 'HL|薛兆丰经济学讲义|薛兆丰|2020/1/13 8:11:05|边际就是“新增”带来的“新增”。\n')
|
||
t_out.clear()
|
||
|
||
def test_add_note_to_highlight(self):
|
||
t_books = self.cre_tbooks()
|
||
t_books_remove_nt = self.km.add_note_to_highlight(t_books)
|
||
self.assertEqual((t_books_remove_nt['薛兆丰经济学讲义']['1']['content']).replace('\n',''),\
|
||
'边际就是“新增”带来的“新增”。'+NTPREF+ '山寨 假货 问题')
|
||
|
||
def test_get_kindle_path(self):
|
||
kp = self.km.get_kindle_path()
|
||
s = u"kindle disconnected" if not kp else u"kindle connected {}".format(kp)
|
||
print(s)
|
||
|
||
def test_get_bookname_num(self):
|
||
t_books = self.cre_tbooks()
|
||
[nu, bn] = self.km.get_bookname_num(t_books)
|
||
self.assertEqual(bn['薛兆丰经济学讲义'],2)
|
||
|
||
def test_get_author_num(self):
|
||
t_books = self.cre_tbooks()
|
||
[nu, bn] = self.km.get_author_num(t_books)
|
||
self.assertEqual(bn['薛兆丰'],2)
|
||
|
||
def test_filter_clips(self):
|
||
t_books = self.cre_tbooks()
|
||
# no filter
|
||
bn = self.km.filter_clips(t_books, '薛兆丰', 0)
|
||
logger.debug('========== 1 ==========\n')
|
||
logger.debug(json.dumps(bn, indent=2, ensure_ascii=False))
|
||
bn = {}
|
||
|
||
# by bookname
|
||
bn = self.km.filter_clips(t_books, '枪炮、病菌与钢铁 : 人类社会的命运 (世纪人文系列丛书·开放人文)', 1)
|
||
logger.debug('========== 2 ==========\n')
|
||
logger.debug(json.dumps(bn, indent=2, ensure_ascii=False))
|
||
bn = {}
|
||
|
||
# by author
|
||
bn = self.km.filter_clips(t_books, '贾雷德·戴蒙德', 2)
|
||
logger.debug('========== 3 ==========\n')
|
||
logger.debug(json.dumps(bn, indent=2, ensure_ascii=False))
|
||
bn = {}
|
||
|
||
# test import words & filter words
|
||
def test_import_words(self):
|
||
import pprint
|
||
[bookinfo, lookups, words] = self.km.import_words()
|
||
# how to beauty print to logger file
|
||
logger.debug('========== 4 ==========\n')
|
||
logger.debug(json.dumps(bookinfo, indent=2, ensure_ascii=False))
|
||
logger.debug('========== 5 ==========\n')
|
||
logger.debug(json.dumps(lookups, indent=2, ensure_ascii=False))
|
||
logger.debug('========== 6 ==========\n')
|
||
logger.debug(json.dumps(words, indent=2, ensure_ascii=False))
|
||
|
||
# test filter_words()
|
||
logger.debug('========== 7 ==========\n')
|
||
self.km.filter_words(self.km.import_words(), '中国历史风云录 (陈舜臣作品)', tp=0)
|
||
|
||
logger.debug('========== 8 ==========\n')
|
||
self.km.filter_words(self.km.import_words(), 'zh:闾', tp=1)
|
||
|
||
def test_util(self):
|
||
logger.debug( 'test get_app_path: {}'.format( self.util.get_app_path()))
|
||
|
||
def test_mtable(self):
|
||
print('\n---------------------')
|
||
data = [['Ohiox','Ohio','Ohio','Nevada','Nevada'],
|
||
[2000,2001,2002,2001,2002],
|
||
[1.5,1.7,3.6,2.4,2.9],
|
||
[1.5,1.7,3.6,2.4,2.9],
|
||
]
|
||
|
||
mt = mTable(data,
|
||
index = ['one', 'two','three','four','five'],
|
||
columns = ['year','state','pop','debt','xx'])
|
||
|
||
print('== frame\n', mt)
|
||
print('== frame.iat[0, 0]\n', mt.get_iat(0, 0))
|
||
print('== frame.iat[1, 1]\n', mt.get_iat(1, 1))
|
||
print('== frame.iat[2, 2]\n', mt.get_iat(2, 2))
|
||
print('== frame.iat[2, 1]\n', mt.get_iat(2, 1))
|
||
print('== frame.shape[0]\n', mt.get_num_rows())
|
||
print('== frame.shape[1]\n', mt.get_num_columns())
|
||
print('== frame.columns\n', mt.get_columns())
|
||
print('== frame.index\n', mt.get_index())
|
||
|
||
mt = mTable(data,
|
||
columns = ['year','state','pop','debt','xx'])
|
||
print('== frame\n', mt)
|
||
print('== frame.iat[2, 1]\n', mt.get_iat(2, 1))
|
||
print('== frame.shape[0]\n', mt.get_num_rows())
|
||
print('== frame.shape[1]\n', mt.get_num_columns())
|
||
print('== frame.columns\n', mt.get_columns())
|
||
print('== frame.index\n', mt.get_index())
|
||
|
||
mt = mTable(data)
|
||
mt.set_repr_width(20)
|
||
print('== frame\n', mt)
|
||
print('== frame.iat[2, 1]\n', mt.get_iat(2, 1))
|
||
print('== frame.shape[0]\n', mt.get_num_rows())
|
||
print('== frame.shape[1]\n', mt.get_num_columns())
|
||
print('== frame.columns\n', mt.get_columns())
|
||
print('== frame.index\n', mt.get_index())
|
||
|
||
mt = mTable()
|
||
print('== frame\n', mt)
|
||
print('== frame.iat[2, 1]\n', mt.get_iat(2, 1))
|
||
print('== frame.shape[0]\n', mt.get_num_rows())
|
||
print('== frame.shape[1]\n', mt.get_num_columns())
|
||
print('== frame.columns\n', mt.get_columns())
|
||
print('== frame.index\n', mt.get_index())
|
||
|
||
"""
|
||
def test_search_clip(self):
|
||
pass
|
||
def test_statistic(self):
|
||
pass
|
||
def test_dict2json(self):
|
||
pass
|
||
def test_json2dict(self):
|
||
pass
|
||
def test_import_clips(self):
|
||
pass
|
||
"""
|
||
|
||
def test_parseweb(self):
|
||
spide = bookInfoSpide()
|
||
|
||
"""
|
||
for bkname in testbooks:
|
||
bkname = re.split(r'[\((\-\::_\s]',bkname.strip())[0]
|
||
print(bkname)
|
||
bkinfo = spide.grab_book_info(bkname)
|
||
filter_bkinfo = spide.filter_spide_book(bkinfo)
|
||
if filter_bkinfo: spide.down_book_img(filter_bkinfo)
|
||
|
||
logger.debug('================ {} ================'.format(bkname))
|
||
logger.debug(json.dumps(bkinfo,indent=2, ensure_ascii=False))
|
||
logger.debug(json.dumps(filter_bkinfo,indent=2, ensure_ascii=False))
|
||
"""
|
||
|
||
def test_kman(self):
|
||
#books = defaultdict(dict)
|
||
km = kMan()
|
||
books = km.import_clips()
|
||
|
||
# remove duplication
|
||
km.drop_duplicate(books)
|
||
|
||
# test search note function
|
||
searchnote = km.search_clip(books, '三大都市圈', 'ALL', 'CONTENT')
|
||
if searchnote[0] > 0: km.export_notes(searchnote[1], 'searchcontent', ft='MD')
|
||
searchnote = km.search_clip(books, '经济', 'ALL', 'TITLE')
|
||
if searchnote[0] > 0: km.export_notes(searchnote[1], 'searchtitle', ft='MD')
|
||
searchnote = km.search_clip(books, '巴曙松', 'ALL', 'AUTHOR')
|
||
if searchnote[0] > 0: km.export_notes(searchnote[1], 'searchauthor', ft='MD')
|
||
|
||
#print(km.get_bookname_num(books))
|
||
#print(km.get_author_num(books))
|
||
|
||
# add note content to hightlight, then delete note
|
||
km.add_note_to_highlight(books)
|
||
|
||
# test dict json convert
|
||
with open(os.path.join(CURRPATH,'xx'), 'w', encoding='utf8', errors='ignore') as fw:
|
||
fw.write(km.dict2json(books))
|
||
if km.json2dict(os.path.join(CURRPATH,'xx'))==books: print( 'test OK')
|
||
|
||
km.export_notes(books, OUTPREF, ft='MD')
|
||
|
||
# print data with json format
|
||
logger.debug(json.dumps(books, indent=4, sort_keys=True, ensure_ascii=False))
|
||
|
||
def test_somepath(self):
|
||
frozen = 'not'
|
||
if getattr(sys, 'frozen', False):
|
||
# we are running in a bundle
|
||
frozen = 'ever so'
|
||
bundle_dir = sys._MEIPASS
|
||
else:
|
||
# we are running in a normal Python environment
|
||
bundle_dir = os.path.dirname(os.path.abspath(__file__))
|
||
print( 'we are',frozen,'frozen')
|
||
print( 'bundle dir is', bundle_dir )
|
||
print( 'sys.argv[0] is', sys.argv[0] )
|
||
print( 'sys.executable is', sys.executable )
|
||
print( 'os.getcwd is', os.getcwd() )
|
||
|
||
print('sys.path[0]', sys.path[0])
|
||
print('sys.argv[0]', sys.argv[0])
|
||
print('os.path.realpath(sys.executable)', os.path.realpath(sys.executable))
|
||
print('os.path.realpath(sys.argv[0]))', os.path.realpath(sys.argv[0]))
|
||
print('os.path.dirname(os.path.realpath(sys.executable))',
|
||
os.path.dirname(os.path.realpath(sys.executable)))
|
||
print('os.path.dirname(os.path.realpath(sys.argv[0]))',
|
||
os.path.dirname(os.path.realpath(sys.argv[0])))
|
||
print('os.path.dirname(os.path.abspath(__file__))',
|
||
os.path.dirname(os.path.abspath(__file__)))
|
||
|
||
# clean
|
||
def tearDown(self):
|
||
pass
|
||
|
||
if __name__ == '__main__':
|
||
|
||
"""
|
||
suite = unittest.TestSuite ()
|
||
suite.addTest(TestKman('test_parse_section'))
|
||
suite.addTest(TestKman('test_format_time'))
|
||
suite.addTest(TestKman('test_format_data'))
|
||
suite.addTest(TestKman('test_drop_duplicate'))
|
||
suite.addTest(TestKman('test_add_note_to_highlight'))
|
||
run = unittest.TextTestRunner (verbosity=2)
|
||
run.run (suite)
|
||
"""
|
||
|
||
# not callable sequency
|
||
unittest.main()
|
||
|