From 2e29cfdb282490134c3b6edcac4a270be0b51d64 Mon Sep 17 00:00:00 2001 From: gavin Date: Thu, 28 May 2020 21:10:46 +0800 Subject: [PATCH] kindle manager --- .gitignore | 2 ++ changelog.md | 20 ++++++++----- kman.py | 81 ++++++++++++++++++++++++++++++---------------------- 3 files changed, 62 insertions(+), 41 deletions(-) diff --git a/.gitignore b/.gitignore index d1d43bb..c79bbda 100755 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ log* *xxx* cr rungit* +clip* + diff --git a/changelog.md b/changelog.md index b3e71a5..1c91fa8 100644 --- a/changelog.md +++ b/changelog.md @@ -7,7 +7,11 @@ - abstract note/bookmark/highlight from kindle clipping - formatter output to special file -### learn lesson +## 1.0.1 (20200528) +### feature +- add search clip + +## learn lesson - assign value to a not exist key, will throw KeyError, too inflexible!!! perl much better - use defaultdict to solve obove problem, note, defaultdict only create two layer key auto ``` @@ -21,10 +25,14 @@ b['1']['2'] = {'3':1} # OK ``` - logging, file io, very strict indent! - re.X(VERBOSE), different regular express between vim & python - -## 1.0.1 (20200528) -### feature -- add search clip +- no c like i=x<0?m:n => python - i=m if x<0 else n +- json dict convert +- pylint check code, but most of suggest is unacceptable for me :smile: +- no c/c++/java/perl switch-case, not use two much if-elif-elif, use this to simplify: +``` + suff = {'MD':'.md','CSV':'.csv','JSON':'.json'} + 198 op = OUTPREF+suff[ft] +``` # feature plan ## 20200528 @@ -32,5 +40,3 @@ b['1']['2'] = {'3':1} # OK - first abstract from kindle hard / local directory for different OS - add GUI use QT - - diff --git a/kman.py b/kman.py index 3e3173f..6a18151 100644 --- a/kman.py +++ b/kman.py @@ -53,14 +53,14 @@ books = ''' # modi clippath for different os -SYS = 'WIN' if platform.system() == 'Windows' else \ - ('LINUX' if platform.system() == 'LINUX' else 'MAC') +SYS = 'WIN' if platform.system()=='Windows' else \ + ('LINUX' if platform.system()=='LINUX' else 'MAC') # some constants LASTLINE = '==========' NTPREF = '--CG注:' CLIPPATH = './My Clippings.txt' # /Volumes/Kindle/documents/My\ Clippings.txt -OUTPATH = './clip' +OUTPREF = './clip' DEBUG = 1 # 0 - INFO; 1 - DEBUG LOG2FILE = 1 # 0 - to stdio; 1 - to file @@ -104,12 +104,12 @@ r''' (\d{1,2}:\d{1,2}:\d{1,2}) #group6 - time ''', flags=re.X ) -def parse_section(sec,idx): +def parse_section(s,i): """parse section Args: - sec: section dict - idx: section index + s: section dict + i: section index Returns: dict like this: @@ -130,14 +130,14 @@ def parse_section(sec,idx): # 1. highlight over the picture, the content(#3 line) is empty, only two lines # 2. bookmark section only two lines # 3. other not correct format < 2 - if len(sec)<=2: + if len(s)<=2: return False # parse #2 line section = defaultdict(dict) authinfo = sec[0] dateinfo = sec[1] - content = sec[2] if len(sec) == 3 else None + content = sec[2] if len(sec)==3 else None das = da.search(dateinfo) # type of section @@ -148,8 +148,8 @@ def parse_section(sec,idx): HL - section is a highlight NT - section is a note ''' - tpy = ('HL' if das.group(2) == '标注' else \ - ('NT' if das.group(2) == '笔记' else 'BM')) + tpy = ('HL' if das.group(2)=='标注' else \ + ('NT' if das.group(2)=='笔记' else 'BM')) pos = das.group(1) day = das.group(3) week = das.group(4) @@ -163,7 +163,7 @@ def parse_section(sec,idx): section[bookname]['author'] = author section['bookname'] = bookname - section[bookname][str(idx)] = { + section[bookname][str(i)] = { 'type':tpy, 'position':pos, 'day':day, @@ -174,28 +174,41 @@ def parse_section(sec,idx): return section - def format_out(bks, ft='MD'): - """format output + """format output and write to file + MARKDOWN format: + TYPE | bookname | author | marktime | content + --|--|--|--|-- + xx|xx|xx|xx|xx + + CSV format: + TYPE,bookname,author,marktime,content + xx,xx,xx,xx,xx + + marktime: 20200403 PM 3:0:3 星期五 Args: bks: books dict - f: canbe 'MD'/'TXT'/'JSON' + f: can be 'MD'/'JSON'/'CSV' - Returns: - special format of 'bks' dict + Returns: special format of 'bks' dict """ - op = OUTPATH+('.md' if ft == 'MD' else \ - ('.json' if ft == 'JSON' else '.txt')) + suff = {'MD':'.md','CSV':'.csv','JSON':'.json'} + op = OUTPREF+suff[ft] with open(op, 'w', encoding='gbk', errors='ignore') as fw: - fw.write(line) + if ft=='JSON': + ft.write(json.dumps(bks, indent=4, sort_keys=True, ensure_ascii=False)) + elif ft=='MD': + pass + else: + ft.write(json.dumps(bks)) # only for load back - if ft == 'JSON': - return json.dumps(bks, indent=4, sort_keys=True, ensure_ascii=False) +def statistic(bks): + pass -def t_dict2json(d): +def dict2json(d): """convert dict to json Args: d is the dict Return: json string @@ -203,7 +216,7 @@ def t_dict2json(d): jstr = json.dumps(d) return jstr -def t_json2dict(jf): +def json2dict(jf): """convert dict to json Args: jf is the file saved json string Return: dict @@ -230,15 +243,15 @@ def search_clip(bks, s, t='ALL', p='ALL'): """ pass -if __name__ == '__main__': +if __name__=='__main__': # 4 lines for each section seperated with '=======' # so read 4 lines before '=======' with open(CLIPPATH, 'r', encoding='utf8', errors='ignore') as f: - books = defaultdict(dict) - secd = defaultdict(dict) - sidx = 0 - idx = 0 - sec = [] + books = defaultdict(dict) + secd = defaultdict(dict) + sidx = 0 + idx = 0 + sec = [] for line in f.readlines(): line = line.strip() if re.match(r'^\s*$',line): continue @@ -246,7 +259,7 @@ if __name__ == '__main__': if not re.search(LASTLINE,line): # content more than 1 line - idx>3: + if idx>3: sec[2] += str(' '+line) logger.debug('idx {} {}'.format(idx, sec[2])) else: @@ -266,7 +279,7 @@ if __name__ == '__main__': books[bn]['author'] = secd[bn]['author'] books[bn][str(sidx)] = secd[bn][str(sidx)] - if tpy == 'NT' and books[bn][str(sidx-1)]['type'] == 'HL': + if tpy=='NT' and books[bn][str(sidx-1)]['type']=='HL': books[bn][str(sidx-1)]['content'] += str(NTPREF+sec[2]) else: # BM or not correct format section sidx -= 1 @@ -275,9 +288,9 @@ if __name__ == '__main__': sec = [] # test dict json convert - with open('./xx', 'w', encoding='gbk', errors='ignore') as fw: - fw.write(t_dict2json(books)) - if t_json2dict('./xx') == books: print( 'test OK') + with open('./xx', 'w', encoding='utf8', errors='ignore') as fw: + fw.write(dict2json(books)) + if json2dict('./xx')==books: print( 'test OK') # print data with json format logger.debug(json.dumps(books, indent=4, sort_keys=True, ensure_ascii=False))