kindle manager

This commit is contained in:
gavin
2020-05-28 21:10:46 +08:00
parent 54e97b57e7
commit 2e29cfdb28
3 changed files with 62 additions and 41 deletions

2
.gitignore vendored
View File

@@ -5,3 +5,5 @@ log*
*xxx* *xxx*
cr cr
rungit* rungit*
clip*

View File

@@ -7,7 +7,11 @@
- abstract note/bookmark/highlight from kindle clipping - abstract note/bookmark/highlight from kindle clipping
- formatter output to special file - formatter output to special file
### learn lesson ## 1.0.1 (20200528)
### feature
- add search clip
## learn lesson
- assign value to a not exist key, will throw KeyError, too inflexible!!! perl much better - assign value to a not exist key, will throw KeyError, too inflexible!!! perl much better
- use defaultdict to solve obove problem, note, defaultdict only create two layer key auto - use defaultdict to solve obove problem, note, defaultdict only create two layer key auto
``` ```
@@ -21,10 +25,14 @@ b['1']['2'] = {'3':1} # OK
``` ```
- logging, file io, very strict indent! - logging, file io, very strict indent!
- re.X(VERBOSE), different regular express between vim & python - re.X(VERBOSE), different regular express between vim & python
- no c like i=x<0?m:n => python - i=m if x<0 else n
## 1.0.1 (20200528) - json dict convert
### feature - pylint check code, but most of suggest is unacceptable for me :smile:
- add search clip - no c/c++/java/perl switch-case, not use two much if-elif-elif, use this to simplify:
```
suff = {'MD':'.md','CSV':'.csv','JSON':'.json'}
198 op = OUTPREF+suff[ft]
```
# feature plan # feature plan
## 20200528 ## 20200528
@@ -32,5 +40,3 @@ b['1']['2'] = {'3':1} # OK
- first abstract from kindle hard / local directory for different OS - first abstract from kindle hard / local directory for different OS
- add GUI use QT - add GUI use QT

81
kman.py
View File

@@ -53,14 +53,14 @@ books =
''' '''
# modi clippath for different os # modi clippath for different os
SYS = 'WIN' if platform.system() == 'Windows' else \ SYS = 'WIN' if platform.system()=='Windows' else \
('LINUX' if platform.system() == 'LINUX' else 'MAC') ('LINUX' if platform.system()=='LINUX' else 'MAC')
# some constants # some constants
LASTLINE = '==========' LASTLINE = '=========='
NTPREF = '--CG注:' NTPREF = '--CG注:'
CLIPPATH = './My Clippings.txt' # /Volumes/Kindle/documents/My\ Clippings.txt CLIPPATH = './My Clippings.txt' # /Volumes/Kindle/documents/My\ Clippings.txt
OUTPATH = './clip' OUTPREF = './clip'
DEBUG = 1 # 0 - INFO; 1 - DEBUG DEBUG = 1 # 0 - INFO; 1 - DEBUG
LOG2FILE = 1 # 0 - to stdio; 1 - to file LOG2FILE = 1 # 0 - to stdio; 1 - to file
@@ -104,12 +104,12 @@ r'''
(\d{1,2}:\d{1,2}:\d{1,2}) #group6 - time (\d{1,2}:\d{1,2}:\d{1,2}) #group6 - time
''', flags=re.X ) ''', flags=re.X )
def parse_section(sec,idx): def parse_section(s,i):
"""parse section """parse section
Args: Args:
sec: section dict s: section dict
idx: section index i: section index
Returns: Returns:
dict like this: dict like this:
@@ -130,14 +130,14 @@ def parse_section(sec,idx):
# 1. highlight over the picture, the content(#3 line) is empty, only two lines # 1. highlight over the picture, the content(#3 line) is empty, only two lines
# 2. bookmark section only two lines # 2. bookmark section only two lines
# 3. other not correct format < 2 # 3. other not correct format < 2
if len(sec)<=2: if len(s)<=2:
return False return False
# parse #2 line # parse #2 line
section = defaultdict(dict) section = defaultdict(dict)
authinfo = sec[0] authinfo = sec[0]
dateinfo = sec[1] dateinfo = sec[1]
content = sec[2] if len(sec) == 3 else None content = sec[2] if len(sec)==3 else None
das = da.search(dateinfo) das = da.search(dateinfo)
# type of section # type of section
@@ -148,8 +148,8 @@ def parse_section(sec,idx):
HL - section is a highlight HL - section is a highlight
NT - section is a note NT - section is a note
''' '''
tpy = ('HL' if das.group(2) == '标注' else \ tpy = ('HL' if das.group(2)=='标注' else \
('NT' if das.group(2) == '笔记' else 'BM')) ('NT' if das.group(2)=='笔记' else 'BM'))
pos = das.group(1) pos = das.group(1)
day = das.group(3) day = das.group(3)
week = das.group(4) week = das.group(4)
@@ -163,7 +163,7 @@ def parse_section(sec,idx):
section[bookname]['author'] = author section[bookname]['author'] = author
section['bookname'] = bookname section['bookname'] = bookname
section[bookname][str(idx)] = { section[bookname][str(i)] = {
'type':tpy, 'type':tpy,
'position':pos, 'position':pos,
'day':day, 'day':day,
@@ -174,28 +174,41 @@ def parse_section(sec,idx):
return section return section
def format_out(bks, ft='MD'): def format_out(bks, ft='MD'):
"""format output """format output and write to file
MARKDOWN format:
TYPE | bookname | author | marktime | content
--|--|--|--|--
xx|xx|xx|xx|xx
CSV format:
TYPE,bookname,author,marktime,content
xx,xx,xx,xx,xx
marktime: 20200403 PM 3:0:3 星期五
Args: Args:
bks: books dict bks: books dict
f: canbe 'MD'/'TXT'/'JSON' f: can be 'MD'/'JSON'/'CSV'
Returns: Returns: special format of 'bks' dict
special format of 'bks' dict
""" """
op = OUTPATH+('.md' if ft == 'MD' else \ suff = {'MD':'.md','CSV':'.csv','JSON':'.json'}
('.json' if ft == 'JSON' else '.txt')) op = OUTPREF+suff[ft]
with open(op, 'w', encoding='gbk', errors='ignore') as fw: with open(op, 'w', encoding='gbk', errors='ignore') as fw:
fw.write(line) if ft=='JSON':
ft.write(json.dumps(bks, indent=4, sort_keys=True, ensure_ascii=False))
elif ft=='MD':
pass
else:
ft.write(json.dumps(bks)) # only for load back
if ft == 'JSON': def statistic(bks):
return json.dumps(bks, indent=4, sort_keys=True, ensure_ascii=False) pass
def t_dict2json(d): def dict2json(d):
"""convert dict to json """convert dict to json
Args: d is the dict Args: d is the dict
Return: json string Return: json string
@@ -203,7 +216,7 @@ def t_dict2json(d):
jstr = json.dumps(d) jstr = json.dumps(d)
return jstr return jstr
def t_json2dict(jf): def json2dict(jf):
"""convert dict to json """convert dict to json
Args: jf is the file saved json string Args: jf is the file saved json string
Return: dict Return: dict
@@ -230,15 +243,15 @@ def search_clip(bks, s, t='ALL', p='ALL'):
""" """
pass pass
if __name__ == '__main__': if __name__=='__main__':
# 4 lines for each section seperated with '=======' # 4 lines for each section seperated with '======='
# so read 4 lines before '=======' # so read 4 lines before '======='
with open(CLIPPATH, 'r', encoding='utf8', errors='ignore') as f: with open(CLIPPATH, 'r', encoding='utf8', errors='ignore') as f:
books = defaultdict(dict) books = defaultdict(dict)
secd = defaultdict(dict) secd = defaultdict(dict)
sidx = 0 sidx = 0
idx = 0 idx = 0
sec = [] sec = []
for line in f.readlines(): for line in f.readlines():
line = line.strip() line = line.strip()
if re.match(r'^\s*$',line): continue if re.match(r'^\s*$',line): continue
@@ -246,7 +259,7 @@ if __name__ == '__main__':
if not re.search(LASTLINE,line): if not re.search(LASTLINE,line):
# content more than 1 line # content more than 1 line
idx>3: if idx>3:
sec[2] += str(' '+line) sec[2] += str(' '+line)
logger.debug('idx {} {}'.format(idx, sec[2])) logger.debug('idx {} {}'.format(idx, sec[2]))
else: else:
@@ -266,7 +279,7 @@ if __name__ == '__main__':
books[bn]['author'] = secd[bn]['author'] books[bn]['author'] = secd[bn]['author']
books[bn][str(sidx)] = secd[bn][str(sidx)] books[bn][str(sidx)] = secd[bn][str(sidx)]
if tpy == 'NT' and books[bn][str(sidx-1)]['type'] == 'HL': if tpy=='NT' and books[bn][str(sidx-1)]['type']=='HL':
books[bn][str(sidx-1)]['content'] += str(NTPREF+sec[2]) books[bn][str(sidx-1)]['content'] += str(NTPREF+sec[2])
else: # BM or not correct format section else: # BM or not correct format section
sidx -= 1 sidx -= 1
@@ -275,9 +288,9 @@ if __name__ == '__main__':
sec = [] sec = []
# test dict json convert # test dict json convert
with open('./xx', 'w', encoding='gbk', errors='ignore') as fw: with open('./xx', 'w', encoding='utf8', errors='ignore') as fw:
fw.write(t_dict2json(books)) fw.write(dict2json(books))
if t_json2dict('./xx') == books: print( 'test OK') if json2dict('./xx')==books: print( 'test OK')
# print data with json format # print data with json format
logger.debug(json.dumps(books, indent=4, sort_keys=True, ensure_ascii=False)) logger.debug(json.dumps(books, indent=4, sort_keys=True, ensure_ascii=False))