kindle manager

This commit is contained in:
gavin
2020-05-28 20:17:50 +08:00
parent 29e8b337b4
commit 54e97b57e7
3 changed files with 104 additions and 105 deletions

162
kman.py
View File

@@ -1,15 +1,16 @@
############################################# #########################################################
## PROGRAM: file2.py ## @file : kman.py
## AUTHOR: Chengan ## @desc : kindle note managerment tool
## CREATE: 20200526 ## @create : 20200526
## douboer@gmail.com ## @author : Chengan
############################################# ## @email : douboer@gmail.com
#########################################################
import platform
import re import re
import json import json
import logging import logging
import platform
from collections import defaultdict from collections import defaultdict
# data structure - use dict # data structure - use dict
@@ -52,14 +53,14 @@ books =
''' '''
# modi clippath for different os # modi clippath for different os
SYS = 'WIN' if platform.system()=='Windows' else \ SYS = 'WIN' if platform.system() == 'Windows' else \
('LINUX' if platform.system()=='LINUX' else 'MAC') ('LINUX' if platform.system() == 'LINUX' else 'MAC')
# some constants # some constants
LASTLINE = '==========' LASTLINE = '=========='
NTPREF = '--CG注:' NTPREF = '--CG注:'
CLIPPATH = './My Clippings.txt' # /Volumes/Kindle/documents/My\ Clippings.txt CLIPPATH = './My Clippings.txt' # /Volumes/Kindle/documents/My\ Clippings.txt
STAT = 'NONE' OUTPATH = './clip'
DEBUG = 1 # 0 - INFO; 1 - DEBUG DEBUG = 1 # 0 - INFO; 1 - DEBUG
LOG2FILE = 1 # 0 - to stdio; 1 - to file LOG2FILE = 1 # 0 - to stdio; 1 - to file
@@ -103,20 +104,29 @@ r'''
(\d{1,2}:\d{1,2}:\d{1,2}) #group6 - time (\d{1,2}:\d{1,2}:\d{1,2}) #group6 - time
''', flags=re.X ) ''', flags=re.X )
# input: section dict & and section index
# return: dict
# d = { 'bookname':bookname,
# bookname: {
# 'author':author
# 'section0':{
# 'type':'HL',
# 'position':'123',
# 'day':'2020年5月26日',
# 'week':'星期二',
# 'meridiem':'PM',
# 'time':'10:26:31'
# 'content':content }}}
def parse_section(sec,idx): def parse_section(sec,idx):
"""parse section
Args:
sec: section dict
idx: section index
Returns:
dict like this:
d = { 'bookname':bookname,
bookname: {
'author':author
'section0':{
'type':'HL',
'position':'123',
'day':'2020年5月26日',
'week':'星期二',
'meridiem':'PM',
'time':'10:26:31'
'content':content }}}
"""
# 1. highlight over the picture, the content(#3 line) is empty, only two lines # 1. highlight over the picture, the content(#3 line) is empty, only two lines
# 2. bookmark section only two lines # 2. bookmark section only two lines
# 3. other not correct format < 2 # 3. other not correct format < 2
@@ -127,7 +137,7 @@ def parse_section(sec,idx):
section = defaultdict(dict) section = defaultdict(dict)
authinfo = sec[0] authinfo = sec[0]
dateinfo = sec[1] dateinfo = sec[1]
content = sec[2] if len(sec)==3 else None content = sec[2] if len(sec) == 3 else None
das = da.search(dateinfo) das = da.search(dateinfo)
# type of section # type of section
@@ -138,13 +148,13 @@ def parse_section(sec,idx):
HL - section is a highlight HL - section is a highlight
NT - section is a note NT - section is a note
''' '''
tpy = ('HL' if das.group(2)=='标注' else \ tpy = ('HL' if das.group(2) == '标注' else \
('NT' if das.group(2)=='笔记' else 'BM')) ('NT' if das.group(2) == '笔记' else 'BM'))
pos = das.group(1) pos = das.group(1)
day = das.group(3) day = das.group(3)
week = das.group(4) week = das.group(4)
pmam = das.group(5) pmam = das.group(5)
time = das.group(6) time = das.group(6)
# parse #1 line # parse #1 line
aus = au.search(authinfo) aus = au.search(authinfo)
@@ -164,30 +174,60 @@ def parse_section(sec,idx):
return section return section
# format output
# input: books - dict
# f - 'MD'
# 'TXT'
# 'JSON'
# output:
#
def formmat_out(books,f='MD'):
pass
# search clip, searching scope may be title/author/content def format_out(bks, ft='MD'):
# input: books - dict """format output
# s - key word
# t - 'ALL' Args:
# 'HL' bks: books dict
# 'BM' f: canbe 'MD'/'TXT'/'JSON'
# 'NT'
# p - 'ALL' Returns:
# 'TITLE' special format of 'bks' dict
# 'AUTHOR' """
# 'CONTENT'
# output: op = OUTPATH+('.md' if ft == 'MD' else \
# ('.json' if ft == 'JSON' else '.txt'))
def search_clip(books, s, t='ALL', p='ALL'):
with open(op, 'w', encoding='gbk', errors='ignore') as fw:
fw.write(line)
if ft == 'JSON':
return json.dumps(bks, indent=4, sort_keys=True, ensure_ascii=False)
def t_dict2json(d):
"""convert dict to json
Args: d is the dict
Return: json string
"""
jstr = json.dumps(d)
return jstr
def t_json2dict(jf):
"""convert dict to json
Args: jf is the file saved json string
Return: dict
"""
d = {}
with open(jf, 'r', encoding='utf8', errors='ignore') as f:
d=json.load(f)
return d
def search_clip(bks, s, t='ALL', p='ALL'):
"""search clip, searching scope may be title/author/content
Args:
input: bks: books dict
s: key word
t: 'ALL'
'HL'
'BM'
'NT'
p: 'ALL'
'TITLE'
'AUTHOR'
'CONTENT'
Return: search clipping content
"""
pass pass
if __name__ == '__main__': if __name__ == '__main__':
@@ -199,7 +239,6 @@ if __name__ == '__main__':
sidx = 0 sidx = 0
idx = 0 idx = 0
sec = [] sec = []
STAT = 'START'
for line in f.readlines(): for line in f.readlines():
line = line.strip() line = line.strip()
if re.match(r'^\s*$',line): continue if re.match(r'^\s*$',line): continue
@@ -207,7 +246,7 @@ if __name__ == '__main__':
if not re.search(LASTLINE,line): if not re.search(LASTLINE,line):
# content more than 1 line # content more than 1 line
if idx>3: idx>3:
sec[2] += str(' '+line) sec[2] += str(' '+line)
logger.debug('idx {} {}'.format(idx, sec[2])) logger.debug('idx {} {}'.format(idx, sec[2]))
else: else:
@@ -227,7 +266,7 @@ if __name__ == '__main__':
books[bn]['author'] = secd[bn]['author'] books[bn]['author'] = secd[bn]['author']
books[bn][str(sidx)] = secd[bn][str(sidx)] books[bn][str(sidx)] = secd[bn][str(sidx)]
if tpy=='NT' and books[bn][str(sidx-1)]['type']=='HL': if tpy == 'NT' and books[bn][str(sidx-1)]['type'] == 'HL':
books[bn][str(sidx-1)]['content'] += str(NTPREF+sec[2]) books[bn][str(sidx-1)]['content'] += str(NTPREF+sec[2])
else: # BM or not correct format section else: # BM or not correct format section
sidx -= 1 sidx -= 1
@@ -235,6 +274,11 @@ if __name__ == '__main__':
# initial section for next section loop # initial section for next section loop
sec = [] sec = []
# print data with json format # test dict json convert
logger.debug(json.dumps(books, indent=4, sort_keys=True, ensure_ascii=False)) with open('./xx', 'w', encoding='gbk', errors='ignore') as fw:
fw.write(t_dict2json(books))
if t_json2dict('./xx') == books: print( 'test OK')
# print data with json format
logger.debug(json.dumps(books, indent=4, sort_keys=True, ensure_ascii=False))

View File

@@ -1,46 +0,0 @@
import re
#author & bookname info
#庆余年(精校版) (猫腻)
au = re.compile(
r'''
^\ufeff
(.+) \( #bookname
(.+)\) #author
''', flags=re.X )
# page & date info
# 您在位置 #4286 的笔记 | 添加于 2020年1月30日星期四 上午10:26:31^M
# re.X(VERBOSE): 详细模式。这个模式下正则表达式可以是多行,忽略空白字符,并可以加入注释
#\(\d\+-\{0,1}\d\+\).\+\(\d\{4}年\d\{1,2}月\d\{1,2}日\)\(星期.\) \(..\)\(\d\{1,2}:\d\{1,2}:\d\{1,2}\)
da = re.compile(
r'''
\#
(\d+-{0,1}\d+) #group1 - page
.+
(\d{4}\d{1,2}月\d{1,2}日) #group2 - xxxx年xx月xx日
(星期.) #group3 - week
\s
(..) #group4 - pm/am
(\d{1,2}:\d{1,2}:\d{1,2}) #group5 - time
''', flags=re.X )
with open('./My Clippings.t.txt', 'r', encoding='utf8', errors='ignore') as f:
for line in f.readlines():
f = False
if not f:
aus = au.search(line)
if aus:
print("book:",aus.group(1),"auth:",aus.group(2))
f = True
if not f:
das = da.search(line)
if das:
print(das.group(1),
das.group(2),
das.group(3),
'PM' if das.group(4)=="下午" else 'AM')
f = True

1
xx Normal file

File diff suppressed because one or more lines are too long