kindle manager
This commit is contained in:
162
kman.py
162
kman.py
@@ -1,15 +1,16 @@
|
|||||||
|
|
||||||
#############################################
|
#########################################################
|
||||||
## PROGRAM: file2.py
|
## @file : kman.py
|
||||||
## AUTHOR: Chengan
|
## @desc : kindle note managerment tool
|
||||||
## CREATE: 20200526
|
## @create : 20200526
|
||||||
## douboer@gmail.com
|
## @author : Chengan
|
||||||
#############################################
|
## @email : douboer@gmail.com
|
||||||
|
#########################################################
|
||||||
|
|
||||||
import platform
|
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import platform
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
# data structure - use dict
|
# data structure - use dict
|
||||||
@@ -52,14 +53,14 @@ books =
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
# modi clippath for different os
|
# modi clippath for different os
|
||||||
SYS = 'WIN' if platform.system()=='Windows' else \
|
SYS = 'WIN' if platform.system() == 'Windows' else \
|
||||||
('LINUX' if platform.system()=='LINUX' else 'MAC')
|
('LINUX' if platform.system() == 'LINUX' else 'MAC')
|
||||||
|
|
||||||
# some constants
|
# some constants
|
||||||
LASTLINE = '=========='
|
LASTLINE = '=========='
|
||||||
NTPREF = '--CG注:'
|
NTPREF = '--CG注:'
|
||||||
CLIPPATH = './My Clippings.txt' # /Volumes/Kindle/documents/My\ Clippings.txt
|
CLIPPATH = './My Clippings.txt' # /Volumes/Kindle/documents/My\ Clippings.txt
|
||||||
STAT = 'NONE'
|
OUTPATH = './clip'
|
||||||
DEBUG = 1 # 0 - INFO; 1 - DEBUG
|
DEBUG = 1 # 0 - INFO; 1 - DEBUG
|
||||||
LOG2FILE = 1 # 0 - to stdio; 1 - to file
|
LOG2FILE = 1 # 0 - to stdio; 1 - to file
|
||||||
|
|
||||||
@@ -103,20 +104,29 @@ r'''
|
|||||||
(\d{1,2}:\d{1,2}:\d{1,2}) #group6 - time
|
(\d{1,2}:\d{1,2}:\d{1,2}) #group6 - time
|
||||||
''', flags=re.X )
|
''', flags=re.X )
|
||||||
|
|
||||||
# input: section dict & and section index
|
|
||||||
# return: dict
|
|
||||||
# d = { 'bookname':bookname,
|
|
||||||
# bookname: {
|
|
||||||
# 'author':author
|
|
||||||
# 'section0':{
|
|
||||||
# 'type':'HL',
|
|
||||||
# 'position':'123',
|
|
||||||
# 'day':'2020年5月26日',
|
|
||||||
# 'week':'星期二',
|
|
||||||
# 'meridiem':'PM',
|
|
||||||
# 'time':'10:26:31'
|
|
||||||
# 'content':content }}}
|
|
||||||
def parse_section(sec,idx):
|
def parse_section(sec,idx):
|
||||||
|
"""parse section
|
||||||
|
|
||||||
|
Args:
|
||||||
|
sec: section dict
|
||||||
|
idx: section index
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
dict like this:
|
||||||
|
d = { 'bookname':bookname,
|
||||||
|
bookname: {
|
||||||
|
'author':author
|
||||||
|
'section0':{
|
||||||
|
'type':'HL',
|
||||||
|
'position':'123',
|
||||||
|
'day':'2020年5月26日',
|
||||||
|
'week':'星期二',
|
||||||
|
'meridiem':'PM',
|
||||||
|
'time':'10:26:31'
|
||||||
|
'content':content }}}
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
# 1. highlight over the picture, the content(#3 line) is empty, only two lines
|
# 1. highlight over the picture, the content(#3 line) is empty, only two lines
|
||||||
# 2. bookmark section only two lines
|
# 2. bookmark section only two lines
|
||||||
# 3. other not correct format < 2
|
# 3. other not correct format < 2
|
||||||
@@ -127,7 +137,7 @@ def parse_section(sec,idx):
|
|||||||
section = defaultdict(dict)
|
section = defaultdict(dict)
|
||||||
authinfo = sec[0]
|
authinfo = sec[0]
|
||||||
dateinfo = sec[1]
|
dateinfo = sec[1]
|
||||||
content = sec[2] if len(sec)==3 else None
|
content = sec[2] if len(sec) == 3 else None
|
||||||
|
|
||||||
das = da.search(dateinfo)
|
das = da.search(dateinfo)
|
||||||
# type of section
|
# type of section
|
||||||
@@ -138,13 +148,13 @@ def parse_section(sec,idx):
|
|||||||
HL - section is a highlight
|
HL - section is a highlight
|
||||||
NT - section is a note
|
NT - section is a note
|
||||||
'''
|
'''
|
||||||
tpy = ('HL' if das.group(2)=='标注' else \
|
tpy = ('HL' if das.group(2) == '标注' else \
|
||||||
('NT' if das.group(2)=='笔记' else 'BM'))
|
('NT' if das.group(2) == '笔记' else 'BM'))
|
||||||
pos = das.group(1)
|
pos = das.group(1)
|
||||||
day = das.group(3)
|
day = das.group(3)
|
||||||
week = das.group(4)
|
week = das.group(4)
|
||||||
pmam = das.group(5)
|
pmam = das.group(5)
|
||||||
time = das.group(6)
|
time = das.group(6)
|
||||||
|
|
||||||
# parse #1 line
|
# parse #1 line
|
||||||
aus = au.search(authinfo)
|
aus = au.search(authinfo)
|
||||||
@@ -164,30 +174,60 @@ def parse_section(sec,idx):
|
|||||||
|
|
||||||
return section
|
return section
|
||||||
|
|
||||||
# format output
|
|
||||||
# input: books - dict
|
|
||||||
# f - 'MD'
|
|
||||||
# 'TXT'
|
|
||||||
# 'JSON'
|
|
||||||
# output:
|
|
||||||
#
|
|
||||||
def formmat_out(books,f='MD'):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# search clip, searching scope may be title/author/content
|
def format_out(bks, ft='MD'):
|
||||||
# input: books - dict
|
"""format output
|
||||||
# s - key word
|
|
||||||
# t - 'ALL'
|
Args:
|
||||||
# 'HL'
|
bks: books dict
|
||||||
# 'BM'
|
f: canbe 'MD'/'TXT'/'JSON'
|
||||||
# 'NT'
|
|
||||||
# p - 'ALL'
|
Returns:
|
||||||
# 'TITLE'
|
special format of 'bks' dict
|
||||||
# 'AUTHOR'
|
"""
|
||||||
# 'CONTENT'
|
|
||||||
# output:
|
op = OUTPATH+('.md' if ft == 'MD' else \
|
||||||
#
|
('.json' if ft == 'JSON' else '.txt'))
|
||||||
def search_clip(books, s, t='ALL', p='ALL'):
|
|
||||||
|
with open(op, 'w', encoding='gbk', errors='ignore') as fw:
|
||||||
|
fw.write(line)
|
||||||
|
|
||||||
|
if ft == 'JSON':
|
||||||
|
return json.dumps(bks, indent=4, sort_keys=True, ensure_ascii=False)
|
||||||
|
|
||||||
|
def t_dict2json(d):
|
||||||
|
"""convert dict to json
|
||||||
|
Args: d is the dict
|
||||||
|
Return: json string
|
||||||
|
"""
|
||||||
|
jstr = json.dumps(d)
|
||||||
|
return jstr
|
||||||
|
|
||||||
|
def t_json2dict(jf):
|
||||||
|
"""convert dict to json
|
||||||
|
Args: jf is the file saved json string
|
||||||
|
Return: dict
|
||||||
|
"""
|
||||||
|
d = {}
|
||||||
|
with open(jf, 'r', encoding='utf8', errors='ignore') as f:
|
||||||
|
d=json.load(f)
|
||||||
|
return d
|
||||||
|
|
||||||
|
def search_clip(bks, s, t='ALL', p='ALL'):
|
||||||
|
"""search clip, searching scope may be title/author/content
|
||||||
|
Args:
|
||||||
|
input: bks: books dict
|
||||||
|
s: key word
|
||||||
|
t: 'ALL'
|
||||||
|
'HL'
|
||||||
|
'BM'
|
||||||
|
'NT'
|
||||||
|
p: 'ALL'
|
||||||
|
'TITLE'
|
||||||
|
'AUTHOR'
|
||||||
|
'CONTENT'
|
||||||
|
Return: search clipping content
|
||||||
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
@@ -199,7 +239,6 @@ if __name__ == '__main__':
|
|||||||
sidx = 0
|
sidx = 0
|
||||||
idx = 0
|
idx = 0
|
||||||
sec = []
|
sec = []
|
||||||
STAT = 'START'
|
|
||||||
for line in f.readlines():
|
for line in f.readlines():
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if re.match(r'^\s*$',line): continue
|
if re.match(r'^\s*$',line): continue
|
||||||
@@ -207,7 +246,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
if not re.search(LASTLINE,line):
|
if not re.search(LASTLINE,line):
|
||||||
# content more than 1 line
|
# content more than 1 line
|
||||||
if idx>3:
|
idx>3:
|
||||||
sec[2] += str(' '+line)
|
sec[2] += str(' '+line)
|
||||||
logger.debug('idx {} {}'.format(idx, sec[2]))
|
logger.debug('idx {} {}'.format(idx, sec[2]))
|
||||||
else:
|
else:
|
||||||
@@ -227,7 +266,7 @@ if __name__ == '__main__':
|
|||||||
books[bn]['author'] = secd[bn]['author']
|
books[bn]['author'] = secd[bn]['author']
|
||||||
books[bn][str(sidx)] = secd[bn][str(sidx)]
|
books[bn][str(sidx)] = secd[bn][str(sidx)]
|
||||||
|
|
||||||
if tpy=='NT' and books[bn][str(sidx-1)]['type']=='HL':
|
if tpy == 'NT' and books[bn][str(sidx-1)]['type'] == 'HL':
|
||||||
books[bn][str(sidx-1)]['content'] += str(NTPREF+sec[2])
|
books[bn][str(sidx-1)]['content'] += str(NTPREF+sec[2])
|
||||||
else: # BM or not correct format section
|
else: # BM or not correct format section
|
||||||
sidx -= 1
|
sidx -= 1
|
||||||
@@ -235,6 +274,11 @@ if __name__ == '__main__':
|
|||||||
# initial section for next section loop
|
# initial section for next section loop
|
||||||
sec = []
|
sec = []
|
||||||
|
|
||||||
# print data with json format
|
# test dict json convert
|
||||||
logger.debug(json.dumps(books, indent=4, sort_keys=True, ensure_ascii=False))
|
with open('./xx', 'w', encoding='gbk', errors='ignore') as fw:
|
||||||
|
fw.write(t_dict2json(books))
|
||||||
|
if t_json2dict('./xx') == books: print( 'test OK')
|
||||||
|
|
||||||
|
# print data with json format
|
||||||
|
logger.debug(json.dumps(books, indent=4, sort_keys=True, ensure_ascii=False))
|
||||||
|
|
||||||
|
|||||||
46
tfile.py
46
tfile.py
@@ -1,46 +0,0 @@
|
|||||||
|
|
||||||
import re
|
|
||||||
|
|
||||||
#author & bookname info
|
|
||||||
#庆余年(精校版) (猫腻)
|
|
||||||
au = re.compile(
|
|
||||||
r'''
|
|
||||||
^\ufeff
|
|
||||||
(.+) \( #bookname
|
|
||||||
(.+)\) #author
|
|
||||||
''', flags=re.X )
|
|
||||||
|
|
||||||
# page & date info
|
|
||||||
# 您在位置 #4286 的笔记 | 添加于 2020年1月30日星期四 上午10:26:31^M
|
|
||||||
# re.X(VERBOSE): 详细模式。这个模式下正则表达式可以是多行,忽略空白字符,并可以加入注释
|
|
||||||
#\(\d\+-\{0,1}\d\+\).\+\(\d\{4}年\d\{1,2}月\d\{1,2}日\)\(星期.\) \(..\)\(\d\{1,2}:\d\{1,2}:\d\{1,2}\)
|
|
||||||
da = re.compile(
|
|
||||||
r'''
|
|
||||||
\#
|
|
||||||
(\d+-{0,1}\d+) #group1 - page
|
|
||||||
.+
|
|
||||||
(\d{4}年\d{1,2}月\d{1,2}日) #group2 - xxxx年xx月xx日
|
|
||||||
(星期.) #group3 - week
|
|
||||||
\s
|
|
||||||
(..) #group4 - pm/am
|
|
||||||
(\d{1,2}:\d{1,2}:\d{1,2}) #group5 - time
|
|
||||||
''', flags=re.X )
|
|
||||||
|
|
||||||
with open('./My Clippings.t.txt', 'r', encoding='utf8', errors='ignore') as f:
|
|
||||||
for line in f.readlines():
|
|
||||||
f = False
|
|
||||||
if not f:
|
|
||||||
aus = au.search(line)
|
|
||||||
if aus:
|
|
||||||
print("book:",aus.group(1),"auth:",aus.group(2))
|
|
||||||
f = True
|
|
||||||
|
|
||||||
if not f:
|
|
||||||
das = da.search(line)
|
|
||||||
if das:
|
|
||||||
print(das.group(1),
|
|
||||||
das.group(2),
|
|
||||||
das.group(3),
|
|
||||||
'PM' if das.group(4)=="下午" else 'AM')
|
|
||||||
f = True
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user