kindle manager

2020-05-28 20:17:50 +08:00
parent 29e8b337b4
commit 54e97b57e7
3 changed files with 104 additions and 105 deletions
--- a/kman.py
+++ b/kman.py
@@ -1,15 +1,16 @@
-#############################################
+#########################################################
-##   PROGRAM: file2.py
+## @file   : kman.py
-##   AUTHOR:  Chengan
+## @desc   : kindle note managerment tool
-##   CREATE:  20200526
+## @create : 20200526
-##   douboer@gmail.com
+## @author : Chengan
-#############################################
+## @email  : douboer@gmail.com
 #########################################################
 import platform
 import re
 import json
 import logging
 import platform
 from collections import defaultdict
 # data structure - use dict
@@ -52,14 +53,14 @@ books =
 '''
 # modi clippath for different os
-SYS = 'WIN' if platform.system()=='Windows' else \
+SYS = 'WIN' if platform.system() == 'Windows' else \
-   ('LINUX' if platform.system()=='LINUX' else 'MAC')
+   ('LINUX' if platform.system() == 'LINUX' else 'MAC')
 # some constants
 LASTLINE = '=========='
 NTPREF   = '--CG注:'
 CLIPPATH = './My Clippings.txt' # /Volumes/Kindle/documents/My\ Clippings.txt
-STAT     = 'NONE'
+OUTPATH  = './clip'
 DEBUG    = 1   # 0 - INFO; 1 - DEBUG
 LOG2FILE = 1   # 0 - to stdio; 1 - to file
@@ -103,20 +104,29 @@ r'''
 (\d{1,2}:\d{1,2}:\d{1,2})      #group6 - time
 ''', flags=re.X )
 # input: section dict & and section index
 # return: dict
 #   d = { 'bookname':bookname,
 #          bookname: {
 #             'author':author
 #             'section0':{
 #                 'type':'HL',
 #                 'position':'123',
 #                 'day':'2020年5月26日',
 #                 'week':'星期二',
 #                 'meridiem':'PM',
 #                 'time':'10:26:31'
 #                 'content':content }}}
 def parse_section(sec,idx):
    """parse section
    Args:
        sec: section dict
        idx: section index
    Returns:
        dict like this:
        d = { 'bookname':bookname,
               bookname: {
                  'author':author
                  'section0':{
                      'type':'HL',
                      'position':'123',
                      'day':'2020年5月26日',
                      'week':'星期二',
                      'meridiem':'PM',
                      'time':'10:26:31'
                      'content':content }}}
    """
    # 1. highlight over the picture, the content(#3 line) is empty, only two lines
    # 2. bookmark section only two lines
    # 3. other not correct format < 2
@@ -127,7 +137,7 @@ def parse_section(sec,idx):
    section  = defaultdict(dict)
    authinfo = sec[0]
    dateinfo = sec[1]
-    content  = sec[2] if len(sec)==3 else None
+    content  = sec[2] if len(sec) == 3 else None
    das = da.search(dateinfo)
    # type of section
@@ -138,13 +148,13 @@ def parse_section(sec,idx):
        HL     - section is a highlight
        NT     - section is a note
    '''
-    tpy   = ('HL' if das.group(2)=='标注' else \
+    tpy  = ('HL' if das.group(2) == '标注' else \
-            ('NT' if das.group(2)=='笔记' else 'BM'))
+           ('NT' if das.group(2) == '笔记' else 'BM'))
-    pos   = das.group(1)
+    pos  = das.group(1)
-    day   = das.group(3)
+    day  = das.group(3)
-    week  = das.group(4)
+    week = das.group(4)
-    pmam  = das.group(5)
+    pmam = das.group(5)
-    time  = das.group(6)
+    time = das.group(6)
    # parse #1 line
    aus = au.search(authinfo)
@@ -164,30 +174,60 @@ def parse_section(sec,idx):
    return section
 # format output
 # input: books - dict
 #        f - 'MD'
 #            'TXT'
 #            'JSON'
 # output: 
 #
 def formmat_out(books,f='MD'):
    pass
-# search clip, searching scope may be title/author/content
+def format_out(bks, ft='MD'):
-# input: books - dict
+    """format output
-#        s - key word
+    
-#        t - 'ALL'
+    Args:
-#            'HL'
+        bks: books dict
-#            'BM'
+        f:  canbe 'MD'/'TXT'/'JSON'
-#            'NT'
+
-#        p - 'ALL'
+    Returns:
-#            'TITLE'
+        special format of 'bks' dict
-#            'AUTHOR'
+    """
-#            'CONTENT'
+
-# output: 
+    op = OUTPATH+('.md' if ft == 'MD'   else \
-#
+               ('.json' if ft == 'JSON' else '.txt'))
-def search_clip(books, s, t='ALL', p='ALL'):
+
    with open(op, 'w', encoding='gbk', errors='ignore') as fw:
        fw.write(line)
    if ft == 'JSON':
        return json.dumps(bks, indent=4, sort_keys=True, ensure_ascii=False)
 def t_dict2json(d):
    """convert dict to json 
    Args: d is the dict
    Return: json string
    """
    jstr = json.dumps(d)
    return jstr
 def t_json2dict(jf):
    """convert dict to json 
    Args: jf is the file saved json string
    Return: dict
    """
    d = {}
    with open(jf, 'r', encoding='utf8', errors='ignore') as f:
        d=json.load(f)
    return d
 def search_clip(bks, s, t='ALL', p='ALL'):
    """search clip, searching scope may be title/author/content
    Args:
        input: bks: books dict
               s: key word
               t: 'ALL'
                  'HL'
                  'BM'
                  'NT'
               p: 'ALL'
                  'TITLE'
                  'AUTHOR'
                  'CONTENT'
    Return: search clipping content
    """
    pass
 if __name__ == '__main__':
@@ -199,7 +239,6 @@ if __name__ == '__main__':
        sidx    = 0
        idx     = 0
        sec     = []
        STAT    = 'START'
        for line in f.readlines():
            line = line.strip()
            if re.match(r'^\s*$',line): continue
@@ -207,7 +246,7 @@ if __name__ == '__main__':
            if not re.search(LASTLINE,line):
                # content more than 1 line
-                if idx>3:
+                 idx>3:
                    sec[2] += str(' '+line)
                    logger.debug('idx {} {}'.format(idx, sec[2]))
                else:
@@ -227,7 +266,7 @@ if __name__ == '__main__':
                    books[bn]['author'] = secd[bn]['author']
                    books[bn][str(sidx)] = secd[bn][str(sidx)]
-                    if tpy=='NT' and books[bn][str(sidx-1)]['type']=='HL':
+                    if tpy == 'NT' and books[bn][str(sidx-1)]['type'] == 'HL':
                        books[bn][str(sidx-1)]['content'] += str(NTPREF+sec[2])
                else: # BM or not correct format section
                    sidx -= 1
@@ -235,6 +274,11 @@ if __name__ == '__main__':
                # initial section for next section loop
                sec = []
-    # print data with json format
+    # test dict json convert
-    logger.debug(json.dumps(books, indent=4, sort_keys=True, ensure_ascii=False))
+    with open('./xx', 'w', encoding='gbk', errors='ignore') as fw:
        fw.write(t_dict2json(books))
    if t_json2dict('./xx') == books: print( 'test OK')
 # print data with json format
 logger.debug(json.dumps(books, indent=4, sort_keys=True, ensure_ascii=False))
--- a/tfile.py
+++ b/tfile.py
@@ -1,46 +0,0 @@
 import re
 #author & bookname info
 #庆余年(精校版） (猫腻)
 au = re.compile(
 r'''
 ^\ufeff
 (.+) \(                       #bookname
 (.+)\)                        #author
 ''', flags=re.X )
 # page & date info
 # 您在位置 #4286 的笔记 | 添加于 2020年1月30日星期四 上午10:26:31^M
 # re.X(VERBOSE): 详细模式。这个模式下正则表达式可以是多行，忽略空白字符，并可以加入注释
 #\(\d\+-\{0,1}\d\+\).\+\(\d\{4}年\d\{1,2}月\d\{1,2}日\)\(星期.\) \(..\)\(\d\{1,2}:\d\{1,2}:\d\{1,2}\)
 da = re.compile(
 r'''
 \#
 (\d+-{0,1}\d+)                 #group1 - page
 .+
 (\d{4}年\d{1,2}月\d{1,2}日)    #group2 - xxxx年xx月xx日
 (星期.)                        #group3 - week
 \s
 (..)                           #group4 - pm/am
 (\d{1,2}:\d{1,2}:\d{1,2})      #group5 - time
 ''', flags=re.X )
 with open('./My Clippings.t.txt', 'r', encoding='utf8', errors='ignore') as f:
    for line in f.readlines():
        f = False
        if not f:
            aus = au.search(line)
            if aus:
                print("book:",aus.group(1),"auth:",aus.group(2))
                f = True
        if not f:
            das = da.search(line)
            if das:
                print(das.group(1),
                      das.group(2),
                      das.group(3),
                      'PM' if das.group(4)=="下午" else 'AM')
                f = True
--- a/1
+++ b/1