kindle manager

2020-05-28 20:17:50 +08:00
parent 29e8b337b4
commit 54e97b57e7
3 changed files with 104 additions and 105 deletions
--- a/kman.py
+++ b/kman.py
@@ -1,15 +1,16 @@

-#############################################
-##   PROGRAM: file2.py
-##   AUTHOR:  Chengan
-##   CREATE:  20200526
-##   douboer@gmail.com
-#############################################
+#########################################################
+## @file   : kman.py
+## @desc   : kindle note managerment tool
+## @create : 20200526
+## @author : Chengan
+## @email  : douboer@gmail.com
+#########################################################

-import platform
 import re
 import json
 import logging
+import platform
 from collections import defaultdict

 # data structure - use dict
@@ -59,7 +60,7 @@ SYS = 'WIN' if platform.system()=='Windows' else \
 LASTLINE = '=========='
 NTPREF   = '--CG注:'
 CLIPPATH = './My Clippings.txt' # /Volumes/Kindle/documents/My\ Clippings.txt
-STAT     = 'NONE'
+OUTPATH  = './clip'
 DEBUG    = 1   # 0 - INFO; 1 - DEBUG
 LOG2FILE = 1   # 0 - to stdio; 1 - to file

@@ -103,20 +104,29 @@ r'''
 (\d{1,2}:\d{1,2}:\d{1,2})      #group6 - time
 ''', flags=re.X )

-# input: section dict & and section index
-# return: dict
-#   d = { 'bookname':bookname,
-#          bookname: {
-#             'author':author
-#             'section0':{
-#                 'type':'HL',
-#                 'position':'123',
-#                 'day':'2020年5月26日',
-#                 'week':'星期二',
-#                 'meridiem':'PM',
-#                 'time':'10:26:31'
-#                 'content':content }}}
 def parse_section(sec,idx):
+    """parse section
+    
+    Args:
+        sec: section dict
+        idx: section index
+
+    Returns:
+        dict like this:
+        d = { 'bookname':bookname,
+               bookname: {
+                  'author':author
+                  'section0':{
+                      'type':'HL',
+                      'position':'123',
+                      'day':'2020年5月26日',
+                      'week':'星期二',
+                      'meridiem':'PM',
+                      'time':'10:26:31'
+                      'content':content }}}
+    """
+
+
    # 1. highlight over the picture, the content(#3 line) is empty, only two lines
    # 2. bookmark section only two lines
    # 3. other not correct format < 2
@@ -164,30 +174,60 @@ def parse_section(sec,idx):

    return section

-# format output
-# input: books - dict
-#        f - 'MD'
-#            'TXT'
-#            'JSON'
-# output: 
-#
-def formmat_out(books,f='MD'):
-    pass

-# search clip, searching scope may be title/author/content
-# input: books - dict
-#        s - key word
-#        t - 'ALL'
-#            'HL'
-#            'BM'
-#            'NT'
-#        p - 'ALL'
-#            'TITLE'
-#            'AUTHOR'
-#            'CONTENT'
-# output: 
-#
-def search_clip(books, s, t='ALL', p='ALL'):
+def format_out(bks, ft='MD'):
+    """format output
+    
+    Args:
+        bks: books dict
+        f:  canbe 'MD'/'TXT'/'JSON'
+
+    Returns:
+        special format of 'bks' dict
+    """
+
+    op = OUTPATH+('.md' if ft == 'MD'   else \
+               ('.json' if ft == 'JSON' else '.txt'))
+
+    with open(op, 'w', encoding='gbk', errors='ignore') as fw:
+        fw.write(line)
+
+    if ft == 'JSON':
+        return json.dumps(bks, indent=4, sort_keys=True, ensure_ascii=False)
+
+def t_dict2json(d):
+    """convert dict to json 
+    Args: d is the dict
+    Return: json string
+    """
+    jstr = json.dumps(d)
+    return jstr
+
+def t_json2dict(jf):
+    """convert dict to json 
+    Args: jf is the file saved json string
+    Return: dict
+    """
+    d = {}
+    with open(jf, 'r', encoding='utf8', errors='ignore') as f:
+        d=json.load(f)
+    return d
+
+def search_clip(bks, s, t='ALL', p='ALL'):
+    """search clip, searching scope may be title/author/content
+    Args:
+        input: bks: books dict
+               s: key word
+               t: 'ALL'
+                  'HL'
+                  'BM'
+                  'NT'
+               p: 'ALL'
+                  'TITLE'
+                  'AUTHOR'
+                  'CONTENT'
+    Return: search clipping content
+    """
    pass

 if __name__ == '__main__':
@@ -199,7 +239,6 @@ if __name__ == '__main__':
        sidx    = 0
        idx     = 0
        sec     = []
-        STAT    = 'START'
        for line in f.readlines():
            line = line.strip()
            if re.match(r'^\s*$',line): continue
@@ -207,7 +246,7 @@ if __name__ == '__main__':

            if not re.search(LASTLINE,line):
                # content more than 1 line
-                if idx>3:
+                 idx>3:
                    sec[2] += str(' '+line)
                    logger.debug('idx {} {}'.format(idx, sec[2]))
                else:
@@ -235,6 +274,11 @@ if __name__ == '__main__':
                # initial section for next section loop
                sec = []

+    # test dict json convert
+    with open('./xx', 'w', encoding='gbk', errors='ignore') as fw:
+        fw.write(t_dict2json(books))
+    if t_json2dict('./xx') == books: print( 'test OK')
+
 # print data with json format
 logger.debug(json.dumps(books, indent=4, sort_keys=True, ensure_ascii=False))

--- a/tfile.py
+++ b/tfile.py
@@ -1,46 +0,0 @@
-
-import re
-
-#author & bookname info
-#庆余年(精校版） (猫腻)
-au = re.compile(
-r'''
-^\ufeff
-(.+) \(                       #bookname
-(.+)\)                        #author
-''', flags=re.X )
-
-# page & date info
-# 您在位置 #4286 的笔记 | 添加于 2020年1月30日星期四 上午10:26:31^M
-# re.X(VERBOSE): 详细模式。这个模式下正则表达式可以是多行，忽略空白字符，并可以加入注释
-#\(\d\+-\{0,1}\d\+\).\+\(\d\{4}年\d\{1,2}月\d\{1,2}日\)\(星期.\) \(..\)\(\d\{1,2}:\d\{1,2}:\d\{1,2}\)
-da = re.compile(
-r'''
-\#
-(\d+-{0,1}\d+)                 #group1 - page
-.+
-(\d{4}年\d{1,2}月\d{1,2}日)    #group2 - xxxx年xx月xx日
-(星期.)                        #group3 - week
-\s
-(..)                           #group4 - pm/am
-(\d{1,2}:\d{1,2}:\d{1,2})      #group5 - time
-''', flags=re.X )
-
-with open('./My Clippings.t.txt', 'r', encoding='utf8', errors='ignore') as f:
-    for line in f.readlines():
-        f = False
-        if not f:
-            aus = au.search(line)
-            if aus:
-                print("book:",aus.group(1),"auth:",aus.group(2))
-                f = True
-
-        if not f:
-            das = da.search(line)
-            if das:
-                print(das.group(1),
-                      das.group(2),
-                      das.group(3),
-                      'PM' if das.group(4)=="下午" else 'AM')
-                f = True
-
--- a/1
+++ b/1