From 2e29cfdb282490134c3b6edcac4a270be0b51d64 Mon Sep 17 00:00:00 2001
From: gavin <gavin@gavin-2.local>
Date: Thu, 28 May 2020 21:10:46 +0800
Subject: [PATCH] kindle manager

---
 .gitignore   |  2 ++
 changelog.md | 20 ++++++++-----
 kman.py      | 81 ++++++++++++++++++++++++++++++----------------------
 3 files changed, 62 insertions(+), 41 deletions(-)

diff --git a/.gitignore b/.gitignore
index d1d43bb..c79bbda 100755
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,5 @@ log*
 *xxx*
 cr
 rungit*
+clip*
+
diff --git a/changelog.md b/changelog.md
index b3e71a5..1c91fa8 100644
--- a/changelog.md
+++ b/changelog.md
@@ -7,7 +7,11 @@
 - abstract note/bookmark/highlight from kindle clipping
 - formatter output to special file
 
-### learn lesson
+## 1.0.1 (20200528)
+### feature
+- add search clip
+
+## learn lesson
 - assign value to a not exist key, will throw KeyError, too inflexible!!! perl much better
 - use defaultdict to solve obove problem, note, defaultdict only create two layer key auto
 ```
@@ -21,10 +25,14 @@ b['1']['2'] = {'3':1}  # OK
 ```
 - logging, file io, very strict indent!
 - re.X(VERBOSE), different regular express between vim & python
-
-## 1.0.1 (20200528)
-### feature
-- add search clip
+- no c like i=x<0?m:n => python - i=m if x<0 else n
+- json dict convert
+- pylint check code, but most of suggest is unacceptable for me :smile:
+- no c/c++/java/perl switch-case, not use two much if-elif-elif, use this to simplify:
+```
+  suff = {'MD':'.md','CSV':'.csv','JSON':'.json'}
+  198     op = OUTPREF+suff[ft]
+```
 
 # feature plan
 ## 20200528
@@ -32,5 +40,3 @@ b['1']['2'] = {'3':1}  # OK
 - first abstract from kindle hard / local directory for different OS
 - add GUI use QT
 
-
-
diff --git a/kman.py b/kman.py
index 3e3173f..6a18151 100644
--- a/kman.py
+++ b/kman.py
@@ -53,14 +53,14 @@ books =
 '''
 
 # modi clippath for different os
-SYS = 'WIN' if platform.system() == 'Windows' else \
-   ('LINUX' if platform.system() == 'LINUX' else 'MAC')
+SYS = 'WIN' if platform.system()=='Windows' else \
+   ('LINUX' if platform.system()=='LINUX' else 'MAC')
 
 # some constants
 LASTLINE = '=========='
 NTPREF   = '--CG注:'
 CLIPPATH = './My Clippings.txt' # /Volumes/Kindle/documents/My\ Clippings.txt
-OUTPATH  = './clip'
+OUTPREF  = './clip'
 DEBUG    = 1   # 0 - INFO; 1 - DEBUG
 LOG2FILE = 1   # 0 - to stdio; 1 - to file
 
@@ -104,12 +104,12 @@ r'''
 (\d{1,2}:\d{1,2}:\d{1,2})      #group6 - time
 ''', flags=re.X )
 
-def parse_section(sec,idx):
+def parse_section(s,i):
     """parse section
     
     Args:
-        sec: section dict
-        idx: section index
+        s: section dict
+        i: section index
 
     Returns:
         dict like this:
@@ -130,14 +130,14 @@ def parse_section(sec,idx):
     # 1. highlight over the picture, the content(#3 line) is empty, only two lines
     # 2. bookmark section only two lines
     # 3. other not correct format < 2
-    if len(sec)<=2:
+    if len(s)<=2:
         return False
 
     # parse #2 line
     section  = defaultdict(dict)
     authinfo = sec[0]
     dateinfo = sec[1]
-    content  = sec[2] if len(sec) == 3 else None
+    content  = sec[2] if len(sec)==3 else None
 
     das = da.search(dateinfo)
     # type of section
@@ -148,8 +148,8 @@ def parse_section(sec,idx):
         HL     - section is a highlight
         NT     - section is a note
     '''
-    tpy  = ('HL' if das.group(2) == '标注' else \
-           ('NT' if das.group(2) == '笔记' else 'BM'))
+    tpy  = ('HL' if das.group(2)=='标注' else \
+           ('NT' if das.group(2)=='笔记' else 'BM'))
     pos  = das.group(1)
     day  = das.group(3)
     week = das.group(4)
@@ -163,7 +163,7 @@ def parse_section(sec,idx):
     section[bookname]['author'] = author
 
     section['bookname'] = bookname
-    section[bookname][str(idx)] = {
+    section[bookname][str(i)] = {
             'type':tpy,
             'position':pos,
             'day':day,
@@ -174,28 +174,41 @@ def parse_section(sec,idx):
 
     return section
 
-
 def format_out(bks, ft='MD'):
-    """format output
+    """format output and write to file
+    MARKDOWN format:
+    TYPE | bookname | author | marktime | content
+    --|--|--|--|--
+    xx|xx|xx|xx|xx
+
+    CSV format:
+    TYPE,bookname,author,marktime,content
+    xx,xx,xx,xx,xx
+
+    marktime: 20200403 PM 3:0:3 星期五
     
     Args:
         bks: books dict
-        f:  canbe 'MD'/'TXT'/'JSON'
+        f: can be 'MD'/'JSON'/'CSV'
 
-    Returns:
-        special format of 'bks' dict
+    Returns: special format of 'bks' dict
     """
 
-    op = OUTPATH+('.md' if ft == 'MD'   else \
-               ('.json' if ft == 'JSON' else '.txt'))
+    suff = {'MD':'.md','CSV':'.csv','JSON':'.json'}
+    op = OUTPREF+suff[ft]
 
     with open(op, 'w', encoding='gbk', errors='ignore') as fw:
-        fw.write(line)
+        if ft=='JSON':
+            ft.write(json.dumps(bks, indent=4, sort_keys=True, ensure_ascii=False))
+        elif ft=='MD':
+            pass
+        else:
+            ft.write(json.dumps(bks)) # only for load back
 
-    if ft == 'JSON':
-        return json.dumps(bks, indent=4, sort_keys=True, ensure_ascii=False)
+def statistic(bks):
+    pass
 
-def t_dict2json(d):
+def dict2json(d):
     """convert dict to json 
     Args: d is the dict
     Return: json string
@@ -203,7 +216,7 @@ def t_dict2json(d):
     jstr = json.dumps(d)
     return jstr
 
-def t_json2dict(jf):
+def json2dict(jf):
     """convert dict to json 
     Args: jf is the file saved json string
     Return: dict
@@ -230,15 +243,15 @@ def search_clip(bks, s, t='ALL', p='ALL'):
     """
     pass
 
-if __name__ == '__main__':
+if __name__=='__main__':
     # 4 lines for each section seperated with '======='
     # so read 4 lines before '======='
     with open(CLIPPATH, 'r', encoding='utf8', errors='ignore') as f:
-        books   = defaultdict(dict)
-        secd    = defaultdict(dict)
-        sidx    = 0
-        idx     = 0
-        sec     = []
+        books = defaultdict(dict)
+        secd  = defaultdict(dict)
+        sidx  = 0
+        idx   = 0
+        sec   = []
         for line in f.readlines():
             line = line.strip()
             if re.match(r'^\s*$',line): continue
@@ -246,7 +259,7 @@ if __name__ == '__main__':
 
             if not re.search(LASTLINE,line):
                 # content more than 1 line
-                 idx>3:
+                if idx>3:
                     sec[2] += str(' '+line)
                     logger.debug('idx {} {}'.format(idx, sec[2]))
                 else:
@@ -266,7 +279,7 @@ if __name__ == '__main__':
                     books[bn]['author'] = secd[bn]['author']
                     books[bn][str(sidx)] = secd[bn][str(sidx)]
 
-                    if tpy == 'NT' and books[bn][str(sidx-1)]['type'] == 'HL':
+                    if tpy=='NT' and books[bn][str(sidx-1)]['type']=='HL':
                         books[bn][str(sidx-1)]['content'] += str(NTPREF+sec[2])
                 else: # BM or not correct format section
                     sidx -= 1
@@ -275,9 +288,9 @@ if __name__ == '__main__':
                 sec = []
 
     # test dict json convert
-    with open('./xx', 'w', encoding='gbk', errors='ignore') as fw:
-        fw.write(t_dict2json(books))
-    if t_json2dict('./xx') == books: print( 'test OK')
+    with open('./xx', 'w', encoding='utf8', errors='ignore') as fw:
+        fw.write(dict2json(books))
+    if json2dict('./xx')==books: print( 'test OK')
 
 # print data with json format
 logger.debug(json.dumps(books, indent=4, sort_keys=True, ensure_ascii=False))