kindle manager

2020-05-28 13:24:40 +08:00
parent 12a7d01ad4
commit 855d436db1
1 changed files with 0 additions and 174 deletions
--- a/file2.py
+++ b/file2.py
@@ -1,174 +0,0 @@
-
-#############################################
-##   PROGRAM: file2.py
-##   AUTHOR:  Chengan 20200526
-##            douboer@gmail.com
-#############################################
-
-import re
-import json
-from collections import defaultdict
-
-# some constants
-BOUNDARY = '==========\n'
-# MACOS - /Volumes/Kindle/documents/My\ Clippings.txt
-CLIPFILE = './My Clippings.txt'
-STAT     = 'NONE'
-
-'''
-STAT :
-    NONE   - match nothing
-    START  - start line of section
-    BM     - section is a bookmark
-    HL     - section is a highlight
-    NT     - section is a note
-'''
-# data structure
-'''
-book = {'bookname1':
-           { 'author':'chen',
-             'HL':
-                 { 'index1':
-                     {
-                         'position':'123-145',
-                         'content':'xxxx',
-                         'day':'2020年5月26日',
-                         'week':'星期二',
-                         'meridiem':'PM',
-                         'time':'10:26:31'
-                     },
-                     'index2':
-                     {
-                     ...
-                     },
-                     ...
-                 },
-#            'BM':   xxx skip bookmark because the content is empty
-#                { 'index1':
-#                    {
-#                        'position':'123',
-#                        'day':'2020年5月26日',
-#                        'week':'星期二',
-#                        'meridiem':'PM',
-#                        'time':'10:26:31'
-#                    },
-#                    'index2':
-#                    {
-#                    ...
-#                    },
-#                    ...
-#                }
-             'NT':
-                 { 'index1':
-                     {
-                         'position':'123',
-                         'content':'xxxx',
-                         'day':'2020年5月26日',
-                         'week':'星期二',
-                         'meridiem':'PM',
-                         'time':'10:26:31'
-                     },
-                     'index2':
-                     {
-                     ...
-                     },
-                     ...
-                 }
-            },
-         'bookname2':
-           ...
-       }
-'''
-
-#author & bookname info
-au = re.compile(
-r'''
-^\ufeff+
-(.+) \(         #bookname
-(.+)\)             #author
-''', flags=re.X )
-
-# page & date info
-# 您在位置 #4286 的笔记 | 添加于 2020年1月30日星期四 上午10:26:31^M
-# re.X(VERBOSE): 详细模式。这个模式下正则表达式可以是多行，忽略空白字符，并可以加入注释
-#\(\d\+-\{0,1}\d\+\).\+\(\d\{4}年\d\{1,2}月\d\{1,2}日\)\(星期.\) \(..\)\(\d\{1,2}:\d\{1,2}:\d\{1,2}\)
-da = re.compile(
-r'''
-\#
-(\d+-{0,1}\d+)                 #group1 - page
-.+
-(笔记|标注|书签)               #group2 - type
-.+
-(\d{4}年\d{1,2}月\d{1,2}日)    #group3 - xxxx年xx月xx日
-(星期.)                        #group4 - week
-\s
-(..)                           #group5 - pm/am
-(\d{1,2}:\d{1,2}:\d{1,2})      #group6 - time
-''', flags=re.X )
-
-with open(CLIPFILE, 'r', encoding='utf8', errors='ignore') as f:
-    books = defaultdict(dict)
-    bookname    = ''
-    author      = ''
-    idx         = 0
-    num_section = 0
-    num_line    = 0
-    for line in f.readlines():
-        idx += 1
-        line = line.strip()
-        if line.isspace(): continue
-
-        # judge whether or not RE matched
-        # if matched will not do an other RE search
-        # in order to low down the computation
-        flg = False
-        if not flg:
-            aus = au.search(line)
-            if aus:
-                STAT = 'START'
-                bookname = aus.group(1)
-                author   = aus.group(2)
-                books[bookname]['author']= author
-                print("book:",aus.group(1),"auth:",aus.group(2))
-                flg = True
-
-        if not flg:
-            das = da.search(line)
-            if das:
-                STAT = ('HL' if das.group(2)=='标注' else ('NT' if das.group(2)=='笔记' else 'BM'))
-
-                # skip bookmark
-                if STAT=='BM': continue
-
-                pos   = das.group(1)
-                day   = das.group(3)
-                week  = das.group(4)
-                pmam  = das.group(5)
-                time  = das.group(6)
-                #books[bookname][STAT] = {'idx']:idx}
-                books[bookname][STAT] = {idx:{'position':pos}}
-                books[bookname][STAT] = {idx:{'day':day}}
-                books[bookname][STAT] = {idx:{'week':week}}
-                books[bookname][STAT] = {idx:{'meridiem':pmam}}
-                books[bookname][STAT] = {idx:{'time':time}}
-
-                print(pos,STAT,day,week,'PM' if das.group(5)=="下午" else 'AM')
-
-                flg = True
-
-        if not flg:
-            # record the hightlight dict
-            # so if the next section is note,
-            # we can modify the highlight content
-            if STAT=='START':
-                pass
-            elif STAT=='HL':
-                bk_idx     = idx
-                bk_content = line
-                books[bookname][STAT] = {idx:{'content':line}}
-            # if the section is note,
-            # append the note to the previous highlight content
-            elif STAT=='NT':
-                books[bookname]['HL'] = {bk_idx:{'content':bk_content+'(CG注:'+line}}
-
-#print(json.dumps(books,indent=4,sort_keys=True,ensure_ascii=False))