47 lines
1.4 KiB
Python
47 lines
1.4 KiB
Python
|
|
import re
|
|
|
|
#author & bookname info
|
|
#庆余年(精校版) (猫腻)
|
|
au = re.compile(
|
|
r'''
|
|
^\ufeff
|
|
(.+) \( #bookname
|
|
(.+)\) #author
|
|
''', flags=re.X )
|
|
|
|
# page & date info
|
|
# 您在位置 #4286 的笔记 | 添加于 2020年1月30日星期四 上午10:26:31^M
|
|
# re.X(VERBOSE): 详细模式。这个模式下正则表达式可以是多行,忽略空白字符,并可以加入注释
|
|
#\(\d\+-\{0,1}\d\+\).\+\(\d\{4}年\d\{1,2}月\d\{1,2}日\)\(星期.\) \(..\)\(\d\{1,2}:\d\{1,2}:\d\{1,2}\)
|
|
da = re.compile(
|
|
r'''
|
|
\#
|
|
(\d+-{0,1}\d+) #group1 - page
|
|
.+
|
|
(\d{4}年\d{1,2}月\d{1,2}日) #group2 - xxxx年xx月xx日
|
|
(星期.) #group3 - week
|
|
\s
|
|
(..) #group4 - pm/am
|
|
(\d{1,2}:\d{1,2}:\d{1,2}) #group5 - time
|
|
''', flags=re.X )
|
|
|
|
with open('./My Clippings.t.txt', 'r', encoding='utf8', errors='ignore') as f:
|
|
for line in f.readlines():
|
|
f = False
|
|
if not f:
|
|
aus = au.search(line)
|
|
if aus:
|
|
print("book:",aus.group(1),"auth:",aus.group(2))
|
|
f = True
|
|
|
|
if not f:
|
|
das = da.search(line)
|
|
if das:
|
|
print(das.group(1),
|
|
das.group(2),
|
|
das.group(3),
|
|
'PM' if das.group(4)=="下午" else 'AM')
|
|
f = True
|
|
|