import re #author & bookname info #庆余年(精校版) (猫腻) au = re.compile( r''' ^\ufeff (.+) \( #bookname (.+)\) #author ''', flags=re.X ) # page & date info # 您在位置 #4286 的笔记 | 添加于 2020年1月30日星期四 上午10:26:31^M # re.X(VERBOSE): 详细模式。这个模式下正则表达式可以是多行,忽略空白字符,并可以加入注释 #\(\d\+-\{0,1}\d\+\).\+\(\d\{4}年\d\{1,2}月\d\{1,2}日\)\(星期.\) \(..\)\(\d\{1,2}:\d\{1,2}:\d\{1,2}\) da = re.compile( r''' \# (\d+-{0,1}\d+) #group1 - page .+ (\d{4}年\d{1,2}月\d{1,2}日) #group2 - xxxx年xx月xx日 (星期.) #group3 - week \s (..) #group4 - pm/am (\d{1,2}:\d{1,2}:\d{1,2}) #group5 - time ''', flags=re.X ) with open('./My Clippings.t.txt', 'r', encoding='utf8', errors='ignore') as f: for line in f.readlines(): f = False if not f: aus = au.search(line) if aus: print("book:",aus.group(1),"auth:",aus.group(2)) f = True if not f: das = da.search(line) if das: print(das.group(1), das.group(2), das.group(3), 'PM' if das.group(4)=="下午" else 'AM') f = True