Files
iBook/annotationdata.py
2025-08-06 13:11:08 +08:00

108 lines
4.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import sqlite3
from collections import defaultdict
import re
import os
def parse_location(location):
"""
解析ZANNOTATIONLOCATION返回(idref, filepos)
- epubcfi(...)格式优先提取[]内内容为idref
- 其他格式兼容原逻辑
"""
idref = None
filepos = None
if not location:
return idref, filepos
# 统一处理,提取前两个[]内容
matches = re.findall(r'\[(.*?)\]', location) if location else []
idref = matches[0] if len(matches) > 0 else None
filepos = matches[1] if len(matches) > 1 else None
return idref, filepos
def get_annotations(db_path='./data/AEAnnotation.sqlite'):
# 检查WAL模式相关文件
base = db_path.rsplit('.', 1)[0]
wal_path = base + '.sqlite-wal'
shm_path = base + '.sqlite-shm'
for f in [db_path, wal_path, shm_path]:
if not os.path.exists(f):
print(f'警告: 缺少 {f},可能无法获取全部最新笔记')
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
cursor.execute('''
SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
FROM ZAEANNOTATION
''')
rows = cursor.fetchall()
annotations = defaultdict(dict)
import datetime
for row in rows:
assetid, creationdate, location, note, selectedtext, uuid = row
# 转换 creationdate 格式支持苹果时间戳以2001-01-01为基准
date_str = creationdate
if creationdate:
try:
origin = datetime.datetime(2001, 1, 1)
# 苹果时间戳 float/int 或数字字符串
if isinstance(creationdate, (int, float)):
dt = origin + datetime.timedelta(seconds=creationdate)
elif isinstance(creationdate, str) and creationdate.replace('.', '', 1).isdigit():
dt = origin + datetime.timedelta(seconds=float(creationdate))
else:
dt = datetime.datetime.strptime(creationdate[:10], "%Y-%m-%d")
date_str = f"{dt.year}/{dt.month}/{dt.day}"
except Exception:
date_str = str(creationdate)
idref, filepos = parse_location(location)
# 跳过note和selectedtext都为None的笔记
if note is None and selectedtext is None:
continue
annotations[str(assetid)][uuid] = {
'creationdate': date_str,
'filepos': filepos,
'idref': idref,
'note': note,
'selectedtext': selectedtext
}
conn.close()
return annotations
# 用法示例输出每本书的前3条笔记
if __name__ == "__main__":
# 测试 parse_location
'''
test_locations = [
'epubcfi(/6/746[id509]!/4[4MLOS0-27b363c65bfe41ad8429f530566a2737]/10,/2/1:0,/7:8',
'epubcfi(/6/22[id15]!/4/156/1,:21,:157)',
'epubcfi(/6/764[id518]!/4[4V8DU0-27b363c65bfe41ad8429f530566a2737]/56,/1:0,/3:2)'
]
for loc in test_locations:
idref, filepos = parse_location(loc)
print(f"location: {loc}\n idref: {idref}\n filepos: {filepos}\n")
'''
annotations = get_annotations()
# 格式化打印所有 annotations
from pprint import pprint
print("\n所有笔记:")
pprint(annotations, indent=2, sort_dicts=False)
# 输出每本书的前3条笔记
'''
book_notes = defaultdict(list)
for assetid, notes_dict in annotations.items():
for uuid, ann in notes_dict.items():
book_notes[assetid].append({**ann, 'uuid': uuid})
for assetid, notes in book_notes.items():
print(f"\nAssetID: {assetid}")
for i, note in enumerate(notes[:3]):
print(f" 笔记{i+1}:")
print(f" creationdate: {note['creationdate']}")
print(f" idref: {note['idref']}")
print(f" filepos: {note['filepos']}")
print(f" note: {note['note']}")
print(f" selectedtext: {note['selectedtext']}")
print(f" uuid: {note['uuid']}")
'''