iBook/annotationdata.py

114 lines
4.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
annotationdata.py (OOP版)
------------------------
功能:
- 解析iBooks的AEAnnotation.sqlite数据库提取所有或指定书籍assetid/bookid的笔记。
- 提供parse_location辅助函数解析笔记定位信息。
- 返回结构化的annotations数据便于后续章节定位与导出。
依赖config.py 统一管理路径和配置项。
主要接口AnnotationManager
- get_annotations(bookid=None)返回所有或指定assetid的笔记结构为{assetid: {uuid: {...}}}
- parse_location(location)解析ZANNOTATIONLOCATION返回(idref, filepos)
依赖sqlite3, collections, re, os, datetime
"""
import config
import sqlite3
import re
import os
from collections import defaultdict
class AnnotationManager:
def __init__(self, db_path=None):
self.db_path = db_path or config.LOCAL_ANNOTATION_DB
@staticmethod
def parse_location(location):
"""
解析ZANNOTATIONLOCATION返回(idref, filepos)
- epubcfi(...)格式优先提取[]内内容为idref
- 其他格式兼容原逻辑
"""
idref = None
filepos = None
if not location:
return idref, filepos
matches = re.findall(r'\[(.*?)\]', location) if location else []
idref = matches[0] if len(matches) > 0 else None
filepos = matches[1] if len(matches) > 1 else None
return idref, filepos
def get_annotations(self, bookid=None):
# 检查WAL模式相关文件
base = self.db_path.rsplit('.', 1)[0]
wal_path = base + '.sqlite-wal'
shm_path = base + '.sqlite-shm'
for f in [self.db_path, wal_path, shm_path]:
if not os.path.exists(f):
print(f'警告: 缺少 {f},可能无法获取全部最新笔记')
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
if bookid is not None:
cursor.execute('''
SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
FROM ZAEANNOTATION WHERE ZANNOTATIONASSETID=?
''', (bookid,))
else:
cursor.execute('''
SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
FROM ZAEANNOTATION
''')
rows = cursor.fetchall()
annotations = defaultdict(dict)
import datetime
for row in rows:
assetid, creationdate, location, note, selectedtext, uuid = row
# 转换 creationdate 格式支持苹果时间戳以2001-01-01为基准
date_str = creationdate
if creationdate:
try:
origin = datetime.datetime(2001, 1, 1)
if isinstance(creationdate, (int, float)):
dt = origin + datetime.timedelta(seconds=creationdate)
elif isinstance(creationdate, str) and creationdate.replace('.', '', 1).isdigit():
dt = origin + datetime.timedelta(seconds=float(creationdate))
else:
dt = datetime.datetime.strptime(creationdate[:10], "%Y-%m-%d")
date_str = f"{dt.year}/{dt.month}/{dt.day}"
except Exception:
date_str = str(creationdate)
idref, filepos = self.parse_location(location)
if note is None and selectedtext is None:
continue
annotations[str(assetid)][uuid] = {
'creationdate': date_str,
'filepos': filepos,
'idref': idref,
'note': note,
'selectedtext': selectedtext
}
conn.close()
if bookid is not None:
return {str(bookid): annotations.get(str(bookid), {})}
return annotations
if __name__ == "__main__":
manager = AnnotationManager()
# 测试 parse_location
test_locations = [
'epubcfi(/6/746[id509]!/4[4MLOS0-27b363c65bfe41ad8429f530566a2737]/10,/2/1:0,/7:8',
'epubcfi(/6/22[id15]!/4/156/1,:21,:157)',
'epubcfi(/6/764[id518]!/4[4V8DU0-27b363c65bfe41ad8429f530566a2737]/56,/1:0,/3:2)'
]
for loc in test_locations:
idref, filepos = manager.parse_location(loc)
print(f"location: {loc}\n idref: {idref}\n filepos: {filepos}\n")
# 测试只获取特定 assetid 的笔记
test_bookid = "B18FCD9F90FD43C2373AE52BAEF9A77C"
annotations = manager.get_annotations(bookid=test_bookid)
from pprint import pprint
print(f"\nAssetID={test_bookid} 的所有笔记:")
pprint(annotations, indent=2, sort_dicts=False)