""" annotationdata.py (OOP版) ------------------------ 功能: - 解析iBooks的AEAnnotation.sqlite数据库,提取所有或指定书籍(assetid/bookid)的笔记。 - 提供parse_location辅助函数,解析笔记定位信息。 - 返回结构化的annotations数据,便于后续章节定位与导出。 依赖:config.py 统一管理路径和配置项。 主要接口:AnnotationManager - get_annotations(bookid=None):返回所有或指定assetid的笔记,结构为{assetid: {uuid: {...}}} - parse_location(location):解析ZANNOTATIONLOCATION,返回(idref, filepos) 依赖:sqlite3, collections, re, os, datetime """ import config import sqlite3 import re import os from collections import defaultdict class AnnotationManager: def __init__(self, db_path=None): self.db_path = db_path or config.LOCAL_ANNOTATION_DB @staticmethod def parse_location(location): """ 解析ZANNOTATIONLOCATION,返回(idref, filepos) - epubcfi(...)格式优先提取[]内内容为idref - 其他格式兼容原逻辑 """ idref = None filepos = None if not location: return idref, filepos matches = re.findall(r'\[(.*?)\]', location) if location else [] idref = matches[0] if len(matches) > 0 else None filepos = matches[1] if len(matches) > 1 else None return idref, filepos def get_annotations(self, bookid=None): # 检查WAL模式相关文件 base = self.db_path.rsplit('.', 1)[0] wal_path = base + '.sqlite-wal' shm_path = base + '.sqlite-shm' for f in [self.db_path, wal_path, shm_path]: if not os.path.exists(f): print(f'警告: 缺少 {f},可能无法获取全部最新笔记') conn = sqlite3.connect(self.db_path) cursor = conn.cursor() if bookid is not None: cursor.execute(''' SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID FROM ZAEANNOTATION WHERE ZANNOTATIONASSETID=? ''', (bookid,)) else: cursor.execute(''' SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID FROM ZAEANNOTATION ''') rows = cursor.fetchall() annotations = defaultdict(dict) import datetime for row in rows: assetid, creationdate, location, note, selectedtext, uuid = row # 转换 creationdate 格式,支持苹果时间戳(以2001-01-01为基准) date_str = creationdate if creationdate: try: origin = datetime.datetime(2001, 1, 1) if isinstance(creationdate, (int, float)): dt = origin + datetime.timedelta(seconds=creationdate) elif isinstance(creationdate, str) and creationdate.replace('.', '', 1).isdigit(): dt = origin + datetime.timedelta(seconds=float(creationdate)) else: dt = datetime.datetime.strptime(creationdate[:10], "%Y-%m-%d") date_str = f"{dt.year}/{dt.month}/{dt.day}" except Exception: date_str = str(creationdate) idref, filepos = self.parse_location(location) if note is None and selectedtext is None: continue annotations[str(assetid)][uuid] = { 'creationdate': date_str, 'filepos': filepos, 'idref': idref, 'note': note, 'selectedtext': selectedtext } conn.close() if bookid is not None: return {str(bookid): annotations.get(str(bookid), {})} return annotations if __name__ == "__main__": manager = AnnotationManager() # 测试 parse_location test_locations = [ 'epubcfi(/6/746[id509]!/4[4MLOS0-27b363c65bfe41ad8429f530566a2737]/10,/2/1:0,/7:8', 'epubcfi(/6/22[id15]!/4/156/1,:21,:157)', 'epubcfi(/6/764[id518]!/4[4V8DU0-27b363c65bfe41ad8429f530566a2737]/56,/1:0,/3:2)' ] for loc in test_locations: idref, filepos = manager.parse_location(loc) print(f"location: {loc}\n idref: {idref}\n filepos: {filepos}\n") # 测试只获取特定 assetid 的笔记 test_bookid = "B18FCD9F90FD43C2373AE52BAEF9A77C" annotations = manager.get_annotations(bookid=test_bookid) from pprint import pprint print(f"\nAssetID={test_bookid} 的所有笔记:") pprint(annotations, indent=2, sort_dicts=False)