'update'

2025-08-15 17:20:30 +08:00
parent 0bc6844209
commit 4e3b8abc34
12 changed files with 406 additions and 516 deletions
--- a/annotationdata.py
+++ b/annotationdata.py
@@ -1,136 +1,113 @@
+
 """
-annotationdata.py
-----------------
+annotationdata.py (OOP版)
+------------------------
 功能：
    - 解析iBooks的AEAnnotation.sqlite数据库，提取所有或指定书籍（assetid/bookid）的笔记。
    - 提供parse_location辅助函数，解析笔记定位信息。
    - 返回结构化的annotations数据，便于后续章节定位与导出。

 依赖：config.py 统一管理路径和配置项。
-
-主要接口：
-    - get_annotations(db_path, bookid=None)：返回所有或指定assetid的笔记，结构为{assetid: {uuid: {...}}}
+主要接口：AnnotationManager
+    - get_annotations(bookid=None)：返回所有或指定assetid的笔记，结构为{assetid: {uuid: {...}}}
    - parse_location(location)：解析ZANNOTATIONLOCATION，返回(idref, filepos)
-
 依赖：sqlite3, collections, re, os, datetime
 """
 import config
-
 import sqlite3
-from collections import defaultdict
 import re
 import os
+from collections import defaultdict

-def parse_location(location):
-    """
-    解析ZANNOTATIONLOCATION，返回(idref, filepos)
-    - epubcfi(...)格式优先提取[]内内容为idref
-    - 其他格式兼容原逻辑
-    """
-    idref = None
-    filepos = None
-    if not location:
+class AnnotationManager:
+    def __init__(self, db_path=None):
+        self.db_path = db_path or config.LOCAL_ANNOTATION_DB
+
+    @staticmethod
+    def parse_location(location):
+        """
+        解析ZANNOTATIONLOCATION，返回(idref, filepos)
+        - epubcfi(...)格式优先提取[]内内容为idref
+        - 其他格式兼容原逻辑
+        """
+        idref = None
+        filepos = None
+        if not location:
+            return idref, filepos
+        matches = re.findall(r'\[(.*?)\]', location) if location else []
+        idref = matches[0] if len(matches) > 0 else None
+        filepos = matches[1] if len(matches) > 1 else None
        return idref, filepos
-    # 统一处理，提取前两个[]内容
-    matches = re.findall(r'\[(.*?)\]', location) if location else []
-    idref = matches[0] if len(matches) > 0 else None
-    filepos = matches[1] if len(matches) > 1 else None
-    return idref, filepos

-def get_annotations(db_path=config.LOCAL_ANNOTATION_DB, bookid=None):
-    # 检查WAL模式相关文件
-    base = db_path.rsplit('.', 1)[0]
-    wal_path = base + '.sqlite-wal'
-    shm_path = base + '.sqlite-shm'
-    for f in [db_path, wal_path, shm_path]:
-        if not os.path.exists(f):
-            print(f'警告: 缺少 {f}，可能无法获取全部最新笔记')
-    conn = sqlite3.connect(db_path)
-    cursor = conn.cursor()
-    if bookid is not None:
-        cursor.execute('''
-            SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
-            FROM ZAEANNOTATION WHERE ZANNOTATIONASSETID=?
-        ''', (bookid,))
-    else:
-        cursor.execute('''
-            SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
-            FROM ZAEANNOTATION
-        ''')
-    rows = cursor.fetchall()
-    annotations = defaultdict(dict)
-    import datetime
-    for row in rows:
-        assetid, creationdate, location, note, selectedtext, uuid = row
-        # 转换 creationdate 格式，支持苹果时间戳（以2001-01-01为基准）
-        date_str = creationdate
-        if creationdate:
-            try:
-                origin = datetime.datetime(2001, 1, 1)
-                # 苹果时间戳 float/int 或数字字符串
-                if isinstance(creationdate, (int, float)):
-                    dt = origin + datetime.timedelta(seconds=creationdate)
-                elif isinstance(creationdate, str) and creationdate.replace('.', '', 1).isdigit():
-                    dt = origin + datetime.timedelta(seconds=float(creationdate))
-                else:
-                    dt = datetime.datetime.strptime(creationdate[:10], "%Y-%m-%d")
-                date_str = f"{dt.year}/{dt.month}/{dt.day}"
-            except Exception:
-                date_str = str(creationdate)
-        idref, filepos = parse_location(location)
-        # 跳过note和selectedtext都为None的笔记
-        if note is None and selectedtext is None:
-            continue
-        annotations[str(assetid)][uuid] = {
-            'creationdate': date_str,
-            'filepos': filepos,
-            'idref': idref,
-            'note': note,
-            'selectedtext': selectedtext
-        }
-    conn.close()
-    if bookid is not None:
-        # 只返回特定bookid的笔记结构
-        return {str(bookid): annotations.get(str(bookid), {})}
-    return annotations
+    def get_annotations(self, bookid=None):
+        # 检查WAL模式相关文件
+        base = self.db_path.rsplit('.', 1)[0]
+        wal_path = base + '.sqlite-wal'
+        shm_path = base + '.sqlite-shm'
+        for f in [self.db_path, wal_path, shm_path]:
+            if not os.path.exists(f):
+                print(f'警告: 缺少 {f}，可能无法获取全部最新笔记')
+        conn = sqlite3.connect(self.db_path)
+        cursor = conn.cursor()
+        if bookid is not None:
+            cursor.execute('''
+                SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
+                FROM ZAEANNOTATION WHERE ZANNOTATIONASSETID=?
+            ''', (bookid,))
+        else:
+            cursor.execute('''
+                SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
+                FROM ZAEANNOTATION
+            ''')
+        rows = cursor.fetchall()
+        annotations = defaultdict(dict)
+        import datetime
+        for row in rows:
+            assetid, creationdate, location, note, selectedtext, uuid = row
+            # 转换 creationdate 格式，支持苹果时间戳（以2001-01-01为基准）
+            date_str = creationdate
+            if creationdate:
+                try:
+                    origin = datetime.datetime(2001, 1, 1)
+                    if isinstance(creationdate, (int, float)):
+                        dt = origin + datetime.timedelta(seconds=creationdate)
+                    elif isinstance(creationdate, str) and creationdate.replace('.', '', 1).isdigit():
+                        dt = origin + datetime.timedelta(seconds=float(creationdate))
+                    else:
+                        dt = datetime.datetime.strptime(creationdate[:10], "%Y-%m-%d")
+                    date_str = f"{dt.year}/{dt.month}/{dt.day}"
+                except Exception:
+                    date_str = str(creationdate)
+            idref, filepos = self.parse_location(location)
+            if note is None and selectedtext is None:
+                continue
+            annotations[str(assetid)][uuid] = {
+                'creationdate': date_str,
+                'filepos': filepos,
+                'idref': idref,
+                'note': note,
+                'selectedtext': selectedtext
+            }
+        conn.close()
+        if bookid is not None:
+            return {str(bookid): annotations.get(str(bookid), {})}
+        return annotations

-# 用法示例：输出每本书的前3条笔记
 if __name__ == "__main__":
+    manager = AnnotationManager()
    # 测试 parse_location
-    '''
    test_locations = [
        'epubcfi(/6/746[id509]!/4[4MLOS0-27b363c65bfe41ad8429f530566a2737]/10,/2/1:0,/7:8',
        'epubcfi(/6/22[id15]!/4/156/1,:21,:157)',
        'epubcfi(/6/764[id518]!/4[4V8DU0-27b363c65bfe41ad8429f530566a2737]/56,/1:0,/3:2)'
    ]
    for loc in test_locations:
-        idref, filepos = parse_location(loc)
+        idref, filepos = manager.parse_location(loc)
        print(f"location: {loc}\n  idref: {idref}\n  filepos: {filepos}\n")
-    '''

    # 测试只获取特定 assetid 的笔记
    test_bookid = "B18FCD9F90FD43C2373AE52BAEF9A77C"
-    annotations = get_annotations(bookid=test_bookid)
-
-    # 格式化打印该书的所有笔记
+    annotations = manager.get_annotations(bookid=test_bookid)
    from pprint import pprint
    print(f"\nAssetID={test_bookid} 的所有笔记:")
    pprint(annotations, indent=2, sort_dicts=False)
-
-    # 输出每本书的前3条笔记
-    '''
-    book_notes = defaultdict(list)
-    for assetid, notes_dict in annotations.items():
-        for uuid, ann in notes_dict.items():
-            book_notes[assetid].append({**ann, 'uuid': uuid})
-    for assetid, notes in book_notes.items():
-        print(f"\nAssetID: {assetid}")
-        for i, note in enumerate(notes[:3]):
-            print(f"  笔记{i+1}:")
-            print(f"    creationdate: {note['creationdate']}")
-            print(f"    idref: {note['idref']}")
-            print(f"    filepos: {note['filepos']}")
-            print(f"    note: {note['note']}")
-            print(f"    selectedtext: {note['selectedtext']}")
-            print(f"    uuid: {note['uuid']}")
-    '''