'update'

2025-09-06 16:43:13 +08:00
parent 95cd27563c
commit 893cd06c2c
7 changed files with 266 additions and 11 deletions
--- a/annotationdata.py
+++ b/annotationdata.py
@@ -20,26 +20,81 @@ import os
 from collections import defaultdict

 class AnnotationManager:
+    """
+    iBooks笔记管理器
+    
+    负责从iBooks的AEAnnotation.sqlite数据库中提取和解析用户的阅读笔记和高亮标记。
+    支持获取所有书籍的笔记或指定书籍的笔记，并提供位置信息解析功能。
+    """
+    
    def __init__(self, db_path=None):
+        """
+        初始化笔记管理器
+        
+        Args:
+            db_path (str, optional): 数据库文件路径，默认使用config.LOCAL_ANNOTATION_DB
+        """
        self.db_path = db_path or config.LOCAL_ANNOTATION_DB

    @staticmethod
    def parse_location(location):
        """
-        解析ZANNOTATIONLOCATION，返回(idref, filepos)
-        - epubcfi(...)格式优先提取[]内内容为idref
-        - 其他格式兼容原逻辑
+        解析iBooks笔记的位置信息
+        
+        从ZANNOTATIONLOCATION字段解析出章节标识符和文件内位置信息。
+        支持epubcfi格式的位置字符串解析。
+        
+        Args:
+            location (str): 笔记位置字符串，通常为epubcfi格式
+            
+        Returns:
+            tuple: (idref, filepos)
+                - idref (str): 章节标识符，用于定位具体章节
+                - filepos (str): 文件内位置，用于精确定位笔记位置
+                
+        Examples:
+            >>> parse_location('epubcfi(/6/746[id509]!/4[abc]/10,/2/1:0,/7:8)')
+            ('id509', 'abc')
        """
        idref = None
        filepos = None
        if not location:
            return idref, filepos
+        # 使用正则表达式提取[]内的内容
        matches = re.findall(r'\[(.*?)\]', location) if location else []
        idref = matches[0] if len(matches) > 0 else None
        filepos = matches[1] if len(matches) > 1 else None
        return idref, filepos

    def get_annotations(self, bookid=None):
+        """
+        从数据库获取笔记数据
+        
+        从iBooks的AEAnnotation.sqlite数据库中提取所有或指定书籍的笔记和高亮内容。
+        自动处理时间戳转换和位置信息解析。
+        
+        Args:
+            bookid (str, optional): 书籍资产ID，如果为None则获取所有书籍的笔记
+            
+        Returns:
+            dict: 笔记数据字典，结构为：
+                {
+                    assetid: {
+                        uuid: {
+                            'creationdate': '创建日期',
+                            'filepos': '文件位置',
+                            'idref': '章节标识',
+                            'note': '笔记内容',
+                            'selectedtext': '选中文本'
+                        }
+                    }
+                }
+                
+        Note:
+            - 会检查WAL模式相关文件(-wal, -shm)的存在性
+            - 自动转换苹果时间戳格式(以2001-01-01为基准)
+            - 过滤掉既没有笔记也没有选中文本的空记录
+        """
        # 检查WAL模式相关文件
        base = self.db_path.rsplit('.', 1)[0]
        wal_path = base + '.sqlite-wal'
@@ -47,8 +102,11 @@ class AnnotationManager:
        for f in [self.db_path, wal_path, shm_path]:
            if not os.path.exists(f):
                print(f'警告: 缺少 {f}，可能无法获取全部最新笔记')
+        # 连接数据库并执行查询
        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
+        
+        # 根据是否指定bookid选择不同的查询语句
        if bookid is not None:
            cursor.execute('''
                SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
@@ -59,12 +117,16 @@ class AnnotationManager:
                SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
                FROM ZAEANNOTATION
            ''')
+        
        rows = cursor.fetchall()
        annotations = defaultdict(dict)
        import datetime
+        
+        # 处理每一行数据
        for row in rows:
            assetid, creationdate, location, note, selectedtext, uuid = row
-            # 转换 creationdate 格式，支持苹果时间戳（以2001-01-01为基准）
+            
+            # 转换 creationdate格式为'YYYY-MM-DD HH:MM:SS'，支持苹果时间戳（以2001-01-01为基准）
            date_str = creationdate
            if creationdate:
                try:
@@ -74,13 +136,20 @@ class AnnotationManager:
                    elif isinstance(creationdate, str) and creationdate.replace('.', '', 1).isdigit():
                        dt = origin + datetime.timedelta(seconds=float(creationdate))
                    else:
+                        # 支持原有格式'2025/9/6'等
                        dt = datetime.datetime.strptime(creationdate[:10], "%Y-%m-%d")
-                    date_str = f"{dt.year}/{dt.month}/{dt.day}"
+                    date_str = dt.strftime('%Y-%m-%d %H:%M:%S')
                except Exception:
                    date_str = str(creationdate)
+            
+            # 解析位置信息
            idref, filepos = self.parse_location(location)
+            
+            # 过滤空记录（既没有笔记也没有选中文本）
            if note is None and selectedtext is None:
                continue
+                
+            # 构建笔记数据结构
            annotations[str(assetid)][uuid] = {
                'creationdate': date_str,
                'filepos': filepos,
@@ -88,14 +157,26 @@ class AnnotationManager:
                'note': note,
                'selectedtext': selectedtext
            }
+        
        conn.close()
+        
+        # 根据查询类型返回相应结果
        if bookid is not None:
            return {str(bookid): annotations.get(str(bookid), {})}
        return annotations

 if __name__ == "__main__":
+    """
+    测试模块功能
+    
+    包含两个测试用例：
+    1. 测试parse_location方法解析各种格式的位置字符串
+    2. 测试get_annotations方法获取指定书籍的笔记数据
+    """
    manager = AnnotationManager()
-    # 测试 parse_location
+    
+    # 测试 parse_location 方法
+    print("=== 测试位置解析功能 ===")
    test_locations = [
        'epubcfi(/6/746[id509]!/4[4MLOS0-27b363c65bfe41ad8429f530566a2737]/10,/2/1:0,/7:8',
        'epubcfi(/6/22[id15]!/4/156/1,:21,:157)',
@@ -105,7 +186,8 @@ if __name__ == "__main__":
        idref, filepos = manager.parse_location(loc)
        print(f"location: {loc}\n  idref: {idref}\n  filepos: {filepos}\n")

-    # 测试只获取特定 assetid 的笔记
+    # 测试获取特定书籍的笔记
+    print("=== 测试笔记获取功能 ===")
    test_bookid = "B18FCD9F90FD43C2373AE52BAEF9A77C"
    annotations = manager.get_annotations(bookid=test_bookid)
    from pprint import pprint