'update'

2025-08-15 17:20:30 +08:00
parent 0bc6844209
commit 4e3b8abc34
12 changed files with 406 additions and 516 deletions
--- a/pycache/annotationdata.cpython-312.pyc
+++ b/pycache/annotationdata.cpython-312.pyc
--- a/pycache/booklist_parse.cpython-312.pyc
+++ b/pycache/booklist_parse.cpython-312.pyc
--- a/pycache/opf_parse.cpython-312.pyc
+++ b/pycache/opf_parse.cpython-312.pyc
--- a/pycache/toc_parse.cpython-312.pyc
+++ b/pycache/toc_parse.cpython-312.pyc
--- a/annotationdata.py
+++ b/annotationdata.py
@@ -1,136 +1,113 @@
 """
-annotationdata.py
+annotationdata.py (OOP版)
-----------------
+------------------------
 功能：
    - 解析iBooks的AEAnnotation.sqlite数据库，提取所有或指定书籍（assetid/bookid）的笔记。
    - 提供parse_location辅助函数，解析笔记定位信息。
    - 返回结构化的annotations数据，便于后续章节定位与导出。
 依赖：config.py 统一管理路径和配置项。
-
+主要接口：AnnotationManager
-主要接口：
+    - get_annotations(bookid=None)：返回所有或指定assetid的笔记，结构为{assetid: {uuid: {...}}}
    - get_annotations(db_path, bookid=None)：返回所有或指定assetid的笔记，结构为{assetid: {uuid: {...}}}
    - parse_location(location)：解析ZANNOTATIONLOCATION，返回(idref, filepos)
 依赖：sqlite3, collections, re, os, datetime
 """
 import config
 import sqlite3
 from collections import defaultdict
 import re
 import os
 from collections import defaultdict
-def parse_location(location):
+class AnnotationManager:
-    """
+    def __init__(self, db_path=None):
-    解析ZANNOTATIONLOCATION，返回(idref, filepos)
+        self.db_path = db_path or config.LOCAL_ANNOTATION_DB
-    - epubcfi(...)格式优先提取[]内内容为idref
+
-    - 其他格式兼容原逻辑
+    @staticmethod
-    """
+    def parse_location(location):
-    idref = None
+        """
-    filepos = None
+        解析ZANNOTATIONLOCATION，返回(idref, filepos)
-    if not location:
+        - epubcfi(...)格式优先提取[]内内容为idref
        - 其他格式兼容原逻辑
        """
        idref = None
        filepos = None
        if not location:
            return idref, filepos
        matches = re.findall(r'\[(.*?)\]', location) if location else []
        idref = matches[0] if len(matches) > 0 else None
        filepos = matches[1] if len(matches) > 1 else None
        return idref, filepos
    # 统一处理，提取前两个[]内容
    matches = re.findall(r'\[(.*?)\]', location) if location else []
    idref = matches[0] if len(matches) > 0 else None
    filepos = matches[1] if len(matches) > 1 else None
    return idref, filepos
-def get_annotations(db_path=config.LOCAL_ANNOTATION_DB, bookid=None):
+    def get_annotations(self, bookid=None):
-    # 检查WAL模式相关文件
+        # 检查WAL模式相关文件
-    base = db_path.rsplit('.', 1)[0]
+        base = self.db_path.rsplit('.', 1)[0]
-    wal_path = base + '.sqlite-wal'
+        wal_path = base + '.sqlite-wal'
-    shm_path = base + '.sqlite-shm'
+        shm_path = base + '.sqlite-shm'
-    for f in [db_path, wal_path, shm_path]:
+        for f in [self.db_path, wal_path, shm_path]:
-        if not os.path.exists(f):
+            if not os.path.exists(f):
-            print(f'警告: 缺少 {f}，可能无法获取全部最新笔记')
+                print(f'警告: 缺少 {f}，可能无法获取全部最新笔记')
-    conn = sqlite3.connect(db_path)
+        conn = sqlite3.connect(self.db_path)
-    cursor = conn.cursor()
+        cursor = conn.cursor()
-    if bookid is not None:
+        if bookid is not None:
-        cursor.execute('''
+            cursor.execute('''
-            SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
+                SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
-            FROM ZAEANNOTATION WHERE ZANNOTATIONASSETID=?
+                FROM ZAEANNOTATION WHERE ZANNOTATIONASSETID=?
-        ''', (bookid,))
+            ''', (bookid,))
-    else:
+        else:
-        cursor.execute('''
+            cursor.execute('''
-            SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
+                SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
-            FROM ZAEANNOTATION
+                FROM ZAEANNOTATION
-        ''')
+            ''')
-    rows = cursor.fetchall()
+        rows = cursor.fetchall()
-    annotations = defaultdict(dict)
+        annotations = defaultdict(dict)
-    import datetime
+        import datetime
-    for row in rows:
+        for row in rows:
-        assetid, creationdate, location, note, selectedtext, uuid = row
+            assetid, creationdate, location, note, selectedtext, uuid = row
-        # 转换 creationdate 格式，支持苹果时间戳（以2001-01-01为基准）
+            # 转换 creationdate 格式，支持苹果时间戳（以2001-01-01为基准）
-        date_str = creationdate
+            date_str = creationdate
-        if creationdate:
+            if creationdate:
-            try:
+                try:
-                origin = datetime.datetime(2001, 1, 1)
+                    origin = datetime.datetime(2001, 1, 1)
-                # 苹果时间戳 float/int 或数字字符串
+                    if isinstance(creationdate, (int, float)):
-                if isinstance(creationdate, (int, float)):
+                        dt = origin + datetime.timedelta(seconds=creationdate)
-                    dt = origin + datetime.timedelta(seconds=creationdate)
+                    elif isinstance(creationdate, str) and creationdate.replace('.', '', 1).isdigit():
-                elif isinstance(creationdate, str) and creationdate.replace('.', '', 1).isdigit():
+                        dt = origin + datetime.timedelta(seconds=float(creationdate))
-                    dt = origin + datetime.timedelta(seconds=float(creationdate))
+                    else:
-                else:
+                        dt = datetime.datetime.strptime(creationdate[:10], "%Y-%m-%d")
-                    dt = datetime.datetime.strptime(creationdate[:10], "%Y-%m-%d")
+                    date_str = f"{dt.year}/{dt.month}/{dt.day}"
-                date_str = f"{dt.year}/{dt.month}/{dt.day}"
+                except Exception:
-            except Exception:
+                    date_str = str(creationdate)
-                date_str = str(creationdate)
+            idref, filepos = self.parse_location(location)
-        idref, filepos = parse_location(location)
+            if note is None and selectedtext is None:
-        # 跳过note和selectedtext都为None的笔记
+                continue
-        if note is None and selectedtext is None:
+            annotations[str(assetid)][uuid] = {
-            continue
+                'creationdate': date_str,
-        annotations[str(assetid)][uuid] = {
+                'filepos': filepos,
-            'creationdate': date_str,
+                'idref': idref,
-            'filepos': filepos,
+                'note': note,
-            'idref': idref,
+                'selectedtext': selectedtext
-            'note': note,
+            }
-            'selectedtext': selectedtext
+        conn.close()
-        }
+        if bookid is not None:
-    conn.close()
+            return {str(bookid): annotations.get(str(bookid), {})}
-    if bookid is not None:
+        return annotations
        # 只返回特定bookid的笔记结构
        return {str(bookid): annotations.get(str(bookid), {})}
    return annotations
 # 用法示例：输出每本书的前3条笔记
 if __name__ == "__main__":
    manager = AnnotationManager()
    # 测试 parse_location
    '''
    test_locations = [
        'epubcfi(/6/746[id509]!/4[4MLOS0-27b363c65bfe41ad8429f530566a2737]/10,/2/1:0,/7:8',
        'epubcfi(/6/22[id15]!/4/156/1,:21,:157)',
        'epubcfi(/6/764[id518]!/4[4V8DU0-27b363c65bfe41ad8429f530566a2737]/56,/1:0,/3:2)'
    ]
    for loc in test_locations:
-        idref, filepos = parse_location(loc)
+        idref, filepos = manager.parse_location(loc)
        print(f"location: {loc}\n  idref: {idref}\n  filepos: {filepos}\n")
    '''
    # 测试只获取特定 assetid 的笔记
    test_bookid = "B18FCD9F90FD43C2373AE52BAEF9A77C"
-    annotations = get_annotations(bookid=test_bookid)
+    annotations = manager.get_annotations(bookid=test_bookid)
    # 格式化打印该书的所有笔记
    from pprint import pprint
    print(f"\nAssetID={test_bookid} 的所有笔记:")
    pprint(annotations, indent=2, sort_dicts=False)
    # 输出每本书的前3条笔记
    '''
    book_notes = defaultdict(list)
    for assetid, notes_dict in annotations.items():
        for uuid, ann in notes_dict.items():
            book_notes[assetid].append({**ann, 'uuid': uuid})
    for assetid, notes in book_notes.items():
        print(f"\nAssetID: {assetid}")
        for i, note in enumerate(notes[:3]):
            print(f"  笔记{i+1}:")
            print(f"    creationdate: {note['creationdate']}")
            print(f"    idref: {note['idref']}")
            print(f"    filepos: {note['filepos']}")
            print(f"    note: {note['note']}")
            print(f"    selectedtext: {note['selectedtext']}")
            print(f"    uuid: {note['uuid']}")
    '''
--- a/booklist_parse.py
+++ b/booklist_parse.py
@@ -1,75 +1,66 @@
 """
 booklist_parse.py
 -----------------
 功能：
    - 解析iBooks的Books.plist，提取所有书籍元数据（书名、作者、路径、时间等）。
    - 解析BKLibrary.sqlite，获取每本书的最近打开时间（苹果时间戳，基准2001-01-01）。
 依赖：config.py 统一管理路径和配置项。
 主要接口：
    - parse_books_plist(plist_path)：返回所有书籍元数据，结构为{bk_id: {...}}
    - get_books_last_open(db_path)：返回所有书籍最近打开时间，结构为{bk_id: {'last_open': 时间戳}}
 依赖：plistlib, collections, sqlite3, os, datetime
 典型用法：
    booksinfo = parse_books_plist(config.LOCAL_BOOKS_PLIST)
    books_open = get_books_last_open(config.LOCAL_LIBRARY_DB)
 """
 import config
 import plistlib
 from collections import defaultdict
 def parse_books_plist(plist_path=config.LOCAL_BOOKS_PLIST):
    booksinfo = defaultdict(dict)
    with open(plist_path, 'rb') as f: plist_data = plistlib.load(f)
    for book in plist_data.get('Books', []):
        bk_id = book.get('BKGeneratedItemId')
        if not bk_id: continue
        booksinfo[bk_id] = {
            'displayname': book.get('BKDisplayName', ''),
            'author': book.get('artistName', ''),
            'type': book.get('BKBookType', ''),
            'bookid': bk_id,
            'itemname': book.get('itemName', ''),
            'path': book.get('path', ''),
            'date': book.get('BKInsertionDate',''),
            'updatedate': book.get('updateDate','')
        }
    return booksinfo
 import sqlite3
 import os
 from collections import defaultdict
-def get_books_last_open(db_path=config.LOCAL_LIBRARY_DB):
+class BookListManager:
-    """
+    def __init__(self, plist_path=None, db_path=None):
-    从BKLibrary.sqlite获取书籍最近打开时间
+        self.plist_path = plist_path or config.LOCAL_BOOKS_PLIST
-    返回：defaultdict(dict)，bk_id为索引，包含最近打开时间
+        self.db_path = db_path or config.LOCAL_LIBRARY_DB
-    """
+        self._booksinfo = None
-    books_open = defaultdict(dict)
+        self._books_open = None
-    if not os.path.exists(db_path):
+
    def get_books_info(self):
        if self._booksinfo is not None:
            return self._booksinfo
        booksinfo = defaultdict(dict)
        with open(self.plist_path, 'rb') as f:
            plist_data = plistlib.load(f)
        for book in plist_data.get('Books', []):
            bk_id = book.get('BKGeneratedItemId')
            if not bk_id:
                continue
            booksinfo[bk_id] = {
                'displayname': book.get('BKDisplayName', ''),
                'author': book.get('artistName', ''),
                'type': book.get('BKBookType', ''),
                'bookid': bk_id,
                'itemname': book.get('itemName', ''),
                'path': book.get('path', ''),
                'date': book.get('BKInsertionDate',''),
                'updatedate': book.get('updateDate','')
            }
        self._booksinfo = booksinfo
        return booksinfo
    def get_books_last_open(self):
        if self._books_open is not None:
            return self._books_open
        books_open = defaultdict(dict)
        if not os.path.exists(self.db_path):
            return books_open
        try:
            conn = sqlite3.connect(self.db_path)
            cursor = conn.cursor()
            cursor.execute(''' SELECT ZASSETID, zlastopendate FROM ZBKLIBRARYASSET WHERE zlastopendate IS NOT NULL ''')
            rows = cursor.fetchall()
            for row in rows:
                asset_id, last_open = row
                if asset_id:
                    books_open[asset_id] = {
                        'last_open': last_open
                    }
            conn.close()
        except Exception as e:
            print(f'警告: 读取BKLibrary.sqlite失败: {e}')
        self._books_open = books_open
        return books_open
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.cursor()
        # ZBKLIBRARYASSET表包含书籍信息
        cursor.execute(''' SELECT ZASSETID, zlastopendate FROM ZBKLIBRARYASSET WHERE zlastopendate IS NOT NULL ''')
        rows = cursor.fetchall()
        for row in rows:
            asset_id, last_open = row
            if asset_id:
                books_open[asset_id] = {
                    'last_open': last_open  # 苹果时间戳，基准时间为2001-01-01
                }
        conn.close()
    except Exception as e:
        print(f'警告: 读取BKLibrary.sqlite失败: {e}')
    return books_open
 if __name__ == '__main__':
-    booksinfo = parse_books_plist(config.LOCAL_BOOKS_PLIST)
+    manager = BookListManager()
    booksinfo = manager.get_books_info()
    from pprint import pprint
    print("\n【前三条示例】")
    for k, v in list(booksinfo.items())[:3]:
@@ -77,19 +68,10 @@ if __name__ == '__main__':
        pprint(v, sort_dicts=False, indent=2)
        print('-' * 60)
    '''
    print("\n【全部内容】")
    for k, v in booksinfo.items():
        print(f"{k}:")
        pprint(v, sort_dicts=False, indent=2)
        print('-' * 60)
    '''
  # 测试最近打开时间
    print("\n【最近打开时间示例】")
-    books_open = get_books_last_open()
+    books_open = manager.get_books_last_open()
    import datetime
    for k, v in list(books_open.items())[:3]:
        ts = v['last_open']
        # 苹果时间戳，基准2001-01-01
        dt = datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=ts)
        print(f"{k}: {dt} (timestamp: {ts})")
--- a/data/Books.plist
+++ b/data/Books.plist
--- a/detaildesign.md
+++ b/detaildesign.md
@@ -158,31 +158,43 @@ answer = inquirer.fuzzy(
 ## 9.1 主要代码文件说明（细化）
 - `exportbooknotes.py`
  - 采用 OOP 设计，核心类为 `BookNotesExporter`：
    - `build_booksnote(bookid=None)`：构建结构化笔记数据。
    - `export_booksnote_to_md(booksnote, booksinfo, out_path=None)`：导出为 Markdown。
    - `find_file_by_ext`、`get_toc_tree` 等辅助方法。
  - 数据同步：自动复制 iBooks 数据库和元数据到本地。
  - 菜单交互：按最近打开时间戳排序，显示“书名 [时间戳]”，支持模糊搜索。
  - 只处理用户选中书籍的笔记，按章节分组导出 Markdown。
  - 依赖核心解析模块，负责主流程调度。
 - `annotationdata.py`
  - OOP 设计，核心类为 `AnnotationManager`：
    - `get_annotations(bookid=None)`：返回所有或指定 assetid 的笔记。
    - `parse_location(location)`：静态方法，解析定位信息。
  - 解析 AEAnnotation.sqlite，提取所有或指定 assetid 的笔记。
  - 支持苹果时间戳转换，结构化输出。
  - parse_location 辅助函数，统一解析笔记定位信息。
 - `booklist_parse.py`
  - OOP 设计，核心类为 `BookListManager`：
    - `get_books_info()`：获取书籍元数据。
    - `get_books_last_open()`：获取每本书的最近打开时间。
  - 解析 Books.plist，获取书籍元数据（书名、作者、路径、时间等）。
-  - 解析 BKLibrary.sqlite，获取每本书的最近打开时间（zlastopendate，苹果时间戳）。
+  - 解析 BKLibrary.sqlite，获取每本书的最近打开时间。
  - 提供统一数据接口，便于主流程排序和展示。
 - `opf_parse.py`
  - OOP 设计，核心类为 `OPFParser`：
    - `parse_opf(filepath)`：静态方法，返回 id->href 映射。
  - 解析 epub 的 OPF 文件，获取章节与文件映射关系（idref -> href）。
  - 支持多种 epub 目录结构。
 - `toc_parse.py`
  - OOP 设计，核心类为 `TOCParser`：
    - `parse_navpoints(navpoints)`：递归解析 navPoint 节点。
    - `find_label_path(node, ref, filepos, path)`：查找章节路径。
    - `find_section_by_selectedtext(html_path, selectedtext)`：通过选中文本定位章节标题。
    - `parse_html_title(html_path)`：解析 html 文件标题。
  - 解析 NCX 目录文件，递归构建章节树结构。
  - find_label_path：支持通过 ref 和 filepos 查找完整 label 路径。
  - find_section_by_selectedtext：通过选中文本在 html 文件中定位章节标题。
  - parse_html_title：解析 html 文件标题。
 - `backup/booksnote.py`
  - 历史/备份脚本，辅助数据迁移或格式转换。
--- a/export_notes/notes_export_B18FCD9F90FD43C2373AE52BAEF9A77C.md
+++ b/export_notes/notes_export_B18FCD9F90FD43C2373AE52BAEF9A77C.md
@@ -1,4 +1,4 @@
-# 笔记导出 2025-08-15 13:25
+# 笔记导出 2025-08-15 17:20
 ## 传统十论
--- a/exportbooknotes.py
+++ b/exportbooknotes.py
@@ -1,31 +1,17 @@
 """
-exportbooknotes.py
+exportbooknotes.py (OOP版)
------------------
+-------------------------
 功能：
    - 自动同步iBooks数据库和元数据文件到本地data目录。
    - 解析AEAnnotation.sqlite、Books.plist、BKLibrary.sqlite，构建结构化笔记数据。
    - 解析epub目录和章节信息，定位每条笔记所属章节。
    - 命令行菜单按最近打开时间降序展示书籍列表，供用户选择导出。
    - 仅导出选中书籍的所有笔记，按章节分组，生成Markdown文件。
 依赖：config.py 统一管理路径和配置项。
-
+主要接口：BookNotesExporter
-主要数据流：
+    - run()：命令行交互式导出主流程
-    1. 数据同步到data目录
+    - build_booksnote(bookid=None)：构建结构化笔记数据
-    2. 解析Books.plist获取书籍元数据
+    - export_booksnote_to_md(booksnote, booksinfo, out_path=None)：导出为Markdown
    3. 解析BKLibrary.sqlite获取最近打开时间
    4. 菜单排序与显示（书名+时间戳）
    5. 解析AEAnnotation.sqlite获取笔记
    6. 解析epub目录，定位章节
    7. 导出Markdown文件
 依赖：Python 3, InquirerPy, bs4, shutil, os, datetime, sqlite3
 主要数据流：
 典型用法：
    python exportbooknotes.py
    # 按提示选择书籍，自动导出笔记到export_notes目录
 """
 import config
 """
@@ -40,117 +26,113 @@ booksnote = {
      }}}
 }
 """
 from collections import defaultdict
 import os
-from annotationdata import get_annotations
+from collections import defaultdict
-from booklist_parse import parse_books_plist
+from annotationdata import AnnotationManager
 from booklist_parse import BookListManager
 from opf_parse import parse_opf
-from toc_parse import parse_navpoints, find_label_path
+from toc_parse import TOCParser
 from bs4 import BeautifulSoup
 from pprint import pprint
 def find_file_by_ext(root, exts):
    """在root下递归查找第一个指定后缀的文件"""
    for dirpath, _, files in os.walk(root):
        for f in files:
            for ext in exts:
                if f.lower().endswith(ext):
                    return os.path.join(dirpath, f)
    return None
-def get_toc_tree(toc_path):
+class BookNotesExporter:
-    with open(toc_path, 'r', encoding='utf-8') as f:
+    def __init__(self, config_module=config):
-        soup = BeautifulSoup(f, 'xml')
+        self.config = config_module
-    nav_map = soup.find('navMap')
+        self.annotation_db = config_module.LOCAL_ANNOTATION_DB
        self.books_plist = config_module.LOCAL_BOOKS_PLIST
        self.library_db = config_module.LOCAL_LIBRARY_DB
-    nav_points = nav_map.find_all('navPoint', recursive=False)
+    @staticmethod
-    toc_tree = parse_navpoints(nav_points)
+    def find_file_by_ext(root, exts):
-    #pprint(toc_tree, indent=2, depth=5)
+        for dirpath, _, files in os.walk(root):
-    return toc_tree
+            for f in files:
                for ext in exts:
                    if f.lower().endswith(ext):
                        return os.path.join(dirpath, f)
        return None
-def build_booksnote(annotation_db=config.LOCAL_ANNOTATION_DB, books_plist=config.LOCAL_BOOKS_PLIST, bookid=None):
+    @staticmethod
-    # 支持只处理特定 assetid 的笔记
+    def get_toc_tree(toc_path):
-    annotations = get_annotations(annotation_db, bookid=bookid)
+        with open(toc_path, 'r', encoding='utf-8') as f:
-    booksinfo = parse_books_plist(books_plist)
+            soup = BeautifulSoup(f, 'xml')
-    booksnote = defaultdict(lambda: defaultdict(dict))
+        nav_map = soup.find('navMap')
-    for assetid, notes in annotations.items():
+        nav_points = nav_map.find_all('navPoint', recursive=False)
-        # 获取epub路径
+        toc_tree = TOCParser.parse_navpoints(nav_points)
-        bookinfo = booksinfo.get(assetid)
+        return toc_tree
-        if not bookinfo:
+
-            continue
+    def build_booksnote(self, bookid=None):
-        epub_path = bookinfo.get('path')
+        manager = AnnotationManager(self.annotation_db)
-        if not epub_path or not os.path.isdir(epub_path):
+        annotations = manager.get_annotations(bookid=bookid)
-            continue
+        bl_manager = BookListManager(plist_path=self.books_plist)
-        # 查找opf和ncx
+        booksinfo = bl_manager.get_books_info()
-        opf_path = find_file_by_ext(epub_path, ['.opf'])
+        booksnote = defaultdict(lambda: defaultdict(dict))
-        ncx_path = find_file_by_ext(epub_path, ['.ncx'])
+        for assetid, notes in annotations.items():
-        if not opf_path or not ncx_path:
+            bookinfo = booksinfo.get(assetid)
-            continue
+            if not bookinfo:
-        id2href = parse_opf(opf_path)
+                continue
-        toc_tree = get_toc_tree(ncx_path)
+            epub_path = bookinfo.get('path')
-        for uuid, ann in notes.items():
+            if not epub_path or not os.path.isdir(epub_path):
-            idref = ann['idref']
+                continue
-            filepos = ann['filepos']
+            opf_path = self.find_file_by_ext(epub_path, ['.opf'])
-            href = id2href.get(idref, idref)
+            ncx_path = self.find_file_by_ext(epub_path, ['.ncx'])
-            chapter = find_label_path(toc_tree, href, filepos)
+            if not opf_path or not ncx_path:
-            if chapter is None:
+                continue
-                # 直接从html文件获取章节信息
+            id2href = parse_opf(opf_path)
-                html_path = os.path.join(epub_path, href.split('#')[0])
+            toc_tree = self.get_toc_tree(ncx_path)
-                selectedtext = ann.get('selectedtext')
+            for uuid, ann in notes.items():
-                if os.path.exists(html_path) and selectedtext:
+                idref = ann['idref']
-                    from toc_parse import find_section_by_selectedtext
+                filepos = ann['filepos']
-                    section = find_section_by_selectedtext(html_path, selectedtext)
+                href = id2href.get(idref, idref)
-                    if section:
+                chapter = TOCParser.find_label_path(toc_tree, href, filepos)
-                        chapter = section
+                if chapter is None:
                    html_path = os.path.join(epub_path, href.split('#')[0])
                    selectedtext = ann.get('selectedtext')
                    if os.path.exists(html_path) and selectedtext:
                        section = TOCParser.find_section_by_selectedtext(html_path, selectedtext)
                        if section:
                            chapter = section
                        else:
                            chapter = "(未找到章节)"
                    else:
                        chapter = "(未找到章节)"
-                else:
+                booksnote[assetid][chapter][uuid] = {
-                    chapter = "(未找到章节)"
+                    'creationdate': ann['creationdate'],
-            booksnote[assetid][chapter][uuid] = {
+                    'filepos': filepos,
-                'creationdate': ann['creationdate'],
+                    'idref': href,
-                'filepos': filepos,
+                    'note': ann['note'],
-                'idref': href,
+                    'selectedtext': ann['selectedtext']
-                'note': ann['note'],
+                }
-                'selectedtext': ann['selectedtext']
+        return booksnote
            }
    return booksnote
-import datetime
+    def export_booksnote_to_md(self, booksnote, booksinfo, out_path=None):
-
+        import datetime
-def export_booksnote_to_md(booksnote, booksinfo, out_path=None):
+        now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
-    """
+        lines = [f'# 笔记导出 {now}\n']
-    依据booksnote结构导出markdown文件，格式：
+        for assetid, chapters in booksnote.items():
-    # “笔记导出”+导出时间
+            bookname = booksinfo.get(assetid, {}).get('itemname', assetid)
-    ## 书名
+            lines.append(f'\n## {bookname}\n')
-    ### chapter
+            for chapter, notes in chapters.items():
-    selectedtext
+                lines.append(f'### {chapter}')
-    > note      (如果存在)
+                for uuid, ann in notes.items():
-    """
+                    sel = ann.get('selectedtext')
-    now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
+                    note = ann.get('note')
-    lines = [f'# 笔记导出 {now}\n']
+                    if sel:
-    for assetid, chapters in booksnote.items():
+                        lines.append(sel)
-        bookname = booksinfo.get(assetid, {}).get('itemname', assetid)
+                    if note:
-        lines.append(f'\n## {bookname}\n')
+                        lines.append(f'> {note}')
-        for chapter, notes in chapters.items():
+                    lines.append('')
-            lines.append(f'### {chapter}')
+        md = '\n'.join(lines)
-            for uuid, ann in notes.items():
+        if out_path:
-                sel = ann.get('selectedtext')
+            with open(out_path, 'w', encoding='utf-8') as f:
-                note = ann.get('note')
+                f.write(md)
-                if sel:
+        return md
                    lines.append(sel)
                if note:
                    lines.append(f'> {note}')
                lines.append('')
    md = '\n'.join(lines)
    if out_path:
        with open(out_path, 'w', encoding='utf-8') as f:
            f.write(md)
    return md
 if __name__ == '__main__':
    import shutil
    import os.path
    from InquirerPy import inquirer # type: ignore
    exporter = BookNotesExporter(config)
    # 自动覆盖 ./data 下的数据库和plist文件，源为iBooks真实路径
    src_files = [
        (config.IBOOKS_ANNOTATION_DB, config.LOCAL_ANNOTATION_DB),
@@ -166,31 +148,19 @@ if __name__ == '__main__':
        else:
            print(f'file not found: {src} ')
    from booklist_parse import parse_books_plist
    from InquirerPy import inquirer # type: ignore
    # 先获取所有书籍元数据
-    booksinfo = parse_books_plist(config.LOCAL_BOOKS_PLIST)
+    manager = BookListManager(plist_path=config.LOCAL_BOOKS_PLIST, db_path=config.LOCAL_LIBRARY_DB)
-
+    booksinfo = manager.get_books_info()
    # 构建书名列表（优先displayname, 其次itemname, 否则assetid），按parse_books_plist中的date字段排序
    assetid2name = {}
    assetid2lastopen = {}
-    from booklist_parse import get_books_last_open
+    last_open_times = manager.get_books_last_open()
    # 获取所有书籍的最后打开时间（字典，值为{'last_open': 时间戳}）
    last_open_times = get_books_last_open(config.LOCAL_LIBRARY_DB)
    for assetid, info in booksinfo.items():
        name = info.get('displayname') or info.get('itemname') or assetid
        # 如果书名中包含“-”，只取“-”前面的部分
        if '-' in name:
            name = name.split('-', 1)[0].strip()
        assetid2name[assetid] = name
        # 用 get_books_last_open 返回的时间戳排序，如无则为0
        ts = last_open_times.get(assetid, {}).get('last_open', 0)
        assetid2lastopen[assetid] = ts
    # 按last_open时间戳降序排列
    sorted_assetids = sorted(assetid2name.keys(), key=lambda aid: assetid2lastopen[aid], reverse=True)
    choices = [f"{assetid2name[aid]} [{assetid2lastopen[aid]}]" for aid in sorted_assetids]
    if not choices:
@@ -202,8 +172,6 @@ if __name__ == '__main__':
        multiselect=False,
        instruction="上下键选择，输入可模糊筛选，回车确定"
    ).execute()
    # 解析选中assetid
    for aid, name in assetid2name.items():
        if answer.startswith(name):
            selected_assetid = aid
@@ -211,10 +179,8 @@ if __name__ == '__main__':
    else:
        print("未找到选中书籍")
        exit(1)
-
+    selected_booksnote = exporter.build_booksnote(bookid=selected_assetid)
    # 只导出选中书的笔记
    selected_booksnote = build_booksnote(bookid=selected_assetid)
    selected_booksinfo = {selected_assetid: booksinfo.get(selected_assetid, {})}
    out_path = f'export_notes/notes_export_{selected_assetid}.md'
-    export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path)
+    exporter.export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path)
    print(f'《{selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid}》 导出笔记 {out_path}')
--- a/opf_parse.py
+++ b/opf_parse.py
@@ -1,38 +1,46 @@
 # parseopf.py
 # -----------------------------
 # 用于解析EPUB电子书的OPF文件，提取manifest部分所有id对应的html文件href。
 # 支持批量测试和通过id快速查找href。
 # 依赖：BeautifulSoup4
 # -----------------------------
 from collections import defaultdict
 from bs4 import BeautifulSoup
 import pprint
 def parse_opf(filepath):
    """
-    解析OPF文件，返回{id: href}的defaultdict(dict)结构。
+    兼容旧代码的顶层函数，实际调用 OPFParser.parse_opf。
    仅保留href以.html结尾的项。
    参数：
        filepath (str): OPF文件路径
    返回：
        defaultdict(dict): id到href的映射（仅html文件）
    """
-    result = defaultdict(dict)
+    return OPFParser.parse_opf(filepath)
-    with open(filepath, 'r', encoding='utf-8') as f:
+
-        soup = BeautifulSoup(f, 'xml')
+"""
-    # 查找manifest部分，遍历所有item，筛选html结尾的href
+opf_parse.py (OOP版)
-    manifest = soup.find('manifest')
+-------------------
-    if manifest:
+功能：
-        for item in manifest.find_all('item'):
+    - 解析EPUB电子书的OPF文件，提取manifest部分所有id对应的html文件href。
-            id_ = item.get('id')
+    - 支持通过id快速查找href。
-            href = item.get('href')
+    - 支持批量测试。
-            if id_ and href and href.strip().lower().endswith('html'):
+依赖：BeautifulSoup4
-                result[id_] = href
+主要接口：OPFParser
-    return result
+    - parse_opf(filepath)：静态方法，返回id->href映射（仅html文件）。
 """
 from collections import defaultdict
 from bs4 import BeautifulSoup
 class OPFParser:
    @staticmethod
    def parse_opf(filepath):
        """
        解析OPF文件，返回{id: href}的defaultdict(dict)结构。
        仅保留href以.html结尾的项。
        参数：
            filepath (str): OPF文件路径
        返回：
            defaultdict(dict): id到href的映射（仅html文件）
        """
        result = defaultdict(dict)
        with open(filepath, 'r', encoding='utf-8') as f:
            soup = BeautifulSoup(f, 'xml')
        manifest = soup.find('manifest')
        if manifest:
            for item in manifest.find_all('item'):
                id_ = item.get('id')
                href = item.get('href')
                if id_ and href and href.strip().lower().endswith('html'):
                    result[id_] = href
        return result
 if __name__ == "__main__":
    test_files = [
@@ -44,8 +52,7 @@ if __name__ == "__main__":
    for file in test_files:
        print(f"\n==== 测试文件: {file} ====")
        try:
-            result = parse_opf(file)
+            result = OPFParser.parse_opf(file)
            pprint.pprint(result, indent=2, width=120, sort_dicts=False)
            # 增加通过id快速打印href的测试
            test_ids = list(result.keys())[:3]  # 取前三个id做演示
--- a/toc_parse.py
+++ b/toc_parse.py
@@ -1,6 +1,7 @@
 """
-toc_parse.py
+toc_parse.py (OOP版)
------------
+-------------------
 功能：
    - 解析EPUB电子书的toc.ncx目录文件，递归构建章节树结构。
    - 支持通过ref和filepos查找完整label路径。
@@ -8,166 +9,120 @@ toc_parse.py
    - 兼容多种EPUB格式，支持批量测试。
 依赖：config.py 统一管理路径和配置项。
-主要接口：
+主要接口：TOCParser
-    parse_navpoints(navpoints)  # 递归解析navPoint节点，返回章节树结构。
+    - parse_navpoints(navpoints)：递归解析navPoint节点，返回章节树结构。
-    find_label_path(node, ref, filepos, path)  # 查找指定ref和filepos的章节label路径。
+    - find_label_path(node, ref, filepos, path)：查找指定ref和filepos的章节label路径。
-    find_section_by_selectedtext(html_path, selectedtext)  # 通过选中文本定位章节标题。
+    - find_section_by_selectedtext(html_path, selectedtext)：通过选中文本定位章节标题。
-    parse_html_title(html_path)  # 解析html文件标题。
+    - parse_html_title(html_path)：解析html文件标题。
 依赖：BeautifulSoup4, pprint, os, typing
 """
 import config
 from bs4 import BeautifulSoup
-from typing import Dict, Optional, List, Any
+import os
 import pprint
-# ==== 辅助函数：根据selectedtext在html文件中的位置推断所在章节 ====
+class TOCParser:
-def find_section_by_selectedtext(html_path, selectedtext):
+    def __init__(self):
    """
    在html文件中查找selectedtext出现的位置，向上回溯最近的h1-h6标题，返回该标题文本。
    若未找到标题，则返回None。
    """
    try:
        with open(html_path, 'r', encoding='utf-8') as f:
            soup = BeautifulSoup(f, 'html.parser')
        # 在所有文本节点中查找selectedtext
        for elem in soup.find_all(string=True):
            if selectedtext and selectedtext.strip() and selectedtext.strip() in elem:
                # 回溯父节点，查找最近的h1-h6
                parent = elem.parent
                while parent:
                    prev = parent.previous_sibling
                    # 向上查找同级前面的h1-h6
                    while prev:
                        if prev.name and prev.name.lower() in ['h1','h2','h3','h4','h5','h6']:
                            return prev.get_text(strip=True)
                        prev = prev.previous_sibling
                    parent = parent.parent
        # 若未找到，尝试全局第一个h1-h6
        for tag in ['h1','h2','h3','h4','h5','h6']:
            h = soup.find(tag)
            if h and h.get_text(strip=True):
                return h.get_text(strip=True)
    except Exception:
        pass
    return None
-def parse_html_title(html_path):
+    @staticmethod
-    """
+    def find_section_by_selectedtext(html_path, selectedtext):
-    解析html文件，优先返回<title>，否则返回body第一个h1/h2/h3/h4/h5/h6或None。
+        try:
-    """
+            with open(html_path, 'r', encoding='utf-8') as f:
-    try:
+                soup = BeautifulSoup(f, 'html.parser')
-        with open(html_path, 'r', encoding='utf-8') as f:
+            for elem in soup.find_all(string=True):
-            soup = BeautifulSoup(f, 'html.parser')
+                if selectedtext and selectedtext.strip() and selectedtext.strip() in elem:
-        # 优先<title>
+                    parent = elem.parent
-        if soup.title and soup.title.string:
+                    while parent:
-            return soup.title.string.strip()
+                        prev = parent.previous_sibling
-        # 其次正文第一个h1-h6
+                        while prev:
-        for tag in ['h1','h2','h3','h4','h5','h6']:
+                            if prev.name and prev.name.lower() in ['h1','h2','h3','h4','h5','h6']:
-            h = soup.find(tag)
+                                return prev.get_text(strip=True)
-            if h and h.get_text(strip=True):
+                            prev = prev.previous_sibling
-                return h.get_text(strip=True)
+                        parent = parent.parent
-    except Exception:
+            for tag in ['h1','h2','h3','h4','h5','h6']:
-        pass
+                h = soup.find(tag)
-    return None
+                if h and h.get_text(strip=True):
                    return h.get_text(strip=True)
        except Exception:
            pass
        return None
-def parse_navpoints(navpoints) -> Dict[str, dict]:
+    @staticmethod
-    """
+    def parse_html_title(html_path):
-    递归解析 navpoints 节点，返回嵌套 dict 结构。
+        try:
-    :param navpoints: BeautifulSoup 查找到的 navPoint 节点列表
+            with open(html_path, 'r', encoding='utf-8') as f:
-    :return: 章节树结构
+                soup = BeautifulSoup(f, 'html.parser')
-    """
+            if soup.title and soup.title.string:
-    result = {}
+                return soup.title.string.strip()
-    for navpoint in navpoints:
+            for tag in ['h1','h2','h3','h4','h5','h6']:
-        label = navpoint.navLabel.text.strip().strip('"“”')
+                h = soup.find(tag)
-        src = navpoint.content["src"]
+                if h and h.get_text(strip=True):
-        if "#" in src:
+                    return h.get_text(strip=True)
-            ref, filepos = src.split("#", 1)
+        except Exception:
-        else:
+            pass
-            ref, filepos = src, None
+        return None
        entry = {
            "label": label,
            "ref": ref,
            "filepos": filepos,
            "children": parse_navpoints(navpoint.find_all("navPoint", recursive=False))
        }
        result[navpoint.get("id")] = entry
-    #pprint.pprint(result)  # 格式化打印result
+    @staticmethod
    def parse_navpoints(navpoints):
        result = {}
        for navpoint in navpoints:
            label = navpoint.navLabel.text.strip().strip('"“”')
            src = navpoint.content["src"]
            if "#" in src:
                ref, filepos = src.split("#", 1)
            else:
                ref, filepos = src, None
            entry = {
                "label": label,
                "ref": ref,
                "filepos": filepos,
                "children": TOCParser.parse_navpoints(navpoint.find_all("navPoint", recursive=False))
            }
            result[navpoint.get("id")] = entry
        return result
-    return result
+    @staticmethod
-
+    def find_label_path(node, ref, filepos=None, path=None):
-def find_label_path(
+        if path is None:
-    node: Any, 
+            path = []
-    ref: str, 
+        if isinstance(node, dict):
-    filepos: Optional[str] = None, 
+            nodes = node.values() if "label" not in node else [node]
    path: Optional[List[str]] = None
 ) -> Optional[str]:
    """
    在嵌套 dict 结构中查找指定 ref 和 filepos 的 label 路径。
    :param node: 当前节点（dict 或 dict集合）
    :param ref: html文件名
    :param filepos: 文件位置，可为 None
    :param path: label 路径累积
    :return: 以 / 分隔的完整 label 路径，未找到返回 None
    """
    if path is None:
        path = []
    if isinstance(node, dict):
        nodes = node.values() if "label" not in node else [node]
        # 1. 优先精确匹配ref和filepos
        for v in nodes:
            if "label" in v:
                new_path = path + [v["label"]]
                if v["ref"] == ref and (filepos is None or v["filepos"] == filepos):
                    title = " / ".join(new_path)
                    #print(f'title ref={ref} filepos={filepos} -> {title}') #DBG
                    return title
                title = find_label_path(v["children"], ref, filepos, new_path)
                if title:
                    #print(f'title1 ref={ref} filepos={filepos} -> {title}') #DBG
                    return title
        # 2. 如果带filepos查找失败，回退到同ref下第一个章节（即只要ref匹配就返回）
        if filepos is not None:
            for v in nodes:
                if "label" in v:
                    new_path = path + [v["label"]]
-                    # print(f"对比 {v['ref']} == {ref}")
+                    if v["ref"] == ref and (filepos is None or v["filepos"] == filepos):
                    if v["ref"].split("#", 1)[0] == ref.split("#", 1)[0]:
                        title = " / ".join(new_path)
                        #print(f'title3 ref={ref} filepos={filepos} -> {title}') #DBG
                        return title
-                    title = find_label_path(v["children"], ref, None, new_path)
+                    title = TOCParser.find_label_path(v["children"], ref, filepos, new_path)
                    if title:
                        #print(f'title4 ref={ref} filepos={filepos} -> {title}') #DBG
                        return title
    # 3. 若完全未找到，尝试直接解析idref所指html文件标题，获取章节label信息
    # 仅在顶层调用时执行此逻辑
    if path == [] and ref and ref.endswith('.html'):
        import os
        # 自动在常见目录下查找html文件（以toc文件目录为基准）
        caller_dir = os.path.dirname(os.path.abspath(__file__))
        search_dirs = [caller_dir, os.getcwd()]
        for d in search_dirs:
            html_path = os.path.join(d, ref)
            #print(f"查找 {html_path}")
            if os.path.isfile(html_path):
                title = parse_html_title(html_path)
                if title:
                    return title
        # 递归查找（以toc文件目录为根）
        for d in search_dirs:
            for root, _, files in os.walk(d):
                if ref in files:
                    html_path = os.path.join(root, ref)
                    #print(f"2 查找 {html_path}")
                    title = parse_html_title(html_path)
                    if title:
                        return title
-    return None
+            if filepos is not None:
                for v in nodes:
                    if "label" in v:
                        new_path = path + [v["label"]]
                        if v["ref"].split("#", 1)[0] == ref.split("#", 1)[0]:
                            title = " / ".join(new_path)
                            return title
                        title = TOCParser.find_label_path(v["children"], ref, None, new_path)
                        if title:
                            return title
        if path == [] and ref and ref.endswith('.html'):
            caller_dir = os.path.dirname(os.path.abspath(__file__))
            search_dirs = [caller_dir, os.getcwd()]
            for d in search_dirs:
                html_path = os.path.join(d, ref)
                if os.path.isfile(html_path):
                    title = TOCParser.parse_html_title(html_path)
                    if title:
                        return title
            for d in search_dirs:
                for root, _, files in os.walk(d):
                    if ref in files:
                        html_path = os.path.join(root, ref)
                        title = TOCParser.parse_html_title(html_path)
                        if title:
                            return title
        return None
 if __name__ == "__main__":
    # ==== 批量测试指定toc/html/filepos列表 ====
@@ -182,8 +137,6 @@ if __name__ == "__main__":
        [config.EXAMPLES_DIR + "/政治哲學的12堂Podcast", "ch1.xhtml#_idParaDest-4", ""],
    ]
    for epub_dir, html_file, filepos in test_cases:
        # 自动查找epub目录下的toc.ncx
        import os
        toc_path = None
        for root, _, files in os.walk(epub_dir):
            for f in files:
@@ -200,39 +153,32 @@ if __name__ == "__main__":
            with open(toc_path, "r", encoding="utf-8") as f:
                soup = BeautifulSoup(f, "xml")
            nav_map = soup.find("navMap")
-            toc_tree = parse_navpoints(nav_map.find_all("navPoint", recursive=False))
+            toc_tree = TOCParser.parse_navpoints(nav_map.find_all("navPoint", recursive=False))
-            label_path = find_label_path(toc_tree, html_file, filepos)
+            label_path = TOCParser.find_label_path(toc_tree, html_file, filepos)
            print(f"find_label_path: {label_path if label_path else '未找到章节/标题'}")
            # tocb中不存在html，直接测试parse_html_title
            html_path = os.path.join(epub_dir, html_file.split('#')[0])
            if os.path.exists(html_path):
-                title = parse_html_title(html_path)
+                title = TOCParser.parse_html_title(html_path)
                print(f"解析html标题: {html_path} => {title if title else '未找到标题'}")
                # 新增：根据selectedtext定位章节标题
                selectedtext = '从变法思想看，王安石变法最大的魅力是“民不加赋而国用足”：老百姓上缴的税率不增，国库的总收入仍可以'
-                section = find_section_by_selectedtext(html_path, selectedtext)
+                section = TOCParser.find_section_by_selectedtext(html_path, selectedtext)
                print(f"selectedtext定位到的章节标题: {section if section else '未找到相关标题'}")
            else:
                print(f"未找到html文件: {html_path}")
        except Exception as e:
            print(f"测试失败: {e}")
    # ==== 新增：测试变宋笔记章节定位和html标题解析 ====
    print("\n==== 测试: 变宋笔记章节定位和html标题解析 ====")
    # 假设笔记数据如下
    note_idref = 'text/part0002_split_003.html'
    note_filepos = None
    # 变宋toc.ncx路径
    bian_song_toc = config.EXAMPLES_DIR + "/变宋/toc.ncx"
    import os
    if os.path.exists(bian_song_toc):
        with open(bian_song_toc, "r", encoding="utf-8") as f:
            soup = BeautifulSoup(f, "xml")
        nav_map = soup.find("navMap")
-        toc_tree = parse_navpoints(nav_map.find_all("navPoint", recursive=False))
+        toc_tree = TOCParser.parse_navpoints(nav_map.find_all("navPoint", recursive=False))
-        # 先尝试用find_label_path查找章节
+        label_path = TOCParser.find_label_path(toc_tree, note_idref, note_filepos)
        label_path = find_label_path(toc_tree, note_idref, note_filepos)
        print(f"查找 {note_idref}: ", label_path if label_path else "未找到章节，尝试解析html标题")
    else:
        print(f"未找到toc.ncx: {bian_song_toc}")
`@@ -1,4 +1,4 @@`
	`# 笔记导出 2025-08-15 13:25`	`# 笔记导出 2025-08-15 17:20`


	`## 传统十论`	`## 传统十论`