'update'

2025-08-15 17:20:30 +08:00
parent 0bc6844209
commit 4e3b8abc34
12 changed files with 406 additions and 516 deletions
--- a/pycache/annotationdata.cpython-312.pyc
+++ b/pycache/annotationdata.cpython-312.pyc
--- a/pycache/booklist_parse.cpython-312.pyc
+++ b/pycache/booklist_parse.cpython-312.pyc
--- a/pycache/opf_parse.cpython-312.pyc
+++ b/pycache/opf_parse.cpython-312.pyc
--- a/pycache/toc_parse.cpython-312.pyc
+++ b/pycache/toc_parse.cpython-312.pyc
--- a/annotationdata.py
+++ b/annotationdata.py
@@ -1,27 +1,30 @@
+
 """
-annotationdata.py
-----------------
+annotationdata.py (OOP版)
+------------------------
 功能：
    - 解析iBooks的AEAnnotation.sqlite数据库，提取所有或指定书籍（assetid/bookid）的笔记。
    - 提供parse_location辅助函数，解析笔记定位信息。
    - 返回结构化的annotations数据，便于后续章节定位与导出。

 依赖：config.py 统一管理路径和配置项。
-
-主要接口：
-    - get_annotations(db_path, bookid=None)：返回所有或指定assetid的笔记，结构为{assetid: {uuid: {...}}}
+主要接口：AnnotationManager
+    - get_annotations(bookid=None)：返回所有或指定assetid的笔记，结构为{assetid: {uuid: {...}}}
    - parse_location(location)：解析ZANNOTATIONLOCATION，返回(idref, filepos)
-
 依赖：sqlite3, collections, re, os, datetime
 """
 import config
-
 import sqlite3
-from collections import defaultdict
 import re
 import os
+from collections import defaultdict

-def parse_location(location):
+class AnnotationManager:
+    def __init__(self, db_path=None):
+        self.db_path = db_path or config.LOCAL_ANNOTATION_DB
+
+    @staticmethod
+    def parse_location(location):
        """
        解析ZANNOTATIONLOCATION，返回(idref, filepos)
        - epubcfi(...)格式优先提取[]内内容为idref
@@ -31,21 +34,20 @@ def parse_location(location):
        filepos = None
        if not location:
            return idref, filepos
-    # 统一处理，提取前两个[]内容
        matches = re.findall(r'\[(.*?)\]', location) if location else []
        idref = matches[0] if len(matches) > 0 else None
        filepos = matches[1] if len(matches) > 1 else None
        return idref, filepos

-def get_annotations(db_path=config.LOCAL_ANNOTATION_DB, bookid=None):
+    def get_annotations(self, bookid=None):
        # 检查WAL模式相关文件
-    base = db_path.rsplit('.', 1)[0]
+        base = self.db_path.rsplit('.', 1)[0]
        wal_path = base + '.sqlite-wal'
        shm_path = base + '.sqlite-shm'
-    for f in [db_path, wal_path, shm_path]:
+        for f in [self.db_path, wal_path, shm_path]:
            if not os.path.exists(f):
                print(f'警告: 缺少 {f}，可能无法获取全部最新笔记')
-    conn = sqlite3.connect(db_path)
+        conn = sqlite3.connect(self.db_path)
        cursor = conn.cursor()
        if bookid is not None:
            cursor.execute('''
@@ -67,7 +69,6 @@ def get_annotations(db_path=config.LOCAL_ANNOTATION_DB, bookid=None):
            if creationdate:
                try:
                    origin = datetime.datetime(2001, 1, 1)
-                # 苹果时间戳 float/int 或数字字符串
                    if isinstance(creationdate, (int, float)):
                        dt = origin + datetime.timedelta(seconds=creationdate)
                    elif isinstance(creationdate, str) and creationdate.replace('.', '', 1).isdigit():
@@ -77,8 +78,7 @@ def get_annotations(db_path=config.LOCAL_ANNOTATION_DB, bookid=None):
                    date_str = f"{dt.year}/{dt.month}/{dt.day}"
                except Exception:
                    date_str = str(creationdate)
-        idref, filepos = parse_location(location)
-        # 跳过note和selectedtext都为None的笔记
+            idref, filepos = self.parse_location(location)
            if note is None and selectedtext is None:
                continue
            annotations[str(assetid)][uuid] = {
@@ -90,47 +90,24 @@ def get_annotations(db_path=config.LOCAL_ANNOTATION_DB, bookid=None):
            }
        conn.close()
        if bookid is not None:
-        # 只返回特定bookid的笔记结构
            return {str(bookid): annotations.get(str(bookid), {})}
        return annotations

-# 用法示例：输出每本书的前3条笔记
 if __name__ == "__main__":
+    manager = AnnotationManager()
    # 测试 parse_location
-    '''
    test_locations = [
        'epubcfi(/6/746[id509]!/4[4MLOS0-27b363c65bfe41ad8429f530566a2737]/10,/2/1:0,/7:8',
        'epubcfi(/6/22[id15]!/4/156/1,:21,:157)',
        'epubcfi(/6/764[id518]!/4[4V8DU0-27b363c65bfe41ad8429f530566a2737]/56,/1:0,/3:2)'
    ]
    for loc in test_locations:
-        idref, filepos = parse_location(loc)
+        idref, filepos = manager.parse_location(loc)
        print(f"location: {loc}\n  idref: {idref}\n  filepos: {filepos}\n")
-    '''

    # 测试只获取特定 assetid 的笔记
    test_bookid = "B18FCD9F90FD43C2373AE52BAEF9A77C"
-    annotations = get_annotations(bookid=test_bookid)
-
-    # 格式化打印该书的所有笔记
+    annotations = manager.get_annotations(bookid=test_bookid)
    from pprint import pprint
    print(f"\nAssetID={test_bookid} 的所有笔记:")
    pprint(annotations, indent=2, sort_dicts=False)
-
-    # 输出每本书的前3条笔记
-    '''
-    book_notes = defaultdict(list)
-    for assetid, notes_dict in annotations.items():
-        for uuid, ann in notes_dict.items():
-            book_notes[assetid].append({**ann, 'uuid': uuid})
-    for assetid, notes in book_notes.items():
-        print(f"\nAssetID: {assetid}")
-        for i, note in enumerate(notes[:3]):
-            print(f"  笔记{i+1}:")
-            print(f"    creationdate: {note['creationdate']}")
-            print(f"    idref: {note['idref']}")
-            print(f"    filepos: {note['filepos']}")
-            print(f"    note: {note['note']}")
-            print(f"    selectedtext: {note['selectedtext']}")
-            print(f"    uuid: {note['uuid']}")
-    '''
--- a/booklist_parse.py
+++ b/booklist_parse.py
@@ -1,32 +1,27 @@
-"""
-booklist_parse.py
-----------------
-功能：
-    - 解析iBooks的Books.plist，提取所有书籍元数据（书名、作者、路径、时间等）。
-    - 解析BKLibrary.sqlite，获取每本书的最近打开时间（苹果时间戳，基准2001-01-01）。

-依赖：config.py 统一管理路径和配置项。
-
-主要接口：
-    - parse_books_plist(plist_path)：返回所有书籍元数据，结构为{bk_id: {...}}
-    - get_books_last_open(db_path)：返回所有书籍最近打开时间，结构为{bk_id: {'last_open': 时间戳}}
-
-依赖：plistlib, collections, sqlite3, os, datetime
-
-典型用法：
-    booksinfo = parse_books_plist(config.LOCAL_BOOKS_PLIST)
-    books_open = get_books_last_open(config.LOCAL_LIBRARY_DB)
-"""
 import config
 import plistlib
+import sqlite3
+import os
 from collections import defaultdict

-def parse_books_plist(plist_path=config.LOCAL_BOOKS_PLIST):
+class BookListManager:
+    def __init__(self, plist_path=None, db_path=None):
+        self.plist_path = plist_path or config.LOCAL_BOOKS_PLIST
+        self.db_path = db_path or config.LOCAL_LIBRARY_DB
+        self._booksinfo = None
+        self._books_open = None
+
+    def get_books_info(self):
+        if self._booksinfo is not None:
+            return self._booksinfo
        booksinfo = defaultdict(dict)
-    with open(plist_path, 'rb') as f: plist_data = plistlib.load(f)
+        with open(self.plist_path, 'rb') as f:
+            plist_data = plistlib.load(f)
        for book in plist_data.get('Books', []):
            bk_id = book.get('BKGeneratedItemId')
-        if not bk_id: continue
+            if not bk_id:
+                continue
            booksinfo[bk_id] = {
                'displayname': book.get('BKDisplayName', ''),
                'author': book.get('artistName', ''),
@@ -37,39 +32,35 @@ def parse_books_plist(plist_path=config.LOCAL_BOOKS_PLIST):
                'date': book.get('BKInsertionDate',''),
                'updatedate': book.get('updateDate','')
            }
+        self._booksinfo = booksinfo
        return booksinfo
-import sqlite3
-import os

-def get_books_last_open(db_path=config.LOCAL_LIBRARY_DB):
-    """
-    从BKLibrary.sqlite获取书籍最近打开时间
-    返回：defaultdict(dict)，bk_id为索引，包含最近打开时间
-    """
+    def get_books_last_open(self):
+        if self._books_open is not None:
+            return self._books_open
        books_open = defaultdict(dict)
-    if not os.path.exists(db_path):
+        if not os.path.exists(self.db_path):
            return books_open
-
        try:
-        conn = sqlite3.connect(db_path)
+            conn = sqlite3.connect(self.db_path)
            cursor = conn.cursor()
-        # ZBKLIBRARYASSET表包含书籍信息
            cursor.execute(''' SELECT ZASSETID, zlastopendate FROM ZBKLIBRARYASSET WHERE zlastopendate IS NOT NULL ''')
            rows = cursor.fetchall()
            for row in rows:
                asset_id, last_open = row
                if asset_id:
                    books_open[asset_id] = {
-                    'last_open': last_open  # 苹果时间戳，基准时间为2001-01-01
+                        'last_open': last_open
                    }
            conn.close()
        except Exception as e:
            print(f'警告: 读取BKLibrary.sqlite失败: {e}')
-    
+        self._books_open = books_open
        return books_open

 if __name__ == '__main__':
-    booksinfo = parse_books_plist(config.LOCAL_BOOKS_PLIST)
+    manager = BookListManager()
+    booksinfo = manager.get_books_info()
    from pprint import pprint
    print("\n【前三条示例】")
    for k, v in list(booksinfo.items())[:3]:
@@ -77,19 +68,10 @@ if __name__ == '__main__':
        pprint(v, sort_dicts=False, indent=2)
        print('-' * 60)

-    '''
-    print("\n【全部内容】")
-    for k, v in booksinfo.items():
-        print(f"{k}:")
-        pprint(v, sort_dicts=False, indent=2)
-        print('-' * 60)
-    '''
-  # 测试最近打开时间
    print("\n【最近打开时间示例】")
-    books_open = get_books_last_open()
+    books_open = manager.get_books_last_open()
    import datetime
    for k, v in list(books_open.items())[:3]:
        ts = v['last_open']
-        # 苹果时间戳，基准2001-01-01
        dt = datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=ts)
        print(f"{k}: {dt} (timestamp: {ts})")
--- a/data/Books.plist
+++ b/data/Books.plist
--- a/detaildesign.md
+++ b/detaildesign.md
@@ -158,31 +158,43 @@ answer = inquirer.fuzzy(

 ## 9.1 主要代码文件说明（细化）

+
 - `exportbooknotes.py`
+  - 采用 OOP 设计，核心类为 `BookNotesExporter`：
+    - `build_booksnote(bookid=None)`：构建结构化笔记数据。
+    - `export_booksnote_to_md(booksnote, booksinfo, out_path=None)`：导出为 Markdown。
+    - `find_file_by_ext`、`get_toc_tree` 等辅助方法。
  - 数据同步：自动复制 iBooks 数据库和元数据到本地。
  - 菜单交互：按最近打开时间戳排序，显示“书名 [时间戳]”，支持模糊搜索。
  - 只处理用户选中书籍的笔记，按章节分组导出 Markdown。
  - 依赖核心解析模块，负责主流程调度。

 - `annotationdata.py`
+  - OOP 设计，核心类为 `AnnotationManager`：
+    - `get_annotations(bookid=None)`：返回所有或指定 assetid 的笔记。
+    - `parse_location(location)`：静态方法，解析定位信息。
  - 解析 AEAnnotation.sqlite，提取所有或指定 assetid 的笔记。
  - 支持苹果时间戳转换，结构化输出。
-  - parse_location 辅助函数，统一解析笔记定位信息。

 - `booklist_parse.py`
+  - OOP 设计，核心类为 `BookListManager`：
+    - `get_books_info()`：获取书籍元数据。
+    - `get_books_last_open()`：获取每本书的最近打开时间。
  - 解析 Books.plist，获取书籍元数据（书名、作者、路径、时间等）。
-  - 解析 BKLibrary.sqlite，获取每本书的最近打开时间（zlastopendate，苹果时间戳）。
-  - 提供统一数据接口，便于主流程排序和展示。
+  - 解析 BKLibrary.sqlite，获取每本书的最近打开时间。

 - `opf_parse.py`
+  - OOP 设计，核心类为 `OPFParser`：
+    - `parse_opf(filepath)`：静态方法，返回 id->href 映射。
  - 解析 epub 的 OPF 文件，获取章节与文件映射关系（idref -> href）。
-  - 支持多种 epub 目录结构。

 - `toc_parse.py`
+  - OOP 设计，核心类为 `TOCParser`：
+    - `parse_navpoints(navpoints)`：递归解析 navPoint 节点。
+    - `find_label_path(node, ref, filepos, path)`：查找章节路径。
+    - `find_section_by_selectedtext(html_path, selectedtext)`：通过选中文本定位章节标题。
+    - `parse_html_title(html_path)`：解析 html 文件标题。
  - 解析 NCX 目录文件，递归构建章节树结构。
-  - find_label_path：支持通过 ref 和 filepos 查找完整 label 路径。
-  - find_section_by_selectedtext：通过选中文本在 html 文件中定位章节标题。
-  - parse_html_title：解析 html 文件标题。

 - `backup/booksnote.py`
  - 历史/备份脚本，辅助数据迁移或格式转换。
--- a/export_notes/notes_export_B18FCD9F90FD43C2373AE52BAEF9A77C.md
+++ b/export_notes/notes_export_B18FCD9F90FD43C2373AE52BAEF9A77C.md
@@ -1,4 +1,4 @@
-# 笔记导出 2025-08-15 13:25
+# 笔记导出 2025-08-15 17:20


 ## 传统十论
--- a/exportbooknotes.py
+++ b/exportbooknotes.py
@@ -1,31 +1,17 @@
 """
-exportbooknotes.py
------------------
+exportbooknotes.py (OOP版)
+-------------------------
 功能：
    - 自动同步iBooks数据库和元数据文件到本地data目录。
    - 解析AEAnnotation.sqlite、Books.plist、BKLibrary.sqlite，构建结构化笔记数据。
    - 解析epub目录和章节信息，定位每条笔记所属章节。
    - 命令行菜单按最近打开时间降序展示书籍列表，供用户选择导出。
    - 仅导出选中书籍的所有笔记，按章节分组，生成Markdown文件。
-
 依赖：config.py 统一管理路径和配置项。
-
-主要数据流：
-    1. 数据同步到data目录
-    2. 解析Books.plist获取书籍元数据
-    3. 解析BKLibrary.sqlite获取最近打开时间
-    4. 菜单排序与显示（书名+时间戳）
-    5. 解析AEAnnotation.sqlite获取笔记
-    6. 解析epub目录，定位章节
-    7. 导出Markdown文件
-
-依赖：Python 3, InquirerPy, bs4, shutil, os, datetime, sqlite3
-
-主要数据流：
-
-典型用法：
-    python exportbooknotes.py
-    # 按提示选择书籍，自动导出笔记到export_notes目录
+主要接口：BookNotesExporter
+    - run()：命令行交互式导出主流程
+    - build_booksnote(bookid=None)：构建结构化笔记数据
+    - export_booksnote_to_md(booksnote, booksinfo, out_path=None)：导出为Markdown
 """
 import config
 """
@@ -40,17 +26,24 @@ booksnote = {
      }}}
 }
 """
-from collections import defaultdict
 import os
-from annotationdata import get_annotations
-from booklist_parse import parse_books_plist
+from collections import defaultdict
+from annotationdata import AnnotationManager
+from booklist_parse import BookListManager
 from opf_parse import parse_opf
-from toc_parse import parse_navpoints, find_label_path
+from toc_parse import TOCParser
 from bs4 import BeautifulSoup
-from pprint import pprint

-def find_file_by_ext(root, exts):
-    """在root下递归查找第一个指定后缀的文件"""
+
+class BookNotesExporter:
+    def __init__(self, config_module=config):
+        self.config = config_module
+        self.annotation_db = config_module.LOCAL_ANNOTATION_DB
+        self.books_plist = config_module.LOCAL_BOOKS_PLIST
+        self.library_db = config_module.LOCAL_LIBRARY_DB
+
+    @staticmethod
+    def find_file_by_ext(root, exts):
        for dirpath, _, files in os.walk(root):
            for f in files:
                for ext in exts:
@@ -58,48 +51,44 @@ def find_file_by_ext(root, exts):
                        return os.path.join(dirpath, f)
        return None

-def get_toc_tree(toc_path):
+    @staticmethod
+    def get_toc_tree(toc_path):
        with open(toc_path, 'r', encoding='utf-8') as f:
            soup = BeautifulSoup(f, 'xml')
        nav_map = soup.find('navMap')
-
        nav_points = nav_map.find_all('navPoint', recursive=False)
-    toc_tree = parse_navpoints(nav_points)
-    #pprint(toc_tree, indent=2, depth=5)
+        toc_tree = TOCParser.parse_navpoints(nav_points)
        return toc_tree

-def build_booksnote(annotation_db=config.LOCAL_ANNOTATION_DB, books_plist=config.LOCAL_BOOKS_PLIST, bookid=None):
-    # 支持只处理特定 assetid 的笔记
-    annotations = get_annotations(annotation_db, bookid=bookid)
-    booksinfo = parse_books_plist(books_plist)
+    def build_booksnote(self, bookid=None):
+        manager = AnnotationManager(self.annotation_db)
+        annotations = manager.get_annotations(bookid=bookid)
+        bl_manager = BookListManager(plist_path=self.books_plist)
+        booksinfo = bl_manager.get_books_info()
        booksnote = defaultdict(lambda: defaultdict(dict))
        for assetid, notes in annotations.items():
-        # 获取epub路径
            bookinfo = booksinfo.get(assetid)
            if not bookinfo:
                continue
            epub_path = bookinfo.get('path')
            if not epub_path or not os.path.isdir(epub_path):
                continue
-        # 查找opf和ncx
-        opf_path = find_file_by_ext(epub_path, ['.opf'])
-        ncx_path = find_file_by_ext(epub_path, ['.ncx'])
+            opf_path = self.find_file_by_ext(epub_path, ['.opf'])
+            ncx_path = self.find_file_by_ext(epub_path, ['.ncx'])
            if not opf_path or not ncx_path:
                continue
            id2href = parse_opf(opf_path)
-        toc_tree = get_toc_tree(ncx_path)
+            toc_tree = self.get_toc_tree(ncx_path)
            for uuid, ann in notes.items():
                idref = ann['idref']
                filepos = ann['filepos']
                href = id2href.get(idref, idref)
-            chapter = find_label_path(toc_tree, href, filepos)
+                chapter = TOCParser.find_label_path(toc_tree, href, filepos)
                if chapter is None:
-                # 直接从html文件获取章节信息
                    html_path = os.path.join(epub_path, href.split('#')[0])
                    selectedtext = ann.get('selectedtext')
                    if os.path.exists(html_path) and selectedtext:
-                    from toc_parse import find_section_by_selectedtext
-                    section = find_section_by_selectedtext(html_path, selectedtext)
+                        section = TOCParser.find_section_by_selectedtext(html_path, selectedtext)
                        if section:
                            chapter = section
                        else:
@@ -115,17 +104,8 @@ def build_booksnote(annotation_db=config.LOCAL_ANNOTATION_DB, books_plist=config
                }
        return booksnote

-import datetime
-
-def export_booksnote_to_md(booksnote, booksinfo, out_path=None):
-    """
-    依据booksnote结构导出markdown文件，格式：
-    # “笔记导出”+导出时间
-    ## 书名
-    ### chapter
-    selectedtext
-    > note      (如果存在)
-    """
+    def export_booksnote_to_md(self, booksnote, booksinfo, out_path=None):
+        import datetime
        now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
        lines = [f'# 笔记导出 {now}\n']
        for assetid, chapters in booksnote.items():
@@ -151,6 +131,8 @@ def export_booksnote_to_md(booksnote, booksinfo, out_path=None):
 if __name__ == '__main__':
    import shutil
    import os.path
+    from InquirerPy import inquirer # type: ignore
+    exporter = BookNotesExporter(config)
    # 自动覆盖 ./data 下的数据库和plist文件，源为iBooks真实路径
    src_files = [
        (config.IBOOKS_ANNOTATION_DB, config.LOCAL_ANNOTATION_DB),
@@ -166,31 +148,19 @@ if __name__ == '__main__':
        else:
            print(f'file not found: {src} ')

-    from booklist_parse import parse_books_plist
-    from InquirerPy import inquirer # type: ignore
-
    # 先获取所有书籍元数据
-    booksinfo = parse_books_plist(config.LOCAL_BOOKS_PLIST)
-
-    # 构建书名列表（优先displayname, 其次itemname, 否则assetid），按parse_books_plist中的date字段排序
+    manager = BookListManager(plist_path=config.LOCAL_BOOKS_PLIST, db_path=config.LOCAL_LIBRARY_DB)
+    booksinfo = manager.get_books_info()
    assetid2name = {}
    assetid2lastopen = {}
-    from booklist_parse import get_books_last_open
-
-    # 获取所有书籍的最后打开时间（字典，值为{'last_open': 时间戳}）
-    last_open_times = get_books_last_open(config.LOCAL_LIBRARY_DB)
-
+    last_open_times = manager.get_books_last_open()
    for assetid, info in booksinfo.items():
        name = info.get('displayname') or info.get('itemname') or assetid
-        # 如果书名中包含“-”，只取“-”前面的部分
        if '-' in name:
            name = name.split('-', 1)[0].strip()
        assetid2name[assetid] = name
-        # 用 get_books_last_open 返回的时间戳排序，如无则为0
        ts = last_open_times.get(assetid, {}).get('last_open', 0)
        assetid2lastopen[assetid] = ts
-
-    # 按last_open时间戳降序排列
    sorted_assetids = sorted(assetid2name.keys(), key=lambda aid: assetid2lastopen[aid], reverse=True)
    choices = [f"{assetid2name[aid]} [{assetid2lastopen[aid]}]" for aid in sorted_assetids]
    if not choices:
@@ -202,8 +172,6 @@ if __name__ == '__main__':
        multiselect=False,
        instruction="上下键选择，输入可模糊筛选，回车确定"
    ).execute()
-
-    # 解析选中assetid
    for aid, name in assetid2name.items():
        if answer.startswith(name):
            selected_assetid = aid
@@ -211,10 +179,8 @@ if __name__ == '__main__':
    else:
        print("未找到选中书籍")
        exit(1)
-
-    # 只导出选中书的笔记
-    selected_booksnote = build_booksnote(bookid=selected_assetid)
+    selected_booksnote = exporter.build_booksnote(bookid=selected_assetid)
    selected_booksinfo = {selected_assetid: booksinfo.get(selected_assetid, {})}
    out_path = f'export_notes/notes_export_{selected_assetid}.md'
-    export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path)
+    exporter.export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path)
    print(f'《{selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid}》 导出笔记 {out_path}')
--- a/opf_parse.py
+++ b/opf_parse.py
@@ -1,21 +1,29 @@
+def parse_opf(filepath):
+    """
+    兼容旧代码的顶层函数，实际调用 OPFParser.parse_opf。
+    """
+    return OPFParser.parse_opf(filepath)

-# parseopf.py
-# -----------------------------
-# 用于解析EPUB电子书的OPF文件，提取manifest部分所有id对应的html文件href。
-# 支持批量测试和通过id快速查找href。
-# 依赖：BeautifulSoup4
-# -----------------------------
-
+"""
+opf_parse.py (OOP版)
+-------------------
+功能：
+    - 解析EPUB电子书的OPF文件，提取manifest部分所有id对应的html文件href。
+    - 支持通过id快速查找href。
+    - 支持批量测试。
+依赖：BeautifulSoup4
+主要接口：OPFParser
+    - parse_opf(filepath)：静态方法，返回id->href映射（仅html文件）。
+"""
 from collections import defaultdict
 from bs4 import BeautifulSoup
-import pprint

-
-def parse_opf(filepath):
+class OPFParser:
+    @staticmethod
+    def parse_opf(filepath):
        """
        解析OPF文件，返回{id: href}的defaultdict(dict)结构。
        仅保留href以.html结尾的项。
-
        参数：
            filepath (str): OPF文件路径
        返回：
@@ -24,7 +32,6 @@ def parse_opf(filepath):
        result = defaultdict(dict)
        with open(filepath, 'r', encoding='utf-8') as f:
            soup = BeautifulSoup(f, 'xml')
-    # 查找manifest部分，遍历所有item，筛选html结尾的href
        manifest = soup.find('manifest')
        if manifest:
            for item in manifest.find_all('item'):
@@ -34,6 +41,7 @@ def parse_opf(filepath):
                    result[id_] = href
        return result

+
 if __name__ == "__main__":
    test_files = [
        './examples/epub_format_2/OEBPS/content.opf',
@@ -44,8 +52,7 @@ if __name__ == "__main__":
    for file in test_files:
        print(f"\n==== 测试文件: {file} ====")
        try:
-            result = parse_opf(file)
-            pprint.pprint(result, indent=2, width=120, sort_dicts=False)
+            result = OPFParser.parse_opf(file)

            # 增加通过id快速打印href的测试
            test_ids = list(result.keys())[:3]  # 取前三个id做演示
--- a/toc_parse.py
+++ b/toc_parse.py
@@ -1,6 +1,7 @@
+
 """
-toc_parse.py
------------
+toc_parse.py (OOP版)
+-------------------
 功能：
    - 解析EPUB电子书的toc.ncx目录文件，递归构建章节树结构。
    - 支持通过ref和filepos查找完整label路径。
@@ -8,43 +9,36 @@ toc_parse.py
    - 兼容多种EPUB格式，支持批量测试。

 依赖：config.py 统一管理路径和配置项。
-主要接口：
-    parse_navpoints(navpoints)  # 递归解析navPoint节点，返回章节树结构。
-    find_label_path(node, ref, filepos, path)  # 查找指定ref和filepos的章节label路径。
-    find_section_by_selectedtext(html_path, selectedtext)  # 通过选中文本定位章节标题。
-    parse_html_title(html_path)  # 解析html文件标题。
+主要接口：TOCParser
+    - parse_navpoints(navpoints)：递归解析navPoint节点，返回章节树结构。
+    - find_label_path(node, ref, filepos, path)：查找指定ref和filepos的章节label路径。
+    - find_section_by_selectedtext(html_path, selectedtext)：通过选中文本定位章节标题。
+    - parse_html_title(html_path)：解析html文件标题。
 依赖：BeautifulSoup4, pprint, os, typing
 """
 import config
-
-
 from bs4 import BeautifulSoup
-from typing import Dict, Optional, List, Any
-import pprint
+import os

-# ==== 辅助函数：根据selectedtext在html文件中的位置推断所在章节 ====
-def find_section_by_selectedtext(html_path, selectedtext):
-    """
-    在html文件中查找selectedtext出现的位置，向上回溯最近的h1-h6标题，返回该标题文本。
-    若未找到标题，则返回None。
-    """
+class TOCParser:
+    def __init__(self):
+        pass
+
+    @staticmethod
+    def find_section_by_selectedtext(html_path, selectedtext):
        try:
            with open(html_path, 'r', encoding='utf-8') as f:
                soup = BeautifulSoup(f, 'html.parser')
-        # 在所有文本节点中查找selectedtext
            for elem in soup.find_all(string=True):
                if selectedtext and selectedtext.strip() and selectedtext.strip() in elem:
-                # 回溯父节点，查找最近的h1-h6
                    parent = elem.parent
                    while parent:
                        prev = parent.previous_sibling
-                    # 向上查找同级前面的h1-h6
                        while prev:
                            if prev.name and prev.name.lower() in ['h1','h2','h3','h4','h5','h6']:
                                return prev.get_text(strip=True)
                            prev = prev.previous_sibling
                        parent = parent.parent
-        # 若未找到，尝试全局第一个h1-h6
            for tag in ['h1','h2','h3','h4','h5','h6']:
                h = soup.find(tag)
                if h and h.get_text(strip=True):
@@ -53,17 +47,13 @@ def find_section_by_selectedtext(html_path, selectedtext):
            pass
        return None

-def parse_html_title(html_path):
-    """
-    解析html文件，优先返回<title>，否则返回body第一个h1/h2/h3/h4/h5/h6或None。
-    """
+    @staticmethod
+    def parse_html_title(html_path):
        try:
            with open(html_path, 'r', encoding='utf-8') as f:
                soup = BeautifulSoup(f, 'html.parser')
-        # 优先<title>
            if soup.title and soup.title.string:
                return soup.title.string.strip()
-        # 其次正文第一个h1-h6
            for tag in ['h1','h2','h3','h4','h5','h6']:
                h = soup.find(tag)
                if h and h.get_text(strip=True):
@@ -72,12 +62,8 @@ def parse_html_title(html_path):
            pass
        return None

-def parse_navpoints(navpoints) -> Dict[str, dict]:
-    """
-    递归解析 navpoints 节点，返回嵌套 dict 结构。
-    :param navpoints: BeautifulSoup 查找到的 navPoint 节点列表
-    :return: 章节树结构
-    """
+    @staticmethod
+    def parse_navpoints(navpoints):
        result = {}
        for navpoint in navpoints:
            label = navpoint.navLabel.text.strip().strip('"“”')
@@ -90,81 +76,50 @@ def parse_navpoints(navpoints) -> Dict[str, dict]:
                "label": label,
                "ref": ref,
                "filepos": filepos,
-            "children": parse_navpoints(navpoint.find_all("navPoint", recursive=False))
+                "children": TOCParser.parse_navpoints(navpoint.find_all("navPoint", recursive=False))
            }
            result[navpoint.get("id")] = entry
-
-    #pprint.pprint(result)  # 格式化打印result
-
        return result

-def find_label_path(
-    node: Any, 
-    ref: str, 
-    filepos: Optional[str] = None, 
-    path: Optional[List[str]] = None
-) -> Optional[str]:
-    """
-    在嵌套 dict 结构中查找指定 ref 和 filepos 的 label 路径。
-    :param node: 当前节点（dict 或 dict集合）
-    :param ref: html文件名
-    :param filepos: 文件位置，可为 None
-    :param path: label 路径累积
-    :return: 以 / 分隔的完整 label 路径，未找到返回 None
-    """
+    @staticmethod
+    def find_label_path(node, ref, filepos=None, path=None):
        if path is None:
            path = []
        if isinstance(node, dict):
            nodes = node.values() if "label" not in node else [node]
-        # 1. 优先精确匹配ref和filepos
            for v in nodes:
                if "label" in v:
                    new_path = path + [v["label"]]
                    if v["ref"] == ref and (filepos is None or v["filepos"] == filepos):
                        title = " / ".join(new_path)
-                    #print(f'title ref={ref} filepos={filepos} -> {title}') #DBG
                        return title
-                title = find_label_path(v["children"], ref, filepos, new_path)
+                    title = TOCParser.find_label_path(v["children"], ref, filepos, new_path)
                    if title:
-                    #print(f'title1 ref={ref} filepos={filepos} -> {title}') #DBG
                        return title
-
-        # 2. 如果带filepos查找失败，回退到同ref下第一个章节（即只要ref匹配就返回）
            if filepos is not None:
                for v in nodes:
                    if "label" in v:
                        new_path = path + [v["label"]]
-                    # print(f"对比 {v['ref']} == {ref}")
                        if v["ref"].split("#", 1)[0] == ref.split("#", 1)[0]:
                            title = " / ".join(new_path)
-                        #print(f'title3 ref={ref} filepos={filepos} -> {title}') #DBG
                            return title
-                    title = find_label_path(v["children"], ref, None, new_path)
+                        title = TOCParser.find_label_path(v["children"], ref, None, new_path)
                        if title:
-                        #print(f'title4 ref={ref} filepos={filepos} -> {title}') #DBG
                            return title
-
-    # 3. 若完全未找到，尝试直接解析idref所指html文件标题，获取章节label信息
-    # 仅在顶层调用时执行此逻辑
        if path == [] and ref and ref.endswith('.html'):
-        import os
-        # 自动在常见目录下查找html文件（以toc文件目录为基准）
            caller_dir = os.path.dirname(os.path.abspath(__file__))
            search_dirs = [caller_dir, os.getcwd()]
            for d in search_dirs:
                html_path = os.path.join(d, ref)
-            #print(f"查找 {html_path}")
                if os.path.isfile(html_path):
-                title = parse_html_title(html_path)
+                    title = TOCParser.parse_html_title(html_path)
                    if title:
                        return title
-        # 递归查找（以toc文件目录为根）
            for d in search_dirs:
                for root, _, files in os.walk(d):
                    if ref in files:
                        html_path = os.path.join(root, ref)
-                    #print(f"2 查找 {html_path}")
-                    title = parse_html_title(html_path)
+                        title = TOCParser.parse_html_title(html_path)
                        if title:
                            return title
        return None
@@ -182,8 +137,6 @@ if __name__ == "__main__":
        [config.EXAMPLES_DIR + "/政治哲學的12堂Podcast", "ch1.xhtml#_idParaDest-4", ""],
    ]
    for epub_dir, html_file, filepos in test_cases:
-        # 自动查找epub目录下的toc.ncx
-        import os
        toc_path = None
        for root, _, files in os.walk(epub_dir):
            for f in files:
@@ -200,39 +153,32 @@ if __name__ == "__main__":
            with open(toc_path, "r", encoding="utf-8") as f:
                soup = BeautifulSoup(f, "xml")
            nav_map = soup.find("navMap")
-            toc_tree = parse_navpoints(nav_map.find_all("navPoint", recursive=False))
-            label_path = find_label_path(toc_tree, html_file, filepos)
+            toc_tree = TOCParser.parse_navpoints(nav_map.find_all("navPoint", recursive=False))
+            label_path = TOCParser.find_label_path(toc_tree, html_file, filepos)
            print(f"find_label_path: {label_path if label_path else '未找到章节/标题'}")
-
-            # tocb中不存在html，直接测试parse_html_title
            html_path = os.path.join(epub_dir, html_file.split('#')[0])
            if os.path.exists(html_path):
-                title = parse_html_title(html_path)
+                title = TOCParser.parse_html_title(html_path)
                print(f"解析html标题: {html_path} => {title if title else '未找到标题'}")
-                # 新增：根据selectedtext定位章节标题
                selectedtext = '从变法思想看，王安石变法最大的魅力是“民不加赋而国用足”：老百姓上缴的税率不增，国库的总收入仍可以'
-                section = find_section_by_selectedtext(html_path, selectedtext)
+                section = TOCParser.find_section_by_selectedtext(html_path, selectedtext)
                print(f"selectedtext定位到的章节标题: {section if section else '未找到相关标题'}")
            else:
                print(f"未找到html文件: {html_path}")
        except Exception as e:
            print(f"测试失败: {e}")

-    # ==== 新增：测试变宋笔记章节定位和html标题解析 ====
    print("\n==== 测试: 变宋笔记章节定位和html标题解析 ====")
-    # 假设笔记数据如下
    note_idref = 'text/part0002_split_003.html'
    note_filepos = None
-    # 变宋toc.ncx路径
    bian_song_toc = config.EXAMPLES_DIR + "/变宋/toc.ncx"
-    import os
    if os.path.exists(bian_song_toc):
        with open(bian_song_toc, "r", encoding="utf-8") as f:
            soup = BeautifulSoup(f, "xml")
        nav_map = soup.find("navMap")
-        toc_tree = parse_navpoints(nav_map.find_all("navPoint", recursive=False))
-        # 先尝试用find_label_path查找章节
-        label_path = find_label_path(toc_tree, note_idref, note_filepos)
+        toc_tree = TOCParser.parse_navpoints(nav_map.find_all("navPoint", recursive=False))
+        label_path = TOCParser.find_label_path(toc_tree, note_idref, note_filepos)
        print(f"查找 {note_idref}: ", label_path if label_path else "未找到章节，尝试解析html标题")
    else:
        print(f"未找到toc.ncx: {bian_song_toc}")
+