'update'

2025-08-15 13:49:02 +08:00
parent 8873c70a60
commit 0bc6844209
76 changed files with 726 additions and 11197 deletions
--- a/toc_parse.py
+++ b/toc_parse.py
@@ -1,9 +1,21 @@
-# parsetoc.py
-# -----------------------------
-# 用于解析EPUB电子书的toc.ncx目录文件，递归构建章节树结构，支持通过ref和filepos查找完整label路径。
-# 支持多种EPUB格式的toc.ncx，包含批量测试用例。
-# 依赖：BeautifulSoup4
-# -----------------------------
+"""
+toc_parse.py
+------------
+功能：
+    - 解析EPUB电子书的toc.ncx目录文件，递归构建章节树结构。
+    - 支持通过ref和filepos查找完整label路径。
+    - 支持通过selectedtext在html文件中定位章节标题。
+    - 兼容多种EPUB格式，支持批量测试。
+
+依赖：config.py 统一管理路径和配置项。
+主要接口：
+    parse_navpoints(navpoints)  # 递归解析navPoint节点，返回章节树结构。
+    find_label_path(node, ref, filepos, path)  # 查找指定ref和filepos的章节label路径。
+    find_section_by_selectedtext(html_path, selectedtext)  # 通过选中文本定位章节标题。
+    parse_html_title(html_path)  # 解析html文件标题。
+依赖：BeautifulSoup4, pprint, os, typing
+"""
+import config


 from bs4 import BeautifulSoup
@@ -160,14 +172,14 @@ def find_label_path(
 if __name__ == "__main__":
    # ==== 批量测试指定toc/html/filepos列表 ====
    test_cases = [
-        ["examples/epub_format_1", "index_split_015.html", "filepos684970"],
-        ["examples/epub_format_2", "Text/8c7276f38ead4738ee19249418898c18_split_006.html", "sigil_toc_id_12"],
-        ["examples/epub_format_3", "Text/011.xhtml", ""],
-        ["examples/epub_format_4", "xhtml/p-006.xhtml", ""],
-        ["examples/变宋", "text/part0005.html", ""],
-        ["examples/变宋", "text/part0002_split_003.html", ""],
-        ["examples/规训与惩罚", "index_split_006.html", ""],
-        ["examples/政治哲學的12堂Podcast", "ch1.xhtml#_idParaDest-4", ""],
+        [config.EXAMPLES_DIR + "/epub_format_1", "index_split_015.html", "filepos684970"],
+        [config.EXAMPLES_DIR + "/epub_format_2", "Text/8c7276f38ead4738ee19249418898c18_split_006.html", "sigil_toc_id_12"],
+        [config.EXAMPLES_DIR + "/epub_format_3", "Text/011.xhtml", ""],
+        [config.EXAMPLES_DIR + "/epub_format_4", "xhtml/p-006.xhtml", ""],
+        [config.EXAMPLES_DIR + "/变宋", "text/part0005.html", ""],
+        [config.EXAMPLES_DIR + "/变宋", "text/part0002_split_003.html", ""],
+        [config.EXAMPLES_DIR + "/规训与惩罚", "index_split_006.html", ""],
+        [config.EXAMPLES_DIR + "/政治哲學的12堂Podcast", "ch1.xhtml#_idParaDest-4", ""],
    ]
    for epub_dir, html_file, filepos in test_cases:
        # 自动查找epub目录下的toc.ncx
@@ -212,7 +224,7 @@ if __name__ == "__main__":
    note_idref = 'text/part0002_split_003.html'
    note_filepos = None
    # 变宋toc.ncx路径
-    bian_song_toc = "examples/变宋/toc.ncx"
+    bian_song_toc = config.EXAMPLES_DIR + "/变宋/toc.ncx"
    import os
    if os.path.exists(bian_song_toc):
        with open(bian_song_toc, "r", encoding="utf-8") as f: