'update'

2025-08-15 17:20:30 +08:00
parent 0bc6844209
commit 4e3b8abc34
12 changed files with 406 additions and 516 deletions
--- a/opf_parse.py
+++ b/opf_parse.py
@@ -1,38 +1,46 @@
-
-# parseopf.py
-# -----------------------------
-# 用于解析EPUB电子书的OPF文件，提取manifest部分所有id对应的html文件href。
-# 支持批量测试和通过id快速查找href。
-# 依赖：BeautifulSoup4
-# -----------------------------
-
-from collections import defaultdict
-from bs4 import BeautifulSoup
-import pprint
-
-
 def parse_opf(filepath):
    """
-    解析OPF文件，返回{id: href}的defaultdict(dict)结构。
-    仅保留href以.html结尾的项。
-
-    参数：
-        filepath (str): OPF文件路径
-    返回：
-        defaultdict(dict): id到href的映射（仅html文件）
+    兼容旧代码的顶层函数，实际调用 OPFParser.parse_opf。
    """
-    result = defaultdict(dict)
-    with open(filepath, 'r', encoding='utf-8') as f:
-        soup = BeautifulSoup(f, 'xml')
-    # 查找manifest部分，遍历所有item，筛选html结尾的href
-    manifest = soup.find('manifest')
-    if manifest:
-        for item in manifest.find_all('item'):
-            id_ = item.get('id')
-            href = item.get('href')
-            if id_ and href and href.strip().lower().endswith('html'):
-                result[id_] = href
-    return result
+    return OPFParser.parse_opf(filepath)
+
+"""
+opf_parse.py (OOP版)
+-------------------
+功能：
+    - 解析EPUB电子书的OPF文件，提取manifest部分所有id对应的html文件href。
+    - 支持通过id快速查找href。
+    - 支持批量测试。
+依赖：BeautifulSoup4
+主要接口：OPFParser
+    - parse_opf(filepath)：静态方法，返回id->href映射（仅html文件）。
+"""
+from collections import defaultdict
+from bs4 import BeautifulSoup
+
+class OPFParser:
+    @staticmethod
+    def parse_opf(filepath):
+        """
+        解析OPF文件，返回{id: href}的defaultdict(dict)结构。
+        仅保留href以.html结尾的项。
+        参数：
+            filepath (str): OPF文件路径
+        返回：
+            defaultdict(dict): id到href的映射（仅html文件）
+        """
+        result = defaultdict(dict)
+        with open(filepath, 'r', encoding='utf-8') as f:
+            soup = BeautifulSoup(f, 'xml')
+        manifest = soup.find('manifest')
+        if manifest:
+            for item in manifest.find_all('item'):
+                id_ = item.get('id')
+                href = item.get('href')
+                if id_ and href and href.strip().lower().endswith('html'):
+                    result[id_] = href
+        return result
+

 if __name__ == "__main__":
    test_files = [
@@ -44,8 +52,7 @@ if __name__ == "__main__":
    for file in test_files:
        print(f"\n==== 测试文件: {file} ====")
        try:
-            result = parse_opf(file)
-            pprint.pprint(result, indent=2, width=120, sort_dicts=False)
+            result = OPFParser.parse_opf(file)

            # 增加通过id快速打印href的测试
            test_ids = list(result.keys())[:3]  # 取前三个id做演示