'update'

2025-10-21 10:46:03 +08:00
parent db9be32815
commit fb0f5ed9c5
20 changed files with 1869 additions and 103 deletions
--- a/test_cfi_sorting.py
+++ b/test_cfi_sorting.py
@@ -0,0 +1,203 @@
+#!/usr/bin/env python3
+"""
+测试 EPUB CFI 排序功能
+
+验证：
+1. CFI 解析器的基本功能
+2. 真实书籍标注的排序效果
+3. 新旧排序方式的对比
+"""
+
+import sys
+import os
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+
+from epub_cfi_parser import EpubCFIParser, test_cfi_parsing, test_cfi_sorting
+from annotationdata import AnnotationManager
+from booklist_parse import BookListManager
+import config
+
+
+def test_real_book_cfi_sorting():
+    """测试真实书籍的 CFI 排序效果"""
+    print("=== 真实数据 CFI 排序测试 ===")
+    
+    # 初始化管理器
+    annotation_manager = AnnotationManager(config.LOCAL_ANNOTATION_DB)
+    book_manager = BookListManager(config.LOCAL_BOOKS_PLIST, config.LOCAL_LIBRARY_DB)
+    
+    # 获取书籍信息
+    books_info = book_manager.get_books_info()
+    
+    # 找第一本有多个标注的书
+    test_book_id = None
+    test_annotations = []
+    
+    for book_id in books_info.keys():
+        annotations = annotation_manager.get_annotations(book_id)
+        book_annotations = annotations.get(book_id, [])
+        
+        if len(book_annotations) > 3:  # 至少3个标注才有排序意义
+            test_book_id = book_id
+            test_annotations = book_annotations
+            break
+    
+    if not test_book_id:
+        print("未找到有足够标注的书籍进行测试")
+        return
+    
+    book_info = books_info[test_book_id]
+    book_title = book_info.get('displayname') or book_info.get('itemname') or test_book_id
+    
+    print(f"测试书籍: {book_title}")
+    print(f"标注数量: {len(test_annotations)}")
+    print()
+    
+    # 显示前10个标注的排序结果
+    print("标注CFI排序结果（前10个）:")
+    print("-" * 80)
+    
+    for i, ann in enumerate(test_annotations[:10], 1):
+        cfi = ann.get('location', '')
+        selected_text = ann.get('selectedtext', '')[:60]
+        chapter_info = ann.get('chapter_info', '')
+        creation_date = ann.get('creationdate', '')
+        
+        print(f"{i:2d}. 章节: {chapter_info}")
+        print(f"    CFI: {cfi}")
+        print(f"    文本: {selected_text}...")
+        print(f"    时间: {creation_date}")
+        print()
+    
+    # 统计章节分布
+    chapter_counts = {}
+    for ann in test_annotations:
+        chapter = ann.get('chapter_info', '未知章节')
+        chapter_counts[chapter] = chapter_counts.get(chapter, 0) + 1
+    
+    print("\n章节分布:")
+    for chapter, count in sorted(chapter_counts.items()):
+        print(f"  {chapter}: {count} 个标注")
+
+
+def test_cfi_parser_edge_cases():
+    """测试 CFI 解析器的边界情况"""
+    print("\n=== CFI 解析器边界测试 ===")
+    
+    edge_cases = [
+        "",  # 空字符串
+        "invalid",  # 无效格式
+        "epubcfi()",  # 空CFI
+        "epubcfi(/6)",  # 只有spine
+        "epubcfi(/6/14!/4:0)",  # 最简local
+        "epubcfi(/6/14[chapter]!/4/2/1:999)",  # 大偏移量
+        "/6/14!/4:0",  # 无epubcfi包装
+    ]
+    
+    for cfi in edge_cases:
+        parsed = EpubCFIParser.parse_cfi(cfi)
+        sort_key = EpubCFIParser.create_sort_key(cfi)
+        chapter = EpubCFIParser.extract_chapter_info(cfi)
+        
+        print(f"输入: '{cfi}'")
+        print(f"  解析: {parsed}")
+        print(f"  排序键: {sort_key}")
+        print(f"  章节: {chapter}")
+        print()
+
+
+def compare_sorting_methods():
+    """对比新旧排序方法的差异"""
+    print("\n=== 排序方法对比 ===")
+    
+    # 这里可以添加对比逻辑，比较 CFI 排序 vs ZPLSORTKEY 排序
+    # 暂时跳过，因为需要修改 AnnotationManager 来支持旧排序方式
+    print("暂时跳过排序对比（需要实现旧排序方法作为参考）")
+
+
+def export_sample_book():
+    """导出一本示例书籍，验证完整流程"""
+    print("\n=== 示例导出测试 ===")
+    
+    try:
+        from exportbooknotes import BookNotesExporter
+        
+        exporter = BookNotesExporter(config)
+        book_manager = BookListManager(config.LOCAL_BOOKS_PLIST, config.LOCAL_LIBRARY_DB)
+        books_info = book_manager.get_books_info()
+        
+        # 找第一本有标注的书
+        test_book_id = None
+        for book_id in books_info.keys():
+            booksnote = exporter.build_booksnote(book_id)
+            if booksnote.get(book_id):
+                test_book_id = book_id
+                break
+        
+        if not test_book_id:
+            print("未找到有标注的书籍")
+            return
+        
+        book_info = books_info[test_book_id]
+        book_title = book_info.get('displayname') or book_info.get('itemname') or test_book_id
+        
+        print(f"导出测试书籍: {book_title}")
+        
+        # 构建和导出
+        booksnote = exporter.build_booksnote(test_book_id)
+        selected_booksinfo = {test_book_id: book_info}
+        
+        test_output = '/tmp/test_cfi_export.md'
+        exporter.export_booksnote_to_md(booksnote, selected_booksinfo, test_output)
+        
+        print(f"测试导出完成: {test_output}")
+        
+        # 显示文件前几行
+        try:
+            with open(test_output, 'r', encoding='utf-8') as f:
+                lines = f.readlines()
+                print(f"\n导出文件前10行:")
+                for i, line in enumerate(lines[:10], 1):
+                    print(f"{i:2d}: {line.rstrip()}")
+                if len(lines) > 10:
+                    print(f"... (共 {len(lines)} 行)")
+        except Exception as e:
+            print(f"读取导出文件失败: {e}")
+    
+    except Exception as e:
+        print(f"导出测试失败: {e}")
+        import traceback
+        traceback.print_exc()
+
+
+if __name__ == "__main__":
+    print("开始 EPUB CFI 排序功能测试...")
+    print("=" * 60)
+    
+    # 1. 基础 CFI 解析测试
+    test_cfi_parsing()
+    print()
+    
+    # 2. CFI 排序测试
+    test_cfi_sorting()
+    
+    # 3. 边界情况测试
+    test_cfi_parser_edge_cases()
+    
+    # 4. 真实数据测试
+    try:
+        test_real_book_cfi_sorting()
+    except Exception as e:
+        print(f"真实数据测试失败: {e}")
+        import traceback
+        traceback.print_exc()
+    
+    # 5. 完整导出测试
+    try:
+        export_sample_book()
+    except Exception as e:
+        print(f"导出测试失败: {e}")
+        import traceback
+        traceback.print_exc()
+    
+    print("\n测试完成！")