This commit is contained in:
douboer
2025-10-21 10:46:03 +08:00
parent db9be32815
commit fb0f5ed9c5
20 changed files with 1869 additions and 103 deletions

203
test_cfi_sorting.py Normal file
View File

@@ -0,0 +1,203 @@
#!/usr/bin/env python3
"""
测试 EPUB CFI 排序功能
验证:
1. CFI 解析器的基本功能
2. 真实书籍标注的排序效果
3. 新旧排序方式的对比
"""
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from epub_cfi_parser import EpubCFIParser, test_cfi_parsing, test_cfi_sorting
from annotationdata import AnnotationManager
from booklist_parse import BookListManager
import config
def test_real_book_cfi_sorting():
"""测试真实书籍的 CFI 排序效果"""
print("=== 真实数据 CFI 排序测试 ===")
# 初始化管理器
annotation_manager = AnnotationManager(config.LOCAL_ANNOTATION_DB)
book_manager = BookListManager(config.LOCAL_BOOKS_PLIST, config.LOCAL_LIBRARY_DB)
# 获取书籍信息
books_info = book_manager.get_books_info()
# 找第一本有多个标注的书
test_book_id = None
test_annotations = []
for book_id in books_info.keys():
annotations = annotation_manager.get_annotations(book_id)
book_annotations = annotations.get(book_id, [])
if len(book_annotations) > 3: # 至少3个标注才有排序意义
test_book_id = book_id
test_annotations = book_annotations
break
if not test_book_id:
print("未找到有足够标注的书籍进行测试")
return
book_info = books_info[test_book_id]
book_title = book_info.get('displayname') or book_info.get('itemname') or test_book_id
print(f"测试书籍: {book_title}")
print(f"标注数量: {len(test_annotations)}")
print()
# 显示前10个标注的排序结果
print("标注CFI排序结果前10个:")
print("-" * 80)
for i, ann in enumerate(test_annotations[:10], 1):
cfi = ann.get('location', '')
selected_text = ann.get('selectedtext', '')[:60]
chapter_info = ann.get('chapter_info', '')
creation_date = ann.get('creationdate', '')
print(f"{i:2d}. 章节: {chapter_info}")
print(f" CFI: {cfi}")
print(f" 文本: {selected_text}...")
print(f" 时间: {creation_date}")
print()
# 统计章节分布
chapter_counts = {}
for ann in test_annotations:
chapter = ann.get('chapter_info', '未知章节')
chapter_counts[chapter] = chapter_counts.get(chapter, 0) + 1
print("\n章节分布:")
for chapter, count in sorted(chapter_counts.items()):
print(f" {chapter}: {count} 个标注")
def test_cfi_parser_edge_cases():
"""测试 CFI 解析器的边界情况"""
print("\n=== CFI 解析器边界测试 ===")
edge_cases = [
"", # 空字符串
"invalid", # 无效格式
"epubcfi()", # 空CFI
"epubcfi(/6)", # 只有spine
"epubcfi(/6/14!/4:0)", # 最简local
"epubcfi(/6/14[chapter]!/4/2/1:999)", # 大偏移量
"/6/14!/4:0", # 无epubcfi包装
]
for cfi in edge_cases:
parsed = EpubCFIParser.parse_cfi(cfi)
sort_key = EpubCFIParser.create_sort_key(cfi)
chapter = EpubCFIParser.extract_chapter_info(cfi)
print(f"输入: '{cfi}'")
print(f" 解析: {parsed}")
print(f" 排序键: {sort_key}")
print(f" 章节: {chapter}")
print()
def compare_sorting_methods():
"""对比新旧排序方法的差异"""
print("\n=== 排序方法对比 ===")
# 这里可以添加对比逻辑,比较 CFI 排序 vs ZPLSORTKEY 排序
# 暂时跳过,因为需要修改 AnnotationManager 来支持旧排序方式
print("暂时跳过排序对比(需要实现旧排序方法作为参考)")
def export_sample_book():
"""导出一本示例书籍,验证完整流程"""
print("\n=== 示例导出测试 ===")
try:
from exportbooknotes import BookNotesExporter
exporter = BookNotesExporter(config)
book_manager = BookListManager(config.LOCAL_BOOKS_PLIST, config.LOCAL_LIBRARY_DB)
books_info = book_manager.get_books_info()
# 找第一本有标注的书
test_book_id = None
for book_id in books_info.keys():
booksnote = exporter.build_booksnote(book_id)
if booksnote.get(book_id):
test_book_id = book_id
break
if not test_book_id:
print("未找到有标注的书籍")
return
book_info = books_info[test_book_id]
book_title = book_info.get('displayname') or book_info.get('itemname') or test_book_id
print(f"导出测试书籍: {book_title}")
# 构建和导出
booksnote = exporter.build_booksnote(test_book_id)
selected_booksinfo = {test_book_id: book_info}
test_output = '/tmp/test_cfi_export.md'
exporter.export_booksnote_to_md(booksnote, selected_booksinfo, test_output)
print(f"测试导出完成: {test_output}")
# 显示文件前几行
try:
with open(test_output, 'r', encoding='utf-8') as f:
lines = f.readlines()
print(f"\n导出文件前10行:")
for i, line in enumerate(lines[:10], 1):
print(f"{i:2d}: {line.rstrip()}")
if len(lines) > 10:
print(f"... (共 {len(lines)} 行)")
except Exception as e:
print(f"读取导出文件失败: {e}")
except Exception as e:
print(f"导出测试失败: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
print("开始 EPUB CFI 排序功能测试...")
print("=" * 60)
# 1. 基础 CFI 解析测试
test_cfi_parsing()
print()
# 2. CFI 排序测试
test_cfi_sorting()
# 3. 边界情况测试
test_cfi_parser_edge_cases()
# 4. 真实数据测试
try:
test_real_book_cfi_sorting()
except Exception as e:
print(f"真实数据测试失败: {e}")
import traceback
traceback.print_exc()
# 5. 完整导出测试
try:
export_sample_book()
except Exception as e:
print(f"导出测试失败: {e}")
import traceback
traceback.print_exc()
print("\n测试完成!")