Files
iBook/test_cfi_sorting.py
2025-10-21 10:46:03 +08:00

203 lines
6.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
测试 EPUB CFI 排序功能
验证:
1. CFI 解析器的基本功能
2. 真实书籍标注的排序效果
3. 新旧排序方式的对比
"""
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from epub_cfi_parser import EpubCFIParser, test_cfi_parsing, test_cfi_sorting
from annotationdata import AnnotationManager
from booklist_parse import BookListManager
import config
def test_real_book_cfi_sorting():
"""测试真实书籍的 CFI 排序效果"""
print("=== 真实数据 CFI 排序测试 ===")
# 初始化管理器
annotation_manager = AnnotationManager(config.LOCAL_ANNOTATION_DB)
book_manager = BookListManager(config.LOCAL_BOOKS_PLIST, config.LOCAL_LIBRARY_DB)
# 获取书籍信息
books_info = book_manager.get_books_info()
# 找第一本有多个标注的书
test_book_id = None
test_annotations = []
for book_id in books_info.keys():
annotations = annotation_manager.get_annotations(book_id)
book_annotations = annotations.get(book_id, [])
if len(book_annotations) > 3: # 至少3个标注才有排序意义
test_book_id = book_id
test_annotations = book_annotations
break
if not test_book_id:
print("未找到有足够标注的书籍进行测试")
return
book_info = books_info[test_book_id]
book_title = book_info.get('displayname') or book_info.get('itemname') or test_book_id
print(f"测试书籍: {book_title}")
print(f"标注数量: {len(test_annotations)}")
print()
# 显示前10个标注的排序结果
print("标注CFI排序结果前10个:")
print("-" * 80)
for i, ann in enumerate(test_annotations[:10], 1):
cfi = ann.get('location', '')
selected_text = ann.get('selectedtext', '')[:60]
chapter_info = ann.get('chapter_info', '')
creation_date = ann.get('creationdate', '')
print(f"{i:2d}. 章节: {chapter_info}")
print(f" CFI: {cfi}")
print(f" 文本: {selected_text}...")
print(f" 时间: {creation_date}")
print()
# 统计章节分布
chapter_counts = {}
for ann in test_annotations:
chapter = ann.get('chapter_info', '未知章节')
chapter_counts[chapter] = chapter_counts.get(chapter, 0) + 1
print("\n章节分布:")
for chapter, count in sorted(chapter_counts.items()):
print(f" {chapter}: {count} 个标注")
def test_cfi_parser_edge_cases():
"""测试 CFI 解析器的边界情况"""
print("\n=== CFI 解析器边界测试 ===")
edge_cases = [
"", # 空字符串
"invalid", # 无效格式
"epubcfi()", # 空CFI
"epubcfi(/6)", # 只有spine
"epubcfi(/6/14!/4:0)", # 最简local
"epubcfi(/6/14[chapter]!/4/2/1:999)", # 大偏移量
"/6/14!/4:0", # 无epubcfi包装
]
for cfi in edge_cases:
parsed = EpubCFIParser.parse_cfi(cfi)
sort_key = EpubCFIParser.create_sort_key(cfi)
chapter = EpubCFIParser.extract_chapter_info(cfi)
print(f"输入: '{cfi}'")
print(f" 解析: {parsed}")
print(f" 排序键: {sort_key}")
print(f" 章节: {chapter}")
print()
def compare_sorting_methods():
"""对比新旧排序方法的差异"""
print("\n=== 排序方法对比 ===")
# 这里可以添加对比逻辑,比较 CFI 排序 vs ZPLSORTKEY 排序
# 暂时跳过,因为需要修改 AnnotationManager 来支持旧排序方式
print("暂时跳过排序对比(需要实现旧排序方法作为参考)")
def export_sample_book():
"""导出一本示例书籍,验证完整流程"""
print("\n=== 示例导出测试 ===")
try:
from exportbooknotes import BookNotesExporter
exporter = BookNotesExporter(config)
book_manager = BookListManager(config.LOCAL_BOOKS_PLIST, config.LOCAL_LIBRARY_DB)
books_info = book_manager.get_books_info()
# 找第一本有标注的书
test_book_id = None
for book_id in books_info.keys():
booksnote = exporter.build_booksnote(book_id)
if booksnote.get(book_id):
test_book_id = book_id
break
if not test_book_id:
print("未找到有标注的书籍")
return
book_info = books_info[test_book_id]
book_title = book_info.get('displayname') or book_info.get('itemname') or test_book_id
print(f"导出测试书籍: {book_title}")
# 构建和导出
booksnote = exporter.build_booksnote(test_book_id)
selected_booksinfo = {test_book_id: book_info}
test_output = '/tmp/test_cfi_export.md'
exporter.export_booksnote_to_md(booksnote, selected_booksinfo, test_output)
print(f"测试导出完成: {test_output}")
# 显示文件前几行
try:
with open(test_output, 'r', encoding='utf-8') as f:
lines = f.readlines()
print(f"\n导出文件前10行:")
for i, line in enumerate(lines[:10], 1):
print(f"{i:2d}: {line.rstrip()}")
if len(lines) > 10:
print(f"... (共 {len(lines)} 行)")
except Exception as e:
print(f"读取导出文件失败: {e}")
except Exception as e:
print(f"导出测试失败: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
print("开始 EPUB CFI 排序功能测试...")
print("=" * 60)
# 1. 基础 CFI 解析测试
test_cfi_parsing()
print()
# 2. CFI 排序测试
test_cfi_sorting()
# 3. 边界情况测试
test_cfi_parser_edge_cases()
# 4. 真实数据测试
try:
test_real_book_cfi_sorting()
except Exception as e:
print(f"真实数据测试失败: {e}")
import traceback
traceback.print_exc()
# 5. 完整导出测试
try:
export_sample_book()
except Exception as e:
print(f"导出测试失败: {e}")
import traceback
traceback.print_exc()
print("\n测试完成!")