'update'
This commit is contained in:
203
test_cfi_sorting.py
Normal file
203
test_cfi_sorting.py
Normal file
@@ -0,0 +1,203 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
测试 EPUB CFI 排序功能
|
||||
|
||||
验证:
|
||||
1. CFI 解析器的基本功能
|
||||
2. 真实书籍标注的排序效果
|
||||
3. 新旧排序方式的对比
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from epub_cfi_parser import EpubCFIParser, test_cfi_parsing, test_cfi_sorting
|
||||
from annotationdata import AnnotationManager
|
||||
from booklist_parse import BookListManager
|
||||
import config
|
||||
|
||||
|
||||
def test_real_book_cfi_sorting():
|
||||
"""测试真实书籍的 CFI 排序效果"""
|
||||
print("=== 真实数据 CFI 排序测试 ===")
|
||||
|
||||
# 初始化管理器
|
||||
annotation_manager = AnnotationManager(config.LOCAL_ANNOTATION_DB)
|
||||
book_manager = BookListManager(config.LOCAL_BOOKS_PLIST, config.LOCAL_LIBRARY_DB)
|
||||
|
||||
# 获取书籍信息
|
||||
books_info = book_manager.get_books_info()
|
||||
|
||||
# 找第一本有多个标注的书
|
||||
test_book_id = None
|
||||
test_annotations = []
|
||||
|
||||
for book_id in books_info.keys():
|
||||
annotations = annotation_manager.get_annotations(book_id)
|
||||
book_annotations = annotations.get(book_id, [])
|
||||
|
||||
if len(book_annotations) > 3: # 至少3个标注才有排序意义
|
||||
test_book_id = book_id
|
||||
test_annotations = book_annotations
|
||||
break
|
||||
|
||||
if not test_book_id:
|
||||
print("未找到有足够标注的书籍进行测试")
|
||||
return
|
||||
|
||||
book_info = books_info[test_book_id]
|
||||
book_title = book_info.get('displayname') or book_info.get('itemname') or test_book_id
|
||||
|
||||
print(f"测试书籍: {book_title}")
|
||||
print(f"标注数量: {len(test_annotations)}")
|
||||
print()
|
||||
|
||||
# 显示前10个标注的排序结果
|
||||
print("标注CFI排序结果(前10个):")
|
||||
print("-" * 80)
|
||||
|
||||
for i, ann in enumerate(test_annotations[:10], 1):
|
||||
cfi = ann.get('location', '')
|
||||
selected_text = ann.get('selectedtext', '')[:60]
|
||||
chapter_info = ann.get('chapter_info', '')
|
||||
creation_date = ann.get('creationdate', '')
|
||||
|
||||
print(f"{i:2d}. 章节: {chapter_info}")
|
||||
print(f" CFI: {cfi}")
|
||||
print(f" 文本: {selected_text}...")
|
||||
print(f" 时间: {creation_date}")
|
||||
print()
|
||||
|
||||
# 统计章节分布
|
||||
chapter_counts = {}
|
||||
for ann in test_annotations:
|
||||
chapter = ann.get('chapter_info', '未知章节')
|
||||
chapter_counts[chapter] = chapter_counts.get(chapter, 0) + 1
|
||||
|
||||
print("\n章节分布:")
|
||||
for chapter, count in sorted(chapter_counts.items()):
|
||||
print(f" {chapter}: {count} 个标注")
|
||||
|
||||
|
||||
def test_cfi_parser_edge_cases():
|
||||
"""测试 CFI 解析器的边界情况"""
|
||||
print("\n=== CFI 解析器边界测试 ===")
|
||||
|
||||
edge_cases = [
|
||||
"", # 空字符串
|
||||
"invalid", # 无效格式
|
||||
"epubcfi()", # 空CFI
|
||||
"epubcfi(/6)", # 只有spine
|
||||
"epubcfi(/6/14!/4:0)", # 最简local
|
||||
"epubcfi(/6/14[chapter]!/4/2/1:999)", # 大偏移量
|
||||
"/6/14!/4:0", # 无epubcfi包装
|
||||
]
|
||||
|
||||
for cfi in edge_cases:
|
||||
parsed = EpubCFIParser.parse_cfi(cfi)
|
||||
sort_key = EpubCFIParser.create_sort_key(cfi)
|
||||
chapter = EpubCFIParser.extract_chapter_info(cfi)
|
||||
|
||||
print(f"输入: '{cfi}'")
|
||||
print(f" 解析: {parsed}")
|
||||
print(f" 排序键: {sort_key}")
|
||||
print(f" 章节: {chapter}")
|
||||
print()
|
||||
|
||||
|
||||
def compare_sorting_methods():
|
||||
"""对比新旧排序方法的差异"""
|
||||
print("\n=== 排序方法对比 ===")
|
||||
|
||||
# 这里可以添加对比逻辑,比较 CFI 排序 vs ZPLSORTKEY 排序
|
||||
# 暂时跳过,因为需要修改 AnnotationManager 来支持旧排序方式
|
||||
print("暂时跳过排序对比(需要实现旧排序方法作为参考)")
|
||||
|
||||
|
||||
def export_sample_book():
|
||||
"""导出一本示例书籍,验证完整流程"""
|
||||
print("\n=== 示例导出测试 ===")
|
||||
|
||||
try:
|
||||
from exportbooknotes import BookNotesExporter
|
||||
|
||||
exporter = BookNotesExporter(config)
|
||||
book_manager = BookListManager(config.LOCAL_BOOKS_PLIST, config.LOCAL_LIBRARY_DB)
|
||||
books_info = book_manager.get_books_info()
|
||||
|
||||
# 找第一本有标注的书
|
||||
test_book_id = None
|
||||
for book_id in books_info.keys():
|
||||
booksnote = exporter.build_booksnote(book_id)
|
||||
if booksnote.get(book_id):
|
||||
test_book_id = book_id
|
||||
break
|
||||
|
||||
if not test_book_id:
|
||||
print("未找到有标注的书籍")
|
||||
return
|
||||
|
||||
book_info = books_info[test_book_id]
|
||||
book_title = book_info.get('displayname') or book_info.get('itemname') or test_book_id
|
||||
|
||||
print(f"导出测试书籍: {book_title}")
|
||||
|
||||
# 构建和导出
|
||||
booksnote = exporter.build_booksnote(test_book_id)
|
||||
selected_booksinfo = {test_book_id: book_info}
|
||||
|
||||
test_output = '/tmp/test_cfi_export.md'
|
||||
exporter.export_booksnote_to_md(booksnote, selected_booksinfo, test_output)
|
||||
|
||||
print(f"测试导出完成: {test_output}")
|
||||
|
||||
# 显示文件前几行
|
||||
try:
|
||||
with open(test_output, 'r', encoding='utf-8') as f:
|
||||
lines = f.readlines()
|
||||
print(f"\n导出文件前10行:")
|
||||
for i, line in enumerate(lines[:10], 1):
|
||||
print(f"{i:2d}: {line.rstrip()}")
|
||||
if len(lines) > 10:
|
||||
print(f"... (共 {len(lines)} 行)")
|
||||
except Exception as e:
|
||||
print(f"读取导出文件失败: {e}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"导出测试失败: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("开始 EPUB CFI 排序功能测试...")
|
||||
print("=" * 60)
|
||||
|
||||
# 1. 基础 CFI 解析测试
|
||||
test_cfi_parsing()
|
||||
print()
|
||||
|
||||
# 2. CFI 排序测试
|
||||
test_cfi_sorting()
|
||||
|
||||
# 3. 边界情况测试
|
||||
test_cfi_parser_edge_cases()
|
||||
|
||||
# 4. 真实数据测试
|
||||
try:
|
||||
test_real_book_cfi_sorting()
|
||||
except Exception as e:
|
||||
print(f"真实数据测试失败: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
# 5. 完整导出测试
|
||||
try:
|
||||
export_sample_book()
|
||||
except Exception as e:
|
||||
print(f"导出测试失败: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
print("\n测试完成!")
|
||||
Reference in New Issue
Block a user