Files
iBook/test_cfi_simple.py
2025-10-21 10:46:03 +08:00

116 lines
4.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
简化的 CFI 排序测试 - 专注核心功能验证
"""
import sys
import os
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from epub_cfi_parser import EpubCFIParser
def test_cfi_core_functionality():
"""测试 CFI 解析和排序的核心功能"""
print("=== CFI 核心功能测试 ===")
# 测试用例:模拟真实的 iBooks CFI
test_cfis = [
"epubcfi(/6/22[id19]!/4[6LJU0-b41b8d40e3c34c548f1a46585319196c]/40/1,:96,:214)",
"epubcfi(/6/18[id17]!/4[5N3C0-b41b8d40e3c34c548f1a46585319196c]/6/1,:128,:219)",
"epubcfi(/6/22[id19]!/4[6LJU0-b41b8d40e3c34c548f1a46585319196c]/8/1,:0,:43)",
"epubcfi(/6/18[id17]!/4[5N3C0-b41b8d40e3c34c548f1a46585319196c]/10/1,:214,:273)",
"epubcfi(/6/22[id19]!/4[6LJU0-b41b8d40e3c34c548f1a46585319196c]/12/1,:0,:17)",
]
print("原始顺序:")
for i, cfi in enumerate(test_cfis, 1):
chapter = EpubCFIParser.extract_chapter_info(cfi)
print(f" {i}. {chapter} - {cfi}")
# 排序
sorted_cfis = sorted(test_cfis, key=EpubCFIParser.create_sort_key)
print("\nCFI 排序后:")
for i, cfi in enumerate(sorted_cfis, 1):
chapter = EpubCFIParser.extract_chapter_info(cfi)
parsed = EpubCFIParser.parse_cfi(cfi)
if parsed:
spine, local, offset = parsed
print(f" {i}. {chapter} - spine={spine} local={local[:3]}... offset={offset}")
else:
print(f" {i}. {chapter} - 解析失败")
# 验证排序正确性
spine_sequence = []
for cfi in sorted_cfis:
parsed = EpubCFIParser.parse_cfi(cfi)
if parsed and parsed[0]:
spine_sequence.append(parsed[0][1]) # 第二个spine数字
is_sorted = all(spine_sequence[i] <= spine_sequence[i+1] for i in range(len(spine_sequence)-1))
print(f"\n✅ 排序验证: {'通过' if is_sorted else '失败'} (spine序列: {spine_sequence})")
def test_cfi_edge_cases():
"""测试边界情况"""
print("\n=== 边界情况测试 ===")
edge_cases = [
("空字符串", ""),
("无效格式", "invalid_cfi"),
("只有spine", "epubcfi(/6/14)"),
("标准格式", "epubcfi(/6/14[chapter]!/4/2:10)"),
("复杂格式", "epubcfi(/6/22[id19]!/4[longid]/40/1,:96,:214)"),
]
for name, cfi in edge_cases:
parsed = EpubCFIParser.parse_cfi(cfi)
sort_key = EpubCFIParser.create_sort_key(cfi)
print(f"{name:12}: 解析={'' if parsed else ''} 排序键长度={len(sort_key)}")
def compare_with_simple_sort():
"""对比 CFI 排序与简单字符串排序的差异"""
print("\n=== 排序方法对比 ===")
test_cfis = [
"epubcfi(/6/22!/4:100)",
"epubcfi(/6/22!/4:20)",
"epubcfi(/6/14!/4:5)",
"epubcfi(/6/2!/4:0)",
]
# 字符串排序
string_sorted = sorted(test_cfis)
print("字符串排序:")
for i, cfi in enumerate(string_sorted, 1):
print(f" {i}. {cfi}")
# CFI 排序
cfi_sorted = sorted(test_cfis, key=EpubCFIParser.create_sort_key)
print("\nCFI 语义排序:")
for i, cfi in enumerate(cfi_sorted, 1):
print(f" {i}. {cfi}")
# 比较差异
different = string_sorted != cfi_sorted
print(f"\n{'❌ 排序结果不同(符合预期)' if different else '⚠️ 排序结果相同'}")
if __name__ == "__main__":
test_cfi_core_functionality()
test_cfi_edge_cases()
compare_with_simple_sort()
print(f"\n🎉 CFI 排序功能实现完成!")
print("主要改进:")
print(" ✅ 按 EPUB CFI 规范解析位置信息")
print(" ✅ 正确的文档位置排序(非字符串排序)")
print(" ✅ 支持复杂的 CFI 格式和章节提取")
print(" ✅ 降级处理CFI失败时使用物理位置")
print("\n下一步可选优化:")
print(" 🔄 优化警告信息显示")
print(" 📊 添加排序性能统计")
print(" 🔍 支持更多 CFI 变体格式")
print(" 💾 缓存解析结果提升性能")