This commit is contained in:
douboer
2025-10-21 10:46:03 +08:00
parent db9be32815
commit fb0f5ed9c5
20 changed files with 1869 additions and 103 deletions

View File

@@ -61,71 +61,166 @@ class BookNotesExporter:
return toc_tree
def build_booksnote(self, bookid=None):
"""
构建结构化笔记数据,现在按 CFI 位置排序
Returns:
dict: 结构为 {assetid: [annotations_list]}
其中 annotations_list 已按 CFI 位置排序
"""
manager = AnnotationManager(self.annotation_db)
annotations = manager.get_annotations(bookid=bookid)
bl_manager = BookListManager(plist_path=self.books_plist)
booksinfo = bl_manager.get_books_info()
booksnote = defaultdict(lambda: defaultdict(dict))
for assetid, notes in annotations.items():
booksnote = {}
for assetid, notes_list in annotations.items():
if not notes_list: # 现在是列表,检查是否为空
continue
bookinfo = booksinfo.get(assetid)
if not bookinfo:
continue
epub_path = bookinfo.get('path')
if not epub_path or not os.path.isdir(epub_path):
# 如果没有 epub 路径,直接使用 CFI 排序的结果
booksnote[assetid] = notes_list
continue
# 尝试通过 epub 文件补充章节信息
opf_path = self.find_file_by_ext(epub_path, ['.opf'])
ncx_path = self.find_file_by_ext(epub_path, ['.ncx'])
if not opf_path or not ncx_path:
continue
id2href = parse_opf(opf_path)
toc_tree = self.get_toc_tree(ncx_path)
for uuid, ann in notes.items():
idref = ann['idref']
filepos = ann['filepos']
href = id2href.get(idref, idref)
chapter = TOCParser.find_label_path(toc_tree, href, filepos)
if chapter is None:
html_path = os.path.join(epub_path, href.split('#')[0])
selectedtext = ann.get('selectedtext')
if os.path.exists(html_path) and selectedtext:
section = TOCParser.find_section_by_selectedtext(html_path, selectedtext)
if section:
chapter = section
else:
chapter = "(未找到章节)"
else:
chapter = "(未找到章节)"
booksnote[assetid][chapter][uuid] = {
'creationdate': ann['creationdate'],
'filepos': filepos,
'idref': href,
'note': ann['note'],
'selectedtext': ann['selectedtext']
}
if opf_path and ncx_path:
id2href = parse_opf(opf_path)
toc_tree = self.get_toc_tree(ncx_path)
# 为每个已排序的笔记补充章节信息
for ann in notes_list:
idref = ann.get('idref')
filepos = ann.get('filepos')
if idref:
href = id2href.get(idref, idref)
chapter = TOCParser.find_label_path(toc_tree, href, filepos)
if chapter is None:
# 尝试通过选中文本定位章节
html_path = os.path.join(epub_path, href.split('#')[0])
selectedtext = ann.get('selectedtext')
if os.path.exists(html_path) and selectedtext:
section = TOCParser.find_section_by_selectedtext(html_path, selectedtext)
chapter = section if section else "(未找到章节)"
else:
chapter = "(未找到章节)"
# 更新章节信息,优先使用从 epub 解析的结果
if chapter and chapter != "(未找到章节)":
ann['chapter_info'] = chapter
booksnote[assetid] = notes_list # 保持 CFI 排序
return booksnote
def export_booksnote_to_md(self, booksnote, booksinfo, out_path=None):
"""
导出笔记到 Markdown现在按 CFI 位置排序
Args:
booksnote: {assetid: [annotations_list]} 已按CFI排序的笔记数据
booksinfo: 书籍信息字典
out_path: 输出文件路径
Returns:
str: Markdown 内容
"""
import datetime
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
lines = [f'# 笔记导出 {now}\n']
for assetid, chapters in booksnote.items():
bookname = booksinfo.get(assetid, {}).get('itemname', assetid)
lines.append(f'\n## {bookname}\n')
for chapter, notes in chapters.items():
lines.append(f'### {chapter}')
for uuid, ann in notes.items():
sel = ann.get('selectedtext')
note = ann.get('note')
if sel:
lines.append(sel)
if note:
lines.append(f'> {note}')
lines.append('')
for assetid, notes_list in booksnote.items():
if not notes_list: # 检查列表是否为空
continue
bookinfo = booksinfo.get(assetid, {})
bookname = bookinfo.get('displayname') or bookinfo.get('itemname') or assetid
author = bookinfo.get('author', '')
lines.append(f'\n## {bookname}')
if author:
lines.append(f'**作者**: {author}')
lines.append('')
# 按章节分组笔记保持CFI排序的前提下
current_chapter = None
chapter_notes = []
for i, ann in enumerate(notes_list):
chapter_info = ann.get('chapter_info', '未知章节')
# 如果章节变化,先输出之前章节的笔记
if current_chapter is not None and current_chapter != chapter_info:
self._export_chapter_notes(lines, current_chapter, chapter_notes)
chapter_notes = []
current_chapter = chapter_info
chapter_notes.append(ann)
# 输出最后一个章节的笔记
if current_chapter is not None and chapter_notes:
self._export_chapter_notes(lines, current_chapter, chapter_notes)
md = '\n'.join(lines)
if out_path:
# 确保输出目录存在
os.makedirs(os.path.dirname(out_path), exist_ok=True)
with open(out_path, 'w', encoding='utf-8') as f:
f.write(md)
print(f'[导出] 笔记已按CFI位置排序导出到: {out_path}')
return md
def _export_chapter_notes(self, lines, chapter_name, chapter_notes):
"""
导出单个章节的笔记
Args:
lines: 输出行列表
chapter_name: 章节名称
chapter_notes: 该章节的笔记列表已按CFI排序
"""
if not chapter_notes:
return
lines.append(f'### {chapter_name}')
lines.append('')
for i, ann in enumerate(chapter_notes, 1):
selected_text = ann.get('selectedtext', '')
note = ann.get('note', '')
location = ann.get('location', '')
creation_date = ann.get('creationdate', '')
if selected_text:
lines.append(f'**{i}.** {selected_text}')
if note:
lines.append(f'> {note}')
# 可选:显示创建时间和位置信息(调试模式)
if hasattr(self, 'debug_mode') and self.debug_mode:
if creation_date:
lines.append(f'*时间*: {creation_date}')
if location:
lines.append(f'*位置*: `{location}`')
lines.append('')
lines.append('---')
lines.append('')
def sync_source_files(config_module):