'update'

2025-10-21 10:46:03 +08:00
parent db9be32815
commit fb0f5ed9c5
20 changed files with 1869 additions and 103 deletions
--- a/exportbooknotes.py
+++ b/exportbooknotes.py
@@ -61,71 +61,166 @@ class BookNotesExporter:
        return toc_tree

    def build_booksnote(self, bookid=None):
+        """
+        构建结构化笔记数据，现在按 CFI 位置排序
+        
+        Returns:
+            dict: 结构为 {assetid: [annotations_list]} 
+                  其中 annotations_list 已按 CFI 位置排序
+        """
        manager = AnnotationManager(self.annotation_db)
        annotations = manager.get_annotations(bookid=bookid)
        bl_manager = BookListManager(plist_path=self.books_plist)
        booksinfo = bl_manager.get_books_info()
-        booksnote = defaultdict(lambda: defaultdict(dict))
-        for assetid, notes in annotations.items():
+        
+        booksnote = {}
+        
+        for assetid, notes_list in annotations.items():
+            if not notes_list:  # 现在是列表，检查是否为空
+                continue
+                
            bookinfo = booksinfo.get(assetid)
            if not bookinfo:
                continue
+                
            epub_path = bookinfo.get('path')
            if not epub_path or not os.path.isdir(epub_path):
+                # 如果没有 epub 路径，直接使用 CFI 排序的结果
+                booksnote[assetid] = notes_list
                continue
+            
+            # 尝试通过 epub 文件补充章节信息
            opf_path = self.find_file_by_ext(epub_path, ['.opf'])
            ncx_path = self.find_file_by_ext(epub_path, ['.ncx'])
-            if not opf_path or not ncx_path:
-                continue
-            id2href = parse_opf(opf_path)
-            toc_tree = self.get_toc_tree(ncx_path)
-            for uuid, ann in notes.items():
-                idref = ann['idref']
-                filepos = ann['filepos']
-                href = id2href.get(idref, idref)
-                chapter = TOCParser.find_label_path(toc_tree, href, filepos)
-                if chapter is None:
-                    html_path = os.path.join(epub_path, href.split('#')[0])
-                    selectedtext = ann.get('selectedtext')
-                    if os.path.exists(html_path) and selectedtext:
-                        section = TOCParser.find_section_by_selectedtext(html_path, selectedtext)
-                        if section:
-                            chapter = section
-                        else:
-                            chapter = "(未找到章节)"
-                    else:
-                        chapter = "(未找到章节)"
-                booksnote[assetid][chapter][uuid] = {
-                    'creationdate': ann['creationdate'],
-                    'filepos': filepos,
-                    'idref': href,
-                    'note': ann['note'],
-                    'selectedtext': ann['selectedtext']
-                }
+            
+            if opf_path and ncx_path:
+                id2href = parse_opf(opf_path)
+                toc_tree = self.get_toc_tree(ncx_path)
+                
+                # 为每个已排序的笔记补充章节信息
+                for ann in notes_list:
+                    idref = ann.get('idref')
+                    filepos = ann.get('filepos')
+                    
+                    if idref:
+                        href = id2href.get(idref, idref)
+                        chapter = TOCParser.find_label_path(toc_tree, href, filepos)
+                        
+                        if chapter is None:
+                            # 尝试通过选中文本定位章节
+                            html_path = os.path.join(epub_path, href.split('#')[0])
+                            selectedtext = ann.get('selectedtext')
+                            if os.path.exists(html_path) and selectedtext:
+                                section = TOCParser.find_section_by_selectedtext(html_path, selectedtext)
+                                chapter = section if section else "(未找到章节)"
+                            else:
+                                chapter = "(未找到章节)"
+                        
+                        # 更新章节信息，优先使用从 epub 解析的结果
+                        if chapter and chapter != "(未找到章节)":
+                            ann['chapter_info'] = chapter
+            
+            booksnote[assetid] = notes_list  # 保持 CFI 排序
+        
        return booksnote

    def export_booksnote_to_md(self, booksnote, booksinfo, out_path=None):
+        """
+        导出笔记到 Markdown，现在按 CFI 位置排序
+        
+        Args:
+            booksnote: {assetid: [annotations_list]} 已按CFI排序的笔记数据
+            booksinfo: 书籍信息字典
+            out_path: 输出文件路径
+            
+        Returns:
+            str: Markdown 内容
+        """
        import datetime
        now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
        lines = [f'# 笔记导出 {now}\n']
-        for assetid, chapters in booksnote.items():
-            bookname = booksinfo.get(assetid, {}).get('itemname', assetid)
-            lines.append(f'\n## {bookname}\n')
-            for chapter, notes in chapters.items():
-                lines.append(f'### {chapter}')
-                for uuid, ann in notes.items():
-                    sel = ann.get('selectedtext')
-                    note = ann.get('note')
-                    if sel:
-                        lines.append(sel)
-                    if note:
-                        lines.append(f'> {note}')
-                    lines.append('')
+        
+        for assetid, notes_list in booksnote.items():
+            if not notes_list:  # 检查列表是否为空
+                continue
+                
+            bookinfo = booksinfo.get(assetid, {})
+            bookname = bookinfo.get('displayname') or bookinfo.get('itemname') or assetid
+            author = bookinfo.get('author', '')
+            
+            lines.append(f'\n## {bookname}')
+            if author:
+                lines.append(f'**作者**: {author}')
+            lines.append('')
+            
+            # 按章节分组笔记（保持CFI排序的前提下）
+            current_chapter = None
+            chapter_notes = []
+            
+            for i, ann in enumerate(notes_list):
+                chapter_info = ann.get('chapter_info', '未知章节')
+                
+                # 如果章节变化，先输出之前章节的笔记
+                if current_chapter is not None and current_chapter != chapter_info:
+                    self._export_chapter_notes(lines, current_chapter, chapter_notes)
+                    chapter_notes = []
+                
+                current_chapter = chapter_info
+                chapter_notes.append(ann)
+            
+            # 输出最后一个章节的笔记
+            if current_chapter is not None and chapter_notes:
+                self._export_chapter_notes(lines, current_chapter, chapter_notes)
+        
        md = '\n'.join(lines)
+        
        if out_path:
+            # 确保输出目录存在
+            os.makedirs(os.path.dirname(out_path), exist_ok=True)
            with open(out_path, 'w', encoding='utf-8') as f:
                f.write(md)
+            print(f'[导出] 笔记已按CFI位置排序导出到: {out_path}')
+        
        return md
+    
+    def _export_chapter_notes(self, lines, chapter_name, chapter_notes):
+        """
+        导出单个章节的笔记
+        
+        Args:
+            lines: 输出行列表
+            chapter_name: 章节名称
+            chapter_notes: 该章节的笔记列表（已按CFI排序）
+        """
+        if not chapter_notes:
+            return
+            
+        lines.append(f'### {chapter_name}')
+        lines.append('')
+        
+        for i, ann in enumerate(chapter_notes, 1):
+            selected_text = ann.get('selectedtext', '')
+            note = ann.get('note', '')
+            location = ann.get('location', '')
+            creation_date = ann.get('creationdate', '')
+            
+            if selected_text:
+                lines.append(f'**{i}.** {selected_text}')
+                
+                if note:
+                    lines.append(f'> {note}')
+                
+                # 可选：显示创建时间和位置信息（调试模式）
+                if hasattr(self, 'debug_mode') and self.debug_mode:
+                    if creation_date:
+                        lines.append(f'*时间*: {creation_date}')
+                    if location:
+                        lines.append(f'*位置*: `{location}`')
+                
+                lines.append('')
+        
+        lines.append('---')
+        lines.append('')


 def sync_source_files(config_module):