'update'

2025-08-15 17:20:30 +08:00
parent 0bc6844209
commit 4e3b8abc34
12 changed files with 406 additions and 516 deletions
--- a/exportbooknotes.py
+++ b/exportbooknotes.py
@@ -1,31 +1,17 @@
 """
-exportbooknotes.py
------------------
+exportbooknotes.py (OOP版)
+-------------------------
 功能：
    - 自动同步iBooks数据库和元数据文件到本地data目录。
    - 解析AEAnnotation.sqlite、Books.plist、BKLibrary.sqlite，构建结构化笔记数据。
    - 解析epub目录和章节信息，定位每条笔记所属章节。
    - 命令行菜单按最近打开时间降序展示书籍列表，供用户选择导出。
    - 仅导出选中书籍的所有笔记，按章节分组，生成Markdown文件。
-
 依赖：config.py 统一管理路径和配置项。
-
-主要数据流：
-    1. 数据同步到data目录
-    2. 解析Books.plist获取书籍元数据
-    3. 解析BKLibrary.sqlite获取最近打开时间
-    4. 菜单排序与显示（书名+时间戳）
-    5. 解析AEAnnotation.sqlite获取笔记
-    6. 解析epub目录，定位章节
-    7. 导出Markdown文件
-
-依赖：Python 3, InquirerPy, bs4, shutil, os, datetime, sqlite3
-
-主要数据流：
-
-典型用法：
-    python exportbooknotes.py
-    # 按提示选择书籍，自动导出笔记到export_notes目录
+主要接口：BookNotesExporter
+    - run()：命令行交互式导出主流程
+    - build_booksnote(bookid=None)：构建结构化笔记数据
+    - export_booksnote_to_md(booksnote, booksinfo, out_path=None)：导出为Markdown
 """
 import config
 """
@@ -40,117 +26,113 @@ booksnote = {
      }}}
 }
 """
-from collections import defaultdict
 import os
-from annotationdata import get_annotations
-from booklist_parse import parse_books_plist
+from collections import defaultdict
+from annotationdata import AnnotationManager
+from booklist_parse import BookListManager
 from opf_parse import parse_opf
-from toc_parse import parse_navpoints, find_label_path
+from toc_parse import TOCParser
 from bs4 import BeautifulSoup
-from pprint import pprint

-def find_file_by_ext(root, exts):
-    """在root下递归查找第一个指定后缀的文件"""
-    for dirpath, _, files in os.walk(root):
-        for f in files:
-            for ext in exts:
-                if f.lower().endswith(ext):
-                    return os.path.join(dirpath, f)
-    return None

-def get_toc_tree(toc_path):
-    with open(toc_path, 'r', encoding='utf-8') as f:
-        soup = BeautifulSoup(f, 'xml')
-    nav_map = soup.find('navMap')
+class BookNotesExporter:
+    def __init__(self, config_module=config):
+        self.config = config_module
+        self.annotation_db = config_module.LOCAL_ANNOTATION_DB
+        self.books_plist = config_module.LOCAL_BOOKS_PLIST
+        self.library_db = config_module.LOCAL_LIBRARY_DB

-    nav_points = nav_map.find_all('navPoint', recursive=False)
-    toc_tree = parse_navpoints(nav_points)
-    #pprint(toc_tree, indent=2, depth=5)
-    return toc_tree
+    @staticmethod
+    def find_file_by_ext(root, exts):
+        for dirpath, _, files in os.walk(root):
+            for f in files:
+                for ext in exts:
+                    if f.lower().endswith(ext):
+                        return os.path.join(dirpath, f)
+        return None

-def build_booksnote(annotation_db=config.LOCAL_ANNOTATION_DB, books_plist=config.LOCAL_BOOKS_PLIST, bookid=None):
-    # 支持只处理特定 assetid 的笔记
-    annotations = get_annotations(annotation_db, bookid=bookid)
-    booksinfo = parse_books_plist(books_plist)
-    booksnote = defaultdict(lambda: defaultdict(dict))
-    for assetid, notes in annotations.items():
-        # 获取epub路径
-        bookinfo = booksinfo.get(assetid)
-        if not bookinfo:
-            continue
-        epub_path = bookinfo.get('path')
-        if not epub_path or not os.path.isdir(epub_path):
-            continue
-        # 查找opf和ncx
-        opf_path = find_file_by_ext(epub_path, ['.opf'])
-        ncx_path = find_file_by_ext(epub_path, ['.ncx'])
-        if not opf_path or not ncx_path:
-            continue
-        id2href = parse_opf(opf_path)
-        toc_tree = get_toc_tree(ncx_path)
-        for uuid, ann in notes.items():
-            idref = ann['idref']
-            filepos = ann['filepos']
-            href = id2href.get(idref, idref)
-            chapter = find_label_path(toc_tree, href, filepos)
-            if chapter is None:
-                # 直接从html文件获取章节信息
-                html_path = os.path.join(epub_path, href.split('#')[0])
-                selectedtext = ann.get('selectedtext')
-                if os.path.exists(html_path) and selectedtext:
-                    from toc_parse import find_section_by_selectedtext
-                    section = find_section_by_selectedtext(html_path, selectedtext)
-                    if section:
-                        chapter = section
+    @staticmethod
+    def get_toc_tree(toc_path):
+        with open(toc_path, 'r', encoding='utf-8') as f:
+            soup = BeautifulSoup(f, 'xml')
+        nav_map = soup.find('navMap')
+        nav_points = nav_map.find_all('navPoint', recursive=False)
+        toc_tree = TOCParser.parse_navpoints(nav_points)
+        return toc_tree
+
+    def build_booksnote(self, bookid=None):
+        manager = AnnotationManager(self.annotation_db)
+        annotations = manager.get_annotations(bookid=bookid)
+        bl_manager = BookListManager(plist_path=self.books_plist)
+        booksinfo = bl_manager.get_books_info()
+        booksnote = defaultdict(lambda: defaultdict(dict))
+        for assetid, notes in annotations.items():
+            bookinfo = booksinfo.get(assetid)
+            if not bookinfo:
+                continue
+            epub_path = bookinfo.get('path')
+            if not epub_path or not os.path.isdir(epub_path):
+                continue
+            opf_path = self.find_file_by_ext(epub_path, ['.opf'])
+            ncx_path = self.find_file_by_ext(epub_path, ['.ncx'])
+            if not opf_path or not ncx_path:
+                continue
+            id2href = parse_opf(opf_path)
+            toc_tree = self.get_toc_tree(ncx_path)
+            for uuid, ann in notes.items():
+                idref = ann['idref']
+                filepos = ann['filepos']
+                href = id2href.get(idref, idref)
+                chapter = TOCParser.find_label_path(toc_tree, href, filepos)
+                if chapter is None:
+                    html_path = os.path.join(epub_path, href.split('#')[0])
+                    selectedtext = ann.get('selectedtext')
+                    if os.path.exists(html_path) and selectedtext:
+                        section = TOCParser.find_section_by_selectedtext(html_path, selectedtext)
+                        if section:
+                            chapter = section
+                        else:
+                            chapter = "(未找到章节)"
                    else:
                        chapter = "(未找到章节)"
-                else:
-                    chapter = "(未找到章节)"
-            booksnote[assetid][chapter][uuid] = {
-                'creationdate': ann['creationdate'],
-                'filepos': filepos,
-                'idref': href,
-                'note': ann['note'],
-                'selectedtext': ann['selectedtext']
-            }
-    return booksnote
+                booksnote[assetid][chapter][uuid] = {
+                    'creationdate': ann['creationdate'],
+                    'filepos': filepos,
+                    'idref': href,
+                    'note': ann['note'],
+                    'selectedtext': ann['selectedtext']
+                }
+        return booksnote

-import datetime
-
-def export_booksnote_to_md(booksnote, booksinfo, out_path=None):
-    """
-    依据booksnote结构导出markdown文件，格式：
-    # “笔记导出”+导出时间
-    ## 书名
-    ### chapter
-    selectedtext
-    > note      (如果存在)
-    """
-    now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
-    lines = [f'# 笔记导出 {now}\n']
-    for assetid, chapters in booksnote.items():
-        bookname = booksinfo.get(assetid, {}).get('itemname', assetid)
-        lines.append(f'\n## {bookname}\n')
-        for chapter, notes in chapters.items():
-            lines.append(f'### {chapter}')
-            for uuid, ann in notes.items():
-                sel = ann.get('selectedtext')
-                note = ann.get('note')
-                if sel:
-                    lines.append(sel)
-                if note:
-                    lines.append(f'> {note}')
-                lines.append('')
-    md = '\n'.join(lines)
-    if out_path:
-        with open(out_path, 'w', encoding='utf-8') as f:
-            f.write(md)
-    return md
+    def export_booksnote_to_md(self, booksnote, booksinfo, out_path=None):
+        import datetime
+        now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
+        lines = [f'# 笔记导出 {now}\n']
+        for assetid, chapters in booksnote.items():
+            bookname = booksinfo.get(assetid, {}).get('itemname', assetid)
+            lines.append(f'\n## {bookname}\n')
+            for chapter, notes in chapters.items():
+                lines.append(f'### {chapter}')
+                for uuid, ann in notes.items():
+                    sel = ann.get('selectedtext')
+                    note = ann.get('note')
+                    if sel:
+                        lines.append(sel)
+                    if note:
+                        lines.append(f'> {note}')
+                    lines.append('')
+        md = '\n'.join(lines)
+        if out_path:
+            with open(out_path, 'w', encoding='utf-8') as f:
+                f.write(md)
+        return md


 if __name__ == '__main__':
    import shutil
    import os.path
+    from InquirerPy import inquirer # type: ignore
+    exporter = BookNotesExporter(config)
    # 自动覆盖 ./data 下的数据库和plist文件，源为iBooks真实路径
    src_files = [
        (config.IBOOKS_ANNOTATION_DB, config.LOCAL_ANNOTATION_DB),
@@ -166,31 +148,19 @@ if __name__ == '__main__':
        else:
            print(f'file not found: {src} ')

-    from booklist_parse import parse_books_plist
-    from InquirerPy import inquirer # type: ignore
-
    # 先获取所有书籍元数据
-    booksinfo = parse_books_plist(config.LOCAL_BOOKS_PLIST)
-
-    # 构建书名列表（优先displayname, 其次itemname, 否则assetid），按parse_books_plist中的date字段排序
+    manager = BookListManager(plist_path=config.LOCAL_BOOKS_PLIST, db_path=config.LOCAL_LIBRARY_DB)
+    booksinfo = manager.get_books_info()
    assetid2name = {}
    assetid2lastopen = {}
-    from booklist_parse import get_books_last_open
-
-    # 获取所有书籍的最后打开时间（字典，值为{'last_open': 时间戳}）
-    last_open_times = get_books_last_open(config.LOCAL_LIBRARY_DB)
-
+    last_open_times = manager.get_books_last_open()
    for assetid, info in booksinfo.items():
        name = info.get('displayname') or info.get('itemname') or assetid
-        # 如果书名中包含“-”，只取“-”前面的部分
        if '-' in name:
            name = name.split('-', 1)[0].strip()
        assetid2name[assetid] = name
-        # 用 get_books_last_open 返回的时间戳排序，如无则为0
        ts = last_open_times.get(assetid, {}).get('last_open', 0)
        assetid2lastopen[assetid] = ts
-
-    # 按last_open时间戳降序排列
    sorted_assetids = sorted(assetid2name.keys(), key=lambda aid: assetid2lastopen[aid], reverse=True)
    choices = [f"{assetid2name[aid]} [{assetid2lastopen[aid]}]" for aid in sorted_assetids]
    if not choices:
@@ -202,8 +172,6 @@ if __name__ == '__main__':
        multiselect=False,
        instruction="上下键选择，输入可模糊筛选，回车确定"
    ).execute()
-
-    # 解析选中assetid
    for aid, name in assetid2name.items():
        if answer.startswith(name):
            selected_assetid = aid
@@ -211,10 +179,8 @@ if __name__ == '__main__':
    else:
        print("未找到选中书籍")
        exit(1)
-
-    # 只导出选中书的笔记
-    selected_booksnote = build_booksnote(bookid=selected_assetid)
+    selected_booksnote = exporter.build_booksnote(bookid=selected_assetid)
    selected_booksinfo = {selected_assetid: booksinfo.get(selected_assetid, {})}
    out_path = f'export_notes/notes_export_{selected_assetid}.md'
-    export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path)
+    exporter.export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path)
    print(f'《{selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid}》 导出笔记 {out_path}')