iBook/exportbooknotes.py

"""
exportbooknotes.py (OOP版)
-------------------------
功能：
    - 自动同步iBooks数据库和元数据文件到本地data目录。
    - 解析AEAnnotation.sqlite、Books.plist、BKLibrary.sqlite，构建结构化笔记数据。
    - 解析epub目录和章节信息，定位每条笔记所属章节。
    - 命令行菜单按最近打开时间降序展示书籍列表，供用户选择导出。
    - 仅导出选中书籍的所有笔记，按章节分组，生成Markdown文件。
依赖：config.py 统一管理路径和配置项。
主要接口：BookNotesExporter
    - run()：命令行交互式导出主流程
    - build_booksnote(bookid=None)：构建结构化笔记数据
    - export_booksnote_to_md(booksnote, booksinfo, out_path=None)：导出为Markdown
"""
import config
"""
自动生成 booksnote 数据结构：
booksnote = {
  assetid: { label_path: { uuid: {
      'creationdate': '2023/7/12',
      'filepos': None,
      'idref': '008.xhtml',
      'note': None,
      'selectedtext': '這就是宣傳的恐怖之處'
      }}}
}
"""
import os
from collections import defaultdict
from annotationdata import AnnotationManager
from booklist_parse import BookListManager
from opf_parse import parse_opf
from toc_parse import TOCParser
from bs4 import BeautifulSoup


class BookNotesExporter:
    def __init__(self, config_module=config):
        self.config = config_module
        self.annotation_db = config_module.LOCAL_ANNOTATION_DB
        self.books_plist = config_module.LOCAL_BOOKS_PLIST
        self.library_db = config_module.LOCAL_LIBRARY_DB

    @staticmethod
    def find_file_by_ext(root, exts):
        for dirpath, _, files in os.walk(root):
            for f in files:
                for ext in exts:
                    if f.lower().endswith(ext):
                        return os.path.join(dirpath, f)
        return None

    @staticmethod
    def get_toc_tree(toc_path):
        with open(toc_path, 'r', encoding='utf-8') as f:
            soup = BeautifulSoup(f, 'xml')
        nav_map = soup.find('navMap')
        nav_points = nav_map.find_all('navPoint', recursive=False)
        toc_tree = TOCParser.parse_navpoints(nav_points)
        return toc_tree

    def build_booksnote(self, bookid=None):
        """
        构建结构化笔记数据，现在按 CFI 位置排序

        Returns:
            dict: 结构为 {assetid: [annotations_list]}
                  其中 annotations_list 已按 CFI 位置排序
        """
        manager = AnnotationManager(self.annotation_db)
        annotations = manager.get_annotations(bookid=bookid)
        bl_manager = BookListManager(plist_path=self.books_plist)
        booksinfo = bl_manager.get_books_info()

        booksnote = {}

        for assetid, notes_list in annotations.items():
            if not notes_list:  # 现在是列表，检查是否为空
                continue

            bookinfo = booksinfo.get(assetid)
            if not bookinfo:
                continue

            epub_path = bookinfo.get('path')
            if not epub_path or not os.path.isdir(epub_path):
                # 如果没有 epub 路径，直接使用 CFI 排序的结果
                booksnote[assetid] = notes_list
                continue

            # 尝试通过 epub 文件补充章节信息
            opf_path = self.find_file_by_ext(epub_path, ['.opf'])
            ncx_path = self.find_file_by_ext(epub_path, ['.ncx'])

            if opf_path and ncx_path:
                id2href = parse_opf(opf_path)
                toc_tree = self.get_toc_tree(ncx_path)

                # 为每个已排序的笔记补充章节信息
                for ann in notes_list:
                    idref = ann.get('idref')
                    filepos = ann.get('filepos')

                    if idref:
                        href = id2href.get(idref, idref)
                        chapter = TOCParser.find_label_path(toc_tree, href, filepos)

                        if chapter is None:
                            # 尝试通过选中文本定位章节
                            html_path = os.path.join(epub_path, href.split('#')[0])
                            selectedtext = ann.get('selectedtext')
                            if os.path.exists(html_path) and selectedtext:
                                section = TOCParser.find_section_by_selectedtext(html_path, selectedtext)
                                chapter = section if section else "(未找到章节)"
                            else:
                                chapter = "(未找到章节)"

                        # 更新章节信息，优先使用从 epub 解析的结果
                        if chapter and chapter != "(未找到章节)":
                            ann['chapter_info'] = chapter

            booksnote[assetid] = notes_list  # 保持 CFI 排序

        return booksnote

    def export_booksnote_to_md(self, booksnote, booksinfo, out_path=None):
        """
        导出笔记到 Markdown，现在按 CFI 位置排序

        Args:
            booksnote: {assetid: [annotations_list]} 已按CFI排序的笔记数据
            booksinfo: 书籍信息字典
            out_path: 输出文件路径

        Returns:
            str: Markdown 内容
        """
        import datetime
        now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
        lines = [f'# 笔记导出 {now}\n']

        for assetid, notes_list in booksnote.items():
            if not notes_list:  # 检查列表是否为空
                continue

            bookinfo = booksinfo.get(assetid, {})
            bookname = bookinfo.get('displayname') or bookinfo.get('itemname') or assetid
            author = bookinfo.get('author', '')

            lines.append(f'\n## {bookname}')
            if author:
                lines.append(f'**作者**: {author}')
            lines.append('')

            # 按章节分组笔记（保持CFI排序的前提下）
            current_chapter = None
            chapter_notes = []

            for i, ann in enumerate(notes_list):
                chapter_info = ann.get('chapter_info', '未知章节')

                # 如果章节变化，先输出之前章节的笔记
                if current_chapter is not None and current_chapter != chapter_info:
                    self._export_chapter_notes(lines, current_chapter, chapter_notes)
                    chapter_notes = []

                current_chapter = chapter_info
                chapter_notes.append(ann)

            # 输出最后一个章节的笔记
            if current_chapter is not None and chapter_notes:
                self._export_chapter_notes(lines, current_chapter, chapter_notes)

        md = '\n'.join(lines)

        if out_path:
            # 确保输出目录存在
            os.makedirs(os.path.dirname(out_path), exist_ok=True)
            with open(out_path, 'w', encoding='utf-8') as f:
                f.write(md)
            print(f'[导出] 笔记已按CFI位置排序导出到: {out_path}')

        return md

    def _export_chapter_notes(self, lines, chapter_name, chapter_notes):
        """
        导出单个章节的笔记

        Args:
            lines: 输出行列表
            chapter_name: 章节名称
            chapter_notes: 该章节的笔记列表（已按CFI排序）
        """
        if not chapter_notes:
            return

        lines.append(f'### {chapter_name}')
        lines.append('')

        for i, ann in enumerate(chapter_notes, 1):
            selected_text = ann.get('selectedtext', '')
            note = ann.get('note', '')
            location = ann.get('location', '')
            creation_date = ann.get('creationdate', '')

            if selected_text:
                lines.append(f'**{i}.** {selected_text}')

                if note:
                    lines.append(f'> {note}')

                # 可选：显示创建时间和位置信息（调试模式）
                if hasattr(self, 'debug_mode') and self.debug_mode:
                    if creation_date:
                        lines.append(f'*时间*: {creation_date}')
                    if location:
                        lines.append(f'*位置*: `{location}`')

                lines.append('')

        lines.append('---')
        lines.append('')


def sync_source_files(config_module):
    """
    自动同步 iBooks 源数据文件到本地 data 目录
    """
    import shutil
    import os
    src_files = [
        (config_module.IBOOKS_ANNOTATION_DB, config_module.LOCAL_ANNOTATION_DB),
        (config_module.IBOOKS_ANNOTATION_SHM, config_module.LOCAL_ANNOTATION_SHM),
        (config_module.IBOOKS_ANNOTATION_WAL, config_module.LOCAL_ANNOTATION_WAL),
        (config_module.IBOOKS_LIBRARY_DB, config_module.LOCAL_LIBRARY_DB),
        (config_module.IBOOKS_LIBRARY_DB + '-shm', config_module.LOCAL_LIBRARY_DB + '-shm'),
        (config_module.IBOOKS_LIBRARY_DB + '-wal', config_module.LOCAL_LIBRARY_DB + '-wal'),
        (config_module.IBOOKS_BOOKS_PLIST, config_module.LOCAL_BOOKS_PLIST)
    ]
    for src, dst in src_files:
        if os.path.exists(src):
            shutil.copy2(src, dst)
            print(f'已拷贝源数据文件到本地: {dst}')
        else:
            print(f'未找到文件: {src}')


if __name__ == '__main__':
    import shutil
    import datetime
    import re
    import os.path
    from InquirerPy import inquirer # type: ignore

    exporter = BookNotesExporter(config)
    sync_source_files(config)
    '''
    sqlite-shm 和 .sqlite-wal 是 SQLite的临时文件，数据库处于WAL模式且有写入时才存在。
    没有进程打开数据库或数据库关闭后，这两个文件可能会被SQLite清理。
    '''

    # 列出 data 目录下所有文件用于测试，此时shm和wal文件存在
    #data_dir = config.DATA_DIR if hasattr(config, 'DATA_DIR') else './data'
    #print(f"\n[data目录文件列表] {data_dir}:")
    #for root, dirs, files in os.walk(data_dir):
    #    for file in files:
    #        print(os.path.join(root, file))

# 先获取所有书籍元数据
    manager = BookListManager(plist_path=config.LOCAL_BOOKS_PLIST, db_path=config.LOCAL_LIBRARY_DB)
    booksinfo = manager.get_books_info()
    assetid2name = {}
    assetid2lastopen = {}
    last_open_times = manager.get_books_last_open()
    for assetid, info in booksinfo.items():
        name = info.get('displayname') or info.get('itemname') or assetid
        if '-' in name:
            name = name.split('-', 1)[0].strip()
        assetid2name[assetid] = name
        ts = last_open_times.get(assetid, {}).get('last_open', 0)
        assetid2lastopen[assetid] = ts
    sorted_assetids = sorted(assetid2name.keys(), key=lambda aid: assetid2lastopen[aid], reverse=True)
    choices = [f"{assetid2name[aid]} [{assetid2lastopen[aid]}]" for aid in sorted_assetids]
    if not choices:
        print("无可导出的笔记")
        exit(0)
    answer = inquirer.fuzzy(
        message="请选择要导出的书名（支持模糊搜索）:",
        choices=choices,
        multiselect=False,
        instruction="上下键选择，输入可模糊筛选，回车确定"
    ).execute()
    for aid, name in assetid2name.items():
        if answer.startswith(name):
            selected_assetid = aid
            break
    else:
        print("未找到选中书籍")
        exit(1)
    selected_booksnote = exporter.build_booksnote(bookid=selected_assetid)
    selected_booksinfo = {selected_assetid: booksinfo.get(selected_assetid, {})}
    bookname = selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid
    ts = datetime.datetime.now().strftime('%m%d%H%M')
    # 文件名用[.:_【分割取第一段
    shortname = re.split(r'[.:：_\【\[\(（]', bookname)[0].strip()
    out_path = os.path.join(config.EXPORT_NOTES_DIR, f'notes_{shortname}-{ts}.md')
    exporter.export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path)
    print(f'《{selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid}》 导出笔记 {out_path}')