""" exportbooknotes.py (OOP版) ------------------------- 功能: - 自动同步iBooks数据库和元数据文件到本地data目录。 - 解析AEAnnotation.sqlite、Books.plist、BKLibrary.sqlite,构建结构化笔记数据。 - 解析epub目录和章节信息,定位每条笔记所属章节。 - 命令行菜单按最近打开时间降序展示书籍列表,供用户选择导出。 - 仅导出选中书籍的所有笔记,按章节分组,生成Markdown文件。 依赖:config.py 统一管理路径和配置项。 主要接口:BookNotesExporter - run():命令行交互式导出主流程 - build_booksnote(bookid=None):构建结构化笔记数据 - export_booksnote_to_md(booksnote, booksinfo, out_path=None):导出为Markdown """ import config """ 自动生成 booksnote 数据结构: booksnote = { assetid: { label_path: { uuid: { 'creationdate': '2023/7/12', 'filepos': None, 'idref': '008.xhtml', 'note': None, 'selectedtext': '這就是宣傳的恐怖之處' }}} } """ import os from collections import defaultdict from annotationdata import AnnotationManager from booklist_parse import BookListManager from opf_parse import parse_opf from toc_parse import TOCParser from bs4 import BeautifulSoup class BookNotesExporter: def __init__(self, config_module=config): self.config = config_module self.annotation_db = config_module.LOCAL_ANNOTATION_DB self.books_plist = config_module.LOCAL_BOOKS_PLIST self.library_db = config_module.LOCAL_LIBRARY_DB @staticmethod def find_file_by_ext(root, exts): for dirpath, _, files in os.walk(root): for f in files: for ext in exts: if f.lower().endswith(ext): return os.path.join(dirpath, f) return None @staticmethod def get_toc_tree(toc_path): with open(toc_path, 'r', encoding='utf-8') as f: soup = BeautifulSoup(f, 'xml') nav_map = soup.find('navMap') nav_points = nav_map.find_all('navPoint', recursive=False) toc_tree = TOCParser.parse_navpoints(nav_points) return toc_tree def build_booksnote(self, bookid=None): """ 构建结构化笔记数据,现在按 CFI 位置排序 Returns: dict: 结构为 {assetid: [annotations_list]} 其中 annotations_list 已按 CFI 位置排序 """ manager = AnnotationManager(self.annotation_db) annotations = manager.get_annotations(bookid=bookid) bl_manager = BookListManager(plist_path=self.books_plist) booksinfo = bl_manager.get_books_info() booksnote = {} for assetid, notes_list in annotations.items(): if not notes_list: # 现在是列表,检查是否为空 continue bookinfo = booksinfo.get(assetid) if not bookinfo: continue epub_path = bookinfo.get('path') if not epub_path or not os.path.isdir(epub_path): # 如果没有 epub 路径,直接使用 CFI 排序的结果 booksnote[assetid] = notes_list continue # 尝试通过 epub 文件补充章节信息 opf_path = self.find_file_by_ext(epub_path, ['.opf']) ncx_path = self.find_file_by_ext(epub_path, ['.ncx']) if opf_path and ncx_path: id2href = parse_opf(opf_path) toc_tree = self.get_toc_tree(ncx_path) # 为每个已排序的笔记补充章节信息 for ann in notes_list: idref = ann.get('idref') filepos = ann.get('filepos') if idref: href = id2href.get(idref, idref) chapter = TOCParser.find_label_path(toc_tree, href, filepos) if chapter is None: # 尝试通过选中文本定位章节 html_path = os.path.join(epub_path, href.split('#')[0]) selectedtext = ann.get('selectedtext') if os.path.exists(html_path) and selectedtext: section = TOCParser.find_section_by_selectedtext(html_path, selectedtext) chapter = section if section else "(未找到章节)" else: chapter = "(未找到章节)" # 更新章节信息,优先使用从 epub 解析的结果 if chapter and chapter != "(未找到章节)": ann['chapter_info'] = chapter booksnote[assetid] = notes_list # 保持 CFI 排序 return booksnote def export_booksnote_to_md(self, booksnote, booksinfo, out_path=None): """ 导出笔记到 Markdown,现在按 CFI 位置排序 Args: booksnote: {assetid: [annotations_list]} 已按CFI排序的笔记数据 booksinfo: 书籍信息字典 out_path: 输出文件路径 Returns: str: Markdown 内容 """ import datetime now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') lines = [f'# 笔记导出 {now}\n'] for assetid, notes_list in booksnote.items(): if not notes_list: # 检查列表是否为空 continue bookinfo = booksinfo.get(assetid, {}) bookname = bookinfo.get('displayname') or bookinfo.get('itemname') or assetid author = bookinfo.get('author', '') lines.append(f'\n## {bookname}') if author: lines.append(f'**作者**: {author}') lines.append('') # 按章节分组笔记(保持CFI排序的前提下) current_chapter = None chapter_notes = [] for i, ann in enumerate(notes_list): chapter_info = ann.get('chapter_info', '未知章节') # 如果章节变化,先输出之前章节的笔记 if current_chapter is not None and current_chapter != chapter_info: self._export_chapter_notes(lines, current_chapter, chapter_notes) chapter_notes = [] current_chapter = chapter_info chapter_notes.append(ann) # 输出最后一个章节的笔记 if current_chapter is not None and chapter_notes: self._export_chapter_notes(lines, current_chapter, chapter_notes) md = '\n'.join(lines) if out_path: # 确保输出目录存在 os.makedirs(os.path.dirname(out_path), exist_ok=True) with open(out_path, 'w', encoding='utf-8') as f: f.write(md) print(f'[导出] 笔记已按CFI位置排序导出到: {out_path}') return md def _export_chapter_notes(self, lines, chapter_name, chapter_notes): """ 导出单个章节的笔记 Args: lines: 输出行列表 chapter_name: 章节名称 chapter_notes: 该章节的笔记列表(已按CFI排序) """ if not chapter_notes: return lines.append(f'### {chapter_name}') lines.append('') for i, ann in enumerate(chapter_notes, 1): selected_text = ann.get('selectedtext', '') note = ann.get('note', '') location = ann.get('location', '') creation_date = ann.get('creationdate', '') if selected_text: lines.append(f'**{i}.** {selected_text}') if note: lines.append(f'> {note}') # 可选:显示创建时间和位置信息(调试模式) if hasattr(self, 'debug_mode') and self.debug_mode: if creation_date: lines.append(f'*时间*: {creation_date}') if location: lines.append(f'*位置*: `{location}`') lines.append('') lines.append('---') lines.append('') def sync_source_files(config_module): """ 自动同步 iBooks 源数据文件到本地 data 目录 """ import shutil import os src_files = [ (config_module.IBOOKS_ANNOTATION_DB, config_module.LOCAL_ANNOTATION_DB), (config_module.IBOOKS_ANNOTATION_SHM, config_module.LOCAL_ANNOTATION_SHM), (config_module.IBOOKS_ANNOTATION_WAL, config_module.LOCAL_ANNOTATION_WAL), (config_module.IBOOKS_LIBRARY_DB, config_module.LOCAL_LIBRARY_DB), (config_module.IBOOKS_LIBRARY_DB + '-shm', config_module.LOCAL_LIBRARY_DB + '-shm'), (config_module.IBOOKS_LIBRARY_DB + '-wal', config_module.LOCAL_LIBRARY_DB + '-wal'), (config_module.IBOOKS_BOOKS_PLIST, config_module.LOCAL_BOOKS_PLIST) ] for src, dst in src_files: if os.path.exists(src): shutil.copy2(src, dst) print(f'已拷贝源数据文件到本地: {dst}') else: print(f'未找到文件: {src}') if __name__ == '__main__': import shutil import datetime import re import os.path from InquirerPy import inquirer # type: ignore exporter = BookNotesExporter(config) sync_source_files(config) ''' sqlite-shm 和 .sqlite-wal 是 SQLite的临时文件,数据库处于WAL模式且有写入时才存在。 没有进程打开数据库或数据库关闭后,这两个文件可能会被SQLite清理。 ''' # 列出 data 目录下所有文件用于测试,此时shm和wal文件存在 #data_dir = config.DATA_DIR if hasattr(config, 'DATA_DIR') else './data' #print(f"\n[data目录文件列表] {data_dir}:") #for root, dirs, files in os.walk(data_dir): # for file in files: # print(os.path.join(root, file)) # 先获取所有书籍元数据 manager = BookListManager(plist_path=config.LOCAL_BOOKS_PLIST, db_path=config.LOCAL_LIBRARY_DB) booksinfo = manager.get_books_info() assetid2name = {} assetid2lastopen = {} last_open_times = manager.get_books_last_open() for assetid, info in booksinfo.items(): name = info.get('displayname') or info.get('itemname') or assetid if '-' in name: name = name.split('-', 1)[0].strip() assetid2name[assetid] = name ts = last_open_times.get(assetid, {}).get('last_open', 0) assetid2lastopen[assetid] = ts sorted_assetids = sorted(assetid2name.keys(), key=lambda aid: assetid2lastopen[aid], reverse=True) choices = [f"{assetid2name[aid]} [{assetid2lastopen[aid]}]" for aid in sorted_assetids] if not choices: print("无可导出的笔记") exit(0) answer = inquirer.fuzzy( message="请选择要导出的书名(支持模糊搜索):", choices=choices, multiselect=False, instruction="上下键选择,输入可模糊筛选,回车确定" ).execute() for aid, name in assetid2name.items(): if answer.startswith(name): selected_assetid = aid break else: print("未找到选中书籍") exit(1) selected_booksnote = exporter.build_booksnote(bookid=selected_assetid) selected_booksinfo = {selected_assetid: booksinfo.get(selected_assetid, {})} bookname = selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid ts = datetime.datetime.now().strftime('%m%d%H%M') # 文件名用[.:_【分割取第一段 shortname = re.split(r'[.::_\【\[\((]', bookname)[0].strip() out_path = os.path.join(config.EXPORT_NOTES_DIR, f'notes_{shortname}-{ts}.md') exporter.export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path) print(f'《{selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid}》 导出笔记 {out_path}')