""" exportbooknotes.py (OOP版) ------------------------- 功能: - 自动同步iBooks数据库和元数据文件到本地data目录。 - 解析AEAnnotation.sqlite、Books.plist、BKLibrary.sqlite,构建结构化笔记数据。 - 解析epub目录和章节信息,定位每条笔记所属章节。 - 命令行菜单按最近打开时间降序展示书籍列表,供用户选择导出。 - 仅导出选中书籍的所有笔记,按章节分组,生成Markdown文件。 依赖:config.py 统一管理路径和配置项。 主要接口:BookNotesExporter - run():命令行交互式导出主流程 - build_booksnote(bookid=None):构建结构化笔记数据 - export_booksnote_to_md(booksnote, booksinfo, out_path=None):导出为Markdown """ import config """ 自动生成 booksnote 数据结构: booksnote = { assetid: { label_path: { uuid: { 'creationdate': '2023/7/12', 'filepos': None, 'idref': '008.xhtml', 'note': None, 'selectedtext': '這就是宣傳的恐怖之處' }}} } """ import os from collections import defaultdict from annotationdata import AnnotationManager from booklist_parse import BookListManager from opf_parse import parse_opf from toc_parse import TOCParser from bs4 import BeautifulSoup class BookNotesExporter: def __init__(self, config_module=config): self.config = config_module self.annotation_db = config_module.LOCAL_ANNOTATION_DB self.books_plist = config_module.LOCAL_BOOKS_PLIST self.library_db = config_module.LOCAL_LIBRARY_DB @staticmethod def find_file_by_ext(root, exts): for dirpath, _, files in os.walk(root): for f in files: for ext in exts: if f.lower().endswith(ext): return os.path.join(dirpath, f) return None @staticmethod def get_toc_tree(toc_path): with open(toc_path, 'r', encoding='utf-8') as f: soup = BeautifulSoup(f, 'xml') nav_map = soup.find('navMap') nav_points = nav_map.find_all('navPoint', recursive=False) toc_tree = TOCParser.parse_navpoints(nav_points) return toc_tree def build_booksnote(self, bookid=None): manager = AnnotationManager(self.annotation_db) annotations = manager.get_annotations(bookid=bookid) bl_manager = BookListManager(plist_path=self.books_plist) booksinfo = bl_manager.get_books_info() booksnote = defaultdict(lambda: defaultdict(dict)) for assetid, notes in annotations.items(): bookinfo = booksinfo.get(assetid) if not bookinfo: continue epub_path = bookinfo.get('path') if not epub_path or not os.path.isdir(epub_path): continue opf_path = self.find_file_by_ext(epub_path, ['.opf']) ncx_path = self.find_file_by_ext(epub_path, ['.ncx']) if not opf_path or not ncx_path: continue id2href = parse_opf(opf_path) toc_tree = self.get_toc_tree(ncx_path) for uuid, ann in notes.items(): idref = ann['idref'] filepos = ann['filepos'] href = id2href.get(idref, idref) chapter = TOCParser.find_label_path(toc_tree, href, filepos) if chapter is None: html_path = os.path.join(epub_path, href.split('#')[0]) selectedtext = ann.get('selectedtext') if os.path.exists(html_path) and selectedtext: section = TOCParser.find_section_by_selectedtext(html_path, selectedtext) if section: chapter = section else: chapter = "(未找到章节)" else: chapter = "(未找到章节)" booksnote[assetid][chapter][uuid] = { 'creationdate': ann['creationdate'], 'filepos': filepos, 'idref': href, 'note': ann['note'], 'selectedtext': ann['selectedtext'] } return booksnote def export_booksnote_to_md(self, booksnote, booksinfo, out_path=None): import datetime now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') lines = [f'# 笔记导出 {now}\n'] for assetid, chapters in booksnote.items(): bookname = booksinfo.get(assetid, {}).get('itemname', assetid) lines.append(f'\n## {bookname}\n') for chapter, notes in chapters.items(): lines.append(f'### {chapter}') for uuid, ann in notes.items(): sel = ann.get('selectedtext') note = ann.get('note') if sel: lines.append(sel) if note: lines.append(f'> {note}') lines.append('') md = '\n'.join(lines) if out_path: with open(out_path, 'w', encoding='utf-8') as f: f.write(md) return md def sync_source_files(config_module): """ 自动同步 iBooks 源数据文件到本地 data 目录 """ import shutil import os src_files = [ (config_module.IBOOKS_ANNOTATION_DB, config_module.LOCAL_ANNOTATION_DB), (config_module.IBOOKS_ANNOTATION_SHM, config_module.LOCAL_ANNOTATION_SHM), (config_module.IBOOKS_ANNOTATION_WAL, config_module.LOCAL_ANNOTATION_WAL), (config_module.IBOOKS_LIBRARY_DB, config_module.LOCAL_LIBRARY_DB), (config_module.IBOOKS_BOOKS_PLIST, config_module.LOCAL_BOOKS_PLIST) ] for src, dst in src_files: if os.path.exists(src): shutil.copy2(src, dst) print(f'已拷贝源数据文件到本地: {dst}') else: print(f'未找到文件: {src}') if __name__ == '__main__': import shutil import datetime import re import os.path from InquirerPy import inquirer # type: ignore exporter = BookNotesExporter(config) sync_source_files(config) ''' sqlite-shm 和 .sqlite-wal 是 SQLite的临时文件,数据库处于WAL模式且有写入时才存在。 没有进程打开数据库或数据库关闭后,这两个文件可能会被SQLite清理。 ''' # 列出 data 目录下所有文件用于测试,此时shm和wal文件存在 #data_dir = config.DATA_DIR if hasattr(config, 'DATA_DIR') else './data' #print(f"\n[data目录文件列表] {data_dir}:") #for root, dirs, files in os.walk(data_dir): # for file in files: # print(os.path.join(root, file)) # 先获取所有书籍元数据 manager = BookListManager(plist_path=config.LOCAL_BOOKS_PLIST, db_path=config.LOCAL_LIBRARY_DB) booksinfo = manager.get_books_info() assetid2name = {} assetid2lastopen = {} last_open_times = manager.get_books_last_open() for assetid, info in booksinfo.items(): name = info.get('displayname') or info.get('itemname') or assetid if '-' in name: name = name.split('-', 1)[0].strip() assetid2name[assetid] = name ts = last_open_times.get(assetid, {}).get('last_open', 0) assetid2lastopen[assetid] = ts sorted_assetids = sorted(assetid2name.keys(), key=lambda aid: assetid2lastopen[aid], reverse=True) choices = [f"{assetid2name[aid]} [{assetid2lastopen[aid]}]" for aid in sorted_assetids] if not choices: print("无可导出的笔记") exit(0) answer = inquirer.fuzzy( message="请选择要导出的书名(支持模糊搜索):", choices=choices, multiselect=False, instruction="上下键选择,输入可模糊筛选,回车确定" ).execute() for aid, name in assetid2name.items(): if answer.startswith(name): selected_assetid = aid break else: print("未找到选中书籍") exit(1) selected_booksnote = exporter.build_booksnote(bookid=selected_assetid) selected_booksinfo = {selected_assetid: booksinfo.get(selected_assetid, {})} bookname = selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid ts = datetime.datetime.now().strftime('%m%d%H%M') # 文件名用[.:_【分割取第一段 shortname = re.split(r'[.::_\【\[\((]', bookname)[0].strip() out_path = os.path.join(config.EXPORT_NOTES_DIR, f'notes_{shortname}-{ts}.md') exporter.export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path) print(f'《{selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid}》 导出笔记 {out_path}')