""" exportbooknotes.py ------------------ 功能: - 自动同步iBooks数据库和元数据文件到本地data目录。 - 解析AEAnnotation.sqlite、Books.plist、BKLibrary.sqlite,构建结构化笔记数据。 - 解析epub目录和章节信息,定位每条笔记所属章节。 - 命令行菜单按最近打开时间降序展示书籍列表,供用户选择导出。 - 仅导出选中书籍的所有笔记,按章节分组,生成Markdown文件。 主要数据流: 1. 数据同步到data目录 2. 解析Books.plist获取书籍元数据 3. 解析BKLibrary.sqlite获取最近打开时间 4. 菜单排序与显示(书名+时间戳) 5. 解析AEAnnotation.sqlite获取笔记 6. 解析epub目录,定位章节 7. 导出Markdown文件 依赖:Python 3, InquirerPy, bs4, shutil, os, datetime, sqlite3 典型用法: python exportbooknotes.py # 按提示选择书籍,自动导出笔记到export_notes目录 """ """ 自动生成 booksnote 数据结构: booksnote = { assetid: { label_path: { uuid: { 'creationdate': '2023/7/12', 'filepos': None, 'idref': '008.xhtml', 'note': None, 'selectedtext': '這就是宣傳的恐怖之處' }}} } """ from collections import defaultdict import os from annotationdata import get_annotations from booklist_parse import parse_books_plist from opf_parse import parse_opf from toc_parse import parse_navpoints, find_label_path from bs4 import BeautifulSoup from pprint import pprint def find_file_by_ext(root, exts): """在root下递归查找第一个指定后缀的文件""" for dirpath, _, files in os.walk(root): for f in files: for ext in exts: if f.lower().endswith(ext): return os.path.join(dirpath, f) return None def get_toc_tree(toc_path): with open(toc_path, 'r', encoding='utf-8') as f: soup = BeautifulSoup(f, 'xml') nav_map = soup.find('navMap') nav_points = nav_map.find_all('navPoint', recursive=False) toc_tree = parse_navpoints(nav_points) #pprint(toc_tree, indent=2, depth=5) return toc_tree def build_booksnote(annotation_db='data/AEAnnotation.sqlite', books_plist='data/Books.plist', bookid=None): # 支持只处理特定 assetid 的笔记 annotations = get_annotations(annotation_db, bookid=bookid) booksinfo = parse_books_plist(books_plist) booksnote = defaultdict(lambda: defaultdict(dict)) for assetid, notes in annotations.items(): # 获取epub路径 bookinfo = booksinfo.get(assetid) if not bookinfo: continue epub_path = bookinfo.get('path') if not epub_path or not os.path.isdir(epub_path): continue # 查找opf和ncx opf_path = find_file_by_ext(epub_path, ['.opf']) ncx_path = find_file_by_ext(epub_path, ['.ncx']) if not opf_path or not ncx_path: continue id2href = parse_opf(opf_path) toc_tree = get_toc_tree(ncx_path) for uuid, ann in notes.items(): idref = ann['idref'] filepos = ann['filepos'] href = id2href.get(idref, idref) chapter = find_label_path(toc_tree, href, filepos) if chapter is None: # 直接从html文件获取章节信息 html_path = os.path.join(epub_path, href.split('#')[0]) selectedtext = ann.get('selectedtext') if os.path.exists(html_path) and selectedtext: from toc_parse import find_section_by_selectedtext section = find_section_by_selectedtext(html_path, selectedtext) if section: chapter = section else: chapter = "(未找到章节)" else: chapter = "(未找到章节)" booksnote[assetid][chapter][uuid] = { 'creationdate': ann['creationdate'], 'filepos': filepos, 'idref': href, 'note': ann['note'], 'selectedtext': ann['selectedtext'] } return booksnote import datetime def export_booksnote_to_md(booksnote, booksinfo, out_path=None): """ 依据booksnote结构导出markdown文件,格式: # “笔记导出”+导出时间 ## 书名 ### chapter selectedtext > note (如果存在) """ now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') lines = [f'# 笔记导出 {now}\n'] for assetid, chapters in booksnote.items(): bookname = booksinfo.get(assetid, {}).get('itemname', assetid) lines.append(f'\n## {bookname}\n') for chapter, notes in chapters.items(): lines.append(f'### {chapter}') for uuid, ann in notes.items(): sel = ann.get('selectedtext') note = ann.get('note') if sel: lines.append(sel) if note: lines.append(f'> {note}') lines.append('') md = '\n'.join(lines) if out_path: with open(out_path, 'w', encoding='utf-8') as f: f.write(md) return md if __name__ == '__main__': import shutil import os.path # 自动覆盖 ./data 下的数据库和plist文件,源为iBooks真实路径 src_files = [ (os.path.expanduser('~/Library/Containers/com.apple.iBooksX/Data/Documents/AEAnnotation/AEAnnotation_v10312011_1727_local.sqlite'), 'data/AEAnnotation.sqlite'), (os.path.expanduser('~/Library/Containers/com.apple.iBooksX/Data/Documents/AEAnnotation/AEAnnotation_v10312011_1727_local.sqlite-shm'), 'data/AEAnnotation.sqlite-shm'), (os.path.expanduser('~/Library/Containers/com.apple.iBooksX/Data/Documents/AEAnnotation/AEAnnotation_v10312011_1727_local.sqlite-wal'), 'data/AEAnnotation.sqlite-wal'), (os.path.expanduser('~/Library/Containers/com.apple.iBooksX/Data/Documents/BKLibrary/BKLibrary-1-091020131601.sqlite'), 'data/BKLibrary.sqlite'), (os.path.expanduser('~/Library/Containers/com.apple.BKAgentService/Data/Documents/iBooks/Books/Books.plist'), 'data/Books.plist') ] for src, dst in src_files: if os.path.exists(src): shutil.copy2(src, dst) print(f'copy source data file to ./data : {dst}') else: print(f'file not found: {src} ') from booklist_parse import parse_books_plist from InquirerPy import inquirer # type: ignore # 先获取所有书籍元数据 booksinfo = parse_books_plist('data/Books.plist') # 构建书名列表(优先displayname, 其次itemname, 否则assetid),按parse_books_plist中的date字段排序 assetid2name = {} assetid2lastopen = {} from booklist_parse import get_books_last_open # 获取所有书籍的最后打开时间(字典,值为{'last_open': 时间戳}) last_open_times = get_books_last_open('data/BKLibrary.sqlite') for assetid, info in booksinfo.items(): name = info.get('displayname') or info.get('itemname') or assetid # 如果书名中包含“-”,只取“-”前面的部分 if '-' in name: name = name.split('-', 1)[0].strip() assetid2name[assetid] = name # 用 get_books_last_open 返回的时间戳排序,如无则为0 ts = last_open_times.get(assetid, {}).get('last_open', 0) assetid2lastopen[assetid] = ts # 按last_open时间戳降序排列 sorted_assetids = sorted(assetid2name.keys(), key=lambda aid: assetid2lastopen[aid], reverse=True) choices = [f"{assetid2name[aid]} [{assetid2lastopen[aid]}]" for aid in sorted_assetids] if not choices: print("无可导出的笔记") exit(0) answer = inquirer.fuzzy( message="请选择要导出的书名(支持模糊搜索):", choices=choices, multiselect=False, instruction="上下键选择,输入可模糊筛选,回车确定" ).execute() # 解析选中assetid for aid, name in assetid2name.items(): if answer.startswith(name): selected_assetid = aid break else: print("未找到选中书籍") exit(1) # 只导出选中书的笔记 selected_booksnote = build_booksnote(bookid=selected_assetid) selected_booksinfo = {selected_assetid: booksinfo.get(selected_assetid, {})} out_path = f'export_notes/notes_export_{selected_assetid}.md' export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path) print(f'《{selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid}》 导出笔记 {out_path}')