iBook/exportbooknotes.py

187 lines
7.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
exportbooknotes.py (OOP版)
-------------------------
功能:
- 自动同步iBooks数据库和元数据文件到本地data目录。
- 解析AEAnnotation.sqlite、Books.plist、BKLibrary.sqlite构建结构化笔记数据。
- 解析epub目录和章节信息定位每条笔记所属章节。
- 命令行菜单按最近打开时间降序展示书籍列表,供用户选择导出。
- 仅导出选中书籍的所有笔记按章节分组生成Markdown文件。
依赖config.py 统一管理路径和配置项。
主要接口BookNotesExporter
- run():命令行交互式导出主流程
- build_booksnote(bookid=None):构建结构化笔记数据
- export_booksnote_to_md(booksnote, booksinfo, out_path=None)导出为Markdown
"""
import config
"""
自动生成 booksnote 数据结构:
booksnote = {
assetid: { label_path: { uuid: {
'creationdate': '2023/7/12',
'filepos': None,
'idref': '008.xhtml',
'note': None,
'selectedtext': '這就是宣傳的恐怖之處'
}}}
}
"""
import os
from collections import defaultdict
from annotationdata import AnnotationManager
from booklist_parse import BookListManager
from opf_parse import parse_opf
from toc_parse import TOCParser
from bs4 import BeautifulSoup
class BookNotesExporter:
def __init__(self, config_module=config):
self.config = config_module
self.annotation_db = config_module.LOCAL_ANNOTATION_DB
self.books_plist = config_module.LOCAL_BOOKS_PLIST
self.library_db = config_module.LOCAL_LIBRARY_DB
@staticmethod
def find_file_by_ext(root, exts):
for dirpath, _, files in os.walk(root):
for f in files:
for ext in exts:
if f.lower().endswith(ext):
return os.path.join(dirpath, f)
return None
@staticmethod
def get_toc_tree(toc_path):
with open(toc_path, 'r', encoding='utf-8') as f:
soup = BeautifulSoup(f, 'xml')
nav_map = soup.find('navMap')
nav_points = nav_map.find_all('navPoint', recursive=False)
toc_tree = TOCParser.parse_navpoints(nav_points)
return toc_tree
def build_booksnote(self, bookid=None):
manager = AnnotationManager(self.annotation_db)
annotations = manager.get_annotations(bookid=bookid)
bl_manager = BookListManager(plist_path=self.books_plist)
booksinfo = bl_manager.get_books_info()
booksnote = defaultdict(lambda: defaultdict(dict))
for assetid, notes in annotations.items():
bookinfo = booksinfo.get(assetid)
if not bookinfo:
continue
epub_path = bookinfo.get('path')
if not epub_path or not os.path.isdir(epub_path):
continue
opf_path = self.find_file_by_ext(epub_path, ['.opf'])
ncx_path = self.find_file_by_ext(epub_path, ['.ncx'])
if not opf_path or not ncx_path:
continue
id2href = parse_opf(opf_path)
toc_tree = self.get_toc_tree(ncx_path)
for uuid, ann in notes.items():
idref = ann['idref']
filepos = ann['filepos']
href = id2href.get(idref, idref)
chapter = TOCParser.find_label_path(toc_tree, href, filepos)
if chapter is None:
html_path = os.path.join(epub_path, href.split('#')[0])
selectedtext = ann.get('selectedtext')
if os.path.exists(html_path) and selectedtext:
section = TOCParser.find_section_by_selectedtext(html_path, selectedtext)
if section:
chapter = section
else:
chapter = "(未找到章节)"
else:
chapter = "(未找到章节)"
booksnote[assetid][chapter][uuid] = {
'creationdate': ann['creationdate'],
'filepos': filepos,
'idref': href,
'note': ann['note'],
'selectedtext': ann['selectedtext']
}
return booksnote
def export_booksnote_to_md(self, booksnote, booksinfo, out_path=None):
import datetime
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
lines = [f'# 笔记导出 {now}\n']
for assetid, chapters in booksnote.items():
bookname = booksinfo.get(assetid, {}).get('itemname', assetid)
lines.append(f'\n## {bookname}\n')
for chapter, notes in chapters.items():
lines.append(f'### {chapter}')
for uuid, ann in notes.items():
sel = ann.get('selectedtext')
note = ann.get('note')
if sel:
lines.append(sel)
if note:
lines.append(f'> {note}')
lines.append('')
md = '\n'.join(lines)
if out_path:
with open(out_path, 'w', encoding='utf-8') as f:
f.write(md)
return md
if __name__ == '__main__':
import shutil
import os.path
from InquirerPy import inquirer # type: ignore
exporter = BookNotesExporter(config)
# 自动覆盖 ./data 下的数据库和plist文件源为iBooks真实路径
src_files = [
(config.IBOOKS_ANNOTATION_DB, config.LOCAL_ANNOTATION_DB),
(config.IBOOKS_ANNOTATION_SHM, config.LOCAL_ANNOTATION_SHM),
(config.IBOOKS_ANNOTATION_WAL, config.LOCAL_ANNOTATION_WAL),
(config.IBOOKS_LIBRARY_DB, config.LOCAL_LIBRARY_DB),
(config.IBOOKS_BOOKS_PLIST, config.LOCAL_BOOKS_PLIST)
]
for src, dst in src_files:
if os.path.exists(src):
shutil.copy2(src, dst)
print(f'copy source data file to ./data : {dst}')
else:
print(f'file not found: {src} ')
# 先获取所有书籍元数据
manager = BookListManager(plist_path=config.LOCAL_BOOKS_PLIST, db_path=config.LOCAL_LIBRARY_DB)
booksinfo = manager.get_books_info()
assetid2name = {}
assetid2lastopen = {}
last_open_times = manager.get_books_last_open()
for assetid, info in booksinfo.items():
name = info.get('displayname') or info.get('itemname') or assetid
if '-' in name:
name = name.split('-', 1)[0].strip()
assetid2name[assetid] = name
ts = last_open_times.get(assetid, {}).get('last_open', 0)
assetid2lastopen[assetid] = ts
sorted_assetids = sorted(assetid2name.keys(), key=lambda aid: assetid2lastopen[aid], reverse=True)
choices = [f"{assetid2name[aid]} [{assetid2lastopen[aid]}]" for aid in sorted_assetids]
if not choices:
print("无可导出的笔记")
exit(0)
answer = inquirer.fuzzy(
message="请选择要导出的书名(支持模糊搜索):",
choices=choices,
multiselect=False,
instruction="上下键选择,输入可模糊筛选,回车确定"
).execute()
for aid, name in assetid2name.items():
if answer.startswith(name):
selected_assetid = aid
break
else:
print("未找到选中书籍")
exit(1)
selected_booksnote = exporter.build_booksnote(bookid=selected_assetid)
selected_booksinfo = {selected_assetid: booksinfo.get(selected_assetid, {})}
out_path = f'export_notes/notes_export_{selected_assetid}.md'
exporter.export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path)
print(f'{selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid}》 导出笔记 {out_path}')