Files
iBook/exportbooknotes.py
2025-08-06 13:11:08 +08:00

181 lines
7.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
自动生成 booksnote 数据结构:
booksnote = {
assetid: { label_path: { uuid: {
'creationdate': '2023/7/12',
'filepos': None,
'idref': '008.xhtml',
'note': None,
'selectedtext': '這就是宣傳的恐怖之處'
}}}
}
"""
from collections import defaultdict
import os
from annotationdata import get_annotations
from booklist_parse import parse_books_plist
from opf_parse import parse_opf
from toc_parse import parse_navpoints, find_label_path
from bs4 import BeautifulSoup
def find_file_by_ext(root, exts):
"""在root下递归查找第一个指定后缀的文件"""
for dirpath, _, files in os.walk(root):
for f in files:
for ext in exts:
if f.lower().endswith(ext):
return os.path.join(dirpath, f)
return None
def get_toc_tree(toc_path):
with open(toc_path, 'r', encoding='utf-8') as f:
soup = BeautifulSoup(f, 'xml')
nav_map = soup.find('navMap')
return parse_navpoints(nav_map.find_all('navPoint', recursive=False))
def build_booksnote(annotation_db='data/AEAnnotation.sqlite', books_plist='data/Books.plist'):
annotations = get_annotations(annotation_db)
booksinfo = parse_books_plist(books_plist)
booksnote = defaultdict(lambda: defaultdict(dict))
for assetid, notes in annotations.items():
# 获取epub路径
bookinfo = booksinfo.get(assetid)
if not bookinfo:
continue
epub_path = bookinfo.get('path')
if not epub_path or not os.path.isdir(epub_path):
continue
# 查找opf和ncx
opf_path = find_file_by_ext(epub_path, ['.opf'])
ncx_path = find_file_by_ext(epub_path, ['.ncx'])
if not opf_path or not ncx_path:
continue
id2href = parse_opf(opf_path)
toc_tree = get_toc_tree(ncx_path)
for uuid, ann in notes.items():
idref = ann['idref']
filepos = ann['filepos']
href = id2href.get(idref, idref)
chapter = find_label_path(toc_tree, href, filepos)
if chapter is None:
# 直接从html文件获取章节信息
html_path = os.path.join(epub_path, href.split('#')[0])
selectedtext = ann.get('selectedtext')
if os.path.exists(html_path) and selectedtext:
from toc_parse import find_section_by_selectedtext
section = find_section_by_selectedtext(html_path, selectedtext)
if section:
chapter = section
else:
chapter = "(未找到章节)"
else:
chapter = "(未找到章节)"
booksnote[assetid][chapter][uuid] = {
'creationdate': ann['creationdate'],
'filepos': filepos,
'idref': href,
'note': ann['note'],
'selectedtext': ann['selectedtext']
}
return booksnote
import datetime
def export_booksnote_to_md(booksnote, booksinfo, out_path=None):
"""
依据booksnote结构导出markdown文件格式
# “笔记导出”+导出时间
## 书名
### chapter
selectedtext
> note (如果存在)
"""
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
lines = [f'# 笔记导出 {now}\n']
for assetid, chapters in booksnote.items():
bookname = booksinfo.get(assetid, {}).get('itemname', assetid)
lines.append(f'\n## {bookname}\n')
for chapter, notes in chapters.items():
lines.append(f'### {chapter}')
for uuid, ann in notes.items():
sel = ann.get('selectedtext')
note = ann.get('note')
if sel:
lines.append(sel)
if note:
lines.append(f'> {note}')
lines.append('')
md = '\n'.join(lines)
if out_path:
with open(out_path, 'w', encoding='utf-8') as f:
f.write(md)
return md
if __name__ == '__main__':
import shutil
import os.path
# 自动覆盖 ./data 下的数据库和plist文件源为iBooks真实路径
src_files = [
(os.path.expanduser('~/Library/Containers/com.apple.iBooksX/Data/Documents/AEAnnotation/AEAnnotation_v10312011_1727_local.sqlite'), 'data/AEAnnotation.sqlite'),
(os.path.expanduser('~/Library/Containers/com.apple.iBooksX/Data/Documents/AEAnnotation/AEAnnotation_v10312011_1727_local.sqlite-shm'), 'data/AEAnnotation.sqlite-shm'),
(os.path.expanduser('~/Library/Containers/com.apple.iBooksX/Data/Documents/AEAnnotation/AEAnnotation_v10312011_1727_local.sqlite-wal'), 'data/AEAnnotation.sqlite-wal'),
(os.path.expanduser('~/Library/Containers/com.apple.iBooksX/Data/Documents/BKLibrary/BKLibrary-1-091020131601.sqlite'), 'data/BKLibrary.sqlite'),
(os.path.expanduser('~/Library/Containers/com.apple.BKAgentService/Data/Documents/iBooks/Books/Books.plist'), 'data/Books.plist')
]
for src, dst in src_files:
if os.path.exists(src):
shutil.copy2(src, dst)
print(f'copy source data file to ./data : {dst}')
else:
print(f'file not found: {src} ')
from booklist_parse import parse_books_plist
from InquirerPy import inquirer
booksnote = build_booksnote()
booksinfo = parse_books_plist('data/Books.plist')
# 构建书名列表优先displayname, 其次itemname, 否则assetid按最新笔记时间排序
assetid2name = {}
assetid2latest = {}
for assetid in booksnote:
info = booksinfo.get(assetid, {})
name = info.get('displayname') or info.get('itemname') or assetid
# 如果书名中包含“-”,只取“-”前面的部分
if '-' in name: name = name.split('-', 1)[0].strip()
assetid2name[assetid] = name
# 获取该书所有笔记的最新creationdate
latest = None
for chapter in booksnote[assetid].values():
for ann in chapter.values():
dt = ann.get('creationdate')
if dt:
if latest is None or dt > latest:
latest = dt
assetid2latest[assetid] = latest or ''
# 按最新时间降序排列
sorted_assetids = sorted(assetid2name.keys(), key=lambda aid: assetid2latest[aid], reverse=True)
choices = [f"{assetid2name[aid]} [{aid}]" for aid in sorted_assetids]
if not choices:
print("无可导出的笔记")
exit(0)
answer = inquirer.fuzzy(
message="请选择要导出的书名(支持模糊搜索):",
choices=choices,
multiselect=False,
instruction="上下键选择,输入可模糊筛选,回车确定"
).execute()
# 解析选中assetid
for aid, name in assetid2name.items():
if answer.startswith(name):
selected_assetid = aid
break
else:
print("未找到选中书籍")
exit(1)
# 只导出选中书的笔记
selected_booksnote = {selected_assetid: booksnote[selected_assetid]}
selected_booksinfo = {selected_assetid: booksinfo.get(selected_assetid, {})}
out_path = f'export_notes/notes_export_{selected_assetid}.md'
export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path)
print(f'{selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid}》 导出笔记 {out_path}')