iBook/exportbooknotes.py

216 lines
8.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
exportbooknotes.py (OOP版)
-------------------------
功能:
- 自动同步iBooks数据库和元数据文件到本地data目录。
- 解析AEAnnotation.sqlite、Books.plist、BKLibrary.sqlite构建结构化笔记数据。
- 解析epub目录和章节信息定位每条笔记所属章节。
- 命令行菜单按最近打开时间降序展示书籍列表,供用户选择导出。
- 仅导出选中书籍的所有笔记按章节分组生成Markdown文件。
依赖config.py 统一管理路径和配置项。
主要接口BookNotesExporter
- run():命令行交互式导出主流程
- build_booksnote(bookid=None):构建结构化笔记数据
- export_booksnote_to_md(booksnote, booksinfo, out_path=None)导出为Markdown
"""
import config
"""
自动生成 booksnote 数据结构:
booksnote = {
assetid: { label_path: { uuid: {
'creationdate': '2023/7/12',
'filepos': None,
'idref': '008.xhtml',
'note': None,
'selectedtext': '這就是宣傳的恐怖之處'
}}}
}
"""
import os
from collections import defaultdict
from annotationdata import AnnotationManager
from booklist_parse import BookListManager
from opf_parse import parse_opf
from toc_parse import TOCParser
from bs4 import BeautifulSoup
class BookNotesExporter:
def __init__(self, config_module=config):
self.config = config_module
self.annotation_db = config_module.LOCAL_ANNOTATION_DB
self.books_plist = config_module.LOCAL_BOOKS_PLIST
self.library_db = config_module.LOCAL_LIBRARY_DB
@staticmethod
def find_file_by_ext(root, exts):
for dirpath, _, files in os.walk(root):
for f in files:
for ext in exts:
if f.lower().endswith(ext):
return os.path.join(dirpath, f)
return None
@staticmethod
def get_toc_tree(toc_path):
with open(toc_path, 'r', encoding='utf-8') as f:
soup = BeautifulSoup(f, 'xml')
nav_map = soup.find('navMap')
nav_points = nav_map.find_all('navPoint', recursive=False)
toc_tree = TOCParser.parse_navpoints(nav_points)
return toc_tree
def build_booksnote(self, bookid=None):
manager = AnnotationManager(self.annotation_db)
annotations = manager.get_annotations(bookid=bookid)
bl_manager = BookListManager(plist_path=self.books_plist)
booksinfo = bl_manager.get_books_info()
booksnote = defaultdict(lambda: defaultdict(dict))
for assetid, notes in annotations.items():
bookinfo = booksinfo.get(assetid)
if not bookinfo:
continue
epub_path = bookinfo.get('path')
if not epub_path or not os.path.isdir(epub_path):
continue
opf_path = self.find_file_by_ext(epub_path, ['.opf'])
ncx_path = self.find_file_by_ext(epub_path, ['.ncx'])
if not opf_path or not ncx_path:
continue
id2href = parse_opf(opf_path)
toc_tree = self.get_toc_tree(ncx_path)
for uuid, ann in notes.items():
idref = ann['idref']
filepos = ann['filepos']
href = id2href.get(idref, idref)
chapter = TOCParser.find_label_path(toc_tree, href, filepos)
if chapter is None:
html_path = os.path.join(epub_path, href.split('#')[0])
selectedtext = ann.get('selectedtext')
if os.path.exists(html_path) and selectedtext:
section = TOCParser.find_section_by_selectedtext(html_path, selectedtext)
if section:
chapter = section
else:
chapter = "(未找到章节)"
else:
chapter = "(未找到章节)"
booksnote[assetid][chapter][uuid] = {
'creationdate': ann['creationdate'],
'filepos': filepos,
'idref': href,
'note': ann['note'],
'selectedtext': ann['selectedtext']
}
return booksnote
def export_booksnote_to_md(self, booksnote, booksinfo, out_path=None):
import datetime
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
lines = [f'# 笔记导出 {now}\n']
for assetid, chapters in booksnote.items():
bookname = booksinfo.get(assetid, {}).get('itemname', assetid)
lines.append(f'\n## {bookname}\n')
for chapter, notes in chapters.items():
lines.append(f'### {chapter}')
for uuid, ann in notes.items():
sel = ann.get('selectedtext')
note = ann.get('note')
if sel:
lines.append(sel)
if note:
lines.append(f'> {note}')
lines.append('')
md = '\n'.join(lines)
if out_path:
with open(out_path, 'w', encoding='utf-8') as f:
f.write(md)
return md
def sync_source_files(config_module):
"""
自动同步 iBooks 源数据文件到本地 data 目录
"""
import shutil
import os
src_files = [
(config_module.IBOOKS_ANNOTATION_DB, config_module.LOCAL_ANNOTATION_DB),
(config_module.IBOOKS_ANNOTATION_SHM, config_module.LOCAL_ANNOTATION_SHM),
(config_module.IBOOKS_ANNOTATION_WAL, config_module.LOCAL_ANNOTATION_WAL),
(config_module.IBOOKS_LIBRARY_DB, config_module.LOCAL_LIBRARY_DB),
(config_module.IBOOKS_LIBRARY_DB + '-shm', config_module.LOCAL_LIBRARY_DB + '-shm'),
(config_module.IBOOKS_LIBRARY_DB + '-wal', config_module.LOCAL_LIBRARY_DB + '-wal'),
(config_module.IBOOKS_BOOKS_PLIST, config_module.LOCAL_BOOKS_PLIST)
]
for src, dst in src_files:
if os.path.exists(src):
shutil.copy2(src, dst)
print(f'已拷贝源数据文件到本地: {dst}')
else:
print(f'未找到文件: {src}')
if __name__ == '__main__':
import shutil
import datetime
import re
import os.path
from InquirerPy import inquirer # type: ignore
exporter = BookNotesExporter(config)
sync_source_files(config)
'''
sqlite-shm 和 .sqlite-wal 是 SQLite的临时文件数据库处于WAL模式且有写入时才存在。
没有进程打开数据库或数据库关闭后这两个文件可能会被SQLite清理。
'''
# 列出 data 目录下所有文件用于测试此时shm和wal文件存在
#data_dir = config.DATA_DIR if hasattr(config, 'DATA_DIR') else './data'
#print(f"\n[data目录文件列表] {data_dir}:")
#for root, dirs, files in os.walk(data_dir):
# for file in files:
# print(os.path.join(root, file))
# 先获取所有书籍元数据
manager = BookListManager(plist_path=config.LOCAL_BOOKS_PLIST, db_path=config.LOCAL_LIBRARY_DB)
booksinfo = manager.get_books_info()
assetid2name = {}
assetid2lastopen = {}
last_open_times = manager.get_books_last_open()
for assetid, info in booksinfo.items():
name = info.get('displayname') or info.get('itemname') or assetid
if '-' in name:
name = name.split('-', 1)[0].strip()
assetid2name[assetid] = name
ts = last_open_times.get(assetid, {}).get('last_open', 0)
assetid2lastopen[assetid] = ts
sorted_assetids = sorted(assetid2name.keys(), key=lambda aid: assetid2lastopen[aid], reverse=True)
choices = [f"{assetid2name[aid]} [{assetid2lastopen[aid]}]" for aid in sorted_assetids]
if not choices:
print("无可导出的笔记")
exit(0)
answer = inquirer.fuzzy(
message="请选择要导出的书名(支持模糊搜索):",
choices=choices,
multiselect=False,
instruction="上下键选择,输入可模糊筛选,回车确定"
).execute()
for aid, name in assetid2name.items():
if answer.startswith(name):
selected_assetid = aid
break
else:
print("未找到选中书籍")
exit(1)
selected_booksnote = exporter.build_booksnote(bookid=selected_assetid)
selected_booksinfo = {selected_assetid: booksinfo.get(selected_assetid, {})}
bookname = selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid
ts = datetime.datetime.now().strftime('%m%d%H%M')
# 文件名用[.:_【分割取第一段
shortname = re.split(r'[.:_\\[\(]', bookname)[0].strip()
out_path = os.path.join(config.EXPORT_NOTES_DIR, f'notes_{shortname}-{ts}.md')
exporter.export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path)
print(f'{selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid}》 导出笔记 {out_path}')