Files
iBook/exportbooknotes.py
2025-10-21 10:46:03 +08:00

311 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
exportbooknotes.py (OOP版)
-------------------------
功能:
- 自动同步iBooks数据库和元数据文件到本地data目录。
- 解析AEAnnotation.sqlite、Books.plist、BKLibrary.sqlite构建结构化笔记数据。
- 解析epub目录和章节信息定位每条笔记所属章节。
- 命令行菜单按最近打开时间降序展示书籍列表,供用户选择导出。
- 仅导出选中书籍的所有笔记按章节分组生成Markdown文件。
依赖config.py 统一管理路径和配置项。
主要接口BookNotesExporter
- run():命令行交互式导出主流程
- build_booksnote(bookid=None):构建结构化笔记数据
- export_booksnote_to_md(booksnote, booksinfo, out_path=None)导出为Markdown
"""
import config
"""
自动生成 booksnote 数据结构:
booksnote = {
assetid: { label_path: { uuid: {
'creationdate': '2023/7/12',
'filepos': None,
'idref': '008.xhtml',
'note': None,
'selectedtext': '這就是宣傳的恐怖之處'
}}}
}
"""
import os
from collections import defaultdict
from annotationdata import AnnotationManager
from booklist_parse import BookListManager
from opf_parse import parse_opf
from toc_parse import TOCParser
from bs4 import BeautifulSoup
class BookNotesExporter:
def __init__(self, config_module=config):
self.config = config_module
self.annotation_db = config_module.LOCAL_ANNOTATION_DB
self.books_plist = config_module.LOCAL_BOOKS_PLIST
self.library_db = config_module.LOCAL_LIBRARY_DB
@staticmethod
def find_file_by_ext(root, exts):
for dirpath, _, files in os.walk(root):
for f in files:
for ext in exts:
if f.lower().endswith(ext):
return os.path.join(dirpath, f)
return None
@staticmethod
def get_toc_tree(toc_path):
with open(toc_path, 'r', encoding='utf-8') as f:
soup = BeautifulSoup(f, 'xml')
nav_map = soup.find('navMap')
nav_points = nav_map.find_all('navPoint', recursive=False)
toc_tree = TOCParser.parse_navpoints(nav_points)
return toc_tree
def build_booksnote(self, bookid=None):
"""
构建结构化笔记数据,现在按 CFI 位置排序
Returns:
dict: 结构为 {assetid: [annotations_list]}
其中 annotations_list 已按 CFI 位置排序
"""
manager = AnnotationManager(self.annotation_db)
annotations = manager.get_annotations(bookid=bookid)
bl_manager = BookListManager(plist_path=self.books_plist)
booksinfo = bl_manager.get_books_info()
booksnote = {}
for assetid, notes_list in annotations.items():
if not notes_list: # 现在是列表,检查是否为空
continue
bookinfo = booksinfo.get(assetid)
if not bookinfo:
continue
epub_path = bookinfo.get('path')
if not epub_path or not os.path.isdir(epub_path):
# 如果没有 epub 路径,直接使用 CFI 排序的结果
booksnote[assetid] = notes_list
continue
# 尝试通过 epub 文件补充章节信息
opf_path = self.find_file_by_ext(epub_path, ['.opf'])
ncx_path = self.find_file_by_ext(epub_path, ['.ncx'])
if opf_path and ncx_path:
id2href = parse_opf(opf_path)
toc_tree = self.get_toc_tree(ncx_path)
# 为每个已排序的笔记补充章节信息
for ann in notes_list:
idref = ann.get('idref')
filepos = ann.get('filepos')
if idref:
href = id2href.get(idref, idref)
chapter = TOCParser.find_label_path(toc_tree, href, filepos)
if chapter is None:
# 尝试通过选中文本定位章节
html_path = os.path.join(epub_path, href.split('#')[0])
selectedtext = ann.get('selectedtext')
if os.path.exists(html_path) and selectedtext:
section = TOCParser.find_section_by_selectedtext(html_path, selectedtext)
chapter = section if section else "(未找到章节)"
else:
chapter = "(未找到章节)"
# 更新章节信息,优先使用从 epub 解析的结果
if chapter and chapter != "(未找到章节)":
ann['chapter_info'] = chapter
booksnote[assetid] = notes_list # 保持 CFI 排序
return booksnote
def export_booksnote_to_md(self, booksnote, booksinfo, out_path=None):
"""
导出笔记到 Markdown现在按 CFI 位置排序
Args:
booksnote: {assetid: [annotations_list]} 已按CFI排序的笔记数据
booksinfo: 书籍信息字典
out_path: 输出文件路径
Returns:
str: Markdown 内容
"""
import datetime
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
lines = [f'# 笔记导出 {now}\n']
for assetid, notes_list in booksnote.items():
if not notes_list: # 检查列表是否为空
continue
bookinfo = booksinfo.get(assetid, {})
bookname = bookinfo.get('displayname') or bookinfo.get('itemname') or assetid
author = bookinfo.get('author', '')
lines.append(f'\n## {bookname}')
if author:
lines.append(f'**作者**: {author}')
lines.append('')
# 按章节分组笔记保持CFI排序的前提下
current_chapter = None
chapter_notes = []
for i, ann in enumerate(notes_list):
chapter_info = ann.get('chapter_info', '未知章节')
# 如果章节变化,先输出之前章节的笔记
if current_chapter is not None and current_chapter != chapter_info:
self._export_chapter_notes(lines, current_chapter, chapter_notes)
chapter_notes = []
current_chapter = chapter_info
chapter_notes.append(ann)
# 输出最后一个章节的笔记
if current_chapter is not None and chapter_notes:
self._export_chapter_notes(lines, current_chapter, chapter_notes)
md = '\n'.join(lines)
if out_path:
# 确保输出目录存在
os.makedirs(os.path.dirname(out_path), exist_ok=True)
with open(out_path, 'w', encoding='utf-8') as f:
f.write(md)
print(f'[导出] 笔记已按CFI位置排序导出到: {out_path}')
return md
def _export_chapter_notes(self, lines, chapter_name, chapter_notes):
"""
导出单个章节的笔记
Args:
lines: 输出行列表
chapter_name: 章节名称
chapter_notes: 该章节的笔记列表已按CFI排序
"""
if not chapter_notes:
return
lines.append(f'### {chapter_name}')
lines.append('')
for i, ann in enumerate(chapter_notes, 1):
selected_text = ann.get('selectedtext', '')
note = ann.get('note', '')
location = ann.get('location', '')
creation_date = ann.get('creationdate', '')
if selected_text:
lines.append(f'**{i}.** {selected_text}')
if note:
lines.append(f'> {note}')
# 可选:显示创建时间和位置信息(调试模式)
if hasattr(self, 'debug_mode') and self.debug_mode:
if creation_date:
lines.append(f'*时间*: {creation_date}')
if location:
lines.append(f'*位置*: `{location}`')
lines.append('')
lines.append('---')
lines.append('')
def sync_source_files(config_module):
"""
自动同步 iBooks 源数据文件到本地 data 目录
"""
import shutil
import os
src_files = [
(config_module.IBOOKS_ANNOTATION_DB, config_module.LOCAL_ANNOTATION_DB),
(config_module.IBOOKS_ANNOTATION_SHM, config_module.LOCAL_ANNOTATION_SHM),
(config_module.IBOOKS_ANNOTATION_WAL, config_module.LOCAL_ANNOTATION_WAL),
(config_module.IBOOKS_LIBRARY_DB, config_module.LOCAL_LIBRARY_DB),
(config_module.IBOOKS_LIBRARY_DB + '-shm', config_module.LOCAL_LIBRARY_DB + '-shm'),
(config_module.IBOOKS_LIBRARY_DB + '-wal', config_module.LOCAL_LIBRARY_DB + '-wal'),
(config_module.IBOOKS_BOOKS_PLIST, config_module.LOCAL_BOOKS_PLIST)
]
for src, dst in src_files:
if os.path.exists(src):
shutil.copy2(src, dst)
print(f'已拷贝源数据文件到本地: {dst}')
else:
print(f'未找到文件: {src}')
if __name__ == '__main__':
import shutil
import datetime
import re
import os.path
from InquirerPy import inquirer # type: ignore
exporter = BookNotesExporter(config)
sync_source_files(config)
'''
sqlite-shm 和 .sqlite-wal 是 SQLite的临时文件数据库处于WAL模式且有写入时才存在。
没有进程打开数据库或数据库关闭后这两个文件可能会被SQLite清理。
'''
# 列出 data 目录下所有文件用于测试此时shm和wal文件存在
#data_dir = config.DATA_DIR if hasattr(config, 'DATA_DIR') else './data'
#print(f"\n[data目录文件列表] {data_dir}:")
#for root, dirs, files in os.walk(data_dir):
# for file in files:
# print(os.path.join(root, file))
# 先获取所有书籍元数据
manager = BookListManager(plist_path=config.LOCAL_BOOKS_PLIST, db_path=config.LOCAL_LIBRARY_DB)
booksinfo = manager.get_books_info()
assetid2name = {}
assetid2lastopen = {}
last_open_times = manager.get_books_last_open()
for assetid, info in booksinfo.items():
name = info.get('displayname') or info.get('itemname') or assetid
if '-' in name:
name = name.split('-', 1)[0].strip()
assetid2name[assetid] = name
ts = last_open_times.get(assetid, {}).get('last_open', 0)
assetid2lastopen[assetid] = ts
sorted_assetids = sorted(assetid2name.keys(), key=lambda aid: assetid2lastopen[aid], reverse=True)
choices = [f"{assetid2name[aid]} [{assetid2lastopen[aid]}]" for aid in sorted_assetids]
if not choices:
print("无可导出的笔记")
exit(0)
answer = inquirer.fuzzy(
message="请选择要导出的书名(支持模糊搜索):",
choices=choices,
multiselect=False,
instruction="上下键选择,输入可模糊筛选,回车确定"
).execute()
for aid, name in assetid2name.items():
if answer.startswith(name):
selected_assetid = aid
break
else:
print("未找到选中书籍")
exit(1)
selected_booksnote = exporter.build_booksnote(bookid=selected_assetid)
selected_booksinfo = {selected_assetid: booksinfo.get(selected_assetid, {})}
bookname = selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid
ts = datetime.datetime.now().strftime('%m%d%H%M')
# 文件名用[.:_【分割取第一段
shortname = re.split(r'[.:_\\[\(]', bookname)[0].strip()
out_path = os.path.join(config.EXPORT_NOTES_DIR, f'notes_{shortname}-{ts}.md')
exporter.export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path)
print(f'{selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid}》 导出笔记 {out_path}')