216 lines
8.9 KiB
Python
216 lines
8.9 KiB
Python
"""
|
||
exportbooknotes.py (OOP版)
|
||
-------------------------
|
||
功能:
|
||
- 自动同步iBooks数据库和元数据文件到本地data目录。
|
||
- 解析AEAnnotation.sqlite、Books.plist、BKLibrary.sqlite,构建结构化笔记数据。
|
||
- 解析epub目录和章节信息,定位每条笔记所属章节。
|
||
- 命令行菜单按最近打开时间降序展示书籍列表,供用户选择导出。
|
||
- 仅导出选中书籍的所有笔记,按章节分组,生成Markdown文件。
|
||
依赖:config.py 统一管理路径和配置项。
|
||
主要接口:BookNotesExporter
|
||
- run():命令行交互式导出主流程
|
||
- build_booksnote(bookid=None):构建结构化笔记数据
|
||
- export_booksnote_to_md(booksnote, booksinfo, out_path=None):导出为Markdown
|
||
"""
|
||
import config
|
||
"""
|
||
自动生成 booksnote 数据结构:
|
||
booksnote = {
|
||
assetid: { label_path: { uuid: {
|
||
'creationdate': '2023/7/12',
|
||
'filepos': None,
|
||
'idref': '008.xhtml',
|
||
'note': None,
|
||
'selectedtext': '這就是宣傳的恐怖之處'
|
||
}}}
|
||
}
|
||
"""
|
||
import os
|
||
from collections import defaultdict
|
||
from annotationdata import AnnotationManager
|
||
from booklist_parse import BookListManager
|
||
from opf_parse import parse_opf
|
||
from toc_parse import TOCParser
|
||
from bs4 import BeautifulSoup
|
||
|
||
|
||
class BookNotesExporter:
|
||
def __init__(self, config_module=config):
|
||
self.config = config_module
|
||
self.annotation_db = config_module.LOCAL_ANNOTATION_DB
|
||
self.books_plist = config_module.LOCAL_BOOKS_PLIST
|
||
self.library_db = config_module.LOCAL_LIBRARY_DB
|
||
|
||
@staticmethod
|
||
def find_file_by_ext(root, exts):
|
||
for dirpath, _, files in os.walk(root):
|
||
for f in files:
|
||
for ext in exts:
|
||
if f.lower().endswith(ext):
|
||
return os.path.join(dirpath, f)
|
||
return None
|
||
|
||
@staticmethod
|
||
def get_toc_tree(toc_path):
|
||
with open(toc_path, 'r', encoding='utf-8') as f:
|
||
soup = BeautifulSoup(f, 'xml')
|
||
nav_map = soup.find('navMap')
|
||
nav_points = nav_map.find_all('navPoint', recursive=False)
|
||
toc_tree = TOCParser.parse_navpoints(nav_points)
|
||
return toc_tree
|
||
|
||
def build_booksnote(self, bookid=None):
|
||
manager = AnnotationManager(self.annotation_db)
|
||
annotations = manager.get_annotations(bookid=bookid)
|
||
bl_manager = BookListManager(plist_path=self.books_plist)
|
||
booksinfo = bl_manager.get_books_info()
|
||
booksnote = defaultdict(lambda: defaultdict(dict))
|
||
for assetid, notes in annotations.items():
|
||
bookinfo = booksinfo.get(assetid)
|
||
if not bookinfo:
|
||
continue
|
||
epub_path = bookinfo.get('path')
|
||
if not epub_path or not os.path.isdir(epub_path):
|
||
continue
|
||
opf_path = self.find_file_by_ext(epub_path, ['.opf'])
|
||
ncx_path = self.find_file_by_ext(epub_path, ['.ncx'])
|
||
if not opf_path or not ncx_path:
|
||
continue
|
||
id2href = parse_opf(opf_path)
|
||
toc_tree = self.get_toc_tree(ncx_path)
|
||
for uuid, ann in notes.items():
|
||
idref = ann['idref']
|
||
filepos = ann['filepos']
|
||
href = id2href.get(idref, idref)
|
||
chapter = TOCParser.find_label_path(toc_tree, href, filepos)
|
||
if chapter is None:
|
||
html_path = os.path.join(epub_path, href.split('#')[0])
|
||
selectedtext = ann.get('selectedtext')
|
||
if os.path.exists(html_path) and selectedtext:
|
||
section = TOCParser.find_section_by_selectedtext(html_path, selectedtext)
|
||
if section:
|
||
chapter = section
|
||
else:
|
||
chapter = "(未找到章节)"
|
||
else:
|
||
chapter = "(未找到章节)"
|
||
booksnote[assetid][chapter][uuid] = {
|
||
'creationdate': ann['creationdate'],
|
||
'filepos': filepos,
|
||
'idref': href,
|
||
'note': ann['note'],
|
||
'selectedtext': ann['selectedtext']
|
||
}
|
||
return booksnote
|
||
|
||
def export_booksnote_to_md(self, booksnote, booksinfo, out_path=None):
|
||
import datetime
|
||
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
|
||
lines = [f'# 笔记导出 {now}\n']
|
||
for assetid, chapters in booksnote.items():
|
||
bookname = booksinfo.get(assetid, {}).get('itemname', assetid)
|
||
lines.append(f'\n## {bookname}\n')
|
||
for chapter, notes in chapters.items():
|
||
lines.append(f'### {chapter}')
|
||
for uuid, ann in notes.items():
|
||
sel = ann.get('selectedtext')
|
||
note = ann.get('note')
|
||
if sel:
|
||
lines.append(sel)
|
||
if note:
|
||
lines.append(f'> {note}')
|
||
lines.append('')
|
||
md = '\n'.join(lines)
|
||
if out_path:
|
||
with open(out_path, 'w', encoding='utf-8') as f:
|
||
f.write(md)
|
||
return md
|
||
|
||
|
||
def sync_source_files(config_module):
|
||
"""
|
||
自动同步 iBooks 源数据文件到本地 data 目录
|
||
"""
|
||
import shutil
|
||
import os
|
||
src_files = [
|
||
(config_module.IBOOKS_ANNOTATION_DB, config_module.LOCAL_ANNOTATION_DB),
|
||
(config_module.IBOOKS_ANNOTATION_SHM, config_module.LOCAL_ANNOTATION_SHM),
|
||
(config_module.IBOOKS_ANNOTATION_WAL, config_module.LOCAL_ANNOTATION_WAL),
|
||
(config_module.IBOOKS_LIBRARY_DB, config_module.LOCAL_LIBRARY_DB),
|
||
(config_module.IBOOKS_LIBRARY_DB + '-shm', config_module.LOCAL_LIBRARY_DB + '-shm'),
|
||
(config_module.IBOOKS_LIBRARY_DB + '-wal', config_module.LOCAL_LIBRARY_DB + '-wal'),
|
||
(config_module.IBOOKS_BOOKS_PLIST, config_module.LOCAL_BOOKS_PLIST)
|
||
]
|
||
for src, dst in src_files:
|
||
if os.path.exists(src):
|
||
shutil.copy2(src, dst)
|
||
print(f'已拷贝源数据文件到本地: {dst}')
|
||
else:
|
||
print(f'未找到文件: {src}')
|
||
|
||
|
||
|
||
if __name__ == '__main__':
|
||
import shutil
|
||
import datetime
|
||
import re
|
||
import os.path
|
||
from InquirerPy import inquirer # type: ignore
|
||
|
||
exporter = BookNotesExporter(config)
|
||
sync_source_files(config)
|
||
'''
|
||
sqlite-shm 和 .sqlite-wal 是 SQLite的临时文件,数据库处于WAL模式且有写入时才存在。
|
||
没有进程打开数据库或数据库关闭后,这两个文件可能会被SQLite清理。
|
||
'''
|
||
|
||
# 列出 data 目录下所有文件用于测试,此时shm和wal文件存在
|
||
#data_dir = config.DATA_DIR if hasattr(config, 'DATA_DIR') else './data'
|
||
#print(f"\n[data目录文件列表] {data_dir}:")
|
||
#for root, dirs, files in os.walk(data_dir):
|
||
# for file in files:
|
||
# print(os.path.join(root, file))
|
||
|
||
# 先获取所有书籍元数据
|
||
manager = BookListManager(plist_path=config.LOCAL_BOOKS_PLIST, db_path=config.LOCAL_LIBRARY_DB)
|
||
booksinfo = manager.get_books_info()
|
||
assetid2name = {}
|
||
assetid2lastopen = {}
|
||
last_open_times = manager.get_books_last_open()
|
||
for assetid, info in booksinfo.items():
|
||
name = info.get('displayname') or info.get('itemname') or assetid
|
||
if '-' in name:
|
||
name = name.split('-', 1)[0].strip()
|
||
assetid2name[assetid] = name
|
||
ts = last_open_times.get(assetid, {}).get('last_open', 0)
|
||
assetid2lastopen[assetid] = ts
|
||
sorted_assetids = sorted(assetid2name.keys(), key=lambda aid: assetid2lastopen[aid], reverse=True)
|
||
choices = [f"{assetid2name[aid]} [{assetid2lastopen[aid]}]" for aid in sorted_assetids]
|
||
if not choices:
|
||
print("无可导出的笔记")
|
||
exit(0)
|
||
answer = inquirer.fuzzy(
|
||
message="请选择要导出的书名(支持模糊搜索):",
|
||
choices=choices,
|
||
multiselect=False,
|
||
instruction="上下键选择,输入可模糊筛选,回车确定"
|
||
).execute()
|
||
for aid, name in assetid2name.items():
|
||
if answer.startswith(name):
|
||
selected_assetid = aid
|
||
break
|
||
else:
|
||
print("未找到选中书籍")
|
||
exit(1)
|
||
selected_booksnote = exporter.build_booksnote(bookid=selected_assetid)
|
||
selected_booksinfo = {selected_assetid: booksinfo.get(selected_assetid, {})}
|
||
bookname = selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid
|
||
ts = datetime.datetime.now().strftime('%m%d%H%M')
|
||
# 文件名用[.:_【分割取第一段
|
||
shortname = re.split(r'[.::_\【\[\((]', bookname)[0].strip()
|
||
out_path = os.path.join(config.EXPORT_NOTES_DIR, f'notes_{shortname}-{ts}.md')
|
||
exporter.export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path)
|
||
print(f'《{selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid}》 导出笔记 {out_path}')
|