Initial commit
This commit is contained in:
180
exportbooknotes.py
Normal file
180
exportbooknotes.py
Normal file
@@ -0,0 +1,180 @@
|
||||
"""
|
||||
自动生成 booksnote 数据结构:
|
||||
booksnote = {
|
||||
assetid: { label_path: { uuid: {
|
||||
'creationdate': '2023/7/12',
|
||||
'filepos': None,
|
||||
'idref': '008.xhtml',
|
||||
'note': None,
|
||||
'selectedtext': '這就是宣傳的恐怖之處'
|
||||
}}}
|
||||
}
|
||||
"""
|
||||
from collections import defaultdict
|
||||
import os
|
||||
from annotationdata import get_annotations
|
||||
from booklist_parse import parse_books_plist
|
||||
from opf_parse import parse_opf
|
||||
from toc_parse import parse_navpoints, find_label_path
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
def find_file_by_ext(root, exts):
|
||||
"""在root下递归查找第一个指定后缀的文件"""
|
||||
for dirpath, _, files in os.walk(root):
|
||||
for f in files:
|
||||
for ext in exts:
|
||||
if f.lower().endswith(ext):
|
||||
return os.path.join(dirpath, f)
|
||||
return None
|
||||
|
||||
def get_toc_tree(toc_path):
|
||||
with open(toc_path, 'r', encoding='utf-8') as f:
|
||||
soup = BeautifulSoup(f, 'xml')
|
||||
nav_map = soup.find('navMap')
|
||||
return parse_navpoints(nav_map.find_all('navPoint', recursive=False))
|
||||
|
||||
def build_booksnote(annotation_db='data/AEAnnotation.sqlite', books_plist='data/Books.plist'):
|
||||
annotations = get_annotations(annotation_db)
|
||||
booksinfo = parse_books_plist(books_plist)
|
||||
booksnote = defaultdict(lambda: defaultdict(dict))
|
||||
for assetid, notes in annotations.items():
|
||||
# 获取epub路径
|
||||
bookinfo = booksinfo.get(assetid)
|
||||
if not bookinfo:
|
||||
continue
|
||||
epub_path = bookinfo.get('path')
|
||||
if not epub_path or not os.path.isdir(epub_path):
|
||||
continue
|
||||
# 查找opf和ncx
|
||||
opf_path = find_file_by_ext(epub_path, ['.opf'])
|
||||
ncx_path = find_file_by_ext(epub_path, ['.ncx'])
|
||||
if not opf_path or not ncx_path:
|
||||
continue
|
||||
id2href = parse_opf(opf_path)
|
||||
toc_tree = get_toc_tree(ncx_path)
|
||||
for uuid, ann in notes.items():
|
||||
idref = ann['idref']
|
||||
filepos = ann['filepos']
|
||||
href = id2href.get(idref, idref)
|
||||
chapter = find_label_path(toc_tree, href, filepos)
|
||||
if chapter is None:
|
||||
# 直接从html文件获取章节信息
|
||||
html_path = os.path.join(epub_path, href.split('#')[0])
|
||||
selectedtext = ann.get('selectedtext')
|
||||
if os.path.exists(html_path) and selectedtext:
|
||||
from toc_parse import find_section_by_selectedtext
|
||||
section = find_section_by_selectedtext(html_path, selectedtext)
|
||||
if section:
|
||||
chapter = section
|
||||
else:
|
||||
chapter = "(未找到章节)"
|
||||
else:
|
||||
chapter = "(未找到章节)"
|
||||
booksnote[assetid][chapter][uuid] = {
|
||||
'creationdate': ann['creationdate'],
|
||||
'filepos': filepos,
|
||||
'idref': href,
|
||||
'note': ann['note'],
|
||||
'selectedtext': ann['selectedtext']
|
||||
}
|
||||
return booksnote
|
||||
|
||||
import datetime
|
||||
|
||||
def export_booksnote_to_md(booksnote, booksinfo, out_path=None):
|
||||
"""
|
||||
依据booksnote结构导出markdown文件,格式:
|
||||
# “笔记导出”+导出时间
|
||||
## 书名
|
||||
### chapter
|
||||
selectedtext
|
||||
> note (如果存在)
|
||||
"""
|
||||
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M')
|
||||
lines = [f'# 笔记导出 {now}\n']
|
||||
for assetid, chapters in booksnote.items():
|
||||
bookname = booksinfo.get(assetid, {}).get('itemname', assetid)
|
||||
lines.append(f'\n## {bookname}\n')
|
||||
for chapter, notes in chapters.items():
|
||||
lines.append(f'### {chapter}')
|
||||
for uuid, ann in notes.items():
|
||||
sel = ann.get('selectedtext')
|
||||
note = ann.get('note')
|
||||
if sel:
|
||||
lines.append(sel)
|
||||
if note:
|
||||
lines.append(f'> {note}')
|
||||
lines.append('')
|
||||
md = '\n'.join(lines)
|
||||
if out_path:
|
||||
with open(out_path, 'w', encoding='utf-8') as f:
|
||||
f.write(md)
|
||||
return md
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import shutil
|
||||
import os.path
|
||||
# 自动覆盖 ./data 下的数据库和plist文件,源为iBooks真实路径
|
||||
src_files = [
|
||||
(os.path.expanduser('~/Library/Containers/com.apple.iBooksX/Data/Documents/AEAnnotation/AEAnnotation_v10312011_1727_local.sqlite'), 'data/AEAnnotation.sqlite'),
|
||||
(os.path.expanduser('~/Library/Containers/com.apple.iBooksX/Data/Documents/AEAnnotation/AEAnnotation_v10312011_1727_local.sqlite-shm'), 'data/AEAnnotation.sqlite-shm'),
|
||||
(os.path.expanduser('~/Library/Containers/com.apple.iBooksX/Data/Documents/AEAnnotation/AEAnnotation_v10312011_1727_local.sqlite-wal'), 'data/AEAnnotation.sqlite-wal'),
|
||||
(os.path.expanduser('~/Library/Containers/com.apple.iBooksX/Data/Documents/BKLibrary/BKLibrary-1-091020131601.sqlite'), 'data/BKLibrary.sqlite'),
|
||||
(os.path.expanduser('~/Library/Containers/com.apple.BKAgentService/Data/Documents/iBooks/Books/Books.plist'), 'data/Books.plist')
|
||||
]
|
||||
for src, dst in src_files:
|
||||
if os.path.exists(src):
|
||||
shutil.copy2(src, dst)
|
||||
print(f'copy source data file to ./data : {dst}')
|
||||
else:
|
||||
print(f'file not found: {src} ')
|
||||
|
||||
from booklist_parse import parse_books_plist
|
||||
from InquirerPy import inquirer
|
||||
booksnote = build_booksnote()
|
||||
booksinfo = parse_books_plist('data/Books.plist')
|
||||
# 构建书名列表(优先displayname, 其次itemname, 否则assetid),按最新笔记时间排序
|
||||
assetid2name = {}
|
||||
assetid2latest = {}
|
||||
for assetid in booksnote:
|
||||
info = booksinfo.get(assetid, {})
|
||||
name = info.get('displayname') or info.get('itemname') or assetid
|
||||
# 如果书名中包含“-”,只取“-”前面的部分
|
||||
if '-' in name: name = name.split('-', 1)[0].strip()
|
||||
assetid2name[assetid] = name
|
||||
# 获取该书所有笔记的最新creationdate
|
||||
latest = None
|
||||
for chapter in booksnote[assetid].values():
|
||||
for ann in chapter.values():
|
||||
dt = ann.get('creationdate')
|
||||
if dt:
|
||||
if latest is None or dt > latest:
|
||||
latest = dt
|
||||
assetid2latest[assetid] = latest or ''
|
||||
# 按最新时间降序排列
|
||||
sorted_assetids = sorted(assetid2name.keys(), key=lambda aid: assetid2latest[aid], reverse=True)
|
||||
choices = [f"{assetid2name[aid]} [{aid}]" for aid in sorted_assetids]
|
||||
if not choices:
|
||||
print("无可导出的笔记")
|
||||
exit(0)
|
||||
answer = inquirer.fuzzy(
|
||||
message="请选择要导出的书名(支持模糊搜索):",
|
||||
choices=choices,
|
||||
multiselect=False,
|
||||
instruction="上下键选择,输入可模糊筛选,回车确定"
|
||||
).execute()
|
||||
# 解析选中assetid
|
||||
for aid, name in assetid2name.items():
|
||||
if answer.startswith(name):
|
||||
selected_assetid = aid
|
||||
break
|
||||
else:
|
||||
print("未找到选中书籍")
|
||||
exit(1)
|
||||
# 只导出选中书的笔记
|
||||
selected_booksnote = {selected_assetid: booksnote[selected_assetid]}
|
||||
selected_booksinfo = {selected_assetid: booksinfo.get(selected_assetid, {})}
|
||||
out_path = f'export_notes/notes_export_{selected_assetid}.md'
|
||||
export_booksnote_to_md(selected_booksnote, selected_booksinfo, out_path)
|
||||
print(f'《{selected_booksinfo[selected_assetid].get("displayname") or selected_booksinfo[selected_assetid].get("itemname") or selected_assetid}》 导出笔记 {out_path}')
|
||||
Reference in New Issue
Block a user