Initial commit

This commit is contained in:
douboer
2025-08-06 13:11:08 +08:00
commit 2f2f98cea1
723 changed files with 69242 additions and 0 deletions

60
backup/booksnote.py Normal file
View File

@@ -0,0 +1,60 @@
"""
生成 booksnote 数据结构:
booksnote = {
assetid: {uuid: {
'chapter': label_path,
'creationdate': '2023/7/12',
'filepos': None,
'idref': '008.xhtml',
'note': None,
'selectedtext': '這就是宣傳的恐怖之處'
}}
}
"""
from collections import defaultdict
from annotationdata import get_annotations
from opf_parse import parse_opf
from toc_parse import parse_navpoints, find_label_path
import os
from bs4 import BeautifulSoup
def get_toc_tree(toc_path):
with open(toc_path, 'r', encoding='utf-8') as f:
soup = BeautifulSoup(f, 'xml')
nav_map = soup.find('navMap')
return parse_navpoints(nav_map.find_all('navPoint', recursive=False))
def build_booksnote(annotation_db='data/AEAnnotation.sqlite', opf_path=None, toc_path=None):
annotations = get_annotations(annotation_db)
booksnote = defaultdict(lambda: defaultdict(dict))
# 解析OPF和TOC
if not opf_path or not toc_path:
raise ValueError('必须提供OPF和TOC路径')
id2href = parse_opf(opf_path)
toc_tree = get_toc_tree(toc_path)
# 遍历所有笔记
for assetid, notes in annotations.items():
for uuid, ann in notes.items():
idref = ann['idref']
filepos = ann['filepos']
href = id2href.get(idref, idref)
chapter = find_label_path(toc_tree, href, filepos)
if chapter is None:
chapter = "(未找到章节)"
booksnote[assetid][chapter][uuid] = {
'chapter': chapter,
'creationdate': ann['creationdate'],
'filepos': filepos,
'idref': href,
'note': ann['note'],
'selectedtext': ann['selectedtext']
}
return booksnote
if __name__ == '__main__':
# 示例请根据实际epub路径填写
opf_path = './examples/epub_format_3/OEBPS/content.opf'
toc_path = './examples/epub_format_3/OEBPS/toc.ncx'
booksnote = build_booksnote(opf_path=opf_path, toc_path=toc_path)
from pprint import pprint
pprint(booksnote, indent=2, width=120, sort_dicts=False)