'update'
This commit is contained in:
Binary file not shown.
@@ -1,3 +1,18 @@
|
|||||||
|
"""
|
||||||
|
annotationdata.py
|
||||||
|
-----------------
|
||||||
|
功能:
|
||||||
|
- 解析iBooks的AEAnnotation.sqlite数据库,提取所有或指定书籍(assetid/bookid)的笔记。
|
||||||
|
- 提供parse_location辅助函数,解析笔记定位信息。
|
||||||
|
- 返回结构化的annotations数据,便于后续章节定位与导出。
|
||||||
|
|
||||||
|
主要接口:
|
||||||
|
- get_annotations(db_path, bookid=None):返回所有或指定assetid的笔记,结构为{assetid: {uuid: {...}}}
|
||||||
|
- parse_location(location):解析ZANNOTATIONLOCATION,返回(idref, filepos)
|
||||||
|
|
||||||
|
依赖:sqlite3, collections, re, os, datetime
|
||||||
|
"""
|
||||||
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
import re
|
import re
|
||||||
|
|||||||
@@ -1,3 +1,20 @@
|
|||||||
|
"""
|
||||||
|
booklist_parse.py
|
||||||
|
-----------------
|
||||||
|
功能:
|
||||||
|
- 解析iBooks的Books.plist,提取所有书籍元数据(书名、作者、路径、时间等)。
|
||||||
|
- 解析BKLibrary.sqlite,获取每本书的最近打开时间(苹果时间戳,基准2001-01-01)。
|
||||||
|
|
||||||
|
主要接口:
|
||||||
|
- parse_books_plist(plist_path):返回所有书籍元数据,结构为{bk_id: {...}}
|
||||||
|
- get_books_last_open(db_path):返回所有书籍最近打开时间,结构为{bk_id: {'last_open': 时间戳}}
|
||||||
|
|
||||||
|
依赖:plistlib, collections, sqlite3, os, datetime
|
||||||
|
|
||||||
|
典型用法:
|
||||||
|
booksinfo = parse_books_plist('./data/Books.plist')
|
||||||
|
books_open = get_books_last_open('data/BKLibrary.sqlite')
|
||||||
|
"""
|
||||||
import plistlib
|
import plistlib
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
BIN
data/Books.plist
BIN
data/Books.plist
Binary file not shown.
@@ -1,4 +1,4 @@
|
|||||||
# 笔记导出 2025-08-12 15:03
|
# 笔记导出 2025-08-12 19:48
|
||||||
|
|
||||||
|
|
||||||
## 明夷待访录·破邪论(精)--中华经典名著全本全注全译 (中华书局)
|
## 明夷待访录·破邪论(精)--中华经典名著全本全注全译 (中华书局)
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
# 笔记导出 2025-08-12 15:02
|
# 笔记导出 2025-08-12 21:16
|
||||||
|
|
||||||
|
|
||||||
## 传统十论
|
## 传统十论
|
||||||
|
|||||||
@@ -1,4 +1,29 @@
|
|||||||
"""
|
"""
|
||||||
|
exportbooknotes.py
|
||||||
|
------------------
|
||||||
|
功能:
|
||||||
|
- 自动同步iBooks数据库和元数据文件到本地data目录。
|
||||||
|
- 解析AEAnnotation.sqlite、Books.plist、BKLibrary.sqlite,构建结构化笔记数据。
|
||||||
|
- 解析epub目录和章节信息,定位每条笔记所属章节。
|
||||||
|
- 命令行菜单按最近打开时间降序展示书籍列表,供用户选择导出。
|
||||||
|
- 仅导出选中书籍的所有笔记,按章节分组,生成Markdown文件。
|
||||||
|
|
||||||
|
主要数据流:
|
||||||
|
1. 数据同步到data目录
|
||||||
|
2. 解析Books.plist获取书籍元数据
|
||||||
|
3. 解析BKLibrary.sqlite获取最近打开时间
|
||||||
|
4. 菜单排序与显示(书名+时间戳)
|
||||||
|
5. 解析AEAnnotation.sqlite获取笔记
|
||||||
|
6. 解析epub目录,定位章节
|
||||||
|
7. 导出Markdown文件
|
||||||
|
|
||||||
|
依赖:Python 3, InquirerPy, bs4, shutil, os, datetime, sqlite3
|
||||||
|
|
||||||
|
典型用法:
|
||||||
|
python exportbooknotes.py
|
||||||
|
# 按提示选择书籍,自动导出笔记到export_notes目录
|
||||||
|
"""
|
||||||
|
"""
|
||||||
自动生成 booksnote 数据结构:
|
自动生成 booksnote 数据结构:
|
||||||
booksnote = {
|
booksnote = {
|
||||||
assetid: { label_path: { uuid: {
|
assetid: { label_path: { uuid: {
|
||||||
@@ -17,6 +42,7 @@ from booklist_parse import parse_books_plist
|
|||||||
from opf_parse import parse_opf
|
from opf_parse import parse_opf
|
||||||
from toc_parse import parse_navpoints, find_label_path
|
from toc_parse import parse_navpoints, find_label_path
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from pprint import pprint
|
||||||
|
|
||||||
def find_file_by_ext(root, exts):
|
def find_file_by_ext(root, exts):
|
||||||
"""在root下递归查找第一个指定后缀的文件"""
|
"""在root下递归查找第一个指定后缀的文件"""
|
||||||
@@ -31,7 +57,11 @@ def get_toc_tree(toc_path):
|
|||||||
with open(toc_path, 'r', encoding='utf-8') as f:
|
with open(toc_path, 'r', encoding='utf-8') as f:
|
||||||
soup = BeautifulSoup(f, 'xml')
|
soup = BeautifulSoup(f, 'xml')
|
||||||
nav_map = soup.find('navMap')
|
nav_map = soup.find('navMap')
|
||||||
return parse_navpoints(nav_map.find_all('navPoint', recursive=False))
|
|
||||||
|
nav_points = nav_map.find_all('navPoint', recursive=False)
|
||||||
|
toc_tree = parse_navpoints(nav_points)
|
||||||
|
#pprint(toc_tree, indent=2, depth=5)
|
||||||
|
return toc_tree
|
||||||
|
|
||||||
def build_booksnote(annotation_db='data/AEAnnotation.sqlite', books_plist='data/Books.plist', bookid=None):
|
def build_booksnote(annotation_db='data/AEAnnotation.sqlite', books_plist='data/Books.plist', bookid=None):
|
||||||
# 支持只处理特定 assetid 的笔记
|
# 支持只处理特定 assetid 的笔记
|
||||||
@@ -132,7 +162,7 @@ if __name__ == '__main__':
|
|||||||
print(f'file not found: {src} ')
|
print(f'file not found: {src} ')
|
||||||
|
|
||||||
from booklist_parse import parse_books_plist
|
from booklist_parse import parse_books_plist
|
||||||
from InquirerPy import inquirer
|
from InquirerPy import inquirer # type: ignore
|
||||||
|
|
||||||
# 先获取所有书籍元数据
|
# 先获取所有书籍元数据
|
||||||
booksinfo = parse_books_plist('data/Books.plist')
|
booksinfo = parse_books_plist('data/Books.plist')
|
||||||
|
|||||||
23
toc_parse.py
23
toc_parse.py
@@ -109,10 +109,14 @@ def find_label_path(
|
|||||||
if "label" in v:
|
if "label" in v:
|
||||||
new_path = path + [v["label"]]
|
new_path = path + [v["label"]]
|
||||||
if v["ref"] == ref and (filepos is None or v["filepos"] == filepos):
|
if v["ref"] == ref and (filepos is None or v["filepos"] == filepos):
|
||||||
return " / ".join(new_path)
|
title = " / ".join(new_path)
|
||||||
found = find_label_path(v["children"], ref, filepos, new_path)
|
#print(f'title ref={ref} filepos={filepos} -> {title}') #DBG
|
||||||
if found:
|
return title
|
||||||
return found
|
title = find_label_path(v["children"], ref, filepos, new_path)
|
||||||
|
if title:
|
||||||
|
#print(f'title1 ref={ref} filepos={filepos} -> {title}') #DBG
|
||||||
|
return title
|
||||||
|
|
||||||
# 2. 如果带filepos查找失败,回退到同ref下第一个章节(即只要ref匹配就返回)
|
# 2. 如果带filepos查找失败,回退到同ref下第一个章节(即只要ref匹配就返回)
|
||||||
if filepos is not None:
|
if filepos is not None:
|
||||||
for v in nodes:
|
for v in nodes:
|
||||||
@@ -120,10 +124,13 @@ def find_label_path(
|
|||||||
new_path = path + [v["label"]]
|
new_path = path + [v["label"]]
|
||||||
# print(f"对比 {v['ref']} == {ref}")
|
# print(f"对比 {v['ref']} == {ref}")
|
||||||
if v["ref"].split("#", 1)[0] == ref.split("#", 1)[0]:
|
if v["ref"].split("#", 1)[0] == ref.split("#", 1)[0]:
|
||||||
return " / ".join(new_path)
|
title = " / ".join(new_path)
|
||||||
found = find_label_path(v["children"], ref, None, new_path)
|
#print(f'title3 ref={ref} filepos={filepos} -> {title}') #DBG
|
||||||
if found:
|
return title
|
||||||
return found
|
title = find_label_path(v["children"], ref, None, new_path)
|
||||||
|
if title:
|
||||||
|
#print(f'title4 ref={ref} filepos={filepos} -> {title}') #DBG
|
||||||
|
return title
|
||||||
|
|
||||||
# 3. 若完全未找到,尝试直接解析idref所指html文件标题,获取章节label信息
|
# 3. 若完全未找到,尝试直接解析idref所指html文件标题,获取章节label信息
|
||||||
# 仅在顶层调用时执行此逻辑
|
# 仅在顶层调用时执行此逻辑
|
||||||
|
|||||||
Reference in New Issue
Block a user