'update'
This commit is contained in:
@@ -20,26 +20,81 @@ import os
|
||||
from collections import defaultdict
|
||||
|
||||
class AnnotationManager:
|
||||
"""
|
||||
iBooks笔记管理器
|
||||
|
||||
负责从iBooks的AEAnnotation.sqlite数据库中提取和解析用户的阅读笔记和高亮标记。
|
||||
支持获取所有书籍的笔记或指定书籍的笔记,并提供位置信息解析功能。
|
||||
"""
|
||||
|
||||
def __init__(self, db_path=None):
|
||||
"""
|
||||
初始化笔记管理器
|
||||
|
||||
Args:
|
||||
db_path (str, optional): 数据库文件路径,默认使用config.LOCAL_ANNOTATION_DB
|
||||
"""
|
||||
self.db_path = db_path or config.LOCAL_ANNOTATION_DB
|
||||
|
||||
@staticmethod
|
||||
def parse_location(location):
|
||||
"""
|
||||
解析ZANNOTATIONLOCATION,返回(idref, filepos)
|
||||
- epubcfi(...)格式优先提取[]内内容为idref
|
||||
- 其他格式兼容原逻辑
|
||||
解析iBooks笔记的位置信息
|
||||
|
||||
从ZANNOTATIONLOCATION字段解析出章节标识符和文件内位置信息。
|
||||
支持epubcfi格式的位置字符串解析。
|
||||
|
||||
Args:
|
||||
location (str): 笔记位置字符串,通常为epubcfi格式
|
||||
|
||||
Returns:
|
||||
tuple: (idref, filepos)
|
||||
- idref (str): 章节标识符,用于定位具体章节
|
||||
- filepos (str): 文件内位置,用于精确定位笔记位置
|
||||
|
||||
Examples:
|
||||
>>> parse_location('epubcfi(/6/746[id509]!/4[abc]/10,/2/1:0,/7:8)')
|
||||
('id509', 'abc')
|
||||
"""
|
||||
idref = None
|
||||
filepos = None
|
||||
if not location:
|
||||
return idref, filepos
|
||||
# 使用正则表达式提取[]内的内容
|
||||
matches = re.findall(r'\[(.*?)\]', location) if location else []
|
||||
idref = matches[0] if len(matches) > 0 else None
|
||||
filepos = matches[1] if len(matches) > 1 else None
|
||||
return idref, filepos
|
||||
|
||||
def get_annotations(self, bookid=None):
|
||||
"""
|
||||
从数据库获取笔记数据
|
||||
|
||||
从iBooks的AEAnnotation.sqlite数据库中提取所有或指定书籍的笔记和高亮内容。
|
||||
自动处理时间戳转换和位置信息解析。
|
||||
|
||||
Args:
|
||||
bookid (str, optional): 书籍资产ID,如果为None则获取所有书籍的笔记
|
||||
|
||||
Returns:
|
||||
dict: 笔记数据字典,结构为:
|
||||
{
|
||||
assetid: {
|
||||
uuid: {
|
||||
'creationdate': '创建日期',
|
||||
'filepos': '文件位置',
|
||||
'idref': '章节标识',
|
||||
'note': '笔记内容',
|
||||
'selectedtext': '选中文本'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Note:
|
||||
- 会检查WAL模式相关文件(-wal, -shm)的存在性
|
||||
- 自动转换苹果时间戳格式(以2001-01-01为基准)
|
||||
- 过滤掉既没有笔记也没有选中文本的空记录
|
||||
"""
|
||||
# 检查WAL模式相关文件
|
||||
base = self.db_path.rsplit('.', 1)[0]
|
||||
wal_path = base + '.sqlite-wal'
|
||||
@@ -47,8 +102,11 @@ class AnnotationManager:
|
||||
for f in [self.db_path, wal_path, shm_path]:
|
||||
if not os.path.exists(f):
|
||||
print(f'警告: 缺少 {f},可能无法获取全部最新笔记')
|
||||
# 连接数据库并执行查询
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 根据是否指定bookid选择不同的查询语句
|
||||
if bookid is not None:
|
||||
cursor.execute('''
|
||||
SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
|
||||
@@ -59,12 +117,16 @@ class AnnotationManager:
|
||||
SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
|
||||
FROM ZAEANNOTATION
|
||||
''')
|
||||
|
||||
rows = cursor.fetchall()
|
||||
annotations = defaultdict(dict)
|
||||
import datetime
|
||||
|
||||
# 处理每一行数据
|
||||
for row in rows:
|
||||
assetid, creationdate, location, note, selectedtext, uuid = row
|
||||
# 转换 creationdate 格式,支持苹果时间戳(以2001-01-01为基准)
|
||||
|
||||
# 转换 creationdate格式为'YYYY-MM-DD HH:MM:SS',支持苹果时间戳(以2001-01-01为基准)
|
||||
date_str = creationdate
|
||||
if creationdate:
|
||||
try:
|
||||
@@ -74,13 +136,20 @@ class AnnotationManager:
|
||||
elif isinstance(creationdate, str) and creationdate.replace('.', '', 1).isdigit():
|
||||
dt = origin + datetime.timedelta(seconds=float(creationdate))
|
||||
else:
|
||||
# 支持原有格式'2025/9/6'等
|
||||
dt = datetime.datetime.strptime(creationdate[:10], "%Y-%m-%d")
|
||||
date_str = f"{dt.year}/{dt.month}/{dt.day}"
|
||||
date_str = dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
except Exception:
|
||||
date_str = str(creationdate)
|
||||
|
||||
# 解析位置信息
|
||||
idref, filepos = self.parse_location(location)
|
||||
|
||||
# 过滤空记录(既没有笔记也没有选中文本)
|
||||
if note is None and selectedtext is None:
|
||||
continue
|
||||
|
||||
# 构建笔记数据结构
|
||||
annotations[str(assetid)][uuid] = {
|
||||
'creationdate': date_str,
|
||||
'filepos': filepos,
|
||||
@@ -88,14 +157,26 @@ class AnnotationManager:
|
||||
'note': note,
|
||||
'selectedtext': selectedtext
|
||||
}
|
||||
|
||||
conn.close()
|
||||
|
||||
# 根据查询类型返回相应结果
|
||||
if bookid is not None:
|
||||
return {str(bookid): annotations.get(str(bookid), {})}
|
||||
return annotations
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""
|
||||
测试模块功能
|
||||
|
||||
包含两个测试用例:
|
||||
1. 测试parse_location方法解析各种格式的位置字符串
|
||||
2. 测试get_annotations方法获取指定书籍的笔记数据
|
||||
"""
|
||||
manager = AnnotationManager()
|
||||
# 测试 parse_location
|
||||
|
||||
# 测试 parse_location 方法
|
||||
print("=== 测试位置解析功能 ===")
|
||||
test_locations = [
|
||||
'epubcfi(/6/746[id509]!/4[4MLOS0-27b363c65bfe41ad8429f530566a2737]/10,/2/1:0,/7:8',
|
||||
'epubcfi(/6/22[id15]!/4/156/1,:21,:157)',
|
||||
@@ -105,7 +186,8 @@ if __name__ == "__main__":
|
||||
idref, filepos = manager.parse_location(loc)
|
||||
print(f"location: {loc}\n idref: {idref}\n filepos: {filepos}\n")
|
||||
|
||||
# 测试只获取特定 assetid 的笔记
|
||||
# 测试获取特定书籍的笔记
|
||||
print("=== 测试笔记获取功能 ===")
|
||||
test_bookid = "B18FCD9F90FD43C2373AE52BAEF9A77C"
|
||||
annotations = manager.get_annotations(bookid=test_bookid)
|
||||
from pprint import pprint
|
||||
|
||||
Reference in New Issue
Block a user