This commit is contained in:
douboer 2025-09-06 16:43:13 +08:00
parent 95cd27563c
commit 893cd06c2c
7 changed files with 266 additions and 11 deletions

Binary file not shown.

View File

@ -20,26 +20,81 @@ import os
from collections import defaultdict
class AnnotationManager:
"""
iBooks笔记管理器
负责从iBooks的AEAnnotation.sqlite数据库中提取和解析用户的阅读笔记和高亮标记
支持获取所有书籍的笔记或指定书籍的笔记并提供位置信息解析功能
"""
def __init__(self, db_path=None):
"""
初始化笔记管理器
Args:
db_path (str, optional): 数据库文件路径默认使用config.LOCAL_ANNOTATION_DB
"""
self.db_path = db_path or config.LOCAL_ANNOTATION_DB
@staticmethod
def parse_location(location):
"""
解析ZANNOTATIONLOCATION返回(idref, filepos)
- epubcfi(...)格式优先提取[]内内容为idref
- 其他格式兼容原逻辑
解析iBooks笔记的位置信息
从ZANNOTATIONLOCATION字段解析出章节标识符和文件内位置信息
支持epubcfi格式的位置字符串解析
Args:
location (str): 笔记位置字符串通常为epubcfi格式
Returns:
tuple: (idref, filepos)
- idref (str): 章节标识符用于定位具体章节
- filepos (str): 文件内位置用于精确定位笔记位置
Examples:
>>> parse_location('epubcfi(/6/746[id509]!/4[abc]/10,/2/1:0,/7:8)')
('id509', 'abc')
"""
idref = None
filepos = None
if not location:
return idref, filepos
# 使用正则表达式提取[]内的内容
matches = re.findall(r'\[(.*?)\]', location) if location else []
idref = matches[0] if len(matches) > 0 else None
filepos = matches[1] if len(matches) > 1 else None
return idref, filepos
def get_annotations(self, bookid=None):
"""
从数据库获取笔记数据
从iBooks的AEAnnotation.sqlite数据库中提取所有或指定书籍的笔记和高亮内容
自动处理时间戳转换和位置信息解析
Args:
bookid (str, optional): 书籍资产ID如果为None则获取所有书籍的笔记
Returns:
dict: 笔记数据字典结构为
{
assetid: {
uuid: {
'creationdate': '创建日期',
'filepos': '文件位置',
'idref': '章节标识',
'note': '笔记内容',
'selectedtext': '选中文本'
}
}
}
Note:
- 会检查WAL模式相关文件(-wal, -shm)的存在性
- 自动转换苹果时间戳格式(以2001-01-01为基准)
- 过滤掉既没有笔记也没有选中文本的空记录
"""
# 检查WAL模式相关文件
base = self.db_path.rsplit('.', 1)[0]
wal_path = base + '.sqlite-wal'
@ -47,8 +102,11 @@ class AnnotationManager:
for f in [self.db_path, wal_path, shm_path]:
if not os.path.exists(f):
print(f'警告: 缺少 {f},可能无法获取全部最新笔记')
# 连接数据库并执行查询
conn = sqlite3.connect(self.db_path)
cursor = conn.cursor()
# 根据是否指定bookid选择不同的查询语句
if bookid is not None:
cursor.execute('''
SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
@ -59,12 +117,16 @@ class AnnotationManager:
SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
FROM ZAEANNOTATION
''')
rows = cursor.fetchall()
annotations = defaultdict(dict)
import datetime
# 处理每一行数据
for row in rows:
assetid, creationdate, location, note, selectedtext, uuid = row
# 转换 creationdate 格式支持苹果时间戳以2001-01-01为基准
# 转换 creationdate格式为'YYYY-MM-DD HH:MM:SS'支持苹果时间戳以2001-01-01为基准
date_str = creationdate
if creationdate:
try:
@ -74,13 +136,20 @@ class AnnotationManager:
elif isinstance(creationdate, str) and creationdate.replace('.', '', 1).isdigit():
dt = origin + datetime.timedelta(seconds=float(creationdate))
else:
# 支持原有格式'2025/9/6'等
dt = datetime.datetime.strptime(creationdate[:10], "%Y-%m-%d")
date_str = f"{dt.year}/{dt.month}/{dt.day}"
date_str = dt.strftime('%Y-%m-%d %H:%M:%S')
except Exception:
date_str = str(creationdate)
# 解析位置信息
idref, filepos = self.parse_location(location)
# 过滤空记录(既没有笔记也没有选中文本)
if note is None and selectedtext is None:
continue
# 构建笔记数据结构
annotations[str(assetid)][uuid] = {
'creationdate': date_str,
'filepos': filepos,
@ -88,14 +157,26 @@ class AnnotationManager:
'note': note,
'selectedtext': selectedtext
}
conn.close()
# 根据查询类型返回相应结果
if bookid is not None:
return {str(bookid): annotations.get(str(bookid), {})}
return annotations
if __name__ == "__main__":
"""
测试模块功能
包含两个测试用例
1. 测试parse_location方法解析各种格式的位置字符串
2. 测试get_annotations方法获取指定书籍的笔记数据
"""
manager = AnnotationManager()
# 测试 parse_location
# 测试 parse_location 方法
print("=== 测试位置解析功能 ===")
test_locations = [
'epubcfi(/6/746[id509]!/4[4MLOS0-27b363c65bfe41ad8429f530566a2737]/10,/2/1:0,/7:8',
'epubcfi(/6/22[id15]!/4/156/1,:21,:157)',
@ -105,7 +186,8 @@ if __name__ == "__main__":
idref, filepos = manager.parse_location(loc)
print(f"location: {loc}\n idref: {idref}\n filepos: {filepos}\n")
# 测试只获取特定 assetid 的笔记
# 测试获取特定书籍的笔记
print("=== 测试笔记获取功能 ===")
test_bookid = "B18FCD9F90FD43C2373AE52BAEF9A77C"
annotations = manager.get_annotations(bookid=test_bookid)
from pprint import pprint

View File

@ -6,6 +6,7 @@ import os
from collections import defaultdict
class BookListManager:
def __init__(self, plist_path=None, db_path=None):
self.plist_path = plist_path or config.LOCAL_BOOKS_PLIST
self.db_path = db_path or config.LOCAL_LIBRARY_DB
@ -32,6 +33,93 @@ class BookListManager:
'date': book.get('BKInsertionDate',''),
'updatedate': book.get('updateDate','')
}
# 统计每本书最近30天每天的阅读时长
try:
from annotationdata import AnnotationManager
import datetime
# 每天最小阅读时长(有笔记)
READ_TIME_DAY = getattr(config, 'READ_TIME_DAY', 60) # 单位:分钟
# 无笔记但当天有打开书籍时的阅读时长
READ_TIME_OPEN_DAY = getattr(config, 'READ_TIME_OPEN_DAY', 30) # 单位:分钟
today = datetime.datetime.now().date()
manager = AnnotationManager()
annotations = manager.get_annotations()
# 获取所有书籍的打开时间ZLASTOPENDATE单位为苹果时间戳
books_open = self.get_books_last_open()
this_year = today.year
for bk_id in booksinfo:
notes = annotations.get(bk_id, {})
day_notes = {}
# 收集每本书所有笔记的创建时间,按天分组
for uuid, note in notes.items():
raw_date = note.get('creationdate')
try:
dt = datetime.datetime.strptime(raw_date, '%Y-%m-%d %H:%M:%S')
day = dt.date()
if day not in day_notes:
day_notes[day] = []
day_notes[day].append(dt)
except Exception:
pass
# 获取该书的打开时间戳ZLASTOPENDATE用于判断无笔记时是否有打开过书籍
open_info = books_open.get(bk_id, {})
last_open_ts = open_info.get('last_open')
# 生成最近30天的阅读时长列表
readtime30d = []
for i in range(30):
day = today - datetime.timedelta(days=i)
times = day_notes.get(day, [])
if not times:
opened = False
if last_open_ts:
open_dt = datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=last_open_ts)
if open_dt.date() == day:
opened = True
readtime = READ_TIME_OPEN_DAY if opened else 0
elif len(times) == 1:
readtime = READ_TIME_DAY
else:
times_sorted = sorted(times)
total_minutes = 0
for idx in range(1, len(times_sorted)):
delta = (times_sorted[idx] - times_sorted[idx-1]).total_seconds() / 60
if 0 < delta <= 180:
total_minutes += int(delta)
readtime = total_minutes if total_minutes > 0 else READ_TIME_DAY
readtime30d.append(readtime)
booksinfo[bk_id]['readtime30d'] = readtime30d
# 新增:统计今年每个月的阅读时长和年总阅读时长(遍历今年每一天)
readtime12m = [0] * 12 # 今年每月阅读时长
readtime_year = 0 # 今年总阅读时长
first_day = datetime.date(this_year, 1, 1)
days_in_year = (today - first_day).days + 1
for i in range(days_in_year):
day = first_day + datetime.timedelta(days=i)
times = day_notes.get(day, [])
if not times:
opened = False
if last_open_ts:
open_dt = datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=last_open_ts)
if open_dt.date() == day:
opened = True
readtime = READ_TIME_OPEN_DAY if opened else 0
elif len(times) == 1:
readtime = READ_TIME_DAY
else:
times_sorted = sorted(times)
total_minutes = 0
for idx in range(1, len(times_sorted)):
delta = (times_sorted[idx] - times_sorted[idx-1]).total_seconds() / 60
if 0 < delta <= 180:
total_minutes += int(delta)
readtime = total_minutes if total_minutes > 0 else READ_TIME_DAY
readtime12m[day.month-1] += readtime
readtime_year += readtime
booksinfo[bk_id]['readtime12m'] = readtime12m
booksinfo[bk_id]['readtime_year'] = readtime_year
except Exception as e:
print(f'警告: 统计readtime30d失败: {e}')
self._booksinfo = booksinfo
return booksinfo
@ -58,7 +146,45 @@ class BookListManager:
self._books_open = books_open
return books_open
def get_total_readtime(self, days=30):
"""
获取最近days天每天所有书籍的总阅读时间分钟返回长度为days的列表
列表第0项为今天第1项为昨天依次类推
"""
booksinfo = self.get_books_info()
total = [0] * days
for info in booksinfo.values():
readtime30d = info.get('readtime30d', [])
for i in range(min(days, len(readtime30d))):
total[i] += readtime30d[i]
return total
def get_total_readtime_year(self):
"""
获取全年所有书的累计阅读时间分钟
"""
booksinfo = self.get_books_info()
total = 0
for info in booksinfo.values():
total += info.get('readtime_year', 0)
return total
def get_total_readtime12m(self):
"""
获取全年所有书的月度累计阅读时间长度12的列表单位分钟
"""
booksinfo = self.get_books_info()
total = [0] * 12
for info in booksinfo.values():
readtime12m = info.get('readtime12m', [0]*12)
for i in range(12):
total[i] += readtime12m[i]
return total
if __name__ == '__main__':
manager = BookListManager()
booksinfo = manager.get_books_info()
manager = BookListManager()
booksinfo = manager.get_books_info()
from pprint import pprint
@ -74,4 +200,48 @@ if __name__ == '__main__':
for k, v in list(books_open.items())[:3]:
ts = v['last_open']
dt = datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=ts)
print(f"{k}: {dt} (timestamp: {ts})")
print(f"{k}: {dt} (timestamp: {ts})")
# 展示最近5天有阅读行为的书籍的readtime30d信息
print("\n【最近5天有阅读行为的书籍的readtime30d信息】")
books_with_recent_reading = []
for book_id, book in booksinfo.items():
readtime30d = book.get('readtime30d', [])
# 最近5天含今天有阅读行为
if len(readtime30d) >= 5 and any(rt > 0 for rt in readtime30d[:5]):
books_with_recent_reading.append((book_id, book))
for book_id, book in books_with_recent_reading:
print(f"书名: {book.get('displayname', book_id)}")
print(f"readtime30d: {book.get('readtime30d', [])}")
print('-' * 60)
# 测试每本书今年每月和年总阅读时长
print("\n【每本书今年每月阅读时长(分钟)和年总阅读时长】")
for k, v in booksinfo.items():
print(f"书名: {v.get('displayname', k)}")
print(f"readtime12m: {v.get('readtime12m', [])}")
print(f"readtime_year: {v.get('readtime_year', 0)} 分钟")
print('-' * 60)
# 测试get_total_readtime天数可自定义
print("\n【最近7天每天所有书籍总阅读时间分钟")
total_readtime7d = manager.get_total_readtime(days=7)
for i, mins in enumerate(total_readtime7d):
if i == 0:
label = "今天"
elif i == 1:
label = "昨天"
else:
label = f"{i}天前"
print(f"{label}: {mins} 分钟")
# 测试全年总阅读时间
print("\n【全年所有书的累计阅读时间(分钟)】")
total_readtime_year = manager.get_total_readtime_year()
print(f"全年总阅读时间: {total_readtime_year} 分钟")
# 测试全年每月累计阅读时间
print("\n【全年所有书的月度累计阅读时间(分钟)】")
total_readtime12m = manager.get_total_readtime12m()
for month, mins in enumerate(total_readtime12m, start=1):
print(f"{month}月: {mins} 分钟")

View File

@ -1,3 +1,5 @@
# 统计用:无笔记但当天有打开书籍时的阅读时长(单位:分钟)
READ_TIME_OPEN_DAY = 15
"""
config.py
---------
@ -29,5 +31,6 @@ LOCAL_ANNOTATION_WAL = os.path.join(DATA_DIR, 'AEAnnotation.sqlite-wal')
LOCAL_LIBRARY_DB = os.path.join(DATA_DIR, 'BKLibrary.sqlite')
LOCAL_BOOKS_PLIST = os.path.join(DATA_DIR, 'Books.plist')
# 其他可扩展配置项
# 统计用:每日最小阅读时长(单位:秒)
READ_TIME_DAY = 60

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

View File

@ -213,8 +213,8 @@ booksnote = {
1. 增加QTUI
2. 增加统计和展示
统计
- 周活跃 - 7天每天的阅读时长,柱状图
每本书ZANNOTATIONCREATIONDATE落在7天前到今天的天数*60min(60mins为每天阅读时间可配置
- 周活跃 - 30天每天的阅读时长,柱状图
某本书根据每条笔记note中ZANNOTATIONCREATIONDATE如果某天没有note则阅读时间为0如果只有一条note阅读时间为READ_TIME_DAY=60(在config.py中配置)如果note超过1条计算第一条和最后一条的时间差作为阅读时长。放在readtime30d这个list中。
- 月活跃 - 30天每天的阅读时长柱状图
每本书ZANNOTATIONCREATIONDATE落在30天前到今天的天数*60min(60mins为每天阅读时间可配置
- 已阅读的书籍: 每本平均阅读时长。所有书籍:总阅读时长,年阅读时长,年平均每日阅读时长,累计阅读天数。用气泡图表示。