'update'
This commit is contained in:
parent
95cd27563c
commit
893cd06c2c
Binary file not shown.
Binary file not shown.
|
@ -20,26 +20,81 @@ import os
|
|||
from collections import defaultdict
|
||||
|
||||
class AnnotationManager:
|
||||
"""
|
||||
iBooks笔记管理器
|
||||
|
||||
负责从iBooks的AEAnnotation.sqlite数据库中提取和解析用户的阅读笔记和高亮标记。
|
||||
支持获取所有书籍的笔记或指定书籍的笔记,并提供位置信息解析功能。
|
||||
"""
|
||||
|
||||
def __init__(self, db_path=None):
|
||||
"""
|
||||
初始化笔记管理器
|
||||
|
||||
Args:
|
||||
db_path (str, optional): 数据库文件路径,默认使用config.LOCAL_ANNOTATION_DB
|
||||
"""
|
||||
self.db_path = db_path or config.LOCAL_ANNOTATION_DB
|
||||
|
||||
@staticmethod
|
||||
def parse_location(location):
|
||||
"""
|
||||
解析ZANNOTATIONLOCATION,返回(idref, filepos)
|
||||
- epubcfi(...)格式优先提取[]内内容为idref
|
||||
- 其他格式兼容原逻辑
|
||||
解析iBooks笔记的位置信息
|
||||
|
||||
从ZANNOTATIONLOCATION字段解析出章节标识符和文件内位置信息。
|
||||
支持epubcfi格式的位置字符串解析。
|
||||
|
||||
Args:
|
||||
location (str): 笔记位置字符串,通常为epubcfi格式
|
||||
|
||||
Returns:
|
||||
tuple: (idref, filepos)
|
||||
- idref (str): 章节标识符,用于定位具体章节
|
||||
- filepos (str): 文件内位置,用于精确定位笔记位置
|
||||
|
||||
Examples:
|
||||
>>> parse_location('epubcfi(/6/746[id509]!/4[abc]/10,/2/1:0,/7:8)')
|
||||
('id509', 'abc')
|
||||
"""
|
||||
idref = None
|
||||
filepos = None
|
||||
if not location:
|
||||
return idref, filepos
|
||||
# 使用正则表达式提取[]内的内容
|
||||
matches = re.findall(r'\[(.*?)\]', location) if location else []
|
||||
idref = matches[0] if len(matches) > 0 else None
|
||||
filepos = matches[1] if len(matches) > 1 else None
|
||||
return idref, filepos
|
||||
|
||||
def get_annotations(self, bookid=None):
|
||||
"""
|
||||
从数据库获取笔记数据
|
||||
|
||||
从iBooks的AEAnnotation.sqlite数据库中提取所有或指定书籍的笔记和高亮内容。
|
||||
自动处理时间戳转换和位置信息解析。
|
||||
|
||||
Args:
|
||||
bookid (str, optional): 书籍资产ID,如果为None则获取所有书籍的笔记
|
||||
|
||||
Returns:
|
||||
dict: 笔记数据字典,结构为:
|
||||
{
|
||||
assetid: {
|
||||
uuid: {
|
||||
'creationdate': '创建日期',
|
||||
'filepos': '文件位置',
|
||||
'idref': '章节标识',
|
||||
'note': '笔记内容',
|
||||
'selectedtext': '选中文本'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Note:
|
||||
- 会检查WAL模式相关文件(-wal, -shm)的存在性
|
||||
- 自动转换苹果时间戳格式(以2001-01-01为基准)
|
||||
- 过滤掉既没有笔记也没有选中文本的空记录
|
||||
"""
|
||||
# 检查WAL模式相关文件
|
||||
base = self.db_path.rsplit('.', 1)[0]
|
||||
wal_path = base + '.sqlite-wal'
|
||||
|
@ -47,8 +102,11 @@ class AnnotationManager:
|
|||
for f in [self.db_path, wal_path, shm_path]:
|
||||
if not os.path.exists(f):
|
||||
print(f'警告: 缺少 {f},可能无法获取全部最新笔记')
|
||||
# 连接数据库并执行查询
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# 根据是否指定bookid选择不同的查询语句
|
||||
if bookid is not None:
|
||||
cursor.execute('''
|
||||
SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
|
||||
|
@ -59,12 +117,16 @@ class AnnotationManager:
|
|||
SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID
|
||||
FROM ZAEANNOTATION
|
||||
''')
|
||||
|
||||
rows = cursor.fetchall()
|
||||
annotations = defaultdict(dict)
|
||||
import datetime
|
||||
|
||||
# 处理每一行数据
|
||||
for row in rows:
|
||||
assetid, creationdate, location, note, selectedtext, uuid = row
|
||||
# 转换 creationdate 格式,支持苹果时间戳(以2001-01-01为基准)
|
||||
|
||||
# 转换 creationdate格式为'YYYY-MM-DD HH:MM:SS',支持苹果时间戳(以2001-01-01为基准)
|
||||
date_str = creationdate
|
||||
if creationdate:
|
||||
try:
|
||||
|
@ -74,13 +136,20 @@ class AnnotationManager:
|
|||
elif isinstance(creationdate, str) and creationdate.replace('.', '', 1).isdigit():
|
||||
dt = origin + datetime.timedelta(seconds=float(creationdate))
|
||||
else:
|
||||
# 支持原有格式'2025/9/6'等
|
||||
dt = datetime.datetime.strptime(creationdate[:10], "%Y-%m-%d")
|
||||
date_str = f"{dt.year}/{dt.month}/{dt.day}"
|
||||
date_str = dt.strftime('%Y-%m-%d %H:%M:%S')
|
||||
except Exception:
|
||||
date_str = str(creationdate)
|
||||
|
||||
# 解析位置信息
|
||||
idref, filepos = self.parse_location(location)
|
||||
|
||||
# 过滤空记录(既没有笔记也没有选中文本)
|
||||
if note is None and selectedtext is None:
|
||||
continue
|
||||
|
||||
# 构建笔记数据结构
|
||||
annotations[str(assetid)][uuid] = {
|
||||
'creationdate': date_str,
|
||||
'filepos': filepos,
|
||||
|
@ -88,14 +157,26 @@ class AnnotationManager:
|
|||
'note': note,
|
||||
'selectedtext': selectedtext
|
||||
}
|
||||
|
||||
conn.close()
|
||||
|
||||
# 根据查询类型返回相应结果
|
||||
if bookid is not None:
|
||||
return {str(bookid): annotations.get(str(bookid), {})}
|
||||
return annotations
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""
|
||||
测试模块功能
|
||||
|
||||
包含两个测试用例:
|
||||
1. 测试parse_location方法解析各种格式的位置字符串
|
||||
2. 测试get_annotations方法获取指定书籍的笔记数据
|
||||
"""
|
||||
manager = AnnotationManager()
|
||||
# 测试 parse_location
|
||||
|
||||
# 测试 parse_location 方法
|
||||
print("=== 测试位置解析功能 ===")
|
||||
test_locations = [
|
||||
'epubcfi(/6/746[id509]!/4[4MLOS0-27b363c65bfe41ad8429f530566a2737]/10,/2/1:0,/7:8',
|
||||
'epubcfi(/6/22[id15]!/4/156/1,:21,:157)',
|
||||
|
@ -105,7 +186,8 @@ if __name__ == "__main__":
|
|||
idref, filepos = manager.parse_location(loc)
|
||||
print(f"location: {loc}\n idref: {idref}\n filepos: {filepos}\n")
|
||||
|
||||
# 测试只获取特定 assetid 的笔记
|
||||
# 测试获取特定书籍的笔记
|
||||
print("=== 测试笔记获取功能 ===")
|
||||
test_bookid = "B18FCD9F90FD43C2373AE52BAEF9A77C"
|
||||
annotations = manager.get_annotations(bookid=test_bookid)
|
||||
from pprint import pprint
|
||||
|
|
|
@ -6,6 +6,7 @@ import os
|
|||
from collections import defaultdict
|
||||
|
||||
class BookListManager:
|
||||
|
||||
def __init__(self, plist_path=None, db_path=None):
|
||||
self.plist_path = plist_path or config.LOCAL_BOOKS_PLIST
|
||||
self.db_path = db_path or config.LOCAL_LIBRARY_DB
|
||||
|
@ -32,6 +33,93 @@ class BookListManager:
|
|||
'date': book.get('BKInsertionDate',''),
|
||||
'updatedate': book.get('updateDate','')
|
||||
}
|
||||
# 统计每本书最近30天每天的阅读时长
|
||||
try:
|
||||
from annotationdata import AnnotationManager
|
||||
import datetime
|
||||
# 每天最小阅读时长(有笔记)
|
||||
READ_TIME_DAY = getattr(config, 'READ_TIME_DAY', 60) # 单位:分钟
|
||||
# 无笔记但当天有打开书籍时的阅读时长
|
||||
READ_TIME_OPEN_DAY = getattr(config, 'READ_TIME_OPEN_DAY', 30) # 单位:分钟
|
||||
today = datetime.datetime.now().date()
|
||||
manager = AnnotationManager()
|
||||
annotations = manager.get_annotations()
|
||||
# 获取所有书籍的打开时间(ZLASTOPENDATE),单位为苹果时间戳
|
||||
books_open = self.get_books_last_open()
|
||||
this_year = today.year
|
||||
for bk_id in booksinfo:
|
||||
notes = annotations.get(bk_id, {})
|
||||
day_notes = {}
|
||||
# 收集每本书所有笔记的创建时间,按天分组
|
||||
for uuid, note in notes.items():
|
||||
raw_date = note.get('creationdate')
|
||||
try:
|
||||
dt = datetime.datetime.strptime(raw_date, '%Y-%m-%d %H:%M:%S')
|
||||
day = dt.date()
|
||||
if day not in day_notes:
|
||||
day_notes[day] = []
|
||||
day_notes[day].append(dt)
|
||||
except Exception:
|
||||
pass
|
||||
# 获取该书的打开时间戳(ZLASTOPENDATE),用于判断无笔记时是否有打开过书籍
|
||||
open_info = books_open.get(bk_id, {})
|
||||
last_open_ts = open_info.get('last_open')
|
||||
# 生成最近30天的阅读时长列表
|
||||
readtime30d = []
|
||||
for i in range(30):
|
||||
day = today - datetime.timedelta(days=i)
|
||||
times = day_notes.get(day, [])
|
||||
if not times:
|
||||
opened = False
|
||||
if last_open_ts:
|
||||
open_dt = datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=last_open_ts)
|
||||
if open_dt.date() == day:
|
||||
opened = True
|
||||
readtime = READ_TIME_OPEN_DAY if opened else 0
|
||||
elif len(times) == 1:
|
||||
readtime = READ_TIME_DAY
|
||||
else:
|
||||
times_sorted = sorted(times)
|
||||
total_minutes = 0
|
||||
for idx in range(1, len(times_sorted)):
|
||||
delta = (times_sorted[idx] - times_sorted[idx-1]).total_seconds() / 60
|
||||
if 0 < delta <= 180:
|
||||
total_minutes += int(delta)
|
||||
readtime = total_minutes if total_minutes > 0 else READ_TIME_DAY
|
||||
readtime30d.append(readtime)
|
||||
booksinfo[bk_id]['readtime30d'] = readtime30d
|
||||
|
||||
# 新增:统计今年每个月的阅读时长和年总阅读时长(遍历今年每一天)
|
||||
readtime12m = [0] * 12 # 今年每月阅读时长
|
||||
readtime_year = 0 # 今年总阅读时长
|
||||
first_day = datetime.date(this_year, 1, 1)
|
||||
days_in_year = (today - first_day).days + 1
|
||||
for i in range(days_in_year):
|
||||
day = first_day + datetime.timedelta(days=i)
|
||||
times = day_notes.get(day, [])
|
||||
if not times:
|
||||
opened = False
|
||||
if last_open_ts:
|
||||
open_dt = datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=last_open_ts)
|
||||
if open_dt.date() == day:
|
||||
opened = True
|
||||
readtime = READ_TIME_OPEN_DAY if opened else 0
|
||||
elif len(times) == 1:
|
||||
readtime = READ_TIME_DAY
|
||||
else:
|
||||
times_sorted = sorted(times)
|
||||
total_minutes = 0
|
||||
for idx in range(1, len(times_sorted)):
|
||||
delta = (times_sorted[idx] - times_sorted[idx-1]).total_seconds() / 60
|
||||
if 0 < delta <= 180:
|
||||
total_minutes += int(delta)
|
||||
readtime = total_minutes if total_minutes > 0 else READ_TIME_DAY
|
||||
readtime12m[day.month-1] += readtime
|
||||
readtime_year += readtime
|
||||
booksinfo[bk_id]['readtime12m'] = readtime12m
|
||||
booksinfo[bk_id]['readtime_year'] = readtime_year
|
||||
except Exception as e:
|
||||
print(f'警告: 统计readtime30d失败: {e}')
|
||||
self._booksinfo = booksinfo
|
||||
return booksinfo
|
||||
|
||||
|
@ -58,7 +146,45 @@ class BookListManager:
|
|||
self._books_open = books_open
|
||||
return books_open
|
||||
|
||||
def get_total_readtime(self, days=30):
|
||||
"""
|
||||
获取最近days天每天所有书籍的总阅读时间(分钟),返回长度为days的列表。
|
||||
列表第0项为今天,第1项为昨天,依次类推。
|
||||
"""
|
||||
booksinfo = self.get_books_info()
|
||||
total = [0] * days
|
||||
for info in booksinfo.values():
|
||||
readtime30d = info.get('readtime30d', [])
|
||||
for i in range(min(days, len(readtime30d))):
|
||||
total[i] += readtime30d[i]
|
||||
return total
|
||||
|
||||
def get_total_readtime_year(self):
|
||||
"""
|
||||
获取全年所有书的累计阅读时间(分钟)。
|
||||
"""
|
||||
booksinfo = self.get_books_info()
|
||||
total = 0
|
||||
for info in booksinfo.values():
|
||||
total += info.get('readtime_year', 0)
|
||||
return total
|
||||
|
||||
def get_total_readtime12m(self):
|
||||
"""
|
||||
获取全年所有书的月度累计阅读时间(长度12的列表,单位:分钟)。
|
||||
"""
|
||||
booksinfo = self.get_books_info()
|
||||
total = [0] * 12
|
||||
for info in booksinfo.values():
|
||||
readtime12m = info.get('readtime12m', [0]*12)
|
||||
for i in range(12):
|
||||
total[i] += readtime12m[i]
|
||||
return total
|
||||
|
||||
if __name__ == '__main__':
|
||||
manager = BookListManager()
|
||||
booksinfo = manager.get_books_info()
|
||||
|
||||
manager = BookListManager()
|
||||
booksinfo = manager.get_books_info()
|
||||
from pprint import pprint
|
||||
|
@ -74,4 +200,48 @@ if __name__ == '__main__':
|
|||
for k, v in list(books_open.items())[:3]:
|
||||
ts = v['last_open']
|
||||
dt = datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=ts)
|
||||
print(f"{k}: {dt} (timestamp: {ts})")
|
||||
print(f"{k}: {dt} (timestamp: {ts})")
|
||||
|
||||
# 展示最近5天有阅读行为的书籍的readtime30d信息
|
||||
print("\n【最近5天有阅读行为的书籍的readtime30d信息】")
|
||||
books_with_recent_reading = []
|
||||
for book_id, book in booksinfo.items():
|
||||
readtime30d = book.get('readtime30d', [])
|
||||
# 最近5天(含今天)有阅读行为
|
||||
if len(readtime30d) >= 5 and any(rt > 0 for rt in readtime30d[:5]):
|
||||
books_with_recent_reading.append((book_id, book))
|
||||
for book_id, book in books_with_recent_reading:
|
||||
print(f"书名: {book.get('displayname', book_id)}")
|
||||
print(f"readtime30d: {book.get('readtime30d', [])}")
|
||||
print('-' * 60)
|
||||
|
||||
# 测试每本书今年每月和年总阅读时长
|
||||
print("\n【每本书今年每月阅读时长(分钟)和年总阅读时长】")
|
||||
for k, v in booksinfo.items():
|
||||
print(f"书名: {v.get('displayname', k)}")
|
||||
print(f"readtime12m: {v.get('readtime12m', [])}")
|
||||
print(f"readtime_year: {v.get('readtime_year', 0)} 分钟")
|
||||
print('-' * 60)
|
||||
|
||||
# 测试get_total_readtime,天数可自定义
|
||||
print("\n【最近7天每天所有书籍总阅读时间(分钟)】")
|
||||
total_readtime7d = manager.get_total_readtime(days=7)
|
||||
for i, mins in enumerate(total_readtime7d):
|
||||
if i == 0:
|
||||
label = "今天"
|
||||
elif i == 1:
|
||||
label = "昨天"
|
||||
else:
|
||||
label = f"{i}天前"
|
||||
print(f"{label}: {mins} 分钟")
|
||||
|
||||
# 测试全年总阅读时间
|
||||
print("\n【全年所有书的累计阅读时间(分钟)】")
|
||||
total_readtime_year = manager.get_total_readtime_year()
|
||||
print(f"全年总阅读时间: {total_readtime_year} 分钟")
|
||||
|
||||
# 测试全年每月累计阅读时间
|
||||
print("\n【全年所有书的月度累计阅读时间(分钟)】")
|
||||
total_readtime12m = manager.get_total_readtime12m()
|
||||
for month, mins in enumerate(total_readtime12m, start=1):
|
||||
print(f"{month}月: {mins} 分钟")
|
|
@ -1,3 +1,5 @@
|
|||
# 统计用:无笔记但当天有打开书籍时的阅读时长(单位:分钟)
|
||||
READ_TIME_OPEN_DAY = 15
|
||||
"""
|
||||
config.py
|
||||
---------
|
||||
|
@ -29,5 +31,6 @@ LOCAL_ANNOTATION_WAL = os.path.join(DATA_DIR, 'AEAnnotation.sqlite-wal')
|
|||
LOCAL_LIBRARY_DB = os.path.join(DATA_DIR, 'BKLibrary.sqlite')
|
||||
LOCAL_BOOKS_PLIST = os.path.join(DATA_DIR, 'Books.plist')
|
||||
|
||||
# 其他可扩展配置项
|
||||
# 统计用:每日最小阅读时长(单位:秒)
|
||||
READ_TIME_DAY = 60
|
||||
|
||||
|
|
BIN
kmanapp.png
BIN
kmanapp.png
Binary file not shown.
Before Width: | Height: | Size: 18 KiB |
|
@ -213,8 +213,8 @@ booksnote = {
|
|||
1. 增加QTUI
|
||||
2. 增加统计和展示
|
||||
统计
|
||||
- 周活跃 - 7天每天的阅读时长,柱状图
|
||||
每本书,ZANNOTATIONCREATIONDATE,落在7天前到今天的天数*60min(60mins为每天阅读时间,可配置)
|
||||
- 周活跃 - 30天每天的阅读时长,柱状图
|
||||
某本书,根据每条笔记note中ZANNOTATIONCREATIONDATE,如果某天没有note,则阅读时间为0;如果只有一条note,阅读时间为READ_TIME_DAY=60(在config.py中配置);如果note超过1条,计算第一条和最后一条的时间差,作为阅读时长。放在readtime30d这个list中。
|
||||
- 月活跃 - 30天每天的阅读时长,柱状图
|
||||
每本书,ZANNOTATIONCREATIONDATE,落在30天前到今天的天数*60min(60mins为每天阅读时间,可配置)
|
||||
- 已阅读的书籍: 每本平均阅读时长。所有书籍:总阅读时长,年阅读时长,年平均每日阅读时长,累计阅读天数。用气泡图表示。
|
||||
|
|
Loading…
Reference in New Issue