diff --git a/__pycache__/annotationdata.cpython-312.pyc b/__pycache__/annotationdata.cpython-312.pyc index e515272..7333ec6 100644 Binary files a/__pycache__/annotationdata.cpython-312.pyc and b/__pycache__/annotationdata.cpython-312.pyc differ diff --git a/__pycache__/config.cpython-312.pyc b/__pycache__/config.cpython-312.pyc index 0d41e28..8c2d7e2 100644 Binary files a/__pycache__/config.cpython-312.pyc and b/__pycache__/config.cpython-312.pyc differ diff --git a/annotationdata.py b/annotationdata.py index 090e406..ff9b580 100644 --- a/annotationdata.py +++ b/annotationdata.py @@ -20,26 +20,81 @@ import os from collections import defaultdict class AnnotationManager: + """ + iBooks笔记管理器 + + 负责从iBooks的AEAnnotation.sqlite数据库中提取和解析用户的阅读笔记和高亮标记。 + 支持获取所有书籍的笔记或指定书籍的笔记,并提供位置信息解析功能。 + """ + def __init__(self, db_path=None): + """ + 初始化笔记管理器 + + Args: + db_path (str, optional): 数据库文件路径,默认使用config.LOCAL_ANNOTATION_DB + """ self.db_path = db_path or config.LOCAL_ANNOTATION_DB @staticmethod def parse_location(location): """ - 解析ZANNOTATIONLOCATION,返回(idref, filepos) - - epubcfi(...)格式优先提取[]内内容为idref - - 其他格式兼容原逻辑 + 解析iBooks笔记的位置信息 + + 从ZANNOTATIONLOCATION字段解析出章节标识符和文件内位置信息。 + 支持epubcfi格式的位置字符串解析。 + + Args: + location (str): 笔记位置字符串,通常为epubcfi格式 + + Returns: + tuple: (idref, filepos) + - idref (str): 章节标识符,用于定位具体章节 + - filepos (str): 文件内位置,用于精确定位笔记位置 + + Examples: + >>> parse_location('epubcfi(/6/746[id509]!/4[abc]/10,/2/1:0,/7:8)') + ('id509', 'abc') """ idref = None filepos = None if not location: return idref, filepos + # 使用正则表达式提取[]内的内容 matches = re.findall(r'\[(.*?)\]', location) if location else [] idref = matches[0] if len(matches) > 0 else None filepos = matches[1] if len(matches) > 1 else None return idref, filepos def get_annotations(self, bookid=None): + """ + 从数据库获取笔记数据 + + 从iBooks的AEAnnotation.sqlite数据库中提取所有或指定书籍的笔记和高亮内容。 + 自动处理时间戳转换和位置信息解析。 + + Args: + bookid (str, optional): 书籍资产ID,如果为None则获取所有书籍的笔记 + + Returns: + dict: 笔记数据字典,结构为: + { + assetid: { + uuid: { + 'creationdate': '创建日期', + 'filepos': '文件位置', + 'idref': '章节标识', + 'note': '笔记内容', + 'selectedtext': '选中文本' + } + } + } + + Note: + - 会检查WAL模式相关文件(-wal, -shm)的存在性 + - 自动转换苹果时间戳格式(以2001-01-01为基准) + - 过滤掉既没有笔记也没有选中文本的空记录 + """ # 检查WAL模式相关文件 base = self.db_path.rsplit('.', 1)[0] wal_path = base + '.sqlite-wal' @@ -47,8 +102,11 @@ class AnnotationManager: for f in [self.db_path, wal_path, shm_path]: if not os.path.exists(f): print(f'警告: 缺少 {f},可能无法获取全部最新笔记') + # 连接数据库并执行查询 conn = sqlite3.connect(self.db_path) cursor = conn.cursor() + + # 根据是否指定bookid选择不同的查询语句 if bookid is not None: cursor.execute(''' SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID @@ -59,12 +117,16 @@ class AnnotationManager: SELECT ZANNOTATIONASSETID, ZANNOTATIONCREATIONDATE, ZANNOTATIONLOCATION, ZANNOTATIONNOTE, ZANNOTATIONSELECTEDTEXT, ZANNOTATIONUUID FROM ZAEANNOTATION ''') + rows = cursor.fetchall() annotations = defaultdict(dict) import datetime + + # 处理每一行数据 for row in rows: assetid, creationdate, location, note, selectedtext, uuid = row - # 转换 creationdate 格式,支持苹果时间戳(以2001-01-01为基准) + + # 转换 creationdate格式为'YYYY-MM-DD HH:MM:SS',支持苹果时间戳(以2001-01-01为基准) date_str = creationdate if creationdate: try: @@ -74,13 +136,20 @@ class AnnotationManager: elif isinstance(creationdate, str) and creationdate.replace('.', '', 1).isdigit(): dt = origin + datetime.timedelta(seconds=float(creationdate)) else: + # 支持原有格式'2025/9/6'等 dt = datetime.datetime.strptime(creationdate[:10], "%Y-%m-%d") - date_str = f"{dt.year}/{dt.month}/{dt.day}" + date_str = dt.strftime('%Y-%m-%d %H:%M:%S') except Exception: date_str = str(creationdate) + + # 解析位置信息 idref, filepos = self.parse_location(location) + + # 过滤空记录(既没有笔记也没有选中文本) if note is None and selectedtext is None: continue + + # 构建笔记数据结构 annotations[str(assetid)][uuid] = { 'creationdate': date_str, 'filepos': filepos, @@ -88,14 +157,26 @@ class AnnotationManager: 'note': note, 'selectedtext': selectedtext } + conn.close() + + # 根据查询类型返回相应结果 if bookid is not None: return {str(bookid): annotations.get(str(bookid), {})} return annotations if __name__ == "__main__": + """ + 测试模块功能 + + 包含两个测试用例: + 1. 测试parse_location方法解析各种格式的位置字符串 + 2. 测试get_annotations方法获取指定书籍的笔记数据 + """ manager = AnnotationManager() - # 测试 parse_location + + # 测试 parse_location 方法 + print("=== 测试位置解析功能 ===") test_locations = [ 'epubcfi(/6/746[id509]!/4[4MLOS0-27b363c65bfe41ad8429f530566a2737]/10,/2/1:0,/7:8', 'epubcfi(/6/22[id15]!/4/156/1,:21,:157)', @@ -105,7 +186,8 @@ if __name__ == "__main__": idref, filepos = manager.parse_location(loc) print(f"location: {loc}\n idref: {idref}\n filepos: {filepos}\n") - # 测试只获取特定 assetid 的笔记 + # 测试获取特定书籍的笔记 + print("=== 测试笔记获取功能 ===") test_bookid = "B18FCD9F90FD43C2373AE52BAEF9A77C" annotations = manager.get_annotations(bookid=test_bookid) from pprint import pprint diff --git a/booklist_parse.py b/booklist_parse.py index fb1c9ab..5080704 100644 --- a/booklist_parse.py +++ b/booklist_parse.py @@ -6,6 +6,7 @@ import os from collections import defaultdict class BookListManager: + def __init__(self, plist_path=None, db_path=None): self.plist_path = plist_path or config.LOCAL_BOOKS_PLIST self.db_path = db_path or config.LOCAL_LIBRARY_DB @@ -32,6 +33,93 @@ class BookListManager: 'date': book.get('BKInsertionDate',''), 'updatedate': book.get('updateDate','') } + # 统计每本书最近30天每天的阅读时长 + try: + from annotationdata import AnnotationManager + import datetime + # 每天最小阅读时长(有笔记) + READ_TIME_DAY = getattr(config, 'READ_TIME_DAY', 60) # 单位:分钟 + # 无笔记但当天有打开书籍时的阅读时长 + READ_TIME_OPEN_DAY = getattr(config, 'READ_TIME_OPEN_DAY', 30) # 单位:分钟 + today = datetime.datetime.now().date() + manager = AnnotationManager() + annotations = manager.get_annotations() + # 获取所有书籍的打开时间(ZLASTOPENDATE),单位为苹果时间戳 + books_open = self.get_books_last_open() + this_year = today.year + for bk_id in booksinfo: + notes = annotations.get(bk_id, {}) + day_notes = {} + # 收集每本书所有笔记的创建时间,按天分组 + for uuid, note in notes.items(): + raw_date = note.get('creationdate') + try: + dt = datetime.datetime.strptime(raw_date, '%Y-%m-%d %H:%M:%S') + day = dt.date() + if day not in day_notes: + day_notes[day] = [] + day_notes[day].append(dt) + except Exception: + pass + # 获取该书的打开时间戳(ZLASTOPENDATE),用于判断无笔记时是否有打开过书籍 + open_info = books_open.get(bk_id, {}) + last_open_ts = open_info.get('last_open') + # 生成最近30天的阅读时长列表 + readtime30d = [] + for i in range(30): + day = today - datetime.timedelta(days=i) + times = day_notes.get(day, []) + if not times: + opened = False + if last_open_ts: + open_dt = datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=last_open_ts) + if open_dt.date() == day: + opened = True + readtime = READ_TIME_OPEN_DAY if opened else 0 + elif len(times) == 1: + readtime = READ_TIME_DAY + else: + times_sorted = sorted(times) + total_minutes = 0 + for idx in range(1, len(times_sorted)): + delta = (times_sorted[idx] - times_sorted[idx-1]).total_seconds() / 60 + if 0 < delta <= 180: + total_minutes += int(delta) + readtime = total_minutes if total_minutes > 0 else READ_TIME_DAY + readtime30d.append(readtime) + booksinfo[bk_id]['readtime30d'] = readtime30d + + # 新增:统计今年每个月的阅读时长和年总阅读时长(遍历今年每一天) + readtime12m = [0] * 12 # 今年每月阅读时长 + readtime_year = 0 # 今年总阅读时长 + first_day = datetime.date(this_year, 1, 1) + days_in_year = (today - first_day).days + 1 + for i in range(days_in_year): + day = first_day + datetime.timedelta(days=i) + times = day_notes.get(day, []) + if not times: + opened = False + if last_open_ts: + open_dt = datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=last_open_ts) + if open_dt.date() == day: + opened = True + readtime = READ_TIME_OPEN_DAY if opened else 0 + elif len(times) == 1: + readtime = READ_TIME_DAY + else: + times_sorted = sorted(times) + total_minutes = 0 + for idx in range(1, len(times_sorted)): + delta = (times_sorted[idx] - times_sorted[idx-1]).total_seconds() / 60 + if 0 < delta <= 180: + total_minutes += int(delta) + readtime = total_minutes if total_minutes > 0 else READ_TIME_DAY + readtime12m[day.month-1] += readtime + readtime_year += readtime + booksinfo[bk_id]['readtime12m'] = readtime12m + booksinfo[bk_id]['readtime_year'] = readtime_year + except Exception as e: + print(f'警告: 统计readtime30d失败: {e}') self._booksinfo = booksinfo return booksinfo @@ -58,7 +146,45 @@ class BookListManager: self._books_open = books_open return books_open + def get_total_readtime(self, days=30): + """ + 获取最近days天每天所有书籍的总阅读时间(分钟),返回长度为days的列表。 + 列表第0项为今天,第1项为昨天,依次类推。 + """ + booksinfo = self.get_books_info() + total = [0] * days + for info in booksinfo.values(): + readtime30d = info.get('readtime30d', []) + for i in range(min(days, len(readtime30d))): + total[i] += readtime30d[i] + return total + + def get_total_readtime_year(self): + """ + 获取全年所有书的累计阅读时间(分钟)。 + """ + booksinfo = self.get_books_info() + total = 0 + for info in booksinfo.values(): + total += info.get('readtime_year', 0) + return total + + def get_total_readtime12m(self): + """ + 获取全年所有书的月度累计阅读时间(长度12的列表,单位:分钟)。 + """ + booksinfo = self.get_books_info() + total = [0] * 12 + for info in booksinfo.values(): + readtime12m = info.get('readtime12m', [0]*12) + for i in range(12): + total[i] += readtime12m[i] + return total + if __name__ == '__main__': + manager = BookListManager() + booksinfo = manager.get_books_info() + manager = BookListManager() booksinfo = manager.get_books_info() from pprint import pprint @@ -74,4 +200,48 @@ if __name__ == '__main__': for k, v in list(books_open.items())[:3]: ts = v['last_open'] dt = datetime.datetime(2001, 1, 1) + datetime.timedelta(seconds=ts) - print(f"{k}: {dt} (timestamp: {ts})") \ No newline at end of file + print(f"{k}: {dt} (timestamp: {ts})") + + # 展示最近5天有阅读行为的书籍的readtime30d信息 + print("\n【最近5天有阅读行为的书籍的readtime30d信息】") + books_with_recent_reading = [] + for book_id, book in booksinfo.items(): + readtime30d = book.get('readtime30d', []) + # 最近5天(含今天)有阅读行为 + if len(readtime30d) >= 5 and any(rt > 0 for rt in readtime30d[:5]): + books_with_recent_reading.append((book_id, book)) + for book_id, book in books_with_recent_reading: + print(f"书名: {book.get('displayname', book_id)}") + print(f"readtime30d: {book.get('readtime30d', [])}") + print('-' * 60) + + # 测试每本书今年每月和年总阅读时长 + print("\n【每本书今年每月阅读时长(分钟)和年总阅读时长】") + for k, v in booksinfo.items(): + print(f"书名: {v.get('displayname', k)}") + print(f"readtime12m: {v.get('readtime12m', [])}") + print(f"readtime_year: {v.get('readtime_year', 0)} 分钟") + print('-' * 60) + + # 测试get_total_readtime,天数可自定义 + print("\n【最近7天每天所有书籍总阅读时间(分钟)】") + total_readtime7d = manager.get_total_readtime(days=7) + for i, mins in enumerate(total_readtime7d): + if i == 0: + label = "今天" + elif i == 1: + label = "昨天" + else: + label = f"{i}天前" + print(f"{label}: {mins} 分钟") + + # 测试全年总阅读时间 + print("\n【全年所有书的累计阅读时间(分钟)】") + total_readtime_year = manager.get_total_readtime_year() + print(f"全年总阅读时间: {total_readtime_year} 分钟") + + # 测试全年每月累计阅读时间 + print("\n【全年所有书的月度累计阅读时间(分钟)】") + total_readtime12m = manager.get_total_readtime12m() + for month, mins in enumerate(total_readtime12m, start=1): + print(f"{month}月: {mins} 分钟") \ No newline at end of file diff --git a/config.py b/config.py index 569f962..140edd5 100644 --- a/config.py +++ b/config.py @@ -1,3 +1,5 @@ +# 统计用:无笔记但当天有打开书籍时的阅读时长(单位:分钟) +READ_TIME_OPEN_DAY = 15 """ config.py --------- @@ -29,5 +31,6 @@ LOCAL_ANNOTATION_WAL = os.path.join(DATA_DIR, 'AEAnnotation.sqlite-wal') LOCAL_LIBRARY_DB = os.path.join(DATA_DIR, 'BKLibrary.sqlite') LOCAL_BOOKS_PLIST = os.path.join(DATA_DIR, 'Books.plist') -# 其他可扩展配置项 +# 统计用:每日最小阅读时长(单位:秒) +READ_TIME_DAY = 60 diff --git a/kmanapp.png b/kmanapp.png deleted file mode 100644 index 6786cc1..0000000 Binary files a/kmanapp.png and /dev/null differ diff --git a/readme.md b/readme.md index 83e08e6..e9a55dc 100644 --- a/readme.md +++ b/readme.md @@ -213,8 +213,8 @@ booksnote = { 1. 增加QTUI 2. 增加统计和展示 统计 - - 周活跃 - 7天每天的阅读时长,柱状图 - 每本书,ZANNOTATIONCREATIONDATE,落在7天前到今天的天数*60min(60mins为每天阅读时间,可配置) + - 周活跃 - 30天每天的阅读时长,柱状图 + 某本书,根据每条笔记note中ZANNOTATIONCREATIONDATE,如果某天没有note,则阅读时间为0;如果只有一条note,阅读时间为READ_TIME_DAY=60(在config.py中配置);如果note超过1条,计算第一条和最后一条的时间差,作为阅读时长。放在readtime30d这个list中。 - 月活跃 - 30天每天的阅读时长,柱状图 每本书,ZANNOTATIONCREATIONDATE,落在30天前到今天的天数*60min(60mins为每天阅读时间,可配置) - 已阅读的书籍: 每本平均阅读时长。所有书籍:总阅读时长,年阅读时长,年平均每日阅读时长,累计阅读天数。用气泡图表示。