Files
kman/mobiparse/mobi/mobi_sectioner.py
2024-04-03 15:08:22 +08:00

205 lines
7.3 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
from .compatibility_utils import PY2, hexlify, bstr, bord, bchar
from loguru import logger
import datetime
if PY2:
range = xrange
# note: struct pack, unpack, unpack_from all require bytestring format
# data all the way up to at least python 2.7.5, python 3 okay with bytestring
import struct
from .unipath import pathof
DUMP = False
""" Set to True to dump all possible information. """
class unpackException(Exception):
pass
def describe(data):
txtans = ""
hexans = hexlify(data)
for i in data:
if bord(i) < 32 or bord(i) > 127:
txtans += "?"
else:
txtans += bchar(i).decode("latin-1")
return '"' + txtans + '"' + " 0x" + hexans
def datetimefrompalmtime(palmtime):
if palmtime > 0x7FFFFFFF:
pythondatetime = datetime.datetime(
year=1904, month=1, day=1
) + datetime.timedelta(seconds=palmtime)
else:
pythondatetime = datetime.datetime(
year=1970, month=1, day=1
) + datetime.timedelta(seconds=palmtime)
return pythondatetime
class Sectionizer:
def __init__(self, filename):
self.data = b""
with open(pathof(filename), "rb") as f:
self.data = f.read()
self.palmheader = self.data[:78]
self.palmname = self.data[:32]
self.ident = self.palmheader[0x3C : 0x3C + 8]
# CG struct.unpack_from(fmt, buffer, offset=0)
(self.num_sections,) = struct.unpack_from(b">H", self.palmheader, 76)
self.filelength = len(self.data)
## CGDBG ???
## sectionsdata (9680, 0, 18618, 2, 22275, 4, 25504, 6, 28607, 8,...
sectionsdata = struct.unpack_from( bstr(">%dL" % (self.num_sections * 2)), self.data, 78
) + (self.filelength, 0)
## 所有section的offset和长度
# sectionsoffset (9680, 18618, 22275, 25504, 28607, ...
self.sectionoffsets = sectionsdata[::2]
# ectionattributes (0, 2, 4, 6, 8, ...
self.sectionattributes = sectionsdata[1::2]
self.sectiondescriptions = ["" for x in range(self.num_sections + 1)]
self.sectiondescriptions[-1] = "File Length Only"
# CGDBG upack_from 返回什么tuple (,)
print( 'sectionsdata {} {}'.format(sectionsdata, bstr(">%dL" % (self.num_sections * 2))))
print( 'sectionsoffset {} \n sectionattributes {}'.format( self.sectionoffsets, self.sectionattributes ))
print( 'sectionsdescriptions {} '.format( self.sectiondescriptions))
print( bstr(">%dL" % (self.num_sections * 2) ) )
print( struct.unpack_from(bstr(">%dL" % (self.num_sections * 2)) , self.data, 78) )
print( (self.filelength, 0) )
return
# sections information
def dumpsectionsinfo(self):
logger.debug("Section Offset Length UID Attribs Description")
for i in range(self.num_sections):
'''
logger.debug(
"{} {} {} {} {} {} {}\n".format( i, i,
self.sectionoffsets[i],
self.sectionoffsets[i + 1] - self.sectionoffsets[i],
self.sectionattributes[i] & 0xFFFFFF,
(self.sectionattributes[i] >> 24) & 0xFF,
self.sectiondescriptions[i]))
'''
logger.debug(
"%3d %3X 0x%07X 0x%05X % 8d % 7d %s"
% (
i,
i,
self.sectionoffsets[i],
self.sectionoffsets[i + 1] - self.sectionoffsets[i],
self.sectionattributes[i] & 0xFFFFFF,
(self.sectionattributes[i] >> 24) & 0xFF,
self.sectiondescriptions[i],
)
)
logger.debug(
"%3d %3X 0x%07X %s"
% (
self.num_sections,
self.num_sections,
self.sectionoffsets[self.num_sections],
self.sectiondescriptions[self.num_sections],
)
)
def setsectiondescription(self, section, description):
if section < len(self.sectiondescriptions):
self.sectiondescriptions[section] = description
else:
logger.debug(
"Section out of range: %d, description %s" % (section, description)
)
def dumppalmheader(self):
logger.debug("Palm Database Header")
logger.debug("Database name: " + repr(self.palmheader[:32]))
(dbattributes,) = struct.unpack_from(b">H", self.palmheader, 32)
logger.debug("Bitfield attributes: 0x%0X" % dbattributes,)
if dbattributes != 0:
print(" (",)
if dbattributes & 2:
print("Read-only; ",)
if dbattributes & 4:
print("Dirty AppInfoArea; ",)
if dbattributes & 8:
print("Needs to be backed up; ",)
if dbattributes & 16:
print("OK to install over newer; ",)
if dbattributes & 32:
print("Reset after installation; ",)
if dbattributes & 64:
print("No copying by PalmPilot beaming; ",)
print(")")
else:
print("")
logger.debug(
"File version: %d" % struct.unpack_from(b">H", self.palmheader, 34)[0]
)
(dbcreation,) = struct.unpack_from(b">L", self.palmheader, 36)
logger.debug(
"Creation Date: "
+ str(datetimefrompalmtime(dbcreation))
+ (" (0x%0X)" % dbcreation)
)
(dbmodification,) = struct.unpack_from(b">L", self.palmheader, 40)
logger.debug(
"Modification Date: "
+ str(datetimefrompalmtime(dbmodification))
+ (" (0x%0X)" % dbmodification)
)
(dbbackup,) = struct.unpack_from(b">L", self.palmheader, 44)
if dbbackup != 0:
logger.debug(
"Backup Date: "
+ str(datetimefrompalmtime(dbbackup))
+ (" (0x%0X)" % dbbackup)
)
logger.debug(
"Modification No.: %d" % struct.unpack_from(b">L", self.palmheader, 48)[0]
)
logger.debug(
"App Info offset: 0x%0X" % struct.unpack_from(b">L", self.palmheader, 52)[0]
)
logger.debug(
"Sort Info offset: 0x%0X"
% struct.unpack_from(b">L", self.palmheader, 56)[0]
)
logger.debug(
"Type/Creator: %s/%s"
% (repr(self.palmheader[60:64]), repr(self.palmheader[64:68]))
)
logger.debug(
"Unique seed: 0x%0X" % struct.unpack_from(b">L", self.palmheader, 68)[0]
)
(expectedzero,) = struct.unpack_from(b">L", self.palmheader, 72)
if expectedzero != 0:
logger.debug(
"Should be zero but isn't: %d"
% struct.unpack_from(b">L", self.palmheader, 72)[0]
)
logger.debug(
"Number of sections: %d" % struct.unpack_from(b">H", self.palmheader, 76)[0]
)
return
def loadSection(self, section):
before, after = self.sectionoffsets[section : section + 2]
return self.data[before:after]