kman/mobiparse/mobi/mobi_sectioner.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab

from __future__ import unicode_literals, division, absolute_import, print_function

from .compatibility_utils import PY2, hexlify, bstr, bord, bchar
from loguru import logger

import datetime

if PY2:
    range = xrange

# note:  struct pack, unpack, unpack_from all require bytestring format
# data all the way up to at least python 2.7.5, python 3 okay with bytestring
import struct

from .unipath import pathof

DUMP = False
""" Set to True to dump all possible information. """


class unpackException(Exception):
    pass


def describe(data):
    txtans = ""
    hexans = hexlify(data)
    for i in data:
        if bord(i) < 32 or bord(i) > 127:
            txtans += "?"
        else:
            txtans += bchar(i).decode("latin-1")
    return '"' + txtans + '"' + " 0x" + hexans


def datetimefrompalmtime(palmtime):
    if palmtime > 0x7FFFFFFF:
        pythondatetime = datetime.datetime(
            year=1904, month=1, day=1
        ) + datetime.timedelta(seconds=palmtime)
    else:
        pythondatetime = datetime.datetime(
            year=1970, month=1, day=1
        ) + datetime.timedelta(seconds=palmtime)
    return pythondatetime


class Sectionizer:
    def __init__(self, filename):
        self.data = b""
        with open(pathof(filename), "rb") as f:
            self.data = f.read()
        self.palmheader = self.data[:78]
        self.palmname = self.data[:32]
        self.ident = self.palmheader[0x3C : 0x3C + 8]
        # CG struct.unpack_from(fmt, buffer, offset=0)
        (self.num_sections,) = struct.unpack_from(b">H", self.palmheader, 76)
        self.filelength = len(self.data)

        ## CGDBG ???
        ## sectionsdata (9680, 0, 18618, 2, 22275, 4, 25504, 6, 28607, 8,...
        sectionsdata = struct.unpack_from( bstr(">%dL" % (self.num_sections * 2)), self.data, 78
        ) + (self.filelength, 0)

        ## 所有section的offset和长度
        # sectionsoffset (9680, 18618, 22275, 25504, 28607, ...
        self.sectionoffsets = sectionsdata[::2]
        # ectionattributes (0, 2, 4, 6, 8, ...
        self.sectionattributes = sectionsdata[1::2]
        self.sectiondescriptions = ["" for x in range(self.num_sections + 1)]
        self.sectiondescriptions[-1] = "File Length Only"

        # CGDBG upack_from 返回什么？tuple (,)
        print( 'sectionsdata {} {}'.format(sectionsdata, bstr(">%dL" % (self.num_sections * 2))))
        print( 'sectionsoffset {} \n sectionattributes {}'.format( self.sectionoffsets, self.sectionattributes ))
        print( 'sectionsdescriptions {} '.format( self.sectiondescriptions))
        print( bstr(">%dL" % (self.num_sections * 2) ) )
        print( struct.unpack_from(bstr(">%dL" % (self.num_sections * 2)) , self.data, 78) )
        print( (self.filelength, 0) )

        return

    # sections information
    def dumpsectionsinfo(self):
        logger.debug("Section     Offset  Length      UID Attribs Description")
        for i in range(self.num_sections):
            '''
            logger.debug(
                "{}  {}  {}  {}  {}  {}  {}\n".format( i, i,
                    self.sectionoffsets[i],
                    self.sectionoffsets[i + 1] - self.sectionoffsets[i],
                    self.sectionattributes[i] & 0xFFFFFF,
                    (self.sectionattributes[i] >> 24) & 0xFF,
                    self.sectiondescriptions[i]))
            '''
            logger.debug(
                "%3d %3X  0x%07X 0x%05X % 8d % 7d %s"
                % (
                    i,
                    i,
                    self.sectionoffsets[i],
                    self.sectionoffsets[i + 1] - self.sectionoffsets[i],
                    self.sectionattributes[i] & 0xFFFFFF,
                    (self.sectionattributes[i] >> 24) & 0xFF,
                    self.sectiondescriptions[i],
                )
            )
        logger.debug(
            "%3d %3X  0x%07X                          %s"
            % (
                self.num_sections,
                self.num_sections,
                self.sectionoffsets[self.num_sections],
                self.sectiondescriptions[self.num_sections],
            )
        )

    def setsectiondescription(self, section, description):
        if section < len(self.sectiondescriptions):
            self.sectiondescriptions[section] = description
        else:
            logger.debug(
                "Section out of range: %d, description %s" % (section, description)
            )

    def dumppalmheader(self):
        logger.debug("Palm Database Header")
        logger.debug("Database name: " + repr(self.palmheader[:32]))
        (dbattributes,) = struct.unpack_from(b">H", self.palmheader, 32)
        logger.debug("Bitfield attributes: 0x%0X" % dbattributes,)
        if dbattributes != 0:
            print(" (",)
            if dbattributes & 2:
                print("Read-only; ",)
            if dbattributes & 4:
                print("Dirty AppInfoArea; ",)
            if dbattributes & 8:
                print("Needs to be backed up; ",)
            if dbattributes & 16:
                print("OK to install over newer; ",)
            if dbattributes & 32:
                print("Reset after installation; ",)
            if dbattributes & 64:
                print("No copying by PalmPilot beaming; ",)
            print(")")
        else:
            print("")
        logger.debug(
            "File version: %d" % struct.unpack_from(b">H", self.palmheader, 34)[0]
        )
        (dbcreation,) = struct.unpack_from(b">L", self.palmheader, 36)
        logger.debug(
            "Creation Date: "
            + str(datetimefrompalmtime(dbcreation))
            + (" (0x%0X)" % dbcreation)
        )
        (dbmodification,) = struct.unpack_from(b">L", self.palmheader, 40)
        logger.debug(
            "Modification Date: "
            + str(datetimefrompalmtime(dbmodification))
            + (" (0x%0X)" % dbmodification)
        )
        (dbbackup,) = struct.unpack_from(b">L", self.palmheader, 44)
        if dbbackup != 0:
            logger.debug(
                "Backup Date: "
                + str(datetimefrompalmtime(dbbackup))
                + (" (0x%0X)" % dbbackup)
            )
        logger.debug(
            "Modification No.: %d" % struct.unpack_from(b">L", self.palmheader, 48)[0]
        )
        logger.debug(
            "App Info offset: 0x%0X" % struct.unpack_from(b">L", self.palmheader, 52)[0]
        )
        logger.debug(
            "Sort Info offset: 0x%0X"
            % struct.unpack_from(b">L", self.palmheader, 56)[0]
        )
        logger.debug(
            "Type/Creator: %s/%s"
            % (repr(self.palmheader[60:64]), repr(self.palmheader[64:68]))
        )
        logger.debug(
            "Unique seed: 0x%0X" % struct.unpack_from(b">L", self.palmheader, 68)[0]
        )
        (expectedzero,) = struct.unpack_from(b">L", self.palmheader, 72)
        if expectedzero != 0:
            logger.debug(
                "Should be zero but isn't: %d"
                % struct.unpack_from(b">L", self.palmheader, 72)[0]
            )
        logger.debug(
            "Number of sections: %d" % struct.unpack_from(b">H", self.palmheader, 76)[0]
        )
        return

    def loadSection(self, section):
        before, after = self.sectionoffsets[section : section + 2]
        return self.data[before:after]