#!/usr/bin/env python # -*- coding: utf-8 -*- # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab from __future__ import unicode_literals, division, absolute_import, print_function from .compatibility_utils import PY2, bchr, lmap, bstr if PY2: range = xrange import struct # note: struct pack, unpack, unpack_from all require bytestring format # data all the way up to at least python 2.7.5, python 3 okay with bytestring class unpackException(Exception): pass class UncompressedReader: def unpack(self, data): return data class PalmdocReader: def unpack(self, i): o, p = b"", 0 while p < len(i): # for python 3 must use slice since i[p] returns int while slice returns character c = ord(i[p : p + 1]) p += 1 if c >= 1 and c <= 8: o += i[p : p + c] p += c elif c < 128: o += bchr(c) elif c >= 192: o += b" " + bchr(c ^ 128) else: if p < len(i): c = (c << 8) | ord(i[p : p + 1]) p += 1 m = (c >> 3) & 0x07FF n = (c & 7) + 3 if m > n: o += o[-m : n - m] else: for _ in range(n): # because of completely ass-backwards decision by python mainters for python 3 # we must use slice for bytes as i[p] returns int while slice returns character if m == 1: o += o[-m:] else: o += o[-m : -m + 1] return o class HuffcdicReader: q = struct.Struct(b">Q").unpack_from def loadHuff(self, huff): if huff[0:8] != b"HUFF\x00\x00\x00\x18": raise unpackException("invalid huff header") off1, off2 = struct.unpack_from(b">LL", huff, 8) def dict1_unpack(v): codelen, term, maxcode = v & 0x1F, v & 0x80, v >> 8 assert codelen != 0 if codelen <= 8: assert term maxcode = ((maxcode + 1) << (32 - codelen)) - 1 return (codelen, term, maxcode) self.dict1 = lmap(dict1_unpack, struct.unpack_from(b">256L", huff, off1)) dict2 = struct.unpack_from(b">64L", huff, off2) self.mincode, self.maxcode = (), () for codelen, mincode in enumerate((0,) + dict2[0::2]): self.mincode += (mincode << (32 - codelen),) for codelen, maxcode in enumerate((0,) + dict2[1::2]): self.maxcode += (((maxcode + 1) << (32 - codelen)) - 1,) self.dictionary = [] def loadCdic(self, cdic): if cdic[0:8] != b"CDIC\x00\x00\x00\x10": raise unpackException("invalid cdic header") phrases, bits = struct.unpack_from(b">LL", cdic, 8) n = min(1 << bits, phrases - len(self.dictionary)) h = struct.Struct(b">H").unpack_from def getslice(off): (blen,) = h(cdic, 16 + off) slice = cdic[18 + off : 18 + off + (blen & 0x7FFF)] return (slice, blen & 0x8000) self.dictionary += lmap( getslice, struct.unpack_from(bstr(">%dH" % n), cdic, 16) ) def unpack(self, data): q = HuffcdicReader.q bitsleft = len(data) * 8 data += b"\x00\x00\x00\x00\x00\x00\x00\x00" pos = 0 (x,) = q(data, pos) n = 32 s = b"" while True: if n <= 0: pos += 4 (x,) = q(data, pos) n += 32 code = (x >> n) & ((1 << 32) - 1) codelen, term, maxcode = self.dict1[code >> 24] if not term: while code < self.mincode[codelen]: codelen += 1 maxcode = self.maxcode[codelen] n -= codelen bitsleft -= codelen if bitsleft < 0: break r = (maxcode - code) >> (32 - codelen) slice, flag = self.dictionary[r] if not flag: self.dictionary[r] = None slice = self.unpack(slice) self.dictionary[r] = (slice, 1) s += slice return s