kindle manager
This commit is contained in:
138
mobiparse/mobi/mobi_uncompress.py
Executable file
138
mobiparse/mobi/mobi_uncompress.py
Executable file
@@ -0,0 +1,138 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||
|
||||
from __future__ import unicode_literals, division, absolute_import, print_function
|
||||
|
||||
from .compatibility_utils import PY2, bchr, lmap, bstr
|
||||
|
||||
if PY2:
|
||||
range = xrange
|
||||
|
||||
import struct
|
||||
|
||||
# note: struct pack, unpack, unpack_from all require bytestring format
|
||||
# data all the way up to at least python 2.7.5, python 3 okay with bytestring
|
||||
|
||||
|
||||
class unpackException(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class UncompressedReader:
|
||||
def unpack(self, data):
|
||||
return data
|
||||
|
||||
|
||||
class PalmdocReader:
|
||||
def unpack(self, i):
|
||||
o, p = b"", 0
|
||||
while p < len(i):
|
||||
# for python 3 must use slice since i[p] returns int while slice returns character
|
||||
c = ord(i[p : p + 1])
|
||||
p += 1
|
||||
if c >= 1 and c <= 8:
|
||||
o += i[p : p + c]
|
||||
p += c
|
||||
elif c < 128:
|
||||
o += bchr(c)
|
||||
elif c >= 192:
|
||||
o += b" " + bchr(c ^ 128)
|
||||
else:
|
||||
if p < len(i):
|
||||
c = (c << 8) | ord(i[p : p + 1])
|
||||
p += 1
|
||||
m = (c >> 3) & 0x07FF
|
||||
n = (c & 7) + 3
|
||||
if m > n:
|
||||
o += o[-m : n - m]
|
||||
else:
|
||||
for _ in range(n):
|
||||
# because of completely ass-backwards decision by python mainters for python 3
|
||||
# we must use slice for bytes as i[p] returns int while slice returns character
|
||||
if m == 1:
|
||||
o += o[-m:]
|
||||
else:
|
||||
o += o[-m : -m + 1]
|
||||
return o
|
||||
|
||||
|
||||
class HuffcdicReader:
|
||||
q = struct.Struct(b">Q").unpack_from
|
||||
|
||||
def loadHuff(self, huff):
|
||||
if huff[0:8] != b"HUFF\x00\x00\x00\x18":
|
||||
raise unpackException("invalid huff header")
|
||||
off1, off2 = struct.unpack_from(b">LL", huff, 8)
|
||||
|
||||
def dict1_unpack(v):
|
||||
codelen, term, maxcode = v & 0x1F, v & 0x80, v >> 8
|
||||
assert codelen != 0
|
||||
if codelen <= 8:
|
||||
assert term
|
||||
maxcode = ((maxcode + 1) << (32 - codelen)) - 1
|
||||
return (codelen, term, maxcode)
|
||||
|
||||
self.dict1 = lmap(dict1_unpack, struct.unpack_from(b">256L", huff, off1))
|
||||
|
||||
dict2 = struct.unpack_from(b">64L", huff, off2)
|
||||
self.mincode, self.maxcode = (), ()
|
||||
for codelen, mincode in enumerate((0,) + dict2[0::2]):
|
||||
self.mincode += (mincode << (32 - codelen),)
|
||||
for codelen, maxcode in enumerate((0,) + dict2[1::2]):
|
||||
self.maxcode += (((maxcode + 1) << (32 - codelen)) - 1,)
|
||||
|
||||
self.dictionary = []
|
||||
|
||||
def loadCdic(self, cdic):
|
||||
if cdic[0:8] != b"CDIC\x00\x00\x00\x10":
|
||||
raise unpackException("invalid cdic header")
|
||||
phrases, bits = struct.unpack_from(b">LL", cdic, 8)
|
||||
n = min(1 << bits, phrases - len(self.dictionary))
|
||||
h = struct.Struct(b">H").unpack_from
|
||||
|
||||
def getslice(off):
|
||||
(blen,) = h(cdic, 16 + off)
|
||||
slice = cdic[18 + off : 18 + off + (blen & 0x7FFF)]
|
||||
return (slice, blen & 0x8000)
|
||||
|
||||
self.dictionary += lmap(
|
||||
getslice, struct.unpack_from(bstr(">%dH" % n), cdic, 16)
|
||||
)
|
||||
|
||||
def unpack(self, data):
|
||||
q = HuffcdicReader.q
|
||||
|
||||
bitsleft = len(data) * 8
|
||||
data += b"\x00\x00\x00\x00\x00\x00\x00\x00"
|
||||
pos = 0
|
||||
(x,) = q(data, pos)
|
||||
n = 32
|
||||
|
||||
s = b""
|
||||
while True:
|
||||
if n <= 0:
|
||||
pos += 4
|
||||
(x,) = q(data, pos)
|
||||
n += 32
|
||||
code = (x >> n) & ((1 << 32) - 1)
|
||||
|
||||
codelen, term, maxcode = self.dict1[code >> 24]
|
||||
if not term:
|
||||
while code < self.mincode[codelen]:
|
||||
codelen += 1
|
||||
maxcode = self.maxcode[codelen]
|
||||
|
||||
n -= codelen
|
||||
bitsleft -= codelen
|
||||
if bitsleft < 0:
|
||||
break
|
||||
|
||||
r = (maxcode - code) >> (32 - codelen)
|
||||
slice, flag = self.dictionary[r]
|
||||
if not flag:
|
||||
self.dictionary[r] = None
|
||||
slice = self.unpack(slice)
|
||||
self.dictionary[r] = (slice, 1)
|
||||
s += slice
|
||||
return s
|
||||
Reference in New Issue
Block a user