98 lines
3.1 KiB
Python
Executable File
98 lines
3.1 KiB
Python
Executable File
#!/usr/bin/env python
|
|
# -*- coding: utf-8 -*-
|
|
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
|
|
|
from loguru import logger
|
|
from collections import defaultdict
|
|
|
|
from .compatibility_utils import PY2, binary_type, utf8_str, unicode_str
|
|
from .compatibility_utils import unicode_argv, add_cp65001_codec
|
|
|
|
K8_BOUNDARY = b"BOUNDARY"
|
|
""" The section data that divides K8 mobi ebooks. """
|
|
|
|
class unpackException(Exception):
|
|
pass
|
|
|
|
# import the kindleunpack support libraries
|
|
from .unpack_structure import fileNames
|
|
from .mobi_sectioner import Sectionizer
|
|
from .mobi_header import MobiHeader
|
|
from .mobi_ncx import ncxExtract
|
|
|
|
|
|
# input mobi file path
|
|
# output ncx dict
|
|
def extractNcx(infile):
|
|
infile = unicode_str(infile)
|
|
mhdict = defaultdict(dict)
|
|
|
|
# process the PalmDoc database header and verify it is a mobi
|
|
sect = Sectionizer(infile)
|
|
if sect.ident != b"BOOKMOBI" and sect.ident != b"TEXtREAd":
|
|
raise unpackException("Invalid file format")
|
|
|
|
logger.debug( "dumppalmheader ...")
|
|
sect.dumppalmheader()
|
|
|
|
# CGDBG
|
|
print('infile {} '.format(infile))
|
|
print('sect.dumpsectionsinfo() {}'.format(sect.dumpsectionsinfo()))
|
|
print('sect.dumppalmheader() {}'.format(sect.dumppalmheader()))
|
|
|
|
# scan sections to see if this is a compound mobi file (K8 format)
|
|
# and build a list of all mobi headers to process.
|
|
mhlst = []
|
|
|
|
# CG mobi header
|
|
mh = MobiHeader(sect, 0)
|
|
metadata = mh.getMetaData()
|
|
|
|
# if this is a mobi8-only file hasK8 here will be true
|
|
mhlst.append(mh)
|
|
K8Boundary = -1
|
|
|
|
if mh.isK8():
|
|
logger.debug("Unpacking a KF8 book...")
|
|
hasK8 = True
|
|
else:
|
|
# CGDBG
|
|
# This is either a Mobipocket 7 or earlier, or a combi M7/KF8
|
|
# Find out which
|
|
hasK8 = False
|
|
for i in range(len(sect.sectionoffsets) - 1):
|
|
before, after = sect.sectionoffsets[i : i + 2]
|
|
if (after - before) == 8:
|
|
data = sect.loadSection(i)
|
|
if data == K8_BOUNDARY:
|
|
sect.setsectiondescription(i, "Mobi/KF8 Boundary Section")
|
|
mh = MobiHeader(sect, i + 1)
|
|
hasK8 = True # K8
|
|
mhlst.append(mh)
|
|
K8Boundary = i
|
|
break
|
|
|
|
# hasK8 header information include K8
|
|
if hasK8:
|
|
logger.debug( "Unpacking a Combination M{0:d}/KF8 book...".format(mh.version))
|
|
else:
|
|
logger.debug("Unpacking a Mobipocket {0:d} book...".format(mh.version))
|
|
|
|
# loop for process ncx and write to json with filename - booname.ncx.json
|
|
for tmh in mhlst:
|
|
# CG
|
|
# process the toc ncx
|
|
# ncx map keys: name, pos, len, noffs, text, hlvl, kind, pos_fid, parent, child1, childn, num
|
|
logger.debug("Processing ncx / toc ")
|
|
print('hasK8 {} tmh.isK8 {}'.format(hasK8, tmh.isK8()))
|
|
|
|
ncx = ncxExtract(tmh)
|
|
ncx_data = ncx.parseNCX()
|
|
|
|
# check the mobi header information is K8 or K7
|
|
kn = 'k8ncx' if tmh.isK8() else 'k7ncx'
|
|
mhdict[kn] = ncx_data
|
|
|
|
return mhdict
|
|
|