#!/usr/bin/env python # -*- coding: utf-8 -*- # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab from loguru import logger from collections import defaultdict from .compatibility_utils import PY2, binary_type, utf8_str, unicode_str from .compatibility_utils import unicode_argv, add_cp65001_codec K8_BOUNDARY = b"BOUNDARY" """ The section data that divides K8 mobi ebooks. """ class unpackException(Exception): pass # import the kindleunpack support libraries from .unpack_structure import fileNames from .mobi_sectioner import Sectionizer from .mobi_header import MobiHeader from .mobi_ncx import ncxExtract # input mobi file path # output ncx dict def extractNcx(infile): infile = unicode_str(infile) mhdict = defaultdict(dict) # process the PalmDoc database header and verify it is a mobi sect = Sectionizer(infile) if sect.ident != b"BOOKMOBI" and sect.ident != b"TEXtREAd": raise unpackException("Invalid file format") logger.debug( "dumppalmheader ...") sect.dumppalmheader() # CGDBG print('infile {} '.format(infile)) print('sect.dumpsectionsinfo() {}'.format(sect.dumpsectionsinfo())) print('sect.dumppalmheader() {}'.format(sect.dumppalmheader())) # scan sections to see if this is a compound mobi file (K8 format) # and build a list of all mobi headers to process. mhlst = [] # CG mobi header mh = MobiHeader(sect, 0) metadata = mh.getMetaData() # if this is a mobi8-only file hasK8 here will be true mhlst.append(mh) K8Boundary = -1 if mh.isK8(): logger.debug("Unpacking a KF8 book...") hasK8 = True else: # CGDBG # This is either a Mobipocket 7 or earlier, or a combi M7/KF8 # Find out which hasK8 = False for i in range(len(sect.sectionoffsets) - 1): before, after = sect.sectionoffsets[i : i + 2] if (after - before) == 8: data = sect.loadSection(i) if data == K8_BOUNDARY: sect.setsectiondescription(i, "Mobi/KF8 Boundary Section") mh = MobiHeader(sect, i + 1) hasK8 = True # K8 mhlst.append(mh) K8Boundary = i break # hasK8 header information include K8 if hasK8: logger.debug( "Unpacking a Combination M{0:d}/KF8 book...".format(mh.version)) else: logger.debug("Unpacking a Mobipocket {0:d} book...".format(mh.version)) # loop for process ncx and write to json with filename - booname.ncx.json for tmh in mhlst: # CG # process the toc ncx # ncx map keys: name, pos, len, noffs, text, hlvl, kind, pos_fid, parent, child1, childn, num logger.debug("Processing ncx / toc ") print('hasK8 {} tmh.isK8 {}'.format(hasK8, tmh.isK8())) ncx = ncxExtract(tmh) ncx_data = ncx.parseNCX() # check the mobi header information is K8 or K7 kn = 'k8ncx' if tmh.isK8() else 'k7ncx' mhdict[kn] = ncx_data return mhdict