kindle manager
This commit is contained in:
97
mobiparse/mobi/makencx.py
Executable file
97
mobiparse/mobi/makencx.py
Executable file
@@ -0,0 +1,97 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||
|
||||
from loguru import logger
|
||||
from collections import defaultdict
|
||||
|
||||
from .compatibility_utils import PY2, binary_type, utf8_str, unicode_str
|
||||
from .compatibility_utils import unicode_argv, add_cp65001_codec
|
||||
|
||||
K8_BOUNDARY = b"BOUNDARY"
|
||||
""" The section data that divides K8 mobi ebooks. """
|
||||
|
||||
class unpackException(Exception):
|
||||
pass
|
||||
|
||||
# import the kindleunpack support libraries
|
||||
from .unpack_structure import fileNames
|
||||
from .mobi_sectioner import Sectionizer
|
||||
from .mobi_header import MobiHeader
|
||||
from .mobi_ncx import ncxExtract
|
||||
|
||||
|
||||
# input mobi file path
|
||||
# output ncx dict
|
||||
def extractNcx(infile):
|
||||
infile = unicode_str(infile)
|
||||
mhdict = defaultdict(dict)
|
||||
|
||||
# process the PalmDoc database header and verify it is a mobi
|
||||
sect = Sectionizer(infile)
|
||||
if sect.ident != b"BOOKMOBI" and sect.ident != b"TEXtREAd":
|
||||
raise unpackException("Invalid file format")
|
||||
|
||||
logger.debug( "dumppalmheader ...")
|
||||
sect.dumppalmheader()
|
||||
|
||||
# CGDBG
|
||||
print('infile {} '.format(infile))
|
||||
print('sect.dumpsectionsinfo() {}'.format(sect.dumpsectionsinfo()))
|
||||
print('sect.dumppalmheader() {}'.format(sect.dumppalmheader()))
|
||||
|
||||
# scan sections to see if this is a compound mobi file (K8 format)
|
||||
# and build a list of all mobi headers to process.
|
||||
mhlst = []
|
||||
|
||||
# CG mobi header
|
||||
mh = MobiHeader(sect, 0)
|
||||
metadata = mh.getMetaData()
|
||||
|
||||
# if this is a mobi8-only file hasK8 here will be true
|
||||
mhlst.append(mh)
|
||||
K8Boundary = -1
|
||||
|
||||
if mh.isK8():
|
||||
logger.debug("Unpacking a KF8 book...")
|
||||
hasK8 = True
|
||||
else:
|
||||
# CGDBG
|
||||
# This is either a Mobipocket 7 or earlier, or a combi M7/KF8
|
||||
# Find out which
|
||||
hasK8 = False
|
||||
for i in range(len(sect.sectionoffsets) - 1):
|
||||
before, after = sect.sectionoffsets[i : i + 2]
|
||||
if (after - before) == 8:
|
||||
data = sect.loadSection(i)
|
||||
if data == K8_BOUNDARY:
|
||||
sect.setsectiondescription(i, "Mobi/KF8 Boundary Section")
|
||||
mh = MobiHeader(sect, i + 1)
|
||||
hasK8 = True # K8
|
||||
mhlst.append(mh)
|
||||
K8Boundary = i
|
||||
break
|
||||
|
||||
# hasK8 header information include K8
|
||||
if hasK8:
|
||||
logger.debug( "Unpacking a Combination M{0:d}/KF8 book...".format(mh.version))
|
||||
else:
|
||||
logger.debug("Unpacking a Mobipocket {0:d} book...".format(mh.version))
|
||||
|
||||
# loop for process ncx and write to json with filename - booname.ncx.json
|
||||
for tmh in mhlst:
|
||||
# CG
|
||||
# process the toc ncx
|
||||
# ncx map keys: name, pos, len, noffs, text, hlvl, kind, pos_fid, parent, child1, childn, num
|
||||
logger.debug("Processing ncx / toc ")
|
||||
print('hasK8 {} tmh.isK8 {}'.format(hasK8, tmh.isK8()))
|
||||
|
||||
ncx = ncxExtract(tmh)
|
||||
ncx_data = ncx.parseNCX()
|
||||
|
||||
# check the mobi header information is K8 or K7
|
||||
kn = 'k8ncx' if tmh.isK8() else 'k7ncx'
|
||||
mhdict[kn] = ncx_data
|
||||
|
||||
return mhdict
|
||||
|
||||
Reference in New Issue
Block a user