kindle manager

This commit is contained in:
douboer
2024-04-03 15:08:22 +08:00
parent 6b3c0f3b6b
commit 6df3ce42a3
459 changed files with 164651 additions and 4690 deletions

97
mobiparse/mobi/makencx.py Executable file
View File

@@ -0,0 +1,97 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from loguru import logger
from collections import defaultdict
from .compatibility_utils import PY2, binary_type, utf8_str, unicode_str
from .compatibility_utils import unicode_argv, add_cp65001_codec
K8_BOUNDARY = b"BOUNDARY"
""" The section data that divides K8 mobi ebooks. """
class unpackException(Exception):
pass
# import the kindleunpack support libraries
from .unpack_structure import fileNames
from .mobi_sectioner import Sectionizer
from .mobi_header import MobiHeader
from .mobi_ncx import ncxExtract
# input mobi file path
# output ncx dict
def extractNcx(infile):
infile = unicode_str(infile)
mhdict = defaultdict(dict)
# process the PalmDoc database header and verify it is a mobi
sect = Sectionizer(infile)
if sect.ident != b"BOOKMOBI" and sect.ident != b"TEXtREAd":
raise unpackException("Invalid file format")
logger.debug( "dumppalmheader ...")
sect.dumppalmheader()
# CGDBG
print('infile {} '.format(infile))
print('sect.dumpsectionsinfo() {}'.format(sect.dumpsectionsinfo()))
print('sect.dumppalmheader() {}'.format(sect.dumppalmheader()))
# scan sections to see if this is a compound mobi file (K8 format)
# and build a list of all mobi headers to process.
mhlst = []
# CG mobi header
mh = MobiHeader(sect, 0)
metadata = mh.getMetaData()
# if this is a mobi8-only file hasK8 here will be true
mhlst.append(mh)
K8Boundary = -1
if mh.isK8():
logger.debug("Unpacking a KF8 book...")
hasK8 = True
else:
# CGDBG
# This is either a Mobipocket 7 or earlier, or a combi M7/KF8
# Find out which
hasK8 = False
for i in range(len(sect.sectionoffsets) - 1):
before, after = sect.sectionoffsets[i : i + 2]
if (after - before) == 8:
data = sect.loadSection(i)
if data == K8_BOUNDARY:
sect.setsectiondescription(i, "Mobi/KF8 Boundary Section")
mh = MobiHeader(sect, i + 1)
hasK8 = True # K8
mhlst.append(mh)
K8Boundary = i
break
# hasK8 header information include K8
if hasK8:
logger.debug( "Unpacking a Combination M{0:d}/KF8 book...".format(mh.version))
else:
logger.debug("Unpacking a Mobipocket {0:d} book...".format(mh.version))
# loop for process ncx and write to json with filename - booname.ncx.json
for tmh in mhlst:
# CG
# process the toc ncx
# ncx map keys: name, pos, len, noffs, text, hlvl, kind, pos_fid, parent, child1, childn, num
logger.debug("Processing ncx / toc ")
print('hasK8 {} tmh.isK8 {}'.format(hasK8, tmh.isK8()))
ncx = ncxExtract(tmh)
ncx_data = ncx.parseNCX()
# check the mobi header information is K8 or K7
kn = 'k8ncx' if tmh.isK8() else 'k7ncx'
mhdict[kn] = ncx_data
return mhdict