kindle manager

2024-04-03 15:08:22 +08:00
parent 6b3c0f3b6b
commit 6df3ce42a3
459 changed files with 164651 additions and 4690 deletions
--- a/mobiparse/mobi/makencx.py
+++ b/mobiparse/mobi/makencx.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from loguru import logger
+from collections import defaultdict
+
+from .compatibility_utils import PY2, binary_type, utf8_str, unicode_str
+from .compatibility_utils import unicode_argv, add_cp65001_codec
+
+K8_BOUNDARY = b"BOUNDARY"
+""" The section data that divides K8 mobi ebooks. """
+
+class unpackException(Exception):
+    pass
+
+# import the kindleunpack support libraries
+from .unpack_structure import fileNames
+from .mobi_sectioner import Sectionizer
+from .mobi_header import MobiHeader
+from .mobi_ncx import ncxExtract
+
+
+# input mobi file path
+# output ncx dict
+def extractNcx(infile):
+    infile = unicode_str(infile)
+    mhdict = defaultdict(dict)
+
+    # process the PalmDoc database header and verify it is a mobi
+    sect = Sectionizer(infile)
+    if sect.ident != b"BOOKMOBI" and sect.ident != b"TEXtREAd":
+        raise unpackException("Invalid file format")
+
+    logger.debug( "dumppalmheader ...")
+    sect.dumppalmheader()
+
+    # CGDBG
+    print('infile {} '.format(infile))
+    print('sect.dumpsectionsinfo() {}'.format(sect.dumpsectionsinfo()))
+    print('sect.dumppalmheader() {}'.format(sect.dumppalmheader()))
+
+    # scan sections to see if this is a compound mobi file (K8 format)
+    # and build a list of all mobi headers to process.
+    mhlst = []
+
+    # CG mobi header
+    mh = MobiHeader(sect, 0)
+    metadata = mh.getMetaData()
+
+    # if this is a mobi8-only file hasK8 here will be true
+    mhlst.append(mh)
+    K8Boundary = -1
+
+    if mh.isK8():
+        logger.debug("Unpacking a KF8 book...")
+        hasK8 = True
+    else:
+        # CGDBG
+        # This is either a Mobipocket 7 or earlier, or a combi M7/KF8
+        # Find out which
+        hasK8 = False
+        for i in range(len(sect.sectionoffsets) - 1):
+            before, after = sect.sectionoffsets[i : i + 2]
+            if (after - before) == 8:
+                data = sect.loadSection(i)
+                if data == K8_BOUNDARY:
+                    sect.setsectiondescription(i, "Mobi/KF8 Boundary Section")
+                    mh = MobiHeader(sect, i + 1)
+                    hasK8 = True   # K8
+                    mhlst.append(mh)
+                    K8Boundary = i
+                    break
+
+        # hasK8 header information include K8
+        if hasK8:
+            logger.debug( "Unpacking a Combination M{0:d}/KF8 book...".format(mh.version))
+        else:
+            logger.debug("Unpacking a Mobipocket {0:d} book...".format(mh.version))
+
+        # loop for process ncx and write to json with filename - booname.ncx.json
+        for tmh in mhlst:
+            # CG
+            # process the toc ncx
+            # ncx map keys: name, pos, len, noffs, text, hlvl, kind, pos_fid, parent, child1, childn, num
+            logger.debug("Processing ncx / toc ")
+            print('hasK8 {} tmh.isK8 {}'.format(hasK8, tmh.isK8()))
+
+            ncx = ncxExtract(tmh)
+            ncx_data = ncx.parseNCX()
+
+            # check the mobi header information is K8 or K7
+            kn = 'k8ncx' if tmh.isK8() else 'k7ncx'
+            mhdict[kn] = ncx_data
+
+        return mhdict
+