kindle manager

2021-08-25 17:58:31 +08:00
parent 5f0c0a9724
commit 6b3c0f3b6b
303 changed files with 87829 additions and 42537 deletions
--- a/mobimaster/mobi/makencx.py
+++ b/mobimaster/mobi/makencx.py
@@ -0,0 +1,286 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
+
+from __future__ import unicode_literals, division, absolute_import, print_function
+
+import os
+from .unipath import pathof
+from loguru import logger
+
+
+import re
+
+# note: re requites the pattern to be the exact same type as the data to be searched in python3
+# but u"" is not allowed for the pattern itself only b""
+
+'''
+NCX (Navigation Control for XML applications) is a generalized navigation definition DTD for application
+to Digital Talking Books, eBooks, and general web content models.                                                
+This DTD is an XML application that layers navigation functionality on top of SMIL 2.0  content.                                       
+The NCX defines a navigation path/model that may be applied upon existing publications,
+without modification of the existing publication source, so long as the navigation targets within
+the source publication can be directly referenced via a URI.                      
+
+http://www.daisy.org/z3986/2005/ncx-2005-1.dtd
+'''
+
+from .mobi_utils import toBase32
+from .mobi_index import MobiIndex
+
+DEBUG_NCX = True
+
+class ncxExtract:
+    def __init__(self, mh):
+        self.mh = mh
+        self.sect = self.mh.sect
+        self.isNCX = False
+        self.mi = MobiIndex(self.sect)
+        self.ncxidx = self.mh.ncxidx
+        self.indx_data = None
+
+    def parseNCX(self):
+        indx_data = []
+        tag_fieldname_map = {
+            1: ["pos", 0],
+            2: ["len", 0],
+            3: ["noffs", 0],
+            4: ["hlvl", 0],
+            5: ["koffs", 0],
+            6: ["pos_fid", 0],
+            21: ["parent", 0],
+            22: ["child1", 0],
+            23: ["childn", 0],
+        }
+        if self.ncxidx != 0xFFFFFFFF:
+            outtbl, ctoc_text = self.mi.getIndexData(self.ncxidx, "NCX")
+            if DEBUG_NCX:
+                logger.debug("ctoc_text {}".format(ctoc_text))
+                logger.debug("outtbl {}".format(outtbl))
+            num = 0
+            for [text, tagMap] in outtbl:
+                tmp = {
+                    "name": text.decode("utf-8"),
+                    "pos": -1,
+                    "len": 0,
+                    "noffs": -1,
+                    "text": "Unknown Text",
+                    "hlvl": -1,
+                    "kind": "Unknown Kind",
+                    "pos_fid": None,
+                    "parent": -1,
+                    "child1": -1,
+                    "childn": -1,
+                    "num": num,
+                }
+                for tag in tag_fieldname_map:
+                    [fieldname, i] = tag_fieldname_map[tag]
+                    if tag in tagMap:
+                        fieldvalue = tagMap[tag][i]
+                        if tag == 6:
+                            pos_fid = toBase32(fieldvalue, 4).decode("utf-8")
+                            fieldvalue2 = tagMap[tag][i + 1]
+                            pos_off = toBase32(fieldvalue2, 10).decode("utf-8")
+                            fieldvalue = "kindle:pos:fid:%s:off:%s" % (pos_fid, pos_off)
+                        tmp[fieldname] = fieldvalue
+                        if tag == 3:
+                            toctext = ctoc_text.get(fieldvalue, "Unknown Text")
+                            toctext = toctext.decode(self.mh.codec)
+                            tmp["text"] = toctext
+                        if tag == 5:
+                            kindtext = ctoc_text.get(fieldvalue, "Unknown Kind")
+                            kindtext = kindtext.decode(self.mh.codec)
+                            tmp["kind"] = kindtext
+                indx_data.append(tmp)
+
+                # CGDBG
+                '''
+                record number:  3
+                name:  03
+                position 461377  length:  465358  => position/150 = real page number
+                text:  第二章 青铜时代——单机游戏
+                kind:  Unknown Kind
+                heading level:  0 => level of section
+                parent: -1  => record number of previous level of section
+                first child:  15  last child:  26 => range of record number of next level section
+                pos_fid is  kindle:pos:fid:0023:off:0000000000
+                '''
+                if DEBUG_NCX:
+                    print("record number: ", num)
+                    print(
+                        "name: ", tmp["name"],
+                    )
+                    print("position", tmp["pos"], " length: ", tmp["len"])
+                    print("text: ", tmp["text"])
+                    print("kind: ", tmp["kind"])
+                    print("heading level: ", tmp["hlvl"])
+                    print("parent:", tmp["parent"])
+                    print(
+                        "first child: ", tmp["child1"], " last child: ", tmp["childn"]
+                    )
+                    print("pos_fid is ", tmp["pos_fid"])
+                    print("\n\n")
+                num += 1
+        self.indx_data = indx_data
+
+        # {'name': '00', 'pos': 167, 'len': 24798, 'noffs': 0, 'text': '版权信息', 'hlvl': 0, 'kind': 'Unknown Kind', 'pos_fid': None, 'parent': -1, 'child1': -1, 'childn': -1, 'num': 0}
+        # {'name': '0B', 'pos': 67932, 'len': 3274, 'noffs': 236, 'text': '8.希罗多德', 'hlvl': 0, 'kind': 'Unknown Kind', 'pos_fid': None, 'parent': -1, 'child1': -1, 'childn': -1, 'num': 11}
+        print(indx_data)
+
+        return indx_data
+
+    def writeNCX(self, metadata):
+        # build the xml
+        self.isNCX = True
+        logger.debug("Write ncx")
+        # write the ncx file
+        # build the xml
+        xml = self.buildNCX(
+            metadata["Title"][0],
+            metadata["UniqueID"][0],
+            metadata.get("Language")[0],
+        )
+        # write the ncx file
+        # ncxname = os.path.join(self.files.mobi7dir, self.files.getInputFileBasename() + '.ncx')
+        ncxname = os.path.join(self.files.mobi7dir, "toc.ncx")
+        with open(pathof(ncxname), "wb") as f:
+            f.write(xml.encode("utf-8"))
+
+
+    def buildNCX(self):
+        indx_data = self.indx_data
+
+        # recursive part
+        def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
+            if start > len(indx_data) or end > len(indx_data):
+                print("Warning: missing INDX child entries", start, end, len(indx_data))
+                return ""
+            if DEBUG_NCX:
+                logger.debug("recursINDX lvl %d from %d to %d" % (lvl, start, end))
+            xml = ""
+            if start <= 0:
+                start = 0
+            if end <= 0:
+                end = len(indx_data)
+            if lvl > max_lvl:
+                max_lvl = lvl
+            indent = "  " * (2 + lvl)
+
+            for i in range(start, end):
+                e = indx_data[i]
+                if not e["hlvl"] == lvl:
+                    continue
+                # open entry
+                num += 1
+                link = "%s#filepos%d" % (htmlfile, e["pos"])
+                tagid = "np_%d" % num
+                entry = ncx_entry % (tagid, num, e["text"], link)
+                entry = re.sub(re.compile("^", re.M), indent, entry, 0)
+                xml += entry + "\n"
+                # recurs
+                if e["child1"] >= 0:
+                    xmlrec, max_lvl, num = recursINDX(
+                        max_lvl, num, lvl + 1, e["child1"], e["childn"] + 1
+                    )
+                    xml += xmlrec
+                # close entry
+                xml += indent + "</navPoint>\n"
+            return xml, max_lvl, num
+
+        body, max_lvl, num = recursINDX()
+        header = ncx_header % (lang, ident, max_lvl + 1, title)
+        ncx = header + body + ncx_footer
+        if not len(indx_data) == num:
+            print("Warning: different number of entries in NCX", len(indx_data), num)
+        return ncx
+
+
+    def buildK8NCX(self, indx_data, title, ident, lang):
+        ncx_header = """<?xml version='1.0' encoding='utf-8'?>
+<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="%s">
+<head>
+<meta content="%s" name="dtb:uid"/>
+<meta content="%d" name="dtb:depth"/>
+<meta content="mobiunpack.py" name="dtb:generator"/>
+<meta content="0" name="dtb:totalPageCount"/>
+<meta content="0" name="dtb:maxPageNumber"/>
+</head>
+<docTitle>
+<text>%s</text>
+</docTitle>
+<navMap>
+"""
+
+        ncx_footer = """  </navMap>
+</ncx>
+"""
+
+        ncx_entry = """<navPoint id="%s" playOrder="%d">
+<navLabel>
+<text>%s</text>
+</navLabel>
+<content src="%s"/>"""
+
+        # recursive part
+        def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
+            if start > len(indx_data) or end > len(indx_data):
+                print("Warning: missing INDX child entries", start, end, len(indx_data))
+                return ""
+            if DEBUG_NCX:
+                logger.debug("recursINDX lvl %d from %d to %d" % (lvl, start, end))
+            xml = ""
+            if start <= 0:
+                start = 0
+            if end <= 0:
+                end = len(indx_data)
+            if lvl > max_lvl:
+                max_lvl = lvl
+            indent = "  " * (2 + lvl)
+
+            for i in range(start, end):
+                e = indx_data[i]
+                htmlfile = e["filename"]
+                desttag = e["idtag"]
+                if not e["hlvl"] == lvl:
+                    continue
+                # open entry
+                num += 1
+                if desttag == "":
+                    link = "Text/%s" % htmlfile
+                else:
+                    link = "Text/%s#%s" % (htmlfile, desttag)
+                tagid = "np_%d" % num
+                entry = ncx_entry % (tagid, num, e["text"], link)
+                entry = re.sub(re.compile("^", re.M), indent, entry, 0)
+                xml += entry + "\n"
+                # recurs
+                if e["child1"] >= 0:
+                    xmlrec, max_lvl, num = recursINDX(
+                        max_lvl, num, lvl + 1, e["child1"], e["childn"] + 1
+                    )
+                    xml += xmlrec
+                # close entry
+                xml += indent + "</navPoint>\n"
+            return xml, max_lvl, num
+
+        body, max_lvl, num = recursINDX()
+        header = ncx_header % (lang, ident, max_lvl + 1, title)
+        ncx = header + body + ncx_footer
+        if not len(indx_data) == num:
+            print("Warning: different number of entries in NCX", len(indx_data), num)
+        return ncx
+
+    def writeK8NCX(self, ncx_data, metadata):
+        # build the xml
+        self.isNCX = True
+        logger.debug("Write K8 ncx")
+        xml = self.buildK8NCX(
+            ncx_data,
+            metadata["Title"][0],
+            metadata["UniqueID"][0],
+            metadata.get("Language")[0],
+        )
+        ncxname = os.path.join('./', 'k8toc.ncx.json')
+        with open(pathof(ncxname), "wb") as f:
+            f.write(xml.encode("utf-8"))
+