kindle manager
This commit is contained in:
286
mobimaster/mobi/makencx.py
Executable file
286
mobimaster/mobi/makencx.py
Executable file
@@ -0,0 +1,286 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||
|
||||
from __future__ import unicode_literals, division, absolute_import, print_function
|
||||
|
||||
import os
|
||||
from .unipath import pathof
|
||||
from loguru import logger
|
||||
|
||||
|
||||
import re
|
||||
|
||||
# note: re requites the pattern to be the exact same type as the data to be searched in python3
|
||||
# but u"" is not allowed for the pattern itself only b""
|
||||
|
||||
'''
|
||||
NCX (Navigation Control for XML applications) is a generalized navigation definition DTD for application
|
||||
to Digital Talking Books, eBooks, and general web content models.
|
||||
This DTD is an XML application that layers navigation functionality on top of SMIL 2.0 content.
|
||||
The NCX defines a navigation path/model that may be applied upon existing publications,
|
||||
without modification of the existing publication source, so long as the navigation targets within
|
||||
the source publication can be directly referenced via a URI.
|
||||
|
||||
http://www.daisy.org/z3986/2005/ncx-2005-1.dtd
|
||||
'''
|
||||
|
||||
from .mobi_utils import toBase32
|
||||
from .mobi_index import MobiIndex
|
||||
|
||||
DEBUG_NCX = True
|
||||
|
||||
class ncxExtract:
|
||||
def __init__(self, mh):
|
||||
self.mh = mh
|
||||
self.sect = self.mh.sect
|
||||
self.isNCX = False
|
||||
self.mi = MobiIndex(self.sect)
|
||||
self.ncxidx = self.mh.ncxidx
|
||||
self.indx_data = None
|
||||
|
||||
def parseNCX(self):
|
||||
indx_data = []
|
||||
tag_fieldname_map = {
|
||||
1: ["pos", 0],
|
||||
2: ["len", 0],
|
||||
3: ["noffs", 0],
|
||||
4: ["hlvl", 0],
|
||||
5: ["koffs", 0],
|
||||
6: ["pos_fid", 0],
|
||||
21: ["parent", 0],
|
||||
22: ["child1", 0],
|
||||
23: ["childn", 0],
|
||||
}
|
||||
if self.ncxidx != 0xFFFFFFFF:
|
||||
outtbl, ctoc_text = self.mi.getIndexData(self.ncxidx, "NCX")
|
||||
if DEBUG_NCX:
|
||||
logger.debug("ctoc_text {}".format(ctoc_text))
|
||||
logger.debug("outtbl {}".format(outtbl))
|
||||
num = 0
|
||||
for [text, tagMap] in outtbl:
|
||||
tmp = {
|
||||
"name": text.decode("utf-8"),
|
||||
"pos": -1,
|
||||
"len": 0,
|
||||
"noffs": -1,
|
||||
"text": "Unknown Text",
|
||||
"hlvl": -1,
|
||||
"kind": "Unknown Kind",
|
||||
"pos_fid": None,
|
||||
"parent": -1,
|
||||
"child1": -1,
|
||||
"childn": -1,
|
||||
"num": num,
|
||||
}
|
||||
for tag in tag_fieldname_map:
|
||||
[fieldname, i] = tag_fieldname_map[tag]
|
||||
if tag in tagMap:
|
||||
fieldvalue = tagMap[tag][i]
|
||||
if tag == 6:
|
||||
pos_fid = toBase32(fieldvalue, 4).decode("utf-8")
|
||||
fieldvalue2 = tagMap[tag][i + 1]
|
||||
pos_off = toBase32(fieldvalue2, 10).decode("utf-8")
|
||||
fieldvalue = "kindle:pos:fid:%s:off:%s" % (pos_fid, pos_off)
|
||||
tmp[fieldname] = fieldvalue
|
||||
if tag == 3:
|
||||
toctext = ctoc_text.get(fieldvalue, "Unknown Text")
|
||||
toctext = toctext.decode(self.mh.codec)
|
||||
tmp["text"] = toctext
|
||||
if tag == 5:
|
||||
kindtext = ctoc_text.get(fieldvalue, "Unknown Kind")
|
||||
kindtext = kindtext.decode(self.mh.codec)
|
||||
tmp["kind"] = kindtext
|
||||
indx_data.append(tmp)
|
||||
|
||||
# CGDBG
|
||||
'''
|
||||
record number: 3
|
||||
name: 03
|
||||
position 461377 length: 465358 => position/150 = real page number
|
||||
text: 第二章 青铜时代——单机游戏
|
||||
kind: Unknown Kind
|
||||
heading level: 0 => level of section
|
||||
parent: -1 => record number of previous level of section
|
||||
first child: 15 last child: 26 => range of record number of next level section
|
||||
pos_fid is kindle:pos:fid:0023:off:0000000000
|
||||
'''
|
||||
if DEBUG_NCX:
|
||||
print("record number: ", num)
|
||||
print(
|
||||
"name: ", tmp["name"],
|
||||
)
|
||||
print("position", tmp["pos"], " length: ", tmp["len"])
|
||||
print("text: ", tmp["text"])
|
||||
print("kind: ", tmp["kind"])
|
||||
print("heading level: ", tmp["hlvl"])
|
||||
print("parent:", tmp["parent"])
|
||||
print(
|
||||
"first child: ", tmp["child1"], " last child: ", tmp["childn"]
|
||||
)
|
||||
print("pos_fid is ", tmp["pos_fid"])
|
||||
print("\n\n")
|
||||
num += 1
|
||||
self.indx_data = indx_data
|
||||
|
||||
# {'name': '00', 'pos': 167, 'len': 24798, 'noffs': 0, 'text': '版权信息', 'hlvl': 0, 'kind': 'Unknown Kind', 'pos_fid': None, 'parent': -1, 'child1': -1, 'childn': -1, 'num': 0}
|
||||
# {'name': '0B', 'pos': 67932, 'len': 3274, 'noffs': 236, 'text': '8.希罗多德', 'hlvl': 0, 'kind': 'Unknown Kind', 'pos_fid': None, 'parent': -1, 'child1': -1, 'childn': -1, 'num': 11}
|
||||
print(indx_data)
|
||||
|
||||
return indx_data
|
||||
|
||||
def writeNCX(self, metadata):
|
||||
# build the xml
|
||||
self.isNCX = True
|
||||
logger.debug("Write ncx")
|
||||
# write the ncx file
|
||||
# build the xml
|
||||
xml = self.buildNCX(
|
||||
metadata["Title"][0],
|
||||
metadata["UniqueID"][0],
|
||||
metadata.get("Language")[0],
|
||||
)
|
||||
# write the ncx file
|
||||
# ncxname = os.path.join(self.files.mobi7dir, self.files.getInputFileBasename() + '.ncx')
|
||||
ncxname = os.path.join(self.files.mobi7dir, "toc.ncx")
|
||||
with open(pathof(ncxname), "wb") as f:
|
||||
f.write(xml.encode("utf-8"))
|
||||
|
||||
|
||||
def buildNCX(self):
|
||||
indx_data = self.indx_data
|
||||
|
||||
# recursive part
|
||||
def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
|
||||
if start > len(indx_data) or end > len(indx_data):
|
||||
print("Warning: missing INDX child entries", start, end, len(indx_data))
|
||||
return ""
|
||||
if DEBUG_NCX:
|
||||
logger.debug("recursINDX lvl %d from %d to %d" % (lvl, start, end))
|
||||
xml = ""
|
||||
if start <= 0:
|
||||
start = 0
|
||||
if end <= 0:
|
||||
end = len(indx_data)
|
||||
if lvl > max_lvl:
|
||||
max_lvl = lvl
|
||||
indent = " " * (2 + lvl)
|
||||
|
||||
for i in range(start, end):
|
||||
e = indx_data[i]
|
||||
if not e["hlvl"] == lvl:
|
||||
continue
|
||||
# open entry
|
||||
num += 1
|
||||
link = "%s#filepos%d" % (htmlfile, e["pos"])
|
||||
tagid = "np_%d" % num
|
||||
entry = ncx_entry % (tagid, num, e["text"], link)
|
||||
entry = re.sub(re.compile("^", re.M), indent, entry, 0)
|
||||
xml += entry + "\n"
|
||||
# recurs
|
||||
if e["child1"] >= 0:
|
||||
xmlrec, max_lvl, num = recursINDX(
|
||||
max_lvl, num, lvl + 1, e["child1"], e["childn"] + 1
|
||||
)
|
||||
xml += xmlrec
|
||||
# close entry
|
||||
xml += indent + "</navPoint>\n"
|
||||
return xml, max_lvl, num
|
||||
|
||||
body, max_lvl, num = recursINDX()
|
||||
header = ncx_header % (lang, ident, max_lvl + 1, title)
|
||||
ncx = header + body + ncx_footer
|
||||
if not len(indx_data) == num:
|
||||
print("Warning: different number of entries in NCX", len(indx_data), num)
|
||||
return ncx
|
||||
|
||||
|
||||
def buildK8NCX(self, indx_data, title, ident, lang):
|
||||
ncx_header = """<?xml version='1.0' encoding='utf-8'?>
|
||||
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="%s">
|
||||
<head>
|
||||
<meta content="%s" name="dtb:uid"/>
|
||||
<meta content="%d" name="dtb:depth"/>
|
||||
<meta content="mobiunpack.py" name="dtb:generator"/>
|
||||
<meta content="0" name="dtb:totalPageCount"/>
|
||||
<meta content="0" name="dtb:maxPageNumber"/>
|
||||
</head>
|
||||
<docTitle>
|
||||
<text>%s</text>
|
||||
</docTitle>
|
||||
<navMap>
|
||||
"""
|
||||
|
||||
ncx_footer = """ </navMap>
|
||||
</ncx>
|
||||
"""
|
||||
|
||||
ncx_entry = """<navPoint id="%s" playOrder="%d">
|
||||
<navLabel>
|
||||
<text>%s</text>
|
||||
</navLabel>
|
||||
<content src="%s"/>"""
|
||||
|
||||
# recursive part
|
||||
def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
|
||||
if start > len(indx_data) or end > len(indx_data):
|
||||
print("Warning: missing INDX child entries", start, end, len(indx_data))
|
||||
return ""
|
||||
if DEBUG_NCX:
|
||||
logger.debug("recursINDX lvl %d from %d to %d" % (lvl, start, end))
|
||||
xml = ""
|
||||
if start <= 0:
|
||||
start = 0
|
||||
if end <= 0:
|
||||
end = len(indx_data)
|
||||
if lvl > max_lvl:
|
||||
max_lvl = lvl
|
||||
indent = " " * (2 + lvl)
|
||||
|
||||
for i in range(start, end):
|
||||
e = indx_data[i]
|
||||
htmlfile = e["filename"]
|
||||
desttag = e["idtag"]
|
||||
if not e["hlvl"] == lvl:
|
||||
continue
|
||||
# open entry
|
||||
num += 1
|
||||
if desttag == "":
|
||||
link = "Text/%s" % htmlfile
|
||||
else:
|
||||
link = "Text/%s#%s" % (htmlfile, desttag)
|
||||
tagid = "np_%d" % num
|
||||
entry = ncx_entry % (tagid, num, e["text"], link)
|
||||
entry = re.sub(re.compile("^", re.M), indent, entry, 0)
|
||||
xml += entry + "\n"
|
||||
# recurs
|
||||
if e["child1"] >= 0:
|
||||
xmlrec, max_lvl, num = recursINDX(
|
||||
max_lvl, num, lvl + 1, e["child1"], e["childn"] + 1
|
||||
)
|
||||
xml += xmlrec
|
||||
# close entry
|
||||
xml += indent + "</navPoint>\n"
|
||||
return xml, max_lvl, num
|
||||
|
||||
body, max_lvl, num = recursINDX()
|
||||
header = ncx_header % (lang, ident, max_lvl + 1, title)
|
||||
ncx = header + body + ncx_footer
|
||||
if not len(indx_data) == num:
|
||||
print("Warning: different number of entries in NCX", len(indx_data), num)
|
||||
return ncx
|
||||
|
||||
def writeK8NCX(self, ncx_data, metadata):
|
||||
# build the xml
|
||||
self.isNCX = True
|
||||
logger.debug("Write K8 ncx")
|
||||
xml = self.buildK8NCX(
|
||||
ncx_data,
|
||||
metadata["Title"][0],
|
||||
metadata["UniqueID"][0],
|
||||
metadata.get("Language")[0],
|
||||
)
|
||||
ncxname = os.path.join('./', 'k8toc.ncx.json')
|
||||
with open(pathof(ncxname), "wb") as f:
|
||||
f.write(xml.encode("utf-8"))
|
||||
|
||||
Reference in New Issue
Block a user