#!/usr/bin/env python
# -*- coding: utf-8 -*-
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
from __future__ import unicode_literals, division, absolute_import, print_function
import os
from .unipath import pathof
from loguru import logger
import re
# note: re requites the pattern to be the exact same type as the data to be searched in python3
# but u"" is not allowed for the pattern itself only b""
'''
NCX (Navigation Control for XML applications) is a generalized navigation definition DTD for application
to Digital Talking Books, eBooks, and general web content models.
This DTD is an XML application that layers navigation functionality on top of SMIL 2.0 content.
The NCX defines a navigation path/model that may be applied upon existing publications,
without modification of the existing publication source, so long as the navigation targets within
the source publication can be directly referenced via a URI.
http://www.daisy.org/z3986/2005/ncx-2005-1.dtd
'''
from .mobi_utils import toBase32
from .mobi_index import MobiIndex
DEBUG_NCX = True
class ncxExtract:
def __init__(self, mh, files):
self.mh = mh
self.sect = self.mh.sect
self.files = files
self.isNCX = False
self.mi = MobiIndex(self.sect)
self.ncxidx = self.mh.ncxidx
self.indx_data = None
def parseNCX(self):
indx_data = []
tag_fieldname_map = {
1: ["pos", 0],
2: ["len", 0],
3: ["noffs", 0],
4: ["hlvl", 0],
5: ["koffs", 0],
6: ["pos_fid", 0],
21: ["parent", 0],
22: ["child1", 0],
23: ["childn", 0],
}
if self.ncxidx != 0xFFFFFFFF:
outtbl, ctoc_text = self.mi.getIndexData(self.ncxidx, "NCX")
if DEBUG_NCX:
logger.debug("ctoc_text {}".format(ctoc_text))
logger.debug("outtbl {}".format(outtbl))
num = 0
for [text, tagMap] in outtbl:
tmp = {
"name": text.decode("utf-8"),
"pos": -1,
"len": 0,
"noffs": -1,
"text": "Unknown Text",
"hlvl": -1,
"kind": "Unknown Kind",
"pos_fid": None,
"parent": -1,
"child1": -1,
"childn": -1,
"num": num,
}
for tag in tag_fieldname_map:
[fieldname, i] = tag_fieldname_map[tag]
if tag in tagMap:
fieldvalue = tagMap[tag][i]
if tag == 6:
pos_fid = toBase32(fieldvalue, 4).decode("utf-8")
fieldvalue2 = tagMap[tag][i + 1]
pos_off = toBase32(fieldvalue2, 10).decode("utf-8")
fieldvalue = "kindle:pos:fid:%s:off:%s" % (pos_fid, pos_off)
tmp[fieldname] = fieldvalue
if tag == 3:
toctext = ctoc_text.get(fieldvalue, "Unknown Text")
toctext = toctext.decode(self.mh.codec)
tmp["text"] = toctext
if tag == 5:
kindtext = ctoc_text.get(fieldvalue, "Unknown Kind")
kindtext = kindtext.decode(self.mh.codec)
tmp["kind"] = kindtext
indx_data.append(tmp)
# CGDBG
'''
record number: 3
name: 03
position 461377 length: 465358 => position/150 = real page number
text: 第二章 青铜时代——单机游戏
kind: Unknown Kind
heading level: 0 => level of section
parent: -1 => record number of previous level of section
first child: 15 last child: 26 => range of record number of next level section
pos_fid is kindle:pos:fid:0023:off:0000000000
'''
if DEBUG_NCX:
print("record number: ", num)
print(
"name: ", tmp["name"],
)
print("position", tmp["pos"], " length: ", tmp["len"])
print("text: ", tmp["text"])
print("kind: ", tmp["kind"])
print("heading level: ", tmp["hlvl"])
print("parent:", tmp["parent"])
print(
"first child: ", tmp["child1"], " last child: ", tmp["childn"]
)
print("pos_fid is ", tmp["pos_fid"])
print("\n\n")
num += 1
self.indx_data = indx_data
# {'name': '00', 'pos': 167, 'len': 24798, 'noffs': 0, 'text': '版权信息', 'hlvl': 0, 'kind': 'Unknown Kind', 'pos_fid': None, 'parent': -1, 'child1': -1, 'childn': -1, 'num': 0}
# {'name': '0B', 'pos': 67932, 'len': 3274, 'noffs': 236, 'text': '8.希罗多德', 'hlvl': 0, 'kind': 'Unknown Kind', 'pos_fid': None, 'parent': -1, 'child1': -1, 'childn': -1, 'num': 11}
print(indx_data)
return indx_data
def buildNCX(self, htmlfile, title, ident, lang):
indx_data = self.indx_data
ncx_header = """
%s
"""
ncx_footer = """
"""
ncx_entry = """
%s
"""
# recursive part
def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
if start > len(indx_data) or end > len(indx_data):
print("Warning: missing INDX child entries", start, end, len(indx_data))
return ""
if DEBUG_NCX:
logger.debug("recursINDX lvl %d from %d to %d" % (lvl, start, end))
xml = ""
if start <= 0:
start = 0
if end <= 0:
end = len(indx_data)
if lvl > max_lvl:
max_lvl = lvl
indent = " " * (2 + lvl)
for i in range(start, end):
e = indx_data[i]
if not e["hlvl"] == lvl:
continue
# open entry
num += 1
link = "%s#filepos%d" % (htmlfile, e["pos"])
print ( 'link {} '.format(link))
tagid = "np_%d" % num
entry = ncx_entry % (tagid, num, e["text"], link)
entry = re.sub(re.compile("^", re.M), indent, entry, 0)
xml += entry + "\n"
# recurs
if e["child1"] >= 0:
xmlrec, max_lvl, num = recursINDX(
max_lvl, num, lvl + 1, e["child1"], e["childn"] + 1
)
xml += xmlrec
# close entry
xml += indent + "\n"
return xml, max_lvl, num
body, max_lvl, num = recursINDX()
header = ncx_header % (lang, ident, max_lvl + 1, title)
ncx = header + body + ncx_footer
if not len(indx_data) == num:
print("Warning: different number of entries in NCX", len(indx_data), num)
return ncx
def writeNCX(self, metadata):
# build the xml
self.isNCX = True
logger.debug("Write ncx")
# htmlname = os.path.basename(self.files.outbase)
# htmlname += '.html'
htmlname = "book1.html"
xml = self.buildNCX(
htmlname,
metadata["Title"][0],
metadata["UniqueID"][0],
metadata.get("Language")[0],
)
# write the ncx file
# ncxname = os.path.join(self.files.mobi7dir, self.files.getInputFileBasename() + '.ncx')
ncxname = os.path.join(self.files.mobi7dir, "toc.ncx")
with open(pathof(ncxname), "wb") as f:
f.write(xml.encode("utf-8"))
def buildK8NCX(self, indx_data, title, ident, lang):
ncx_header = """
%s
"""
ncx_footer = """
"""
ncx_entry = """
%s
"""
# recursive part
def recursINDX(max_lvl=0, num=0, lvl=0, start=-1, end=-1):
if start > len(indx_data) or end > len(indx_data):
print("Warning: missing INDX child entries", start, end, len(indx_data))
return ""
if DEBUG_NCX:
logger.debug("recursINDX lvl %d from %d to %d" % (lvl, start, end))
xml = ""
if start <= 0:
start = 0
if end <= 0:
end = len(indx_data)
if lvl > max_lvl:
max_lvl = lvl
indent = " " * (2 + lvl)
for i in range(start, end):
e = indx_data[i]
htmlfile = e["filename"]
desttag = e["idtag"]
if not e["hlvl"] == lvl:
continue
# open entry
num += 1
if desttag == "":
link = "Text/%s" % htmlfile
else:
link = "Text/%s#%s" % (htmlfile, desttag)
tagid = "np_%d" % num
entry = ncx_entry % (tagid, num, e["text"], link)
entry = re.sub(re.compile("^", re.M), indent, entry, 0)
xml += entry + "\n"
# recurs
if e["child1"] >= 0:
xmlrec, max_lvl, num = recursINDX(
max_lvl, num, lvl + 1, e["child1"], e["childn"] + 1
)
xml += xmlrec
# close entry
xml += indent + "\n"
return xml, max_lvl, num
body, max_lvl, num = recursINDX()
header = ncx_header % (lang, ident, max_lvl + 1, title)
ncx = header + body + ncx_footer
if not len(indx_data) == num:
print("Warning: different number of entries in NCX", len(indx_data), num)
return ncx
def writeK8NCX(self, ncx_data, metadata):
# build the xml
self.isNCX = True
logger.debug("Write K8 ncx")
xml = self.buildK8NCX(
ncx_data,
metadata["Title"][0],
metadata["UniqueID"][0],
metadata.get("Language")[0],
)
bname = "toc.ncx"
ncxname = os.path.join(self.files.k8oebps, bname)
with open(pathof(ncxname), "wb") as f:
f.write(xml.encode("utf-8"))