kindle manager
This commit is contained in:
132
mobiparse/mobi/mobi_ncx.py
Executable file
132
mobiparse/mobi/mobi_ncx.py
Executable file
@@ -0,0 +1,132 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
|
||||
|
||||
from __future__ import unicode_literals, division, absolute_import, print_function
|
||||
|
||||
import os
|
||||
from .unipath import pathof
|
||||
from loguru import logger
|
||||
|
||||
|
||||
import re
|
||||
import json
|
||||
|
||||
# note: re requites the pattern to be the exact same type as the data to be searched in python3
|
||||
# but u"" is not allowed for the pattern itself only b""
|
||||
|
||||
'''
|
||||
NCX (Navigation Control for XML applications) is a generalized navigation definition DTD for application
|
||||
to Digital Talking Books, eBooks, and general web content models.
|
||||
This DTD is an XML application that layers navigation functionality on top of SMIL 2.0 content.
|
||||
The NCX defines a navigation path/model that may be applied upon existing publications,
|
||||
without modification of the existing publication source, so long as the navigation targets within
|
||||
the source publication can be directly referenced via a URI.
|
||||
|
||||
http://www.daisy.org/z3986/2005/ncx-2005-1.dtd
|
||||
'''
|
||||
|
||||
from .mobi_utils import toBase32
|
||||
from .mobi_index import MobiIndex
|
||||
|
||||
DEBUG_NCX = False
|
||||
|
||||
class ncxExtract:
|
||||
def __init__(self, mh):
|
||||
self.mh = mh
|
||||
self.sect = self.mh.sect
|
||||
self.isNCX = False
|
||||
self.mi = MobiIndex(self.sect)
|
||||
self.ncxidx = self.mh.ncxidx
|
||||
self.indx_data = None
|
||||
|
||||
def parseNCX(self):
|
||||
indx_data = []
|
||||
tag_fieldname_map = {
|
||||
1: ["pos", 0],
|
||||
2: ["len", 0],
|
||||
3: ["noffs", 0],
|
||||
4: ["hlvl", 0],
|
||||
5: ["koffs", 0],
|
||||
6: ["pos_fid", 0],
|
||||
21: ["parent", 0],
|
||||
22: ["child1", 0],
|
||||
23: ["childn", 0],
|
||||
}
|
||||
if self.ncxidx != 0xFFFFFFFF:
|
||||
outtbl, ctoc_text = self.mi.getIndexData(self.ncxidx, "NCX")
|
||||
if DEBUG_NCX:
|
||||
logger.debug("ctoc_text {}".format(ctoc_text))
|
||||
logger.debug("outtbl {}".format(outtbl))
|
||||
num = 0
|
||||
for [text, tagMap] in outtbl:
|
||||
tmp = {
|
||||
"name": text.decode("utf-8"),
|
||||
"pos": -1,
|
||||
"len": 0,
|
||||
"noffs": -1,
|
||||
"text": "Unknown Text",
|
||||
"hlvl": -1,
|
||||
"kind": "Unknown Kind",
|
||||
"pos_fid": None,
|
||||
"parent": -1,
|
||||
"child1": -1,
|
||||
"childn": -1,
|
||||
"num": num,
|
||||
}
|
||||
for tag in tag_fieldname_map:
|
||||
[fieldname, i] = tag_fieldname_map[tag]
|
||||
if tag in tagMap:
|
||||
fieldvalue = tagMap[tag][i]
|
||||
if tag == 6:
|
||||
pos_fid = toBase32(fieldvalue, 4).decode("utf-8")
|
||||
fieldvalue2 = tagMap[tag][i + 1]
|
||||
pos_off = toBase32(fieldvalue2, 10).decode("utf-8")
|
||||
fieldvalue = "kindle:pos:fid:%s:off:%s" % (pos_fid, pos_off)
|
||||
tmp[fieldname] = fieldvalue
|
||||
if tag == 3:
|
||||
toctext = ctoc_text.get(fieldvalue, "Unknown Text")
|
||||
toctext = toctext.decode(self.mh.codec)
|
||||
tmp["text"] = toctext
|
||||
if tag == 5:
|
||||
kindtext = ctoc_text.get(fieldvalue, "Unknown Kind")
|
||||
kindtext = kindtext.decode(self.mh.codec)
|
||||
tmp["kind"] = kindtext
|
||||
indx_data.append(tmp)
|
||||
|
||||
# CGDBG
|
||||
'''
|
||||
record number: 3
|
||||
name: 03
|
||||
position 461377 length: 465358 => position/150 = real page number
|
||||
text: 第二章 青铜时代——单机游戏
|
||||
kind: Unknown Kind
|
||||
heading level: 0 => level of section
|
||||
parent: -1 => record number of previous level of section
|
||||
first child: 15 last child: 26 => range of record number of next level section
|
||||
pos_fid is kindle:pos:fid:0023:off:0000000000
|
||||
'''
|
||||
if DEBUG_NCX:
|
||||
print("record number: ", num)
|
||||
print(
|
||||
"name: ", tmp["name"],
|
||||
)
|
||||
print("position", tmp["pos"], " length: ", tmp["len"])
|
||||
print("text: ", tmp["text"])
|
||||
print("kind: ", tmp["kind"])
|
||||
print("heading level: ", tmp["hlvl"])
|
||||
print("parent:", tmp["parent"])
|
||||
print(
|
||||
"first child: ", tmp["child1"], " last child: ", tmp["childn"]
|
||||
)
|
||||
print("pos_fid is ", tmp["pos_fid"])
|
||||
print("\n\n")
|
||||
num += 1
|
||||
self.indx_data = indx_data
|
||||
|
||||
# {'name': '00', 'pos': 167, 'len': 24798, 'noffs': 0, 'text': '版权信息', 'hlvl': 0, 'kind': 'Unknown Kind', 'pos_fid': None, 'parent': -1, 'child1': -1, 'childn': -1, 'num': 0}
|
||||
# {'name': '0B', 'pos': 67932, 'len': 3274, 'noffs': 236, 'text': '8.希罗多德', 'hlvl': 0, 'kind': 'Unknown Kind', 'pos_fid': None, 'parent': -1, 'child1': -1, 'childn': -1, 'num': 11}
|
||||
#print('indx_data {}'.format(json.dumps(indx_data, indent=4, sort_keys=True, ensure_ascii=False)))
|
||||
|
||||
return indx_data
|
||||
|
||||
Reference in New Issue
Block a user