%PDF- %PDF-
| Direktori : /usr/lib/calibre/calibre/ebooks/mobi/debug/ |
| Current File : //usr/lib/calibre/calibre/ebooks/mobi/debug/index.py |
#!/usr/bin/env python3
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import struct
from collections import OrderedDict, namedtuple
from calibre.ebooks.mobi.reader.headers import NULL_INDEX
from calibre.ebooks.mobi.reader.index import (
CNCX, INDEX_HEADER_FIELDS, get_tag_section_start, parse_index_record,
parse_indx_header, parse_tagx_section
)
from calibre.ebooks.mobi.reader.ncx import default_entry, tag_fieldname_map
from polyglot.builtins import iteritems
File = namedtuple('File',
'file_number name divtbl_count start_position length')
Elem = namedtuple('Chunk',
'insert_pos toc_text file_number sequence_number start_pos '
'length')
GuideRef = namedtuple('GuideRef', 'type title pos_fid')
INDEX_HEADER_FIELDS = INDEX_HEADER_FIELDS + ('indices', 'tagx_block_size', 'tagx_block')
FIELD_NAMES = {'len':'Header length', 'type':'Unknown', 'gen':'Index Type (0 - normal, 2 - inflection)',
'start':'IDXT Offset', 'count':'Number of entries in this record', 'code': 'character encoding', 'lng':'Unknown',
'total':'Total number of actual Index Entries in all records', 'ordt': 'ORDT Offset', 'ligt':'LIGT Offset', 'nligt':'Number of LIGT',
'ncncx':'Number of CNCX records', 'indices':'Geometry of index records'}
def read_variable_len_data(data, header):
offset = header['tagx']
indices = []
idxt_offset = header['start']
idxt_size = 4 + header['count'] * 2
if offset > 0:
tagx_block_size = header['tagx_block_size'] = struct.unpack_from(b'>I', data, offset + 4)[0]
header['tagx_block'] = data[offset:offset+tagx_block_size]
offset = idxt_offset + 4
for i in range(header['count']):
p = struct.unpack_from(b'>H', data, offset)[0]
offset += 2
strlen = bytearray(data[p])[0]
text = data[p+1:p+1+strlen]
p += 1 + strlen
num = struct.unpack_from(b'>H', data, p)[0]
indices.append((text, num))
else:
header['tagx_block'] = b''
header['tagx_block_size'] = 0
trailing_bytes = data[idxt_offset+idxt_size:]
if trailing_bytes.rstrip(b'\0'):
raise ValueError('Traling bytes after last IDXT entry: %r' % trailing_bytes.rstrip(b'\0'))
header['indices'] = indices
def read_index(sections, idx, codec):
table, cncx = OrderedDict(), CNCX([], codec)
data = sections[idx].raw
indx_header = parse_indx_header(data)
indx_count = indx_header['count']
if indx_header['ncncx'] > 0:
off = idx + indx_count + 1
cncx_records = [x.raw for x in sections[off:off+indx_header['ncncx']]]
cncx = CNCX(cncx_records, codec)
tag_section_start = get_tag_section_start(data, indx_header)
control_byte_count, tags = parse_tagx_section(data[tag_section_start:])
read_variable_len_data(data, indx_header)
index_headers = []
for i in range(idx + 1, idx + 1 + indx_count):
# Index record
data = sections[i].raw
index_headers.append(parse_index_record(table, data, control_byte_count, tags, codec,
indx_header['ordt_map'], strict=True))
read_variable_len_data(data, index_headers[-1])
return table, cncx, indx_header, index_headers
class Index:
def __init__(self, idx, records, codec):
self.table = self.cncx = self.header = self.records = None
self.index_headers = []
if idx != NULL_INDEX:
self.table, self.cncx, self.header, self.index_headers = read_index(records, idx, codec)
def render(self):
ans = ['*'*10 + ' Index Header ' + '*'*10]
a = ans.append
if self.header is not None:
for field in INDEX_HEADER_FIELDS:
a('%-12s: %r'%(FIELD_NAMES.get(field, field), self.header[field]))
ans.extend(['', ''])
ans += ['*'*10 + ' Index Record Headers (%d records) ' % len(self.index_headers) + '*'*10]
for i, header in enumerate(self.index_headers):
ans += ['*'*10 + ' Index Record %d ' % i + '*'*10]
for field in INDEX_HEADER_FIELDS:
a('%-12s: %r'%(FIELD_NAMES.get(field, field), header[field]))
if self.cncx:
a('*'*10 + ' CNCX ' + '*'*10)
for offset, val in iteritems(self.cncx):
a('%10s: %s'%(offset, val))
ans.extend(['', ''])
if self.table is not None:
a('*'*10 + ' %d Index Entries '%len(self.table) + '*'*10)
for k, v in iteritems(self.table):
a('%s: %r'%(k, v))
if self.records:
ans.extend(['', '', '*'*10 + ' Parsed Entries ' + '*'*10])
for f in self.records:
a(repr(f))
return ans + ['']
def __str__(self):
return '\n'.join(self.render())
def __iter__(self):
return iter(self.records)
class SKELIndex(Index):
def __init__(self, skelidx, records, codec):
super().__init__(skelidx, records, codec)
self.records = []
if self.table is not None:
for i, text in enumerate(self.table):
tag_map = self.table[text]
if set(tag_map) != {1, 6}:
raise ValueError('SKEL Index has unknown tags: %s'%
(set(tag_map)-{1,6}))
self.records.append(File(
i, # file_number
text, # name
tag_map[1][0], # divtbl_count
tag_map[6][0], # start_pos
tag_map[6][1]) # length
)
class SECTIndex(Index):
def __init__(self, sectidx, records, codec):
super().__init__(sectidx, records, codec)
self.records = []
if self.table is not None:
for i, text in enumerate(self.table):
tag_map = self.table[text]
if set(tag_map) != {2, 3, 4, 6}:
raise ValueError('Chunk Index has unknown tags: %s'%
(set(tag_map)-{2, 3, 4, 6}))
toc_text = self.cncx[tag_map[2][0]]
self.records.append(Elem(
int(text), # insert_pos
toc_text, # toc_text
tag_map[3][0], # file_number
tag_map[4][0], # sequence_number
tag_map[6][0], # start_pos
tag_map[6][1] # length
)
)
class GuideIndex(Index):
def __init__(self, guideidx, records, codec):
super().__init__(guideidx, records, codec)
self.records = []
if self.table is not None:
for i, text in enumerate(self.table):
tag_map = self.table[text]
if set(tag_map) not in ({1, 6}, {1, 2, 3}):
raise ValueError('Guide Index has unknown tags: %s'%
tag_map)
title = self.cncx[tag_map[1][0]]
self.records.append(GuideRef(
text,
title,
tag_map[6] if 6 in tag_map else (tag_map[2], tag_map[3])
)
)
class NCXIndex(Index):
def __init__(self, ncxidx, records, codec):
super().__init__(ncxidx, records, codec)
self.records = []
if self.table is not None:
NCXEntry = namedtuple('NCXEntry', 'index start length depth parent '
'first_child last_child title pos_fid kind')
for num, x in enumerate(iteritems(self.table)):
text, tag_map = x
entry = e = default_entry.copy()
entry['name'] = text
entry['num'] = num
for tag in tag_fieldname_map:
fieldname, i = tag_fieldname_map[tag]
if tag in tag_map:
fieldvalue = tag_map[tag][i]
if tag == 6:
# Appears to be an idx into the KF8 elems table with an
# offset
fieldvalue = tuple(tag_map[tag])
entry[fieldname] = fieldvalue
for which, name in iteritems({3:'text', 5:'kind', 70:'description',
71:'author', 72:'image_caption',
73:'image_attribution'}):
if tag == which:
entry[name] = self.cncx.get(fieldvalue,
default_entry[name])
def refindx(e, name):
ans = e[name]
if ans < 0:
ans = None
return ans
entry = NCXEntry(start=e['pos'], index=e['num'],
length=e['len'], depth=e['hlvl'], parent=refindx(e,
'parent'), first_child=refindx(e, 'child1'),
last_child=refindx(e, 'childn'), title=e['text'],
pos_fid=e['pos_fid'], kind=e['kind'])
self.records.append(entry)