%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /usr/lib/calibre/calibre/ebooks/mobi/debug/
Upload File :
Create Path :
Current File : //usr/lib/calibre/calibre/ebooks/mobi/debug/index.py

#!/usr/bin/env python3


__license__   = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

import struct
from collections import OrderedDict, namedtuple

from calibre.ebooks.mobi.reader.headers import NULL_INDEX
from calibre.ebooks.mobi.reader.index import (
    CNCX, INDEX_HEADER_FIELDS, get_tag_section_start, parse_index_record,
    parse_indx_header, parse_tagx_section
)
from calibre.ebooks.mobi.reader.ncx import default_entry, tag_fieldname_map
from polyglot.builtins import iteritems

File = namedtuple('File',
    'file_number name divtbl_count start_position length')

Elem = namedtuple('Chunk',
    'insert_pos toc_text file_number sequence_number start_pos '
    'length')

GuideRef = namedtuple('GuideRef', 'type title pos_fid')

INDEX_HEADER_FIELDS = INDEX_HEADER_FIELDS + ('indices', 'tagx_block_size', 'tagx_block')
FIELD_NAMES = {'len':'Header length', 'type':'Unknown', 'gen':'Index Type (0 - normal, 2 - inflection)',
               'start':'IDXT Offset', 'count':'Number of entries in this record', 'code': 'character encoding', 'lng':'Unknown',
               'total':'Total number of actual Index Entries in all records', 'ordt': 'ORDT Offset', 'ligt':'LIGT Offset', 'nligt':'Number of LIGT',
               'ncncx':'Number of CNCX records', 'indices':'Geometry of index records'}


def read_variable_len_data(data, header):
    offset = header['tagx']
    indices = []
    idxt_offset = header['start']
    idxt_size = 4 + header['count'] * 2
    if offset > 0:
        tagx_block_size = header['tagx_block_size'] = struct.unpack_from(b'>I', data, offset + 4)[0]
        header['tagx_block'] = data[offset:offset+tagx_block_size]
        offset = idxt_offset + 4
        for i in range(header['count']):
            p = struct.unpack_from(b'>H', data, offset)[0]
            offset += 2
            strlen = bytearray(data[p])[0]
            text = data[p+1:p+1+strlen]
            p += 1 + strlen
            num = struct.unpack_from(b'>H', data, p)[0]
            indices.append((text, num))
    else:
        header['tagx_block'] = b''
        header['tagx_block_size'] = 0
    trailing_bytes = data[idxt_offset+idxt_size:]
    if trailing_bytes.rstrip(b'\0'):
        raise ValueError('Traling bytes after last IDXT entry: %r' % trailing_bytes.rstrip(b'\0'))
    header['indices'] = indices


def read_index(sections, idx, codec):
    table, cncx = OrderedDict(), CNCX([], codec)

    data = sections[idx].raw

    indx_header = parse_indx_header(data)
    indx_count = indx_header['count']

    if indx_header['ncncx'] > 0:
        off = idx + indx_count + 1
        cncx_records = [x.raw for x in sections[off:off+indx_header['ncncx']]]
        cncx = CNCX(cncx_records, codec)

    tag_section_start = get_tag_section_start(data, indx_header)
    control_byte_count, tags = parse_tagx_section(data[tag_section_start:])

    read_variable_len_data(data, indx_header)
    index_headers = []

    for i in range(idx + 1, idx + 1 + indx_count):
        # Index record
        data = sections[i].raw
        index_headers.append(parse_index_record(table, data, control_byte_count, tags, codec,
                indx_header['ordt_map'], strict=True))
        read_variable_len_data(data, index_headers[-1])
    return table, cncx, indx_header, index_headers


class Index:

    def __init__(self, idx, records, codec):
        self.table = self.cncx = self.header = self.records = None
        self.index_headers = []
        if idx != NULL_INDEX:
            self.table, self.cncx, self.header, self.index_headers = read_index(records, idx, codec)

    def render(self):
        ans = ['*'*10 + ' Index Header ' + '*'*10]
        a = ans.append
        if self.header is not None:
            for field in INDEX_HEADER_FIELDS:
                a('%-12s: %r'%(FIELD_NAMES.get(field, field), self.header[field]))
        ans.extend(['', ''])
        ans += ['*'*10 + ' Index Record Headers (%d records) ' % len(self.index_headers) + '*'*10]
        for i, header in enumerate(self.index_headers):
            ans += ['*'*10 + ' Index Record %d ' % i + '*'*10]
            for field in INDEX_HEADER_FIELDS:
                a('%-12s: %r'%(FIELD_NAMES.get(field, field), header[field]))

        if self.cncx:
            a('*'*10 + ' CNCX ' + '*'*10)
            for offset, val in iteritems(self.cncx):
                a('%10s: %s'%(offset, val))
            ans.extend(['', ''])

        if self.table is not None:
            a('*'*10 + ' %d Index Entries '%len(self.table) + '*'*10)
            for k, v in iteritems(self.table):
                a('%s: %r'%(k, v))

        if self.records:
            ans.extend(['', '', '*'*10 + ' Parsed Entries ' + '*'*10])
            for f in self.records:
                a(repr(f))

        return ans + ['']

    def __str__(self):
        return '\n'.join(self.render())

    def __iter__(self):
        return iter(self.records)


class SKELIndex(Index):

    def __init__(self, skelidx, records, codec):
        super().__init__(skelidx, records, codec)
        self.records = []

        if self.table is not None:
            for i, text in enumerate(self.table):
                tag_map = self.table[text]
                if set(tag_map) != {1, 6}:
                    raise ValueError('SKEL Index has unknown tags: %s'%
                            (set(tag_map)-{1,6}))
                self.records.append(File(
                    i,  # file_number
                    text,  # name
                    tag_map[1][0],  # divtbl_count
                    tag_map[6][0],  # start_pos
                    tag_map[6][1])  # length
                )


class SECTIndex(Index):

    def __init__(self, sectidx, records, codec):
        super().__init__(sectidx, records, codec)
        self.records = []

        if self.table is not None:
            for i, text in enumerate(self.table):
                tag_map = self.table[text]
                if set(tag_map) != {2, 3, 4, 6}:
                    raise ValueError('Chunk Index has unknown tags: %s'%
                            (set(tag_map)-{2, 3, 4, 6}))

                toc_text = self.cncx[tag_map[2][0]]
                self.records.append(Elem(
                    int(text),  # insert_pos
                    toc_text,  # toc_text
                    tag_map[3][0],  # file_number
                    tag_map[4][0],  # sequence_number
                    tag_map[6][0],  # start_pos
                    tag_map[6][1]  # length
                    )
                )


class GuideIndex(Index):

    def __init__(self, guideidx, records, codec):
        super().__init__(guideidx, records, codec)
        self.records = []

        if self.table is not None:
            for i, text in enumerate(self.table):
                tag_map = self.table[text]
                if set(tag_map) not in ({1, 6}, {1, 2, 3}):
                    raise ValueError('Guide Index has unknown tags: %s'%
                            tag_map)

                title = self.cncx[tag_map[1][0]]
                self.records.append(GuideRef(
                    text,
                    title,
                    tag_map[6] if 6 in tag_map else (tag_map[2], tag_map[3])
                    )
                )


class NCXIndex(Index):

    def __init__(self, ncxidx, records, codec):
        super().__init__(ncxidx, records, codec)
        self.records = []

        if self.table is not None:
            NCXEntry = namedtuple('NCXEntry', 'index start length depth parent '
        'first_child last_child title pos_fid kind')

            for num, x in enumerate(iteritems(self.table)):
                text, tag_map = x
                entry = e = default_entry.copy()
                entry['name'] = text
                entry['num'] = num

                for tag in tag_fieldname_map:
                    fieldname, i = tag_fieldname_map[tag]
                    if tag in tag_map:
                        fieldvalue = tag_map[tag][i]
                        if tag == 6:
                            # Appears to be an idx into the KF8 elems table with an
                            # offset
                            fieldvalue = tuple(tag_map[tag])
                        entry[fieldname] = fieldvalue
                        for which, name in iteritems({3:'text', 5:'kind', 70:'description',
                                71:'author', 72:'image_caption',
                                73:'image_attribution'}):
                            if tag == which:
                                entry[name] = self.cncx.get(fieldvalue,
                                        default_entry[name])

                def refindx(e, name):
                    ans = e[name]
                    if ans < 0:
                        ans = None
                    return ans

                entry = NCXEntry(start=e['pos'], index=e['num'],
                        length=e['len'], depth=e['hlvl'], parent=refindx(e,
                            'parent'), first_child=refindx(e, 'child1'),
                        last_child=refindx(e, 'childn'), title=e['text'],
                        pos_fid=e['pos_fid'], kind=e['kind'])
                self.records.append(entry)

Zerion Mini Shell 1.0