%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /usr/lib/calibre/calibre/ebooks/oeb/polish/check/
Upload File :
Create Path :
Current File : //usr/lib/calibre/calibre/ebooks/oeb/polish/check/opf.py

#!/usr/bin/env python3


__license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'

from lxml import etree

from calibre import prepare_string_for_xml as xml
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN
from calibre.ebooks.oeb.polish.toc import find_existing_nav_toc, parse_nav
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.ebooks.oeb.base import OPF, OPF2_NS, DC, DC11_NS, XHTML_MIME
from polyglot.builtins import iteritems


class MissingSection(BaseError):

    def __init__(self, name, section_name):
        BaseError.__init__(self, _('The <%s> section is missing from the OPF') % section_name, name)
        self.HELP = xml(_(
            'The <%s> section is required in the OPF file. You have to create one.') % section_name)


class EmptyID(BaseError):

    def __init__(self, name, lnum):
        BaseError.__init__(self, _('Empty id attributes are invalid'), name, lnum)
        self.HELP = xml(_(
            'Empty ID attributes are invalid in OPF files.'))


class IncorrectIdref(BaseError):

    def __init__(self, name, idref, lnum):
        BaseError.__init__(self, _('idref="%s" points to unknown id') % idref, name, lnum)
        self.HELP = xml(_(
            'The idref="%s" points to an id that does not exist in the OPF') % idref)


class IncorrectCover(BaseError):

    def __init__(self, name, lnum, cover):
        BaseError.__init__(self, _('The meta cover tag points to an non-existent item'), name, lnum)
        self.HELP = xml(_(
            'The meta cover tag points to an item with id="%s" which does not exist in the manifest') % cover)


class NookCover(BaseError):

    HELP = _(
            'Some e-book readers such as the Nook fail to recognize covers if'
            ' the content attribute comes before the name attribute.'
            ' For maximum compatibility move the name attribute before the content attribute.')
    INDIVIDUAL_FIX = _('Move the name attribute before the content attribute')

    def __init__(self, name, lnum):
        BaseError.__init__(self, _('The meta cover tag has content before name'), name, lnum)

    def __call__(self, container):
        for cover in container.opf_xpath('//opf:meta[@name="cover" and @content]'):
            cover.set('content', cover.attrib.pop('content'))
        container.dirty(container.opf_name)
        return True


class IncorrectToc(BaseError):

    def __init__(self, name, lnum, bad_idref=None, bad_mimetype=None):
        if bad_idref is not None:
            msg = _('The item identified as the Table of Contents (%s) does not exist') % bad_idref
            self.HELP = _('There is no item with id="%s" in the manifest.') % bad_idref
        else:
            msg = _('The item identified as the Table of Contents has an incorrect media-type (%s)') % bad_mimetype
            self.HELP = _('The media type for the Table of Contents must be %s') % guess_type('a.ncx')
        BaseError.__init__(self, msg, name, lnum)


class NoHref(BaseError):

    HELP = _('This manifest entry has no href attribute. Either add the href attribute or remove the entry.')
    INDIVIDUAL_FIX = _('Remove this manifest entry')

    def __init__(self, name, item_id, lnum):
        BaseError.__init__(self, _('Item in manifest has no href attribute'), name, lnum)
        self.item_id = item_id

    def __call__(self, container):
        changed = False
        for item in container.opf_xpath('/opf:package/opf:manifest/opf:item'):
            if item.get('id', None) == self.item_id:
                changed = True
                container.remove_from_xml(item)
                container.dirty(container.opf_name)
        return changed


class MissingNCXRef(BaseError):

    HELP = _('The <spine> tag has no reference to the NCX table of contents file.'
             ' Without this reference, the table of contents will not work in most'
             ' readers. The reference should look like <spine toc="id of manifest item for the ncx file">.')
    INDIVIDUAL_FIX = _('Add the reference to the NCX file')

    def __init__(self, name, lnum, ncx_id):
        BaseError.__init__(self, _('Missing reference to the NCX Table of Contents'), name, lnum)
        self.ncx_id = ncx_id

    def __call__(self, container):
        changed = False
        for item in container.opf_xpath('/opf:package/opf:spine'):
            if item.get('toc') is None:
                item.set('toc', self.ncx_id)
                changed = True
                container.dirty(container.opf_name)
        return changed


class MissingNav(BaseError):

    HELP = _('This book has no Navigation document. According to the EPUB 3 specification, a navigation document'
             ' is required. The Navigation document contains the Table of Contents. Use the Table of Contents'
             ' tool to add a Table of Contents to this book.')

    def __init__(self, name, lnum):
        BaseError.__init__(self, _('Missing navigation document'), name, lnum)


class EmptyNav(BaseError):

    HELP = _('The nav document for this book contains no table of contents, or an empty table of contents.'
             ' Use the Table of Contents tool to add a Table of Contents to this book.')
    LEVEL = WARN

    def __init__(self, name, lnum):
        BaseError.__init__(self, _('Missing ToC in navigation document'), name, lnum)


class MissingHref(BaseError):

    HELP = _('A file listed in the manifest is missing, you should either remove'
             ' it from the manifest or add the missing file to the book.')

    def __init__(self, name, href, lnum):
        BaseError.__init__(self, _('Item (%s) in manifest is missing') % href, name, lnum)
        self.bad_href = href
        self.INDIVIDUAL_FIX = _('Remove the entry for %s from the manifest') % href

    def __call__(self, container):
        [container.remove_from_xml(elem) for elem in container.opf_xpath('/opf:package/opf:manifest/opf:item[@href]')
         if elem.get('href') == self.bad_href]
        container.dirty(container.opf_name)
        return True


class NonLinearItems(BaseError):

    level = WARN
    has_multiple_locations = True

    HELP = xml(_('There are items marked as non-linear in the <spine>.'
                 ' These will be displayed in random order by different e-book readers.'
                 ' Some will ignore the non-linear attribute, some will display'
                 ' them at the end or the beginning of the book and some will'
                 ' fail to display them at all. Instead of using non-linear items'
                 ' simply place the items in the order you want them to be displayed.'))

    INDIVIDUAL_FIX = _('Mark all non-linear items as linear')

    def __init__(self, name, locs):
        BaseError.__init__(self, _('Non-linear items in the spine'), name)
        self.all_locations = [(name, x, None) for x in locs]

    def __call__(self, container):
        [elem.attrib.pop('linear') for elem in container.opf_xpath('//opf:spine/opf:itemref[@linear]')]
        container.dirty(container.opf_name)
        return True


class DuplicateHref(BaseError):

    has_multiple_locations = True

    INDIVIDUAL_FIX = _(
        'Remove all but the first duplicate item')

    def __init__(self, name, eid, locs, for_spine=False):
        loc = 'spine' if for_spine else 'manifest'
        BaseError.__init__(self, _('Duplicate item in {0}: {1}').format(loc, eid), name)
        self.HELP = _(
            'The item {0} is present more than once in the {2} in {1}. This is'
            ' not allowed.').format(eid, name, loc)
        self.all_locations = [(name, lnum, None) for lnum in sorted(locs)]
        self.duplicate_href = eid
        self.xpath = '/opf:package/opf:' + ('spine/opf:itemref[@idref]' if for_spine else 'manifest/opf:item[@href]')
        self.attr = 'idref' if for_spine else 'href'

    def __call__(self, container):
        items = [e for e in container.opf_xpath(self.xpath) if e.get(self.attr) == self.duplicate_href]
        [container.remove_from_xml(e) for e in items[1:]]
        container.dirty(self.name)
        return True


class MultipleCovers(BaseError):

    has_multiple_locations = True
    HELP = xml(_(
        'There is more than one <meta name="cover"> tag defined. There should be only one.'))
    INDIVIDUAL_FIX = _('Remove all but the first meta cover tag')

    def __init__(self, name, locs):
        BaseError.__init__(self, _('There is more than one cover defined'), name)
        self.all_locations = [(name, lnum, None) for lnum in sorted(locs)]

    def __call__(self, container):
        items = [e for e in container.opf_xpath('/opf:package/opf:metadata/opf:meta[@name="cover"]')]
        [container.remove_from_xml(e) for e in items[1:]]
        container.dirty(self.name)
        return True


class NoUID(BaseError):

    HELP = xml(_(
        'The OPF must have a unique identifier, i.e. a <dc:identifier> element whose id is referenced'
        ' by the <package> element'))
    INDIVIDUAL_FIX = _('Auto-generate a unique identifier')

    def __init__(self, name):
        BaseError.__init__(self, _('The OPF has no unique identifier'), name)

    def __call__(self, container):
        from calibre.ebooks.oeb.base import uuid_id
        opf = container.opf
        uid = uuid_id()
        opf.set('unique-identifier', uid)
        m = container.opf_xpath('/opf:package/opf:metadata')
        if not m:
            m = [container.opf.makeelement(OPF('metadata'), nsmap={'dc':DC11_NS})]
            container.insert_into_xml(container.opf, m[0], 0)
        m = m[0]
        dc = m.makeelement(DC('identifier'), id=uid, nsmap={'opf':OPF2_NS})
        dc.set(OPF('scheme'), 'uuid')
        dc.text = uid
        container.insert_into_xml(m, dc)
        container.dirty(container.opf_name)
        return True


class EmptyIdentifier(BaseError):

    HELP = xml(_('The <dc:identifier> element must not be empty.'))

    def __init__(self, name, lnum):
        BaseError.__init__(self, _('Empty identifier element'), name, lnum)


class BadSpineMime(BaseError):

    def __init__(self, name, iid, mt, lnum, opf_name):
        BaseError.__init__(self, _('Incorrect media-type for spine item'), opf_name, lnum)
        self.HELP = _(
            'The item {0} present in the spine has the media-type {1}. '
            ' Most e-book software cannot handle non-HTML spine items. '
            ' If the item is actually HTML, you should change its media-type to {2}.'
            ' If it is not-HTML you should consider replacing it with an HTML item, as it'
            ' is unlikely to work in most readers.').format(name, mt, XHTML_MIME)
        if iid is not None:
            self.INDIVIDUAL_FIX = _('Change the media-type to %s') % XHTML_MIME
            self.iid = iid

    def __call__(self, container):
        container.opf_xpath('/opf:package/opf:manifest/opf:item[@id=%r]' % self.iid)[0].set(
            'media-type', XHTML_MIME)
        container.dirty(container.opf_name)
        container.refresh_mime_map()
        return True


def check_opf(container):
    errors = []
    opf_version = container.opf_version_parsed

    if container.opf.tag != OPF('package'):
        err = BaseError(_('The OPF does not have the correct root element'), container.opf_name, container.opf.sourceline)
        err.HELP = xml(_(
            'The OPF must have the root element <package> in namespace {0}, like this: <package xmlns="{0}">')).format(OPF2_NS)
        errors.append(err)

    elif container.opf.get('version') is None and container.book_type == 'epub':
        err = BaseError(_('The OPF does not have a version'), container.opf_name, container.opf.sourceline)
        err.HELP = xml(_(
            'The <package> tag in the OPF must have a version attribute. This is usually version="2.0" for EPUB2 and AZW3 and version="3.0" for EPUB3'))
        errors.append(err)

    for tag in ('metadata', 'manifest', 'spine'):
        if not container.opf_xpath('/opf:package/opf:' + tag):
            errors.append(MissingSection(container.opf_name, tag))

    all_ids = set(container.opf_xpath('//*/@id'))
    if '' in all_ids:
        for empty_id_tag in container.opf_xpath('//*[@id=""]'):
            errors.append(EmptyID(container.opf_name, empty_id_tag.sourceline))
    all_ids.discard('')
    for elem in container.opf_xpath('//*[@idref]'):
        if elem.get('idref') not in all_ids:
            errors.append(IncorrectIdref(container.opf_name, elem.get('idref'), elem.sourceline))

    nl_items = [elem.sourceline for elem in container.opf_xpath('//opf:spine/opf:itemref[@linear="no"]')]
    if nl_items:
        errors.append(NonLinearItems(container.opf_name, nl_items))

    seen, dups = {}, {}
    for item in container.opf_xpath('/opf:package/opf:manifest/opf:item'):
        href = item.get('href', None)
        if href is None:
            errors.append(NoHref(container.opf_name, item.get('id', None), item.sourceline))
        else:
            hname = container.href_to_name(href, container.opf_name)
            if not hname or not container.exists(hname):
                errors.append(MissingHref(container.opf_name, href, item.sourceline))
            if href in seen:
                if href not in dups:
                    dups[href] = [seen[href]]
                dups[href].append(item.sourceline)
            else:
                seen[href] = item.sourceline
    errors.extend(DuplicateHref(container.opf_name, eid, locs) for eid, locs in iteritems(dups))

    seen, dups = {}, {}
    for item in container.opf_xpath('/opf:package/opf:spine/opf:itemref[@idref]'):
        ref = item.get('idref')
        if ref in seen:
            if ref not in dups:
                dups[ref] = [seen[ref]]
            dups[ref].append(item.sourceline)
        else:
            seen[ref] = item.sourceline
    errors.extend(DuplicateHref(container.opf_name, eid, locs, for_spine=True) for eid, locs in iteritems(dups))

    spine = container.opf_xpath('/opf:package/opf:spine[@toc]')
    if spine:
        spine = spine[0]
        mitems = [x for x in container.opf_xpath('/opf:package/opf:manifest/opf:item[@id]') if x.get('id') == spine.get('toc')]
        if mitems:
            mitem = mitems[0]
            if mitem.get('media-type', '') != guess_type('a.ncx'):
                errors.append(IncorrectToc(container.opf_name, mitem.sourceline, bad_mimetype=mitem.get('media-type')))
        else:
            errors.append(IncorrectToc(container.opf_name, spine.sourceline, bad_idref=spine.get('toc')))
    else:
        spine = container.opf_xpath('/opf:package/opf:spine')
        if spine:
            spine = spine[0]
            ncx = container.manifest_type_map.get(guess_type('a.ncx'))
            if ncx:
                ncx_name = ncx[0]
                rmap = {v:k for k, v in iteritems(container.manifest_id_map)}
                ncx_id = rmap.get(ncx_name)
                if ncx_id:
                    errors.append(MissingNCXRef(container.opf_name, spine.sourceline, ncx_id))

    if opf_version.major > 2:
        existing_nav = find_existing_nav_toc(container)
        if existing_nav is None:
            errors.append(MissingNav(container.opf_name, 0))
        else:
            toc = parse_nav(container, existing_nav)
            if len(toc) == 0:
                errors.append(EmptyNav(existing_nav, 0))

    covers = container.opf_xpath('/opf:package/opf:metadata/opf:meta[@name="cover"]')
    if len(covers) > 0:
        if len(covers) > 1:
            errors.append(MultipleCovers(container.opf_name, [c.sourceline for c in covers]))
        manifest_ids = set(container.opf_xpath('/opf:package/opf:manifest/opf:item/@id'))
        for cover in covers:
            if cover.get('content', None) not in manifest_ids:
                errors.append(IncorrectCover(container.opf_name, cover.sourceline, cover.get('content', '')))
            raw = etree.tostring(cover)
            try:
                n, c = raw.index(b'name="'), raw.index(b'content="')
            except ValueError:
                n = c = -1
            if n > -1 and c > -1 and n > c:
                errors.append(NookCover(container.opf_name, cover.sourceline))

    uid = container.opf.get('unique-identifier', None)
    if uid is None or not container.opf_xpath('/opf:package/opf:metadata/dc:identifier[@id=%r]' % uid):
        errors.append(NoUID(container.opf_name))
    for elem in container.opf_xpath('/opf:package/opf:metadata/dc:identifier'):
        if not elem.text or not elem.text.strip():
            errors.append(EmptyIdentifier(container.opf_name, elem.sourceline))

    for item, name, linear in container.spine_iter:
        mt = container.mime_map[name]
        if mt != XHTML_MIME:
            iid = item.get('idref', None)
            lnum = None
            if iid:
                mitem = container.opf_xpath('/opf:package/opf:manifest/opf:item[@id=%r]' % iid)
                if mitem:
                    lnum = mitem[0].sourceline
                else:
                    iid = None
            errors.append(BadSpineMime(name, iid, mt, lnum, container.opf_name))

    return errors

Zerion Mini Shell 1.0