%PDF- %PDF-
| Direktori : /usr/lib/calibre/calibre/ebooks/oeb/polish/check/ |
| Current File : //usr/lib/calibre/calibre/ebooks/oeb/polish/check/opf.py |
#!/usr/bin/env python3
__license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
from lxml import etree
from calibre import prepare_string_for_xml as xml
from calibre.ebooks.oeb.polish.check.base import BaseError, WARN
from calibre.ebooks.oeb.polish.toc import find_existing_nav_toc, parse_nav
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.ebooks.oeb.base import OPF, OPF2_NS, DC, DC11_NS, XHTML_MIME
from polyglot.builtins import iteritems
class MissingSection(BaseError):
def __init__(self, name, section_name):
BaseError.__init__(self, _('The <%s> section is missing from the OPF') % section_name, name)
self.HELP = xml(_(
'The <%s> section is required in the OPF file. You have to create one.') % section_name)
class EmptyID(BaseError):
def __init__(self, name, lnum):
BaseError.__init__(self, _('Empty id attributes are invalid'), name, lnum)
self.HELP = xml(_(
'Empty ID attributes are invalid in OPF files.'))
class IncorrectIdref(BaseError):
def __init__(self, name, idref, lnum):
BaseError.__init__(self, _('idref="%s" points to unknown id') % idref, name, lnum)
self.HELP = xml(_(
'The idref="%s" points to an id that does not exist in the OPF') % idref)
class IncorrectCover(BaseError):
def __init__(self, name, lnum, cover):
BaseError.__init__(self, _('The meta cover tag points to an non-existent item'), name, lnum)
self.HELP = xml(_(
'The meta cover tag points to an item with id="%s" which does not exist in the manifest') % cover)
class NookCover(BaseError):
HELP = _(
'Some e-book readers such as the Nook fail to recognize covers if'
' the content attribute comes before the name attribute.'
' For maximum compatibility move the name attribute before the content attribute.')
INDIVIDUAL_FIX = _('Move the name attribute before the content attribute')
def __init__(self, name, lnum):
BaseError.__init__(self, _('The meta cover tag has content before name'), name, lnum)
def __call__(self, container):
for cover in container.opf_xpath('//opf:meta[@name="cover" and @content]'):
cover.set('content', cover.attrib.pop('content'))
container.dirty(container.opf_name)
return True
class IncorrectToc(BaseError):
def __init__(self, name, lnum, bad_idref=None, bad_mimetype=None):
if bad_idref is not None:
msg = _('The item identified as the Table of Contents (%s) does not exist') % bad_idref
self.HELP = _('There is no item with id="%s" in the manifest.') % bad_idref
else:
msg = _('The item identified as the Table of Contents has an incorrect media-type (%s)') % bad_mimetype
self.HELP = _('The media type for the Table of Contents must be %s') % guess_type('a.ncx')
BaseError.__init__(self, msg, name, lnum)
class NoHref(BaseError):
HELP = _('This manifest entry has no href attribute. Either add the href attribute or remove the entry.')
INDIVIDUAL_FIX = _('Remove this manifest entry')
def __init__(self, name, item_id, lnum):
BaseError.__init__(self, _('Item in manifest has no href attribute'), name, lnum)
self.item_id = item_id
def __call__(self, container):
changed = False
for item in container.opf_xpath('/opf:package/opf:manifest/opf:item'):
if item.get('id', None) == self.item_id:
changed = True
container.remove_from_xml(item)
container.dirty(container.opf_name)
return changed
class MissingNCXRef(BaseError):
HELP = _('The <spine> tag has no reference to the NCX table of contents file.'
' Without this reference, the table of contents will not work in most'
' readers. The reference should look like <spine toc="id of manifest item for the ncx file">.')
INDIVIDUAL_FIX = _('Add the reference to the NCX file')
def __init__(self, name, lnum, ncx_id):
BaseError.__init__(self, _('Missing reference to the NCX Table of Contents'), name, lnum)
self.ncx_id = ncx_id
def __call__(self, container):
changed = False
for item in container.opf_xpath('/opf:package/opf:spine'):
if item.get('toc') is None:
item.set('toc', self.ncx_id)
changed = True
container.dirty(container.opf_name)
return changed
class MissingNav(BaseError):
HELP = _('This book has no Navigation document. According to the EPUB 3 specification, a navigation document'
' is required. The Navigation document contains the Table of Contents. Use the Table of Contents'
' tool to add a Table of Contents to this book.')
def __init__(self, name, lnum):
BaseError.__init__(self, _('Missing navigation document'), name, lnum)
class EmptyNav(BaseError):
HELP = _('The nav document for this book contains no table of contents, or an empty table of contents.'
' Use the Table of Contents tool to add a Table of Contents to this book.')
LEVEL = WARN
def __init__(self, name, lnum):
BaseError.__init__(self, _('Missing ToC in navigation document'), name, lnum)
class MissingHref(BaseError):
HELP = _('A file listed in the manifest is missing, you should either remove'
' it from the manifest or add the missing file to the book.')
def __init__(self, name, href, lnum):
BaseError.__init__(self, _('Item (%s) in manifest is missing') % href, name, lnum)
self.bad_href = href
self.INDIVIDUAL_FIX = _('Remove the entry for %s from the manifest') % href
def __call__(self, container):
[container.remove_from_xml(elem) for elem in container.opf_xpath('/opf:package/opf:manifest/opf:item[@href]')
if elem.get('href') == self.bad_href]
container.dirty(container.opf_name)
return True
class NonLinearItems(BaseError):
level = WARN
has_multiple_locations = True
HELP = xml(_('There are items marked as non-linear in the <spine>.'
' These will be displayed in random order by different e-book readers.'
' Some will ignore the non-linear attribute, some will display'
' them at the end or the beginning of the book and some will'
' fail to display them at all. Instead of using non-linear items'
' simply place the items in the order you want them to be displayed.'))
INDIVIDUAL_FIX = _('Mark all non-linear items as linear')
def __init__(self, name, locs):
BaseError.__init__(self, _('Non-linear items in the spine'), name)
self.all_locations = [(name, x, None) for x in locs]
def __call__(self, container):
[elem.attrib.pop('linear') for elem in container.opf_xpath('//opf:spine/opf:itemref[@linear]')]
container.dirty(container.opf_name)
return True
class DuplicateHref(BaseError):
has_multiple_locations = True
INDIVIDUAL_FIX = _(
'Remove all but the first duplicate item')
def __init__(self, name, eid, locs, for_spine=False):
loc = 'spine' if for_spine else 'manifest'
BaseError.__init__(self, _('Duplicate item in {0}: {1}').format(loc, eid), name)
self.HELP = _(
'The item {0} is present more than once in the {2} in {1}. This is'
' not allowed.').format(eid, name, loc)
self.all_locations = [(name, lnum, None) for lnum in sorted(locs)]
self.duplicate_href = eid
self.xpath = '/opf:package/opf:' + ('spine/opf:itemref[@idref]' if for_spine else 'manifest/opf:item[@href]')
self.attr = 'idref' if for_spine else 'href'
def __call__(self, container):
items = [e for e in container.opf_xpath(self.xpath) if e.get(self.attr) == self.duplicate_href]
[container.remove_from_xml(e) for e in items[1:]]
container.dirty(self.name)
return True
class MultipleCovers(BaseError):
has_multiple_locations = True
HELP = xml(_(
'There is more than one <meta name="cover"> tag defined. There should be only one.'))
INDIVIDUAL_FIX = _('Remove all but the first meta cover tag')
def __init__(self, name, locs):
BaseError.__init__(self, _('There is more than one cover defined'), name)
self.all_locations = [(name, lnum, None) for lnum in sorted(locs)]
def __call__(self, container):
items = [e for e in container.opf_xpath('/opf:package/opf:metadata/opf:meta[@name="cover"]')]
[container.remove_from_xml(e) for e in items[1:]]
container.dirty(self.name)
return True
class NoUID(BaseError):
HELP = xml(_(
'The OPF must have a unique identifier, i.e. a <dc:identifier> element whose id is referenced'
' by the <package> element'))
INDIVIDUAL_FIX = _('Auto-generate a unique identifier')
def __init__(self, name):
BaseError.__init__(self, _('The OPF has no unique identifier'), name)
def __call__(self, container):
from calibre.ebooks.oeb.base import uuid_id
opf = container.opf
uid = uuid_id()
opf.set('unique-identifier', uid)
m = container.opf_xpath('/opf:package/opf:metadata')
if not m:
m = [container.opf.makeelement(OPF('metadata'), nsmap={'dc':DC11_NS})]
container.insert_into_xml(container.opf, m[0], 0)
m = m[0]
dc = m.makeelement(DC('identifier'), id=uid, nsmap={'opf':OPF2_NS})
dc.set(OPF('scheme'), 'uuid')
dc.text = uid
container.insert_into_xml(m, dc)
container.dirty(container.opf_name)
return True
class EmptyIdentifier(BaseError):
HELP = xml(_('The <dc:identifier> element must not be empty.'))
def __init__(self, name, lnum):
BaseError.__init__(self, _('Empty identifier element'), name, lnum)
class BadSpineMime(BaseError):
def __init__(self, name, iid, mt, lnum, opf_name):
BaseError.__init__(self, _('Incorrect media-type for spine item'), opf_name, lnum)
self.HELP = _(
'The item {0} present in the spine has the media-type {1}. '
' Most e-book software cannot handle non-HTML spine items. '
' If the item is actually HTML, you should change its media-type to {2}.'
' If it is not-HTML you should consider replacing it with an HTML item, as it'
' is unlikely to work in most readers.').format(name, mt, XHTML_MIME)
if iid is not None:
self.INDIVIDUAL_FIX = _('Change the media-type to %s') % XHTML_MIME
self.iid = iid
def __call__(self, container):
container.opf_xpath('/opf:package/opf:manifest/opf:item[@id=%r]' % self.iid)[0].set(
'media-type', XHTML_MIME)
container.dirty(container.opf_name)
container.refresh_mime_map()
return True
def check_opf(container):
errors = []
opf_version = container.opf_version_parsed
if container.opf.tag != OPF('package'):
err = BaseError(_('The OPF does not have the correct root element'), container.opf_name, container.opf.sourceline)
err.HELP = xml(_(
'The OPF must have the root element <package> in namespace {0}, like this: <package xmlns="{0}">')).format(OPF2_NS)
errors.append(err)
elif container.opf.get('version') is None and container.book_type == 'epub':
err = BaseError(_('The OPF does not have a version'), container.opf_name, container.opf.sourceline)
err.HELP = xml(_(
'The <package> tag in the OPF must have a version attribute. This is usually version="2.0" for EPUB2 and AZW3 and version="3.0" for EPUB3'))
errors.append(err)
for tag in ('metadata', 'manifest', 'spine'):
if not container.opf_xpath('/opf:package/opf:' + tag):
errors.append(MissingSection(container.opf_name, tag))
all_ids = set(container.opf_xpath('//*/@id'))
if '' in all_ids:
for empty_id_tag in container.opf_xpath('//*[@id=""]'):
errors.append(EmptyID(container.opf_name, empty_id_tag.sourceline))
all_ids.discard('')
for elem in container.opf_xpath('//*[@idref]'):
if elem.get('idref') not in all_ids:
errors.append(IncorrectIdref(container.opf_name, elem.get('idref'), elem.sourceline))
nl_items = [elem.sourceline for elem in container.opf_xpath('//opf:spine/opf:itemref[@linear="no"]')]
if nl_items:
errors.append(NonLinearItems(container.opf_name, nl_items))
seen, dups = {}, {}
for item in container.opf_xpath('/opf:package/opf:manifest/opf:item'):
href = item.get('href', None)
if href is None:
errors.append(NoHref(container.opf_name, item.get('id', None), item.sourceline))
else:
hname = container.href_to_name(href, container.opf_name)
if not hname or not container.exists(hname):
errors.append(MissingHref(container.opf_name, href, item.sourceline))
if href in seen:
if href not in dups:
dups[href] = [seen[href]]
dups[href].append(item.sourceline)
else:
seen[href] = item.sourceline
errors.extend(DuplicateHref(container.opf_name, eid, locs) for eid, locs in iteritems(dups))
seen, dups = {}, {}
for item in container.opf_xpath('/opf:package/opf:spine/opf:itemref[@idref]'):
ref = item.get('idref')
if ref in seen:
if ref not in dups:
dups[ref] = [seen[ref]]
dups[ref].append(item.sourceline)
else:
seen[ref] = item.sourceline
errors.extend(DuplicateHref(container.opf_name, eid, locs, for_spine=True) for eid, locs in iteritems(dups))
spine = container.opf_xpath('/opf:package/opf:spine[@toc]')
if spine:
spine = spine[0]
mitems = [x for x in container.opf_xpath('/opf:package/opf:manifest/opf:item[@id]') if x.get('id') == spine.get('toc')]
if mitems:
mitem = mitems[0]
if mitem.get('media-type', '') != guess_type('a.ncx'):
errors.append(IncorrectToc(container.opf_name, mitem.sourceline, bad_mimetype=mitem.get('media-type')))
else:
errors.append(IncorrectToc(container.opf_name, spine.sourceline, bad_idref=spine.get('toc')))
else:
spine = container.opf_xpath('/opf:package/opf:spine')
if spine:
spine = spine[0]
ncx = container.manifest_type_map.get(guess_type('a.ncx'))
if ncx:
ncx_name = ncx[0]
rmap = {v:k for k, v in iteritems(container.manifest_id_map)}
ncx_id = rmap.get(ncx_name)
if ncx_id:
errors.append(MissingNCXRef(container.opf_name, spine.sourceline, ncx_id))
if opf_version.major > 2:
existing_nav = find_existing_nav_toc(container)
if existing_nav is None:
errors.append(MissingNav(container.opf_name, 0))
else:
toc = parse_nav(container, existing_nav)
if len(toc) == 0:
errors.append(EmptyNav(existing_nav, 0))
covers = container.opf_xpath('/opf:package/opf:metadata/opf:meta[@name="cover"]')
if len(covers) > 0:
if len(covers) > 1:
errors.append(MultipleCovers(container.opf_name, [c.sourceline for c in covers]))
manifest_ids = set(container.opf_xpath('/opf:package/opf:manifest/opf:item/@id'))
for cover in covers:
if cover.get('content', None) not in manifest_ids:
errors.append(IncorrectCover(container.opf_name, cover.sourceline, cover.get('content', '')))
raw = etree.tostring(cover)
try:
n, c = raw.index(b'name="'), raw.index(b'content="')
except ValueError:
n = c = -1
if n > -1 and c > -1 and n > c:
errors.append(NookCover(container.opf_name, cover.sourceline))
uid = container.opf.get('unique-identifier', None)
if uid is None or not container.opf_xpath('/opf:package/opf:metadata/dc:identifier[@id=%r]' % uid):
errors.append(NoUID(container.opf_name))
for elem in container.opf_xpath('/opf:package/opf:metadata/dc:identifier'):
if not elem.text or not elem.text.strip():
errors.append(EmptyIdentifier(container.opf_name, elem.sourceline))
for item, name, linear in container.spine_iter:
mt = container.mime_map[name]
if mt != XHTML_MIME:
iid = item.get('idref', None)
lnum = None
if iid:
mitem = container.opf_xpath('/opf:package/opf:manifest/opf:item[@id=%r]' % iid)
if mitem:
lnum = mitem[0].sourceline
else:
iid = None
errors.append(BadSpineMime(name, iid, mt, lnum, container.opf_name))
return errors