%PDF- %PDF-
| Direktori : /usr/lib/calibre/calibre/ebooks/oeb/transforms/ |
| Current File : //usr/lib/calibre/calibre/ebooks/oeb/transforms/metadata.py |
#!/usr/bin/env python3
__license__ = 'GPL v3'
__copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, re
from calibre.utils.date import isoformat, now
from calibre import guess_type
from polyglot.builtins import iteritems
def meta_info_to_oeb_metadata(mi, m, log, override_input_metadata=False):
from calibre.ebooks.oeb.base import OPF
if not mi.is_null('title'):
m.clear('title')
m.add('title', mi.title)
if mi.title_sort:
if not m.title:
m.add('title', mi.title_sort)
m.clear('title_sort')
m.add('title_sort', mi.title_sort)
if not mi.is_null('authors'):
m.filter('creator', lambda x : x.role.lower() in ['aut', ''])
for a in mi.authors:
attrib = {'role':'aut'}
if mi.author_sort:
attrib[OPF('file-as')] = mi.author_sort
m.add('creator', a, attrib=attrib)
if not mi.is_null('book_producer'):
m.filter('contributor', lambda x : x.role.lower() == 'bkp')
m.add('contributor', mi.book_producer, role='bkp')
elif override_input_metadata:
m.filter('contributor', lambda x : x.role.lower() == 'bkp')
if not mi.is_null('comments'):
m.clear('description')
m.add('description', mi.comments)
elif override_input_metadata:
m.clear('description')
if not mi.is_null('publisher'):
m.clear('publisher')
m.add('publisher', mi.publisher)
elif override_input_metadata:
m.clear('publisher')
if not mi.is_null('series'):
m.clear('series')
m.add('series', mi.series)
elif override_input_metadata:
m.clear('series')
identifiers = mi.get_identifiers()
set_isbn = False
for typ, val in iteritems(identifiers):
has = False
if typ.lower() == 'isbn':
set_isbn = True
for x in m.identifier:
if x.scheme.lower() == typ.lower():
x.content = val
has = True
if not has:
m.add('identifier', val, scheme=typ.upper())
if override_input_metadata and not set_isbn:
m.filter('identifier', lambda x: x.scheme.lower() == 'isbn')
if not mi.is_null('languages'):
m.clear('language')
for lang in mi.languages:
if lang and lang.lower() not in ('und', ''):
m.add('language', lang)
if not mi.is_null('series_index'):
m.clear('series_index')
m.add('series_index', mi.format_series_index())
elif override_input_metadata:
m.clear('series_index')
if not mi.is_null('rating'):
m.clear('rating')
m.add('rating', '%.2f'%mi.rating)
elif override_input_metadata:
m.clear('rating')
if not mi.is_null('tags'):
m.clear('subject')
for t in mi.tags:
m.add('subject', t)
elif override_input_metadata:
m.clear('subject')
if not mi.is_null('pubdate'):
m.clear('date')
m.add('date', isoformat(mi.pubdate))
if not mi.is_null('timestamp'):
m.clear('timestamp')
m.add('timestamp', isoformat(mi.timestamp))
if not mi.is_null('rights'):
m.clear('rights')
m.add('rights', mi.rights)
if not mi.is_null('publication_type'):
m.clear('publication_type')
m.add('publication_type', mi.publication_type)
if not m.timestamp:
m.add('timestamp', isoformat(now()))
class MergeMetadata:
'Merge in user metadata, including cover'
def __call__(self, oeb, mi, opts, override_input_metadata=False):
self.oeb, self.log = oeb, oeb.log
m = self.oeb.metadata
self.log('Merging user specified metadata...')
meta_info_to_oeb_metadata(mi, m, oeb.log,
override_input_metadata=override_input_metadata)
cover_id = self.set_cover(mi, opts.prefer_metadata_cover)
m.clear('cover')
if cover_id is not None:
m.add('cover', cover_id)
if mi.uuid is not None:
m.filter('identifier', lambda x:x.id=='uuid_id')
self.oeb.metadata.add('identifier', mi.uuid, id='uuid_id',
scheme='uuid')
self.oeb.uid = self.oeb.metadata.identifier[-1]
if mi.application_id is not None:
m.filter('identifier', lambda x:x.scheme=='calibre')
self.oeb.metadata.add('identifier', mi.application_id, scheme='calibre')
def set_cover(self, mi, prefer_metadata_cover):
cdata, ext = b'', 'jpg'
if mi.cover and os.access(mi.cover, os.R_OK):
with open(mi.cover, 'rb') as f:
cdata = f.read()
ext = mi.cover.rpartition('.')[-1].lower().strip()
elif mi.cover_data and mi.cover_data[-1]:
cdata = mi.cover_data[1]
ext = mi.cover_data[0]
if ext not in ('png', 'jpg', 'jpeg'):
ext = 'jpg'
id = old_cover = None
if 'cover' in self.oeb.guide:
old_cover = self.oeb.guide['cover']
if prefer_metadata_cover and old_cover is not None:
cdata = b''
if cdata:
self.oeb.guide.remove('cover')
self.oeb.guide.remove('titlepage')
elif self.oeb.plumber_output_format in {'mobi', 'azw3'} and old_cover is not None:
# The amazon formats dont support html cover pages, so remove them
# even if no cover was specified.
self.oeb.guide.remove('titlepage')
do_remove_old_cover = False
if old_cover is not None:
if old_cover.href in self.oeb.manifest.hrefs:
item = self.oeb.manifest.hrefs[old_cover.href]
if not cdata:
return item.id
do_remove_old_cover = True
elif not cdata:
id = self.oeb.manifest.generate(id='cover')[0]
self.oeb.manifest.add(id, old_cover.href, 'image/jpeg')
return id
new_cover_item = None
if cdata:
id, href = self.oeb.manifest.generate('cover', 'cover.'+ext)
new_cover_item = self.oeb.manifest.add(id, href, guess_type('cover.'+ext)[0], data=cdata)
self.oeb.guide.add('cover', 'Cover', href)
if do_remove_old_cover:
self.remove_old_cover(item, new_cover_item.href)
return id
def remove_old_cover(self, cover_item, new_cover_href=None):
from calibre.ebooks.oeb.base import XPath, XLINK
from lxml import etree
self.oeb.manifest.remove(cover_item)
# Remove any references to the cover in the HTML
affected_items = set()
xp = XPath('//h:img[@src]|//svg:image[@xl:href]')
for i, item in enumerate(self.oeb.spine):
try:
images = xp(item.data)
except Exception:
images = ()
removed = False
for img in images:
href = img.get('src') or img.get(XLINK('href'))
try:
href = item.abshref(href)
except Exception:
continue # Invalid URL, ignore
if href == cover_item.href:
if new_cover_href is not None:
replacement_href = item.relhref(new_cover_href)
attr = 'src' if img.tag.endswith('img') else XLINK('href')
img.set(attr, replacement_href)
else:
p = img.getparent()
if p.tag.endswith('}svg'):
p.getparent().remove(p)
else:
p.remove(img)
removed = True
if removed:
affected_items.add(item)
# Check if the resulting HTML has no content, if so remove it
for item in affected_items:
body = XPath('//h:body')(item.data)
if body:
text = etree.tostring(body[0], method='text', encoding='unicode')
else:
text = ''
text = re.sub(r'\s+', '', text)
if not text and not XPath('//h:img|//svg:svg')(item.data):
self.log('Removing %s as it is a wrapper around'
' the cover image'%item.href)
self.oeb.spine.remove(item)
self.oeb.manifest.remove(item)
self.oeb.guide.remove_by_href(item.href)