%PDF- %PDF-
Direktori : /lib/calibre/calibre/ebooks/oeb/transforms/ |
Current File : //lib/calibre/calibre/ebooks/oeb/transforms/jacket.py |
#!/usr/bin/env python3 __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' import os import re import sys from contextlib import suppress from string import Formatter from xml.sax.saxutils import escape from calibre import guess_type, prepare_string_for_xml from calibre.constants import iswindows from calibre.ebooks.chardet import strip_encoding_declarations from calibre.ebooks.metadata import fmt_sidx, rating_to_stars from calibre.ebooks.metadata.sources.identify import urls_from_identifiers from calibre.ebooks.oeb.base import ( XHTML, XHTML_NS, XPath, urldefrag, urlnormalize, xml2text ) from calibre.library.comments import comments_to_html, markdown from calibre.utils.config import tweaks from calibre.utils.date import as_local_time, format_date, is_date_undefined from calibre.utils.icu import sort_key JACKET_XPATH = '//h:meta[@name="calibre-content" and @content="jacket"]' class SafeFormatter(Formatter): def get_value(self, *args, **kwargs): try: return Formatter.get_value(self, *args, **kwargs) except KeyError: return '' class Base: def remove_images(self, item, limit=1): path = XPath('//h:img[@src]') removed = 0 for img in path(item.data): if removed >= limit: break href = item.abshref(img.get('src')) image = self.oeb.manifest.hrefs.get(href) if image is None: href = urlnormalize(href) image = self.oeb.manifest.hrefs.get(href) if image is not None: self.oeb.manifest.remove(image) self.oeb.guide.remove_by_href(href) img.getparent().remove(img) removed += 1 return removed class RemoveFirstImage(Base): def remove_first_image(self): deleted_item = None for item in self.oeb.spine: if XPath(JACKET_XPATH)(item.data): continue removed = self.remove_images(item) if removed > 0: self.log('Removed first image') body = XPath('//h:body')(item.data) if body: raw = xml2text(body[0]).strip() imgs = XPath('//h:img|//svg:svg')(item.data) if not raw and not imgs: self.log('Removing %s as it has no content'%item.href) self.oeb.manifest.remove(item) deleted_item = item break else: self.log.warn('Could not find first image to remove') if deleted_item is not None: for item in list(self.oeb.toc): href = urldefrag(item.href)[0] if href == deleted_item.href: self.oeb.toc.remove(item) self.oeb.guide.remove_by_href(deleted_item.href) def __call__(self, oeb, opts, metadata): ''' Add metadata in jacket.xhtml if specified in opts If not specified, remove previous jacket instance ''' self.oeb, self.opts, self.log = oeb, opts, oeb.log if opts.remove_first_image: self.remove_first_image() class Jacket(Base): ''' Book jacket manipulation. Remove first image and insert comments at start of book. ''' def insert_metadata(self, mi): self.log('Inserting metadata into book...') try: tags = list(map(str, self.oeb.metadata.subject)) except Exception: tags = [] try: comments = str(self.oeb.metadata.description[0]) except: comments = '' try: title = str(self.oeb.metadata.title[0]) except: title = _('Unknown') try: authors = list(map(str, self.oeb.metadata.creator)) except: authors = [_('Unknown')] root = render_jacket(mi, self.opts.output_profile, alt_title=title, alt_tags=tags, alt_authors=authors, alt_comments=comments, rescale_fonts=True, smarten_punctuation=self.opts.smarten_punctuation) id, href = self.oeb.manifest.generate('calibre_jacket', 'jacket.xhtml') jacket = self.oeb.manifest.add(id, href, guess_type(href)[0], data=root) self.oeb.spine.insert(0, jacket, True) self.oeb.inserted_metadata_jacket = jacket for img, path in referenced_images(root): self.oeb.log('Embedding referenced image %s into jacket' % path) ext = path.rpartition('.')[-1].lower() item_id, href = self.oeb.manifest.generate('jacket_image', 'jacket_img.'+ext) with open(path, 'rb') as f: item = self.oeb.manifest.add(item_id, href, guess_type(href)[0], data=f.read()) item.unload_data_from_memory() img.set('src', jacket.relhref(item.href)) def remove_existing_jacket(self): for x in self.oeb.spine[:4]: if XPath(JACKET_XPATH)(x.data): self.remove_images(x, limit=sys.maxsize) self.oeb.manifest.remove(x) self.log('Removed existing jacket') break def __call__(self, oeb, opts, metadata): ''' Add metadata in jacket.xhtml if specified in opts If not specified, remove previous jacket instance ''' self.oeb, self.opts, self.log = oeb, opts, oeb.log self.remove_existing_jacket() if opts.insert_metadata: self.insert_metadata(metadata) # Render Jacket {{{ def get_rating(rating, rchar, e_rchar): ans = '' try: num = float(rating)/2 except: return ans num = max(0, num) num = min(num, 5) if num < 1: return ans ans = ("%s%s") % (rchar * int(num), e_rchar * (5 - int(num))) return ans class Series(str): def __new__(self, series, series_index): if series and series_index is not None: roman = _('{1} of <em>{0}</em>').format( escape(series), escape(fmt_sidx(series_index, use_roman=True))) combined = _('{1} of <em>{0}</em>').format( escape(series), escape(fmt_sidx(series_index, use_roman=False))) else: combined = roman = escape(series or '') s = str.__new__(self, combined) s.roman = roman s.name = escape(series or '') s.number = escape(fmt_sidx(series_index or 1.0, use_roman=False)) s.roman_number = escape(fmt_sidx(series_index or 1.0, use_roman=True)) return s class Timestamp: def __init__(self, dt, render_template): self.dt = as_local_time(dt) self.is_date_undefined = dt is None or is_date_undefined(dt) self.default_render = '' if self.is_date_undefined else escape(format_date(self.dt, render_template)) def __repr__(self): return self.default_render __str__ = __repr__ def __bool__(self): return bool(self.default_render) def __getattr__(self, template): with suppress(Exception): if not self.is_date_undefined: return escape(format_date(self.dt, template)) return '' class Tags(str): def __new__(self, tags, output_profile): tags = [escape(x) for x in tags or ()] t = str.__new__(self, ', '.join(tags)) t.alphabetical = ', '.join(sorted(tags, key=sort_key)) t.tags_list = tags return t def postprocess_jacket(root, output_profile, has_data): # Post-process the generated html to strip out empty header items def extract(tag): parent = tag.getparent() idx = parent.index(tag) parent.remove(tag) if tag.tail: if idx == 0: parent.text = (parent.text or '') + tag.tail else: if idx >= len(parent): idx = -1 parent[-1].tail = (parent[-1].tail or '') + tag.tail def extract_class(cls): for tag in root.xpath('//*[@class="_"]'.replace('_', cls)): extract(tag) for key in 'series rating tags'.split(): if not has_data[key]: extract_class('cbj_' + key) if not has_data['pubdate']: extract_class('cbj_pubdata') if output_profile.short_name != 'kindle': extract_class('cbj_kindle_banner_hr') class Attributes: def __getattr__(self, name): return 'none' class Identifiers: def __init__(self, idents): self.identifiers = idents or {} self.display = Attributes() for k in self.identifiers: setattr(self.display, k, 'initial') links = [] for x in urls_from_identifiers(self.identifiers): name, id_typ, id_val, url = (prepare_string_for_xml(e, True) for e in x) links.append(f'<a href="{url}" title="{id_typ}:{id_val}">{name}</a>') self.links = ', '.join(links) self.display.links = 'initial' if self.links else 'none' def __getattr__(self, name): return self.identifiers.get(name, '') def render_jacket(mi, output_profile, alt_title=_('Unknown'), alt_tags=[], alt_comments='', alt_publisher='', rescale_fonts=False, alt_authors=None, smarten_punctuation=False): css = P('jacket/stylesheet.css', data=True).decode('utf-8') template = P('jacket/template.xhtml', data=True).decode('utf-8') template = re.sub(r'<!--.*?-->', '', template, flags=re.DOTALL) css = re.sub(r'/\*.*?\*/', '', css, flags=re.DOTALL) try: title_str = alt_title if mi.is_null('title') else mi.title except: title_str = _('Unknown') title_str = escape(title_str) title = '<span class="title">%s</span>' % title_str series = Series(mi.series, mi.series_index) try: publisher = mi.publisher if not mi.is_null('publisher') else alt_publisher except: publisher = '' publisher = escape(publisher) pubdate = timestamp = None with suppress(Exception): if not is_date_undefined(mi.pubdate): pubdate = mi.pubdate with suppress(Exception): if not is_date_undefined(mi.timestamp): timestamp = mi.timestamp rating = get_rating(mi.rating, output_profile.ratings_char, output_profile.empty_ratings_char) tags = Tags((mi.tags if mi.tags else alt_tags), output_profile) comments = mi.comments if mi.comments else alt_comments comments = comments.strip() if comments: comments = comments_to_html(comments) orig = mi.authors if mi.is_null('authors'): mi.authors = list(alt_authors or (_('Unknown'),)) try: author = mi.format_authors() except: author = '' mi.authors = orig author = escape(author) has_data = {} def generate_html(comments): display = Attributes() args = dict(xmlns=XHTML_NS, title_str=title_str, identifiers=Identifiers(mi.identifiers), css=css, title=title, author=author, publisher=publisher, publisher_label=_('Publisher'), pubdate_label=_('Published'), pubdate=Timestamp(pubdate, tweaks['gui_pubdate_display_format']), series_label=ngettext('Series', 'Series', 1), series=series, rating_label=_('Rating'), rating=rating, tags_label=_('Tags'), tags=tags, timestamp=Timestamp(timestamp, tweaks['gui_timestamp_display_format']), timestamp_label=_('Date'), comments=comments, footer='', display=display, searchable_tags=' '.join(escape(t)+'ttt' for t in tags.tags_list), ) for key in mi.custom_field_keys(): m = mi.get_user_metadata(key, False) or {} try: display_name, val = mi.format_field_extended(key)[:2] dkey = key.replace('#', '_') dt = m.get('datatype') if dt == 'series': args[dkey] = Series(mi.get(key), mi.get(key + '_index')) elif dt == 'rating': args[dkey] = rating_to_stars(mi.get(key), m.get('display', {}).get('allow_half_stars', False)) elif dt == 'datetime': args[dkey] = Timestamp(mi.get(key), m.get('display', {}).get('date_format','dd MMM yyyy')) elif dt == 'comments': val = val or '' ctype = m.get('display', {}).get('interpret_as') or 'html' if ctype == 'long-text': val = '<pre style="white-space:pre-wrap">%s</pre>' % escape(val) elif ctype == 'short-text': val = '<span>%s</span>' % escape(val) elif ctype == 'markdown': val = markdown(val) else: val = comments_to_html(val) args[dkey] = val else: args[dkey] = escape(val) args[dkey+'_label'] = escape(display_name) setattr(display, dkey, 'none' if mi.is_null(key) else 'initial') except Exception: # if the val (custom column contents) is None, don't add to args pass if False: print("Custom column values available in jacket template:") for key in args.keys(): if key.startswith('_') and not key.endswith('_label'): print(" {}: {}".format('#' + key[1:], args[key])) # Used in the comment describing use of custom columns in templates # Don't change this unless you also change it in template.xhtml args['_genre_label'] = args.get('_genre_label', '{_genre_label}') args['_genre'] = args.get('_genre', '{_genre}') has_data['series'] = bool(series) has_data['tags'] = bool(tags) has_data['rating'] = bool(rating) has_data['pubdate'] = bool(pubdate) has_data['timestamp'] = bool(timestamp) has_data['publisher'] = bool(publisher) for k, v in has_data.items(): setattr(display, k, 'initial' if v else 'none') display.title = 'initial' if mi.identifiers: display.identifiers = 'initial' formatter = SafeFormatter() generated_html = formatter.format(template, **args) return strip_encoding_declarations(generated_html) from calibre.ebooks.oeb.polish.parsing import parse raw = generate_html(comments) if smarten_punctuation: from calibre.ebooks.conversion.preprocess import smarten_punctuation as sp raw = sp(raw) root = parse(raw, line_numbers=False, force_html5_parse=True) if rescale_fonts: # We ensure that the conversion pipeline will set the font sizes for # text in the jacket to the same size as the font sizes for the rest of # the text in the book. That means that as long as the jacket uses # relative font sizes (em or %), the post conversion font size will be # the same as for text in the main book. So text with size x em will # be rescaled to the same value in both the jacket and the main content. # # We cannot use data-calibre-rescale 100 on the body tag as that will just # give the body tag a font size of 1em, which is useless. for body in root.xpath('//*[local-name()="body"]'): fw = body.makeelement(XHTML('div')) fw.set('data-calibre-rescale', '100') for child in body: fw.append(child) body.append(fw) postprocess_jacket(root, output_profile, has_data) from calibre.ebooks.oeb.polish.pretty import pretty_html_tree pretty_html_tree(None, root) return root # }}} def linearize_jacket(oeb): for x in oeb.spine[:4]: if XPath(JACKET_XPATH)(x.data): for e in XPath('//h:table|//h:tr|//h:th')(x.data): e.tag = XHTML('div') for e in XPath('//h:td')(x.data): e.tag = XHTML('span') break def referenced_images(root): for img in XPath('//h:img[@src]')(root): src = img.get('src') if src.startswith('file://'): path = src[7:] if iswindows and path.startswith('/'): path = path[1:] if os.path.exists(path): yield img, path