%PDF- %PDF-
Direktori : /lib/calibre/calibre/ebooks/oeb/transforms/ |
Current File : //lib/calibre/calibre/ebooks/oeb/transforms/flatcss.py |
''' CSS flattening transform. ''' __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>' import re, operator, math, numbers from collections import defaultdict from xml.dom import SyntaxErr from lxml import etree import css_parser from css_parser.css import Property from calibre import guess_type from calibre.ebooks import unit_convert from calibre.ebooks.oeb.base import (XHTML, XHTML_NS, CSS_MIME, OEB_STYLES, namespace, barename, XPath, css_text) from calibre.ebooks.oeb.stylizer import Stylizer from calibre.utils.filenames import ascii_filename, ascii_text from calibre.utils.icu import numeric_sort_key from polyglot.builtins import iteritems, string_or_bytes COLLAPSE = re.compile(r'[ \t\r\n\v]+') STRIPNUM = re.compile(r'[-0-9]+$') def asfloat(value, default): if not isinstance(value, numbers.Number): value = default return float(value) class KeyMapper: def __init__(self, sbase, dbase, dkey): self.sbase = float(sbase) self.dprop = [(self.relate(x, dbase), float(x)) for x in dkey] self.cache = {} @staticmethod def relate(size, base): if size == 0: return base size = float(size) base = float(base) if abs(size - base) < 0.1: return 0 sign = -1 if size < base else 1 endp = 0 if size < base else 36 diff = (abs(base - size) * 3) + ((36 - size) / 100) logb = abs(base - endp) if logb == 1.0: logb = 1.1 try: result = sign * math.log(diff, logb) except ValueError: if diff < 0: # Size is both very large and close to base return 0 if logb == 0: logb = 1e-6 if diff == 0: diff = 1e-6 result = sign * math.log(diff, logb) return result def __getitem__(self, ssize): ssize = asfloat(ssize, 0) if ssize in self.cache: return self.cache[ssize] dsize = self.map(ssize) self.cache[ssize] = dsize return dsize def map(self, ssize): sbase = self.sbase prop = self.relate(ssize, sbase) diff = [(abs(prop - p), s) for p, s in self.dprop] dsize = min(diff)[1] return dsize class ScaleMapper: def __init__(self, sbase, dbase): self.dscale = float(dbase) / float(sbase) def __getitem__(self, ssize): ssize = asfloat(ssize, 0) dsize = ssize * self.dscale return dsize class NullMapper: def __init__(self): pass def __getitem__(self, ssize): return ssize def FontMapper(sbase=None, dbase=None, dkey=None): if sbase and dbase and dkey: return KeyMapper(sbase, dbase, dkey) elif sbase and dbase: return ScaleMapper(sbase, dbase) else: return NullMapper() class EmbedFontsCSSRules: def __init__(self, body_font_family, rules): self.body_font_family, self.rules = body_font_family, rules self.href = None def __call__(self, oeb): if not self.body_font_family: return None if not self.href: iid, href = oeb.manifest.generate('page_styles', 'page_styles.css') rules = [css_text(x) for x in self.rules] rules = '\n\n'.join(rules) sheet = css_parser.parseString(rules, validate=False) self.href = oeb.manifest.add(iid, href, guess_type(href)[0], data=sheet).href return self.href class CSSFlattener: def __init__(self, fbase=None, fkey=None, lineh=None, unfloat=False, untable=False, page_break_on_body=False, specializer=None, transform_css_rules=()): self.fbase = fbase self.transform_css_rules = transform_css_rules if self.transform_css_rules: from calibre.ebooks.css_transform_rules import compile_rules self.transform_css_rules = compile_rules(self.transform_css_rules) self.fkey = fkey self.lineh = lineh self.unfloat = unfloat self.untable = untable self.specializer = specializer self.page_break_on_body = page_break_on_body @classmethod def config(cls, cfg): return cfg @classmethod def generate(cls, opts): return cls() def __call__(self, oeb, context): oeb.logger.info('Flattening CSS and remapping font sizes...') self.context = self.opts = context self.oeb = oeb self.items = list(self.oeb.spine) titlepage = self.oeb.guide.get('titlepage') if titlepage is not None: titlepage = titlepage.item if titlepage is not None and titlepage not in self.items: self.items.append(titlepage) epub3_nav = None if getattr(self.opts, 'epub3_nav_href', None): epub3_nav = self.oeb.manifest.hrefs.get(self.opts.epub3_nav_href) if epub3_nav is not None and epub3_nav not in self.items: self.items.append(epub3_nav) self.filter_css = frozenset() if self.opts.filter_css: try: self.filter_css = {x.strip().lower() for x in self.opts.filter_css.split(',')} except: self.oeb.log.warning('Failed to parse filter_css, ignoring') else: from calibre.ebooks.oeb.normalize_css import normalize_filter_css self.filter_css = frozenset(normalize_filter_css(self.filter_css)) self.oeb.log.debug('Filtering CSS properties: %s'% ', '.join(self.filter_css)) for item in oeb.manifest.values(): # Make all links to resources absolute, as these sheets will be # consolidated into a single stylesheet at the root of the document if item.media_type in OEB_STYLES: css_parser.replaceUrls(item.data, item.abshref, ignoreImportRules=True) self.body_font_family, self.embed_font_rules = self.get_embed_font_info( self.opts.embed_font_family) # Store for use in output plugins/transforms that generate content, # like the AZW3 output inline ToC. self.oeb.store_embed_font_rules = EmbedFontsCSSRules(self.body_font_family, self.embed_font_rules) self.stylize_spine() self.sbase = self.baseline_spine() if self.fbase else None self.fmap = FontMapper(self.sbase, self.fbase, self.fkey) self.flatten_spine() if epub3_nav is not None: self.opts.epub3_nav_parsed = epub3_nav.data self.store_page_margins() def store_page_margins(self): self.opts._stored_page_margins = {} for item, stylizer in iteritems(self.stylizers): margins = self.opts._stored_page_margins[item.href] = {} for prop, val in stylizer.page_rule.items(): p, w = prop.partition('-')[::2] if p == 'margin': margins[w] = unit_convert( val, stylizer.profile.width_pts, stylizer.body_font_size, stylizer.profile.dpi, body_font_size=stylizer.body_font_size) def get_embed_font_info(self, family, failure_critical=True): efi = [] body_font_family = None if not family: return body_font_family, efi from calibre.utils.fonts.scanner import font_scanner, NoFonts from calibre.utils.fonts.utils import panose_to_css_generic_family try: faces = font_scanner.fonts_for_family(family) except NoFonts: msg = ('No embeddable fonts found for family: %r'%family) if failure_critical: raise ValueError(msg) self.oeb.log.warn(msg) return body_font_family, efi if not faces: msg = ('No embeddable fonts found for family: %r'%family) if failure_critical: raise ValueError(msg) self.oeb.log.warn(msg) return body_font_family, efi for i, font in enumerate(faces): ext = 'otf' if font['is_otf'] else 'ttf' fid, href = self.oeb.manifest.generate(id='font', href='fonts/%s.%s'%(ascii_filename(font['full_name']).replace(' ', '-'), ext)) item = self.oeb.manifest.add(fid, href, guess_type('dummy.'+ext)[0], data=font_scanner.get_font_data(font)) item.unload_data_from_memory() cfont = { 'font-family': '"%s"'%font['font-family'], 'panose-1': ' '.join(map(str, font['panose'])), 'src': 'url(%s)'%item.href, } if i == 0: generic_family = panose_to_css_generic_family(font['panose']) body_font_family = "'%s',%s"%(font['font-family'], generic_family) self.oeb.log('Embedding font: %s'%font['font-family']) for k in ('font-weight', 'font-style', 'font-stretch'): if font[k] != 'normal': cfont[k] = font[k] rule = '@font-face { %s }'%('; '.join('%s:%s'%(k, v) for k, v in iteritems(cfont))) rule = css_parser.parseString(rule) efi.append(rule) return body_font_family, efi def stylize_spine(self): self.stylizers = {} profile = self.context.source css = '' for item in self.items: html = item.data body = html.find(XHTML('body')) if 'style' in html.attrib: b = body.attrib.get('style', '') body.set('style', html.get('style') + ';' + b) del html.attrib['style'] bs = body.get('style', '').split(';') bs.append('margin-top: 0pt') bs.append('margin-bottom: 0pt') if float(self.context.margin_left) >= 0: bs.append('margin-left : %gpt'% float(self.context.margin_left)) if float(self.context.margin_right) >= 0: bs.append('margin-right : %gpt'% float(self.context.margin_right)) bs.extend(['padding-left: 0pt', 'padding-right: 0pt']) if self.page_break_on_body: bs.extend(['page-break-before: always']) if self.context.change_justification != 'original': bs.append('text-align: '+ self.context.change_justification) if self.body_font_family: bs.append('font-family: '+self.body_font_family) body.set('style', '; '.join(bs)) stylizer = Stylizer(html, item.href, self.oeb, self.context, profile, user_css=self.context.extra_css, extra_css=css) self.stylizers[item] = stylizer def baseline_node(self, node, stylizer, sizes, csize): csize = stylizer.style(node)['font-size'] if node.text: sizes[csize] += len(COLLAPSE.sub(' ', node.text)) for child in node: self.baseline_node(child, stylizer, sizes, csize) if child.tail: sizes[csize] += len(COLLAPSE.sub(' ', child.tail)) def baseline_spine(self): sizes = defaultdict(float) for item in self.items: html = item.data stylizer = self.stylizers[item] body = html.find(XHTML('body')) fsize = self.context.source.fbase self.baseline_node(body, stylizer, sizes, fsize) try: sbase = max(list(sizes.items()), key=operator.itemgetter(1))[0] except: sbase = 12.0 self.oeb.logger.info( "Source base font size is %0.05fpt" % sbase) return sbase def clean_edges(self, cssdict, style, fsize): slineh = self.sbase * 1.26 dlineh = self.lineh for kind in ('margin', 'padding'): for edge in ('bottom', 'top'): property = f"{kind}-{edge}" if property not in cssdict: continue if '%' in cssdict[property]: continue value = style[property] if value == 0: continue elif value <= slineh: cssdict[property] = "%0.5fem" % (dlineh / fsize) else: try: value = round(value / slineh) * dlineh except: self.oeb.logger.warning( 'Invalid length:', value) value = 0.0 cssdict[property] = "%0.5fem" % (value / fsize) def flatten_node(self, node, stylizer, names, styles, pseudo_styles, psize, item_id, recurse=True): if not isinstance(node.tag, string_or_bytes) \ or namespace(node.tag) != XHTML_NS: return tag = barename(node.tag) style = stylizer.style(node) cssdict = style.cssdict() try: font_size = style['font-size'] except: font_size = self.sbase if self.sbase is not None else \ self.context.source.fbase if tag == 'body' and isinstance(font_size, numbers.Number): stylizer.body_font_size = font_size if 'align' in node.attrib: if tag != 'img': cssdict['text-align'] = node.attrib['align'] if cssdict['text-align'] == 'center': # align=center causes tables to be center aligned, # which text-align does not. And the ever trustworthy Word # uses this construct in its HTML output. See # https://bugs.launchpad.net/bugs/1569583 if tag == 'table': if 'margin-left' not in cssdict and 'margin-right' not in cssdict: cssdict['margin-left'] = cssdict['margin-right'] = 'auto' else: for table in node.iterchildren(XHTML("table")): ts = stylizer.style(table) if ts.get('margin-left') is None and ts.get('margin-right') is None: ts.set('margin-left', 'auto') ts.set('margin-right', 'auto') else: val = node.attrib['align'] if val in ('middle', 'bottom', 'top'): cssdict['vertical-align'] = val elif val in ('left', 'right'): cssdict['float'] = val del node.attrib['align'] if 'valign' in node.attrib and tag == 'td': if cssdict.get('vertical-align') == 'inherit': cssdict['vertical-align'] = node.attrib['valign'] del node.attrib['valign'] if node.tag == XHTML('font'): tags = ['descendant::h:%s'%x for x in ('p', 'div', 'table', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'ol', 'ul', 'dl', 'blockquote')] tag = 'div' if XPath('|'.join(tags))(node) else 'span' node.tag = XHTML(tag) if 'size' in node.attrib: def force_int(raw): return int(re.search(r'([0-9+-]+)', raw).group(1)) size = node.attrib['size'].strip() if size: fnums = self.context.source.fnums if size[0] in ('+', '-'): # Oh, the warcrimes try: esize = 3 + force_int(size) except: esize = 3 if esize < 1: esize = 1 if esize > 7: esize = 7 font_size = fnums[esize] else: try: font_size = fnums[force_int(size)] except: font_size = fnums[3] cssdict['font-size'] = '%.1fpt'%font_size del node.attrib['size'] if 'face' in node.attrib: cssdict['font-family'] = node.attrib['face'] del node.attrib['face'] if 'color' in node.attrib: try: cssdict['color'] = Property('color', node.attrib['color']).value except (ValueError, SyntaxErr): pass del node.attrib['color'] if 'bgcolor' in node.attrib: try: cssdict['background-color'] = Property('background-color', node.attrib['bgcolor']).value except (ValueError, SyntaxErr): pass del node.attrib['bgcolor'] if tag == 'ol' and 'type' in node.attrib: del node.attrib['type'] if cssdict.get('font-weight', '').lower() == 'medium': cssdict['font-weight'] = 'normal' # ADE chokes on font-weight medium fsize = font_size is_drop_cap = (cssdict.get('float', None) == 'left' and 'font-size' in cssdict and len(node) == 0 and node.text and ( len(node.text) == 1 or (len(node.text) == 2 and 0x2000 <= ord(node.text[0]) <= 0x206f))) # Detect drop caps generated by the docx input plugin if node.tag and node.tag.endswith('}p') and len(node) == 0 and node.text and len(node.text.strip()) == 1 and \ not node.tail and 'line-height' in cssdict and 'font-size' in cssdict: dp = node.getparent() if dp.tag and dp.tag.endswith('}div') and len(dp) == 1 and not dp.text: if stylizer.style(dp).cssdict().get('float', None) == 'left': is_drop_cap = True if not self.context.disable_font_rescaling and not is_drop_cap: _sbase = self.sbase if self.sbase is not None else \ self.context.source.fbase dyn_rescale = node.attrib.pop('data-calibre-rescale', None) if dyn_rescale is not None: try: dyn_rescale = float(dyn_rescale) / 100 except Exception: dyn_rescale = 1 fsize = self.fmap[_sbase] fsize *= dyn_rescale cssdict['font-size'] = '%0.5fem'%(fsize/psize) psize = fsize elif 'font-size' in cssdict or tag == 'body': fsize = self.fmap[font_size] try: cssdict['font-size'] = "%0.5fem" % (fsize / psize) except ZeroDivisionError: cssdict['font-size'] = '%.1fpt'%fsize psize = fsize try: minlh = self.context.minimum_line_height / 100. slh = style['line-height'] if not is_drop_cap and isinstance(slh, numbers.Number) and slh < minlh * fsize: cssdict['line-height'] = str(minlh) except Exception: self.oeb.logger.exception('Failed to set minimum line-height') if cssdict: for x in self.filter_css: popval = cssdict.pop(x, None) if self.body_font_family and popval and x == 'font-family' \ and popval.partition(',')[0][1:-1] == self.body_font_family.partition(',')[0][1:-1]: cssdict[x] = popval if cssdict: if self.lineh and self.fbase and tag not in ('body', 'html'): self.clean_edges(cssdict, style, psize) if 'display' in cssdict and cssdict['display'] == 'in-line': cssdict['display'] = 'inline' if self.unfloat and 'float' in cssdict \ and cssdict.get('display', 'none') != 'none': del cssdict['display'] if self.untable and 'display' in cssdict \ and cssdict['display'].startswith('table'): display = cssdict['display'] if display == 'table-cell': cssdict['display'] = 'inline' else: cssdict['display'] = 'block' if 'vertical-align' in cssdict \ and cssdict['vertical-align'] == 'sup': cssdict['vertical-align'] = 'super' if self.lineh and 'line-height' not in cssdict and tag != 'html': lineh = self.lineh / psize cssdict['line-height'] = "%0.5fem" % lineh if (self.context.remove_paragraph_spacing or self.context.insert_blank_line) and tag in ('p', 'div'): if item_id != 'calibre_jacket' or self.context.output_profile.name == 'Kindle': for prop in ('margin', 'padding', 'border'): for edge in ('top', 'bottom'): cssdict['%s-%s'%(prop, edge)] = '0pt' if self.context.insert_blank_line: cssdict['margin-top'] = cssdict['margin-bottom'] = \ '%fem'%self.context.insert_blank_line_size indent_size = self.context.remove_paragraph_spacing_indent_size keep_indents = indent_size < 0.0 if (self.context.remove_paragraph_spacing and not keep_indents and cssdict.get('text-align', None) not in ('center', 'right')): cssdict['text-indent'] = "%1.1fem" % indent_size pseudo_classes = style.pseudo_classes(self.filter_css) if cssdict or pseudo_classes: keep_classes = set() if cssdict: items = sorted(iteritems(cssdict)) css = ';\n'.join(f'{key}: {val}' for key, val in items) classes = node.get('class', '').strip() or 'calibre' classes_list = classes.split() # lower() because otherwise if the document uses the same class # name with different case, both cases will apply, leading # to incorrect results. klass = ascii_text(STRIPNUM.sub('', classes_list[0])).lower().strip().replace(' ', '_') if css in styles: match = styles[css] else: match = klass + str(names[klass] or '') styles[css] = match names[klass] += 1 node.attrib['class'] = match keep_classes.add(match) for psel, cssdict in iteritems(pseudo_classes): items = sorted(iteritems(cssdict)) css = ';\n'.join(f'{key}: {val}' for key, val in items) pstyles = pseudo_styles[psel] if css in pstyles: match = pstyles[css] else: # We have to use a different class for each psel as # otherwise you can have incorrect styles for a situation # like: a:hover { color: red } a:link { color: blue } a.x:hover { color: green } # If the pcalibre class for a:hover and a:link is the same, # then the class attribute for a.x tags will contain both # that class and the class for a.x:hover, which is wrong. klass = 'pcalibre' match = klass + str(names[klass] or '') pstyles[css] = match names[klass] += 1 keep_classes.add(match) node.attrib['class'] = ' '.join(keep_classes) elif 'class' in node.attrib: del node.attrib['class'] if 'style' in node.attrib: del node.attrib['style'] if recurse: for child in node: self.flatten_node(child, stylizer, names, styles, pseudo_styles, psize, item_id) def flatten_head(self, item, href, global_href): html = item.data head = html.find(XHTML('head')) def safe_lower(x): try: x = x.lower() except Exception: pass return x for node in html.xpath('//*[local-name()="style" or local-name()="link"]'): if node.tag == XHTML('link') \ and safe_lower(node.get('rel', 'stylesheet')) == 'stylesheet' \ and safe_lower(node.get('type', CSS_MIME)) in OEB_STYLES: node.getparent().remove(node) elif node.tag == XHTML('style') \ and node.get('type', CSS_MIME) in OEB_STYLES: node.getparent().remove(node) href = item.relhref(href) l = etree.SubElement(head, XHTML('link'), rel='stylesheet', type=CSS_MIME, href=href) l.tail='\n' if global_href: href = item.relhref(global_href) l = etree.SubElement(head, XHTML('link'), rel='stylesheet', type=CSS_MIME, href=href) l.tail = '\n' def replace_css(self, css): manifest = self.oeb.manifest for item in manifest.values(): if item.media_type in OEB_STYLES: manifest.remove(item) id, href = manifest.generate('css', 'stylesheet.css') sheet = css_parser.parseString(css, validate=False) if self.transform_css_rules: from calibre.ebooks.css_transform_rules import transform_sheet transform_sheet(self.transform_css_rules, sheet) item = manifest.add(id, href, CSS_MIME, data=sheet) self.oeb.manifest.main_stylesheet = item return href def collect_global_css(self): global_css = defaultdict(list) for item in self.items: stylizer = self.stylizers[item] if float(self.context.margin_top) >= 0: stylizer.page_rule['margin-top'] = '%gpt'%\ float(self.context.margin_top) if float(self.context.margin_bottom) >= 0: stylizer.page_rule['margin-bottom'] = '%gpt'%\ float(self.context.margin_bottom) items = sorted(stylizer.page_rule.items()) css = ';\n'.join(f"{key}: {val}" for key, val in items) css = ('@page {\n%s\n}\n'%css) if items else '' rules = [css_text(r) for r in stylizer.font_face_rules + self.embed_font_rules] raw = '\n\n'.join(rules) css += '\n\n' + raw global_css[css].append(item) gc_map = {} manifest = self.oeb.manifest for css in global_css: href = None if css.strip(): id_, href = manifest.generate('page_css', 'page_styles.css') sheet = css_parser.parseString(css, validate=False) if self.transform_css_rules: from calibre.ebooks.css_transform_rules import transform_sheet transform_sheet(self.transform_css_rules, sheet) manifest.add(id_, href, CSS_MIME, data=sheet) gc_map[css] = href ans = {} for css, items in iteritems(global_css): for item in items: ans[item] = gc_map[css] return ans def flatten_spine(self): names = defaultdict(int) styles, pseudo_styles = {}, defaultdict(dict) for item in self.items: html = item.data stylizer = self.stylizers[item] if self.specializer is not None: self.specializer(item, stylizer) fsize = self.context.dest.fbase self.flatten_node(html, stylizer, names, styles, pseudo_styles, fsize, item.id, recurse=False) self.flatten_node(html.find(XHTML('body')), stylizer, names, styles, pseudo_styles, fsize, item.id) items = sorted(((key, val) for (val, key) in iteritems(styles)), key=lambda x:numeric_sort_key(x[0])) # :hover must come after link and :active must come after :hover psels = sorted(pseudo_styles, key=lambda x : {'hover':1, 'active':2}.get(x, 0)) for psel in psels: styles = pseudo_styles[psel] if not styles: continue x = sorted(((k+':'+psel, v) for v, k in iteritems(styles))) items.extend(x) css = ''.join(f".{key} {{\n{val};\n}}\n\n" for key, val in items) href = self.replace_css(css) global_css = self.collect_global_css() for item in self.items: stylizer = self.stylizers[item] self.flatten_head(item, href, global_css[item])