%PDF- %PDF-
Direktori : /lib/calibre/calibre/ebooks/oeb/ |
Current File : //lib/calibre/calibre/ebooks/oeb/stylizer.py |
''' CSS property propagation class. ''' __license__ = 'GPL v3' __copyright__ = '2008, Marshall T. Vandegrift <llasram@gmail.com>' import os, re, logging, copy, unicodedata, numbers from operator import itemgetter from weakref import WeakKeyDictionary from xml.dom import SyntaxErr as CSSSyntaxError from css_parser.css import (CSSStyleRule, CSSPageRule, CSSFontFaceRule, cssproperties) from css_parser import (profile as cssprofiles, parseString, parseStyle, log as css_parser_log, CSSParser, profiles, replaceUrls) from calibre import force_unicode, as_unicode from calibre.ebooks import unit_convert from calibre.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES, xpath, urlnormalize from calibre.ebooks.oeb.normalize_css import DEFAULTS, normalizers from css_selectors import Select, SelectorError, INAPPROPRIATE_PSEUDO_CLASSES from polyglot.builtins import iteritems from tinycss.media3 import CSSMedia3Parser css_parser_log.setLevel(logging.WARN) _html_css_stylesheet = None def validate_color(col): return cssprofiles.validateWithProfile('color', col, profiles=[profiles.Profiles.CSS_LEVEL_2])[1] def html_css_stylesheet(): global _html_css_stylesheet if _html_css_stylesheet is None: with open(P('templates/html.css'), 'rb') as f: html_css = f.read().decode('utf-8') _html_css_stylesheet = parseString(html_css, validate=False) return _html_css_stylesheet INHERITED = { 'azimuth', 'border-collapse', 'border-spacing', 'caption-side', 'color', 'cursor', 'direction', 'elevation', 'empty-cells', 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight', 'letter-spacing', 'line-height', 'list-style-image', 'list-style-position', 'list-style-type', 'orphans', 'page-break-inside', 'pitch-range', 'pitch', 'quotes', 'richness', 'speak-header', 'speak-numeral', 'speak-punctuation', 'speak', 'speech-rate', 'stress', 'text-align', 'text-indent', 'text-transform', 'visibility', 'voice-family', 'volume', 'white-space', 'widows', 'word-spacing', 'text-shadow', } FONT_SIZE_NAMES = { 'xx-small', 'x-small', 'small', 'medium', 'large', 'x-large', 'xx-large' } ALLOWED_MEDIA_TYPES = frozenset({'screen', 'all', 'aural', 'amzn-kf8'}) IGNORED_MEDIA_FEATURES = frozenset('width min-width max-width height min-height max-height device-width min-device-width max-device-width device-height min-device-height max-device-height aspect-ratio min-aspect-ratio max-aspect-ratio device-aspect-ratio min-device-aspect-ratio max-device-aspect-ratio color min-color max-color color-index min-color-index max-color-index monochrome min-monochrome max-monochrome -webkit-min-device-pixel-ratio resolution min-resolution max-resolution scan grid'.split()) # noqa def media_ok(raw): if not raw: return True if raw == 'amzn-mobi': # Optimization for the common case return False def query_ok(mq): matched = True if mq.media_type not in ALLOWED_MEDIA_TYPES: matched = False # Media queries that test for device specific features always fail for media_feature, expr in mq.expressions: if media_feature in IGNORED_MEDIA_FEATURES: matched = False return mq.negated ^ matched try: for mq in CSSMedia3Parser().parse_stylesheet('@media %s {}' % raw).rules[0].media: if query_ok(mq): return True return False except Exception: pass return True def test_media_ok(): assert media_ok(None) assert media_ok('') assert not media_ok('amzn-mobi') assert media_ok('amzn-kf8') assert media_ok('screen') assert media_ok('only screen') assert not media_ok('not screen') assert not media_ok('(device-width:10px)') assert media_ok('screen, (device-width:10px)') assert not media_ok('screen and (device-width:10px)') class StylizerRules: def __init__(self, opts, profile, stylesheets): self.opts, self.profile, self.stylesheets = opts, profile, stylesheets index = 0 self.rules = [] self.page_rule = {} self.font_face_rules = [] for sheet_index, stylesheet in enumerate(stylesheets): href = stylesheet.href for rule in stylesheet.cssRules: if rule.type == rule.MEDIA_RULE: if media_ok(rule.media.mediaText): for subrule in rule.cssRules: self.rules.extend(self.flatten_rule(subrule, href, index, is_user_agent_sheet=sheet_index==0)) index += 1 else: self.rules.extend(self.flatten_rule(rule, href, index, is_user_agent_sheet=sheet_index==0)) index = index + 1 self.rules.sort(key=itemgetter(0)) # sort by specificity def flatten_rule(self, rule, href, index, is_user_agent_sheet=False): results = [] sheet_index = 0 if is_user_agent_sheet else 1 if isinstance(rule, CSSStyleRule): style = self.flatten_style(rule.style) for selector in rule.selectorList: specificity = (sheet_index,) + selector.specificity + (index,) text = selector.selectorText selector = list(selector.seq) results.append((specificity, selector, style, text, href)) elif isinstance(rule, CSSPageRule): style = self.flatten_style(rule.style) self.page_rule.update(style) elif isinstance(rule, CSSFontFaceRule): if rule.style.length > 1: # Ignore the meaningless font face rules generated by the # benighted MS Word that contain only a font-family declaration # and nothing else self.font_face_rules.append(rule) return results def flatten_style(self, cssstyle): style = {} for prop in cssstyle: name = prop.name normalizer = normalizers.get(name, None) if normalizer is not None: style.update(normalizer(name, prop.propertyValue)) elif name == 'text-align': style['text-align'] = self._apply_text_align(prop.value) else: style[name] = prop.value if 'font-size' in style: size = style['font-size'] if size == 'normal': size = 'medium' if size == 'smallest': size = 'xx-small' if size in FONT_SIZE_NAMES: style['font-size'] = "%.1frem" % (self.profile.fnames[size] / float(self.profile.fbase)) if '-epub-writing-mode' in style: for x in ('-webkit-writing-mode', 'writing-mode'): style[x] = style.get(x, style['-epub-writing-mode']) return style def _apply_text_align(self, text): if text in ('left', 'justify') and self.opts.change_justification in ('left', 'justify'): text = self.opts.change_justification return text def same_rules(self, opts, profile, stylesheets): if self.opts != opts: # it's unlikely to happen, but better safe than sorry return False if self.profile != profile: return False if len(self.stylesheets) != len(stylesheets): return False for index, stylesheet in enumerate(self.stylesheets): if stylesheet != stylesheets[index]: return False return True class Stylizer: STYLESHEETS = WeakKeyDictionary() def __init__(self, tree, path, oeb, opts, profile=None, extra_css='', user_css='', base_css=''): self.oeb, self.opts = oeb, opts self.profile = profile if self.profile is None: # Use the default profile. This should really be using # opts.output_profile, but I don't want to risk changing it, as # doing so might well have hard to debug font size effects. from calibre.customize.ui import output_profiles for x in output_profiles(): if x.short_name == 'default': self.profile = x break if self.profile is None: # Just in case the default profile is removed in the future :) self.profile = opts.output_profile self.body_font_size = self.profile.fbase self.logger = oeb.logger item = oeb.manifest.hrefs[path] basename = os.path.basename(path) cssname = os.path.splitext(basename)[0] + '.css' stylesheets = [html_css_stylesheet()] if base_css: stylesheets.append(parseString(base_css, validate=False)) style_tags = xpath(tree, '//*[local-name()="style" or local-name()="link"]') # Add css_parser parsing profiles from output_profile for profile in self.opts.output_profile.extra_css_modules: cssprofiles.addProfile(profile['name'], profile['props'], profile['macros']) parser = CSSParser(fetcher=self._fetch_css_file, log=logging.getLogger('calibre.css')) for elem in style_tags: if (elem.tag == XHTML('style') and elem.get('type', CSS_MIME) in OEB_STYLES and media_ok(elem.get('media'))): text = elem.text if elem.text else '' for x in elem: t = getattr(x, 'text', None) if t: text += '\n\n' + force_unicode(t, 'utf-8') t = getattr(x, 'tail', None) if t: text += '\n\n' + force_unicode(t, 'utf-8') if text: text = oeb.css_preprocessor(text) # We handle @import rules separately parser.setFetcher(lambda x: ('utf-8', b'')) stylesheet = parser.parseString(text, href=cssname, validate=False) parser.setFetcher(self._fetch_css_file) for rule in stylesheet.cssRules: if rule.type == rule.IMPORT_RULE: ihref = item.abshref(rule.href) if not media_ok(rule.media.mediaText): continue hrefs = self.oeb.manifest.hrefs if ihref not in hrefs: self.logger.warn('Ignoring missing stylesheet in @import rule:', rule.href) continue sitem = hrefs[ihref] if sitem.media_type not in OEB_STYLES: self.logger.warn('CSS @import of non-CSS file %r' % rule.href) continue stylesheets.append(sitem.data) # Make links to resources absolute, since these rules will # be folded into a stylesheet at the root replaceUrls(stylesheet, item.abshref, ignoreImportRules=True) stylesheets.append(stylesheet) elif (elem.tag == XHTML('link') and elem.get('href') and elem.get( 'rel', 'stylesheet').lower() == 'stylesheet' and elem.get( 'type', CSS_MIME).lower() in OEB_STYLES and media_ok(elem.get('media')) ): href = urlnormalize(elem.attrib['href']) path = item.abshref(href) sitem = oeb.manifest.hrefs.get(path, None) if sitem is None: self.logger.warn( 'Stylesheet %r referenced by file %r not in manifest' % (path, item.href)) continue if not hasattr(sitem.data, 'cssRules'): self.logger.warn( 'Stylesheet %r referenced by file %r is not CSS'%(path, item.href)) continue stylesheets.append(sitem.data) csses = {'extra_css':extra_css, 'user_css':user_css} for w, x in csses.items(): if x: try: text = x stylesheet = parser.parseString(text, href=cssname, validate=False) stylesheets.append(stylesheet) except Exception: self.logger.exception('Failed to parse %s, ignoring.'%w) self.logger.debug('Bad css: ') self.logger.debug(x) # using oeb to store the rules, page rule and font face rules # and generating them again if opts, profile or stylesheets are different if (not hasattr(self.oeb, 'stylizer_rules')) \ or not self.oeb.stylizer_rules.same_rules(self.opts, self.profile, stylesheets): self.oeb.stylizer_rules = StylizerRules(self.opts, self.profile, stylesheets) self.rules = self.oeb.stylizer_rules.rules self.page_rule = self.oeb.stylizer_rules.page_rule self.font_face_rules = self.oeb.stylizer_rules.font_face_rules self.flatten_style = self.oeb.stylizer_rules.flatten_style self._styles = {} pseudo_pat = re.compile(':{1,2}(%s)' % ('|'.join(INAPPROPRIATE_PSEUDO_CLASSES)), re.I) select = Select(tree, ignore_inappropriate_pseudo_classes=True) for _, _, cssdict, text, _ in self.rules: fl = pseudo_pat.search(text) try: matches = tuple(select(text)) except SelectorError as err: self.logger.error(f'Ignoring CSS rule with invalid selector: {text!r} ({as_unicode(err)})') continue if fl is not None: fl = fl.group(1) if fl == 'first-letter' and getattr(self.oeb, 'plumber_output_format', '').lower() in {'mobi', 'docx'}: # Fake first-letter for elem in matches: for x in elem.iter('*'): if x.text: punctuation_chars = [] text = str(x.text) while text: category = unicodedata.category(text[0]) if category[0] not in {'P', 'Z'}: break punctuation_chars.append(text[0]) text = text[1:] special_text = ''.join(punctuation_chars) + \ (text[0] if text else '') span = x.makeelement('{%s}span' % XHTML_NS) span.text = special_text span.set('data-fake-first-letter', '1') span.tail = text[1:] x.text = None x.insert(0, span) self.style(span)._update_cssdict(cssdict) break else: # Element pseudo-class for elem in matches: self.style(elem)._update_pseudo_class(fl, cssdict) else: for elem in matches: self.style(elem)._update_cssdict(cssdict) for elem in xpath(tree, '//h:*[@style]'): self.style(elem)._apply_style_attr(url_replacer=item.abshref) num_pat = re.compile(r'[0-9.]+$') for elem in xpath(tree, '//h:img[@width or @height]'): style = self.style(elem) # Check if either height or width is not default is_styled = style._style.get('width', 'auto') != 'auto' or \ style._style.get('height', 'auto') != 'auto' if not is_styled: # Update img style dimension using width and height upd = {} for prop in ('width', 'height'): val = elem.get(prop, '').strip() try: del elem.attrib[prop] except: pass if val: if num_pat.match(val) is not None: val += 'px' upd[prop] = val if upd: style._update_cssdict(upd) def _fetch_css_file(self, path): hrefs = self.oeb.manifest.hrefs if path not in hrefs: self.logger.warn('CSS import of missing file %r' % path) return (None, None) item = hrefs[path] if item.media_type not in OEB_STYLES: self.logger.warn('CSS import of non-CSS file %r' % path) return (None, None) data = item.data.cssText if not isinstance(data, bytes): data = data.encode('utf-8') return ('utf-8', data) def style(self, element): try: return self._styles[element] except KeyError: return Style(element, self) def stylesheet(self, name, font_scale=None): rules = [] for _, _, style, selector, href in self.rules: if href != name: continue if font_scale and 'font-size' in style and \ style['font-size'].endswith('pt'): style = copy.copy(style) size = float(style['font-size'][:-2]) style['font-size'] = "%.2fpt" % (size * font_scale) style = ';\n '.join(': '.join(item) for item in style.items()) rules.append(f'{selector} {{\n {style};\n}}') return '\n'.join(rules) class Style: MS_PAT = re.compile(r'^\s*(mso-|panose-|text-underline|tab-interval)') def __init__(self, element, stylizer): self._element = element self._profile = stylizer.profile self._stylizer = stylizer self._style = {} self._fontSize = None self._width = None self._height = None self._lineHeight = None self._bgcolor = None self._fgcolor = None self._pseudo_classes = {} stylizer._styles[element] = self def set(self, prop, val): self._style[prop] = val def drop(self, prop, default=None): return self._style.pop(prop, default) def _update_cssdict(self, cssdict): self._style.update(cssdict) def _update_pseudo_class(self, name, cssdict): orig = self._pseudo_classes.get(name, {}) orig.update(cssdict) self._pseudo_classes[name] = orig def _apply_style_attr(self, url_replacer=None): attrib = self._element.attrib if 'style' not in attrib: return css = attrib['style'].split(';') css = filter(None, (x.strip() for x in css)) css = [y.strip() for y in css] css = [y for y in css if self.MS_PAT.match(y) is None] css = '; '.join(css) try: style = parseStyle(css, validate=False) except CSSSyntaxError: return if url_replacer is not None: replaceUrls(style, url_replacer, ignoreImportRules=True) self._style.update(self._stylizer.flatten_style(style)) def _has_parent(self): try: return self._element.getparent() is not None except AttributeError: return False # self._element is None def _get_parent(self): elem = self._element.getparent() if elem is None: return None return self._stylizer.style(elem) def __getitem__(self, name): domname = cssproperties._toDOMname(name) if hasattr(self, domname): return getattr(self, domname) return self._unit_convert(self._get(name)) def _get(self, name): result = self._style.get(name, None) if (result == 'inherit' or (result is None and name in INHERITED and self._has_parent())): stylizer = self._stylizer result = stylizer.style(self._element.getparent())._get(name) if result is None: result = DEFAULTS[name] return result def get(self, name, default=None): return self._style.get(name, default) def _unit_convert(self, value, base=None, font=None): 'Return value in pts' if base is None: base = self.width if not font and font != 0: font = self.fontSize return unit_convert(value, base, font, self._profile.dpi, body_font_size=self._stylizer.body_font_size) def pt_to_px(self, value): return (self._profile.dpi / 72) * value @property def color(self): if self._fgcolor is None: val = self._get('color') if val and validate_color(val): self._fgcolor = val else: self._fgcolor = DEFAULTS['color'] return self._fgcolor @property def backgroundColor(self): ''' Return the background color by parsing both the background-color and background shortcut properties. Note that inheritance/default values are not used. None is returned if no background color is set. ''' if self._bgcolor is None: col = None val = self._style.get('background-color', None) if val and validate_color(val): col = val else: val = self._style.get('background', None) if val is not None: try: style = parseStyle('background: '+val, validate=False) val = style.getProperty('background').propertyValue try: val = list(val) except: # val is CSSPrimitiveValue val = [val] for c in val: c = c.cssText if isinstance(c, bytes): c = c.decode('utf-8', 'replace') if validate_color(c): col = c break except: pass if col is None: self._bgcolor = False else: self._bgcolor = col return self._bgcolor if self._bgcolor else None @property def fontSize(self): def normalize_fontsize(value, base): value = value.replace('"', '').replace("'", '') result = None factor = None if value == 'inherit': value = base if value in FONT_SIZE_NAMES: result = self._profile.fnames[value] elif value == 'smaller': factor = 1.0/1.2 for _, _, size in self._profile.fsizes: if base <= size: break factor = None result = size elif value == 'larger': factor = 1.2 for _, _, size in reversed(self._profile.fsizes): if base >= size: break factor = None result = size else: result = self._unit_convert(value, base=base, font=base) if not isinstance(result, numbers.Number): return base if result < 0: result = normalize_fontsize("smaller", base) if factor: result = factor * base return result if self._fontSize is None: result = None parent = self._get_parent() if parent is not None: base = parent.fontSize else: base = self._profile.fbase if 'font-size' in self._style: size = self._style['font-size'] result = normalize_fontsize(size, base) else: result = base self._fontSize = result return self._fontSize def img_dimension(self, attr, img_size): ans = None parent = self._get_parent() if parent is not None: base = getattr(parent, attr) else: base = getattr(self._profile, attr + '_pts') x = self._style.get(attr) if x is not None: if x == 'auto': ans = self._unit_convert(str(img_size) + 'px', base=base) else: x = self._unit_convert(x, base=base) if isinstance(x, numbers.Number): ans = x if ans is None: x = self._element.get(attr) if x is not None: x = self._unit_convert(x + 'px', base=base) if isinstance(x, numbers.Number): ans = x if ans is None: ans = self._unit_convert(str(img_size) + 'px', base=base) maa = self._style.get('max-' + attr) if maa is not None: x = self._unit_convert(maa, base=base) if isinstance(x, numbers.Number) and (ans is None or x < ans): ans = x return ans def img_size(self, width, height): ' Return the final size of an <img> given that it points to an image of size widthxheight ' w, h = self._get('width'), self._get('height') answ, ansh = self.img_dimension('width', width), self.img_dimension('height', height) if w == 'auto' and h != 'auto': answ = (float(width)/height) * ansh elif h == 'auto' and w != 'auto': ansh = (float(height)/width) * answ return answ, ansh @property def width(self): if self._width is None: width = None base = None parent = self._get_parent() if parent is not None: base = parent.width else: base = self._profile.width_pts if 'width' in self._element.attrib: width = self._element.attrib['width'] elif 'width' in self._style: width = self._style['width'] if not width or width == 'auto': result = base else: result = self._unit_convert(width, base=base) if isinstance(result, (str, bytes)): result = self._profile.width self._width = result if 'max-width' in self._style: result = self._unit_convert(self._style['max-width'], base=base) if isinstance(result, (str, bytes)): result = self._width if result < self._width: self._width = result return self._width @property def parent_width(self): parent = self._get_parent() if parent is None: return self.width return parent.width @property def height(self): if self._height is None: height = None base = None parent = self._get_parent() if parent is not None: base = parent.height else: base = self._profile.height_pts if 'height' in self._element.attrib: height = self._element.attrib['height'] elif 'height' in self._style: height = self._style['height'] if not height or height == 'auto': result = base else: result = self._unit_convert(height, base=base) if isinstance(result, (str, bytes)): result = self._profile.height self._height = result if 'max-height' in self._style: result = self._unit_convert(self._style['max-height'], base=base) if isinstance(result, (str, bytes)): result = self._height if result < self._height: self._height = result return self._height @property def lineHeight(self): if self._lineHeight is None: result = None parent = self._get_parent() if 'line-height' in self._style: lineh = self._style['line-height'] if lineh == 'normal': lineh = '1.2' try: result = float(lineh) * self.fontSize except ValueError: result = self._unit_convert(lineh, base=self.fontSize) elif parent is not None: # TODO: proper inheritance result = parent.lineHeight else: result = 1.2 * self.fontSize self._lineHeight = result return self._lineHeight @property def effective_text_decoration(self): ''' Browsers do this creepy thing with text-decoration where even though the property is not inherited, it looks like it is because containing blocks apply it. The actual algorithm is utterly ridiculous, see http://reference.sitepoint.com/css/text-decoration This matters for MOBI output, where text-decoration is mapped to <u> and <st> tags. Trying to implement the actual algorithm is too much work, so we just use a simple fake that should cover most cases. ''' css = self._style.get('text-decoration', None) pcss = None parent = self._get_parent() if parent is not None: pcss = parent._style.get('text-decoration', None) if css in ('none', None, 'inherit') and pcss not in (None, 'none'): return pcss return css @property def first_vertical_align(self): ''' For docx output where tags are not nested, we cannot directly simulate the HTML vertical-align rendering model. Instead use the approximation of considering the first non-default vertical-align ''' val = self['vertical-align'] if val != 'baseline': raw_val = self._get('vertical-align') if '%' in raw_val: val = self._unit_convert(raw_val, base=self['line-height']) return val parent = self._get_parent() if parent is not None and 'inline' in parent['display']: return parent.first_vertical_align @property def marginTop(self): return self._unit_convert( self._get('margin-top'), base=self.parent_width) @property def marginBottom(self): return self._unit_convert( self._get('margin-bottom'), base=self.parent_width) @property def marginLeft(self): return self._unit_convert( self._get('margin-left'), base=self.parent_width) @property def marginRight(self): return self._unit_convert( self._get('margin-right'), base=self.parent_width) @property def paddingTop(self): return self._unit_convert( self._get('padding-top'), base=self.parent_width) @property def paddingBottom(self): return self._unit_convert( self._get('padding-bottom'), base=self.parent_width) @property def paddingLeft(self): return self._unit_convert( self._get('padding-left'), base=self.parent_width) @property def paddingRight(self): return self._unit_convert( self._get('padding-right'), base=self.parent_width) def __str__(self): items = sorted(iteritems(self._style)) return '; '.join(f"{key}: {val}" for key, val in items) def cssdict(self): return dict(self._style) def pseudo_classes(self, filter_css): if filter_css: css = copy.deepcopy(self._pseudo_classes) for psel, cssdict in iteritems(css): for k in filter_css: cssdict.pop(k, None) else: css = self._pseudo_classes return {k:v for k, v in iteritems(css) if v} @property def is_hidden(self): return self._style.get('display') == 'none' or self._style.get('visibility') == 'hidden'