%PDF- %PDF-
Direktori : /usr/lib/calibre/calibre/ebooks/docx/ |
Current File : //usr/lib/calibre/calibre/ebooks/docx/styles.py |
#!/usr/bin/env python3 __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' import textwrap from collections import OrderedDict, Counter from calibre.ebooks.docx.block_styles import ParagraphStyle, inherit, twips from calibre.ebooks.docx.char_styles import RunStyle from calibre.ebooks.docx.tables import TableStyle from polyglot.builtins import iteritems, itervalues class PageProperties: ''' Class representing page level properties (page size/margins) read from sectPr elements. ''' def __init__(self, namespace, elems=()): self.width, self.height = 595.28, 841.89 # pts, A4 self.margin_left = self.margin_right = 72 # pts def setval(attr, val): val = twips(val) if val is not None: setattr(self, attr, val) for sectPr in elems: for pgSz in namespace.XPath('./w:pgSz')(sectPr): w, h = namespace.get(pgSz, 'w:w'), namespace.get(pgSz, 'w:h') setval('width', w), setval('height', h) for pgMar in namespace.XPath('./w:pgMar')(sectPr): l, r = namespace.get(pgMar, 'w:left'), namespace.get(pgMar, 'w:right') setval('margin_left', l), setval('margin_right', r) class Style: ''' Class representing a <w:style> element. Can contain block, character, etc. styles. ''' def __init__(self, namespace, elem): self.namespace = namespace self.name_path = namespace.XPath('./w:name[@w:val]') self.based_on_path = namespace.XPath('./w:basedOn[@w:val]') self.resolved = False self.style_id = namespace.get(elem, 'w:styleId') self.style_type = namespace.get(elem, 'w:type') names = self.name_path(elem) self.name = namespace.get(names[-1], 'w:val') if names else None based_on = self.based_on_path(elem) self.based_on = namespace.get(based_on[0], 'w:val') if based_on else None if self.style_type == 'numbering': self.based_on = None self.is_default = namespace.get(elem, 'w:default') in {'1', 'on', 'true'} self.paragraph_style = self.character_style = self.table_style = None if self.style_type in {'paragraph', 'character', 'table'}: if self.style_type == 'table': for tblPr in namespace.XPath('./w:tblPr')(elem): ts = TableStyle(namespace, tblPr) if self.table_style is None: self.table_style = ts else: self.table_style.update(ts) if self.style_type in {'paragraph', 'table'}: for pPr in namespace.XPath('./w:pPr')(elem): ps = ParagraphStyle(namespace, pPr) if self.paragraph_style is None: self.paragraph_style = ps else: self.paragraph_style.update(ps) for rPr in namespace.XPath('./w:rPr')(elem): rs = RunStyle(namespace, rPr) if self.character_style is None: self.character_style = rs else: self.character_style.update(rs) if self.style_type in {'numbering', 'paragraph'}: self.numbering_style_link = None for x in namespace.XPath('./w:pPr/w:numPr/w:numId[@w:val]')(elem): self.numbering_style_link = namespace.get(x, 'w:val') def resolve_based_on(self, parent): if parent.table_style is not None: if self.table_style is None: self.table_style = TableStyle(self.namespace) self.table_style.resolve_based_on(parent.table_style) if parent.paragraph_style is not None: if self.paragraph_style is None: self.paragraph_style = ParagraphStyle(self.namespace) self.paragraph_style.resolve_based_on(parent.paragraph_style) if parent.character_style is not None: if self.character_style is None: self.character_style = RunStyle(self.namespace) self.character_style.resolve_based_on(parent.character_style) class Styles: ''' Collection of all styles defined in the document. Used to get the final styles applicable to elements in the document markup. ''' def __init__(self, namespace, tables): self.namespace = namespace self.id_map = OrderedDict() self.para_cache = {} self.para_char_cache = {} self.run_cache = {} self.classes = {} self.counter = Counter() self.default_styles = {} self.tables = tables self.numbering_style_links = {} self.default_paragraph_style = self.default_character_style = None def __iter__(self): yield from itervalues(self.id_map) def __getitem__(self, key): return self.id_map[key] def __len__(self): return len(self.id_map) def get(self, key, default=None): return self.id_map.get(key, default) def __call__(self, root, fonts, theme): self.fonts, self.theme = fonts, theme self.default_paragraph_style = self.default_character_style = None if root is not None: for s in self.namespace.XPath('//w:style')(root): s = Style(self.namespace, s) if s.style_id: self.id_map[s.style_id] = s if s.is_default: self.default_styles[s.style_type] = s if getattr(s, 'numbering_style_link', None) is not None: self.numbering_style_links[s.style_id] = s.numbering_style_link for dd in self.namespace.XPath('./w:docDefaults')(root): for pd in self.namespace.XPath('./w:pPrDefault')(dd): for pPr in self.namespace.XPath('./w:pPr')(pd): ps = ParagraphStyle(self.namespace, pPr) if self.default_paragraph_style is None: self.default_paragraph_style = ps else: self.default_paragraph_style.update(ps) for pd in self.namespace.XPath('./w:rPrDefault')(dd): for pPr in self.namespace.XPath('./w:rPr')(pd): ps = RunStyle(self.namespace, pPr) if self.default_character_style is None: self.default_character_style = ps else: self.default_character_style.update(ps) def resolve(s, p): if p is not None: if not p.resolved: resolve(p, self.get(p.based_on)) s.resolve_based_on(p) s.resolved = True for s in self: if not s.resolved: resolve(s, self.get(s.based_on)) def para_val(self, parent_styles, direct_formatting, attr): val = getattr(direct_formatting, attr) if val is inherit: for ps in reversed(parent_styles): pval = getattr(ps, attr) if pval is not inherit: val = pval break return val def run_val(self, parent_styles, direct_formatting, attr): val = getattr(direct_formatting, attr) if val is not inherit: return val if attr in direct_formatting.toggle_properties: # The spec (section 17.7.3) does not make sense, so we follow the behavior # of Word, which seems to only consider the document default if the # property has not been defined in any styles. vals = [int(getattr(rs, attr)) for rs in parent_styles if rs is not self.default_character_style and getattr(rs, attr) is not inherit] if vals: return sum(vals) % 2 == 1 if self.default_character_style is not None: return getattr(self.default_character_style, attr) is True return False for rs in reversed(parent_styles): rval = getattr(rs, attr) if rval is not inherit: return rval return val def resolve_paragraph(self, p): ans = self.para_cache.get(p, None) if ans is None: linked_style = None ans = self.para_cache[p] = ParagraphStyle(self.namespace) ans.style_name = None direct_formatting = None is_section_break = False for pPr in self.namespace.XPath('./w:pPr')(p): ps = ParagraphStyle(self.namespace, pPr) if direct_formatting is None: direct_formatting = ps else: direct_formatting.update(ps) if self.namespace.XPath('./w:sectPr')(pPr): is_section_break = True if direct_formatting is None: direct_formatting = ParagraphStyle(self.namespace) parent_styles = [] if self.default_paragraph_style is not None: parent_styles.append(self.default_paragraph_style) ts = self.tables.para_style(p) if ts is not None: parent_styles.append(ts) default_para = self.default_styles.get('paragraph', None) if direct_formatting.linked_style is not None: ls = linked_style = self.get(direct_formatting.linked_style) if ls is not None: ans.style_name = ls.name ps = ls.paragraph_style if ps is not None: parent_styles.append(ps) if ls.character_style is not None: self.para_char_cache[p] = ls.character_style elif default_para is not None: if default_para.paragraph_style is not None: parent_styles.append(default_para.paragraph_style) if default_para.character_style is not None: self.para_char_cache[p] = default_para.character_style def has_numbering(block_style): num_id, lvl = getattr(block_style, 'numbering_id', inherit), getattr(block_style, 'numbering_level', inherit) return num_id is not None and num_id is not inherit and lvl is not None and lvl is not inherit is_numbering = has_numbering(direct_formatting) is_section_break = is_section_break and not self.namespace.XPath('./w:r')(p) if is_numbering and not is_section_break: num_id, lvl = direct_formatting.numbering_id, direct_formatting.numbering_level p.set('calibre_num_id', f'{lvl}:{num_id}') ps = self.numbering.get_para_style(num_id, lvl) if ps is not None: parent_styles.append(ps) if ( not is_numbering and not is_section_break and linked_style is not None and has_numbering(linked_style.paragraph_style) ): num_id, lvl = linked_style.paragraph_style.numbering_id, linked_style.paragraph_style.numbering_level p.set('calibre_num_id', f'{lvl}:{num_id}') is_numbering = True ps = self.numbering.get_para_style(num_id, lvl) if ps is not None: parent_styles.append(ps) for attr in ans.all_properties: if not (is_numbering and attr == 'text_indent'): # skip text-indent for lists setattr(ans, attr, self.para_val(parent_styles, direct_formatting, attr)) ans.linked_style = direct_formatting.linked_style return ans def resolve_run(self, r): ans = self.run_cache.get(r, None) if ans is None: p = self.namespace.XPath('ancestor::w:p[1]')(r) p = p[0] if p else None ans = self.run_cache[r] = RunStyle(self.namespace) direct_formatting = None for rPr in self.namespace.XPath('./w:rPr')(r): rs = RunStyle(self.namespace, rPr) if direct_formatting is None: direct_formatting = rs else: direct_formatting.update(rs) if direct_formatting is None: direct_formatting = RunStyle(self.namespace) parent_styles = [] default_char = self.default_styles.get('character', None) if self.default_character_style is not None: parent_styles.append(self.default_character_style) pstyle = self.para_char_cache.get(p, None) if pstyle is not None: parent_styles.append(pstyle) # As best as I can understand the spec, table overrides should be # applied before paragraph overrides, but word does it # this way, see the December 2007 table header in the demo # document. ts = self.tables.run_style(p) if ts is not None: parent_styles.append(ts) if direct_formatting.linked_style is not None: ls = getattr(self.get(direct_formatting.linked_style), 'character_style', None) if ls is not None: parent_styles.append(ls) elif default_char is not None and default_char.character_style is not None: parent_styles.append(default_char.character_style) for attr in ans.all_properties: setattr(ans, attr, self.run_val(parent_styles, direct_formatting, attr)) if ans.font_family is not inherit: ff = self.theme.resolve_font_family(ans.font_family) ans.font_family = self.fonts.family_for(ff, ans.b, ans.i) return ans def resolve(self, obj): if obj.tag.endswith('}p'): return self.resolve_paragraph(obj) if obj.tag.endswith('}r'): return self.resolve_run(obj) def cascade(self, layers): self.body_font_family = 'serif' self.body_font_size = '10pt' self.body_color = 'currentColor' def promote_property(char_styles, block_style, prop): vals = {getattr(s, prop) for s in char_styles} if len(vals) == 1: # All the character styles have the same value for s in char_styles: setattr(s, prop, inherit) setattr(block_style, prop, next(iter(vals))) for p, runs in iteritems(layers): has_links = '1' in {r.get('is-link', None) for r in runs} char_styles = [self.resolve_run(r) for r in runs] block_style = self.resolve_paragraph(p) for prop in ('font_family', 'font_size', 'cs_font_family', 'cs_font_size', 'color'): if has_links and prop == 'color': # We cannot promote color as browser rendering engines will # override the link color setting it to blue, unless the # color is specified on the link element itself continue promote_property(char_styles, block_style, prop) for s in char_styles: if s.text_decoration == 'none': # The default text decoration is 'none' s.text_decoration = inherit def promote_most_common(block_styles, prop, default, inherit_means=None): c = Counter() for s in block_styles: val = getattr(s, prop) if val is inherit and inherit_means is not None: val = inherit_means if val is not inherit: c[val] += 1 val = None if c: val = c.most_common(1)[0][0] for s in block_styles: oval = getattr(s, prop) if oval is inherit and inherit_means is not None: oval = inherit_means if oval is inherit: if default != val: setattr(s, prop, default) elif oval == val: setattr(s, prop, inherit) return val block_styles = tuple(self.resolve_paragraph(p) for p in layers) ff = promote_most_common(block_styles, 'font_family', self.body_font_family) if ff is not None: self.body_font_family = ff fs = promote_most_common(block_styles, 'font_size', int(self.body_font_size[:2])) if fs is not None: self.body_font_size = '%.3gpt' % fs color = promote_most_common(block_styles, 'color', self.body_color, inherit_means='currentColor') if color is not None: self.body_color = color def resolve_numbering(self, numbering): # When a numPr element appears inside a paragraph style, the lvl info # must be discarded and pStyle used instead. self.numbering = numbering for style in self: ps = style.paragraph_style if ps is not None and ps.numbering_id is not inherit: lvl = numbering.get_pstyle(ps.numbering_id, style.style_id) if lvl is None: ps.numbering_id = ps.numbering_level = inherit else: ps.numbering_level = lvl def apply_contextual_spacing(self, paras): last_para = None for p in paras: if last_para is not None: ls = self.resolve_paragraph(last_para) ps = self.resolve_paragraph(p) if ls.linked_style is not None and ls.linked_style == ps.linked_style: if ls.contextualSpacing is True: ls.margin_bottom = 0 if ps.contextualSpacing is True: ps.margin_top = 0 last_para = p def apply_section_page_breaks(self, paras): for p in paras: ps = self.resolve_paragraph(p) ps.pageBreakBefore = True def register(self, css, prefix): h = hash(frozenset(iteritems(css))) ans, _ = self.classes.get(h, (None, None)) if ans is None: self.counter[prefix] += 1 ans = '%s_%d' % (prefix, self.counter[prefix]) self.classes[h] = (ans, css) return ans def generate_classes(self): for bs in itervalues(self.para_cache): css = bs.css if css: self.register(css, 'block') for bs in itervalues(self.run_cache): css = bs.css if css: self.register(css, 'text') def class_name(self, css): h = hash(frozenset(iteritems(css))) return self.classes.get(h, (None, None))[0] def generate_css(self, dest_dir, docx, notes_nopb, nosupsub): ef = self.fonts.embed_fonts(dest_dir, docx) s = '''\ body { font-family: %s; font-size: %s; %s } /* In word all paragraphs have zero margins unless explicitly specified in a style */ p, h1, h2, h3, h4, h5, h6, div { margin: 0; padding: 0 } /* In word headings only have bold font if explicitly specified, similarly the font size is the body font size, unless explicitly set. */ h1, h2, h3, h4, h5, h6 { font-weight: normal; font-size: 1rem } /* Setting padding-left to zero breaks rendering of lists, so we only set the other values to zero and leave padding-left for the user-agent */ ul, ol { margin: 0; padding-top: 0; padding-bottom: 0; padding-right: 0 } /* The word hyperlink styling will set text-decoration to underline if needed */ a { text-decoration: none } sup.noteref a { text-decoration: none } h1.notes-header { page-break-before: always } dl.footnote dt { font-size: large } dl.footnote dt a { text-decoration: none } ''' if not notes_nopb: s += '''\ dl.footnote { page-break-after: always } dl.footnote:last-of-type { page-break-after: avoid } ''' s = s + '''\ span.tab { white-space: pre } p.index-entry { text-indent: 0pt; } p.index-entry a:visited { color: blue } p.index-entry a:hover { color: red } ''' if nosupsub: s = s + '''\ sup { vertical-align: top } sub { vertical-align: bottom } ''' body_color = '' if self.body_color.lower() not in ('currentcolor', 'inherit'): body_color = f'color: {self.body_color};' prefix = textwrap.dedent(s) % (self.body_font_family, self.body_font_size, body_color) if ef: prefix = ef + '\n' + prefix ans = [] for (cls, css) in sorted(itervalues(self.classes), key=lambda x:x[0]): b = (f'\t{k}: {v};' for k, v in iteritems(css)) b = '\n'.join(b) ans.append('.{} {{\n{}\n}}\n'.format(cls, b.rstrip(';'))) return prefix + '\n' + '\n'.join(ans)