%PDF- %PDF-
Direktori : /usr/lib/calibre/calibre/ebooks/docx/ |
Current File : //usr/lib/calibre/calibre/ebooks/docx/tables.py |
#!/usr/bin/env python3 __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' from lxml.html.builder import TABLE, TR, TD from calibre.ebooks.docx.block_styles import inherit, read_shd as rs, read_border, binary_property, border_props, ParagraphStyle, border_to_css from calibre.ebooks.docx.char_styles import RunStyle from polyglot.builtins import iteritems, itervalues # Read from XML {{{ read_shd = rs edges = ('left', 'top', 'right', 'bottom') def _read_width(elem, get): ans = inherit try: w = int(get(elem, 'w:w')) except (TypeError, ValueError): w = 0 typ = get(elem, 'w:type', 'auto') if typ == 'nil': ans = '0' elif typ == 'auto': ans = 'auto' elif typ == 'dxa': ans = '%.3gpt' % (w/20) elif typ == 'pct': ans = '%.3g%%' % (w/50) return ans def read_width(parent, dest, XPath, get): ans = inherit for tblW in XPath('./w:tblW')(parent): ans = _read_width(tblW, get) setattr(dest, 'width', ans) def read_cell_width(parent, dest, XPath, get): ans = inherit for tblW in XPath('./w:tcW')(parent): ans = _read_width(tblW, get) setattr(dest, 'width', ans) def read_padding(parent, dest, XPath, get): name = 'tblCellMar' if parent.tag.endswith('}tblPr') else 'tcMar' ans = {x:inherit for x in edges} for mar in XPath('./w:%s' % name)(parent): for x in edges: for edge in XPath('./w:%s' % x)(mar): ans[x] = _read_width(edge, get) for x in edges: setattr(dest, 'cell_padding_%s' % x, ans[x]) def read_justification(parent, dest, XPath, get): left = right = inherit for jc in XPath('./w:jc[@w:val]')(parent): val = get(jc, 'w:val') if not val: continue if val == 'left': right = 'auto' elif val == 'right': left = 'auto' elif val == 'center': left = right = 'auto' setattr(dest, 'margin_left', left) setattr(dest, 'margin_right', right) def read_spacing(parent, dest, XPath, get): ans = inherit for cs in XPath('./w:tblCellSpacing')(parent): ans = _read_width(cs, get) setattr(dest, 'spacing', ans) def read_float(parent, dest, XPath, get): ans = inherit for x in XPath('./w:tblpPr')(parent): ans = {k.rpartition('}')[-1]: v for k, v in iteritems(x.attrib)} setattr(dest, 'float', ans) def read_indent(parent, dest, XPath, get): ans = inherit for cs in XPath('./w:tblInd')(parent): ans = _read_width(cs, get) setattr(dest, 'indent', ans) border_edges = ('left', 'top', 'right', 'bottom', 'insideH', 'insideV') def read_borders(parent, dest, XPath, get): name = 'tblBorders' if parent.tag.endswith('}tblPr') else 'tcBorders' read_border(parent, dest, XPath, get, border_edges, name) def read_height(parent, dest, XPath, get): ans = inherit for rh in XPath('./w:trHeight')(parent): rule = get(rh, 'w:hRule', 'auto') if rule in {'auto', 'atLeast', 'exact'}: val = get(rh, 'w:val') ans = (rule, val) setattr(dest, 'height', ans) def read_vertical_align(parent, dest, XPath, get): ans = inherit for va in XPath('./w:vAlign')(parent): val = get(va, 'w:val') ans = {'center': 'middle', 'top': 'top', 'bottom': 'bottom'}.get(val, 'middle') setattr(dest, 'vertical_align', ans) def read_col_span(parent, dest, XPath, get): ans = inherit for gs in XPath('./w:gridSpan')(parent): try: ans = int(get(gs, 'w:val')) except (TypeError, ValueError): continue setattr(dest, 'col_span', ans) def read_merge(parent, dest, XPath, get): for x in ('hMerge', 'vMerge'): ans = inherit for m in XPath('./w:%s' % x)(parent): ans = get(m, 'w:val', 'continue') setattr(dest, x, ans) def read_band_size(parent, dest, XPath, get): for x in ('Col', 'Row'): ans = 1 for y in XPath('./w:tblStyle%sBandSize' % x)(parent): try: ans = int(get(y, 'w:val')) except (TypeError, ValueError): continue setattr(dest, '%s_band_size' % x.lower(), ans) def read_look(parent, dest, XPath, get): ans = 0 for x in XPath('./w:tblLook')(parent): try: ans = int(get(x, 'w:val'), 16) except (ValueError, TypeError): continue setattr(dest, 'look', ans) # }}} def clone(style): if style is None: return None try: ans = type(style)(style.namespace) except TypeError: return None ans.update(style) return ans class Style: is_bidi = False def update(self, other): for prop in self.all_properties: nval = getattr(other, prop) if nval is not inherit: setattr(self, prop, nval) def apply_bidi(self): self.is_bidi = True def convert_spacing(self): ans = {} if self.spacing is not inherit: if self.spacing in {'auto', '0'}: ans['border-collapse'] = 'collapse' else: ans['border-collapse'] = 'separate' ans['border-spacing'] = self.spacing return ans def convert_border(self): c = {} for x in edges: border_to_css(x, self, c) val = getattr(self, 'padding_%s' % x) if val is not inherit: c['padding-%s' % x] = '%.3gpt' % val if self.is_bidi: for a in ('padding-%s', 'border-%s-style', 'border-%s-color', 'border-%s-width'): l, r = c.get(a % 'left'), c.get(a % 'right') if l is not None: c[a % 'right'] = l if r is not None: c[a % 'left'] = r return c class RowStyle(Style): all_properties = ('height', 'cantSplit', 'hidden', 'spacing',) def __init__(self, namespace, trPr=None): self.namespace = namespace if trPr is None: for p in self.all_properties: setattr(self, p, inherit) else: for p in ('hidden', 'cantSplit'): setattr(self, p, binary_property(trPr, p, namespace.XPath, namespace.get)) for p in ('spacing', 'height'): f = globals()['read_%s' % p] f(trPr, self, namespace.XPath, namespace.get) self._css = None @property def css(self): if self._css is None: c = self._css = {} if self.hidden is True: c['display'] = 'none' if self.cantSplit is True: c['page-break-inside'] = 'avoid' if self.height is not inherit: rule, val = self.height if rule != 'auto': try: c['min-height' if rule == 'atLeast' else 'height'] = '%.3gpt' % (int(val)/20) except (ValueError, TypeError): pass c.update(self.convert_spacing()) return self._css class CellStyle(Style): all_properties = ('background_color', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top', 'cell_padding_bottom', 'width', 'vertical_align', 'col_span', 'vMerge', 'hMerge', 'row_span', ) + tuple(k % edge for edge in border_edges for k in border_props) def __init__(self, namespace, tcPr=None): self.namespace = namespace if tcPr is None: for p in self.all_properties: setattr(self, p, inherit) else: for x in ('borders', 'shd', 'padding', 'cell_width', 'vertical_align', 'col_span', 'merge'): f = globals()['read_%s' % x] f(tcPr, self, namespace.XPath, namespace.get) self.row_span = inherit self._css = None @property def css(self): if self._css is None: self._css = c = {} if self.background_color is not inherit: c['background-color'] = self.background_color if self.width not in (inherit, 'auto'): c['width'] = self.width c['vertical-align'] = 'top' if self.vertical_align is inherit else self.vertical_align for x in edges: val = getattr(self, 'cell_padding_%s' % x) if val not in (inherit, 'auto'): c['padding-%s' % x] = val elif val is inherit and x in {'left', 'right'}: c['padding-%s' % x] = '%.3gpt' % (115/20) # In Word, tables are apparently rendered with some default top and # bottom padding irrespective of the cellMargin values. Simulate # that here. for x in ('top', 'bottom'): if c.get('padding-%s' % x, '0pt') == '0pt': c['padding-%s' % x] = '0.5ex' c.update(self.convert_border()) return self._css class TableStyle(Style): all_properties = ( 'width', 'float', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top', 'cell_padding_bottom', 'margin_left', 'margin_right', 'background_color', 'spacing', 'indent', 'overrides', 'col_band_size', 'row_band_size', 'look', 'bidi', ) + tuple(k % edge for edge in border_edges for k in border_props) def __init__(self, namespace, tblPr=None): self.namespace = namespace if tblPr is None: for p in self.all_properties: setattr(self, p, inherit) else: self.overrides = inherit self.bidi = binary_property(tblPr, 'bidiVisual', namespace.XPath, namespace.get) for x in ('width', 'float', 'padding', 'shd', 'justification', 'spacing', 'indent', 'borders', 'band_size', 'look'): f = globals()['read_%s' % x] f(tblPr, self, self.namespace.XPath, self.namespace.get) parent = tblPr.getparent() if self.namespace.is_tag(parent, 'w:style'): self.overrides = {} for tblStylePr in self.namespace.XPath('./w:tblStylePr[@w:type]')(parent): otype = self.namespace.get(tblStylePr, 'w:type') orides = self.overrides[otype] = {} for tblPr in self.namespace.XPath('./w:tblPr')(tblStylePr): orides['table'] = TableStyle(self.namespace, tblPr) for trPr in self.namespace.XPath('./w:trPr')(tblStylePr): orides['row'] = RowStyle(self.namespace, trPr) for tcPr in self.namespace.XPath('./w:tcPr')(tblStylePr): orides['cell'] = CellStyle(self.namespace, tcPr) for pPr in self.namespace.XPath('./w:pPr')(tblStylePr): orides['para'] = ParagraphStyle(self.namespace, pPr) for rPr in self.namespace.XPath('./w:rPr')(tblStylePr): orides['run'] = RunStyle(self.namespace, rPr) self._css = None def resolve_based_on(self, parent): for p in self.all_properties: val = getattr(self, p) if val is inherit: setattr(self, p, getattr(parent, p)) @property def css(self): if self._css is None: c = self._css = {} if self.width not in (inherit, 'auto'): c['width'] = self.width for x in ('background_color', 'margin_left', 'margin_right'): val = getattr(self, x) if val is not inherit: c[x.replace('_', '-')] = val if self.indent not in (inherit, 'auto') and self.margin_left != 'auto': c['margin-left'] = self.indent if self.float is not inherit: for x in ('left', 'top', 'right', 'bottom'): val = self.float.get('%sFromText' % x, 0) try: val = '%.3gpt' % (int(val) / 20) except (ValueError, TypeError): val = '0' c['margin-%s' % x] = val if 'tblpXSpec' in self.float: c['float'] = 'right' if self.float['tblpXSpec'] in {'right', 'outside'} else 'left' else: page = self.page page_width = page.width - page.margin_left - page.margin_right try: x = int(self.float['tblpX']) / 20 except (KeyError, ValueError, TypeError): x = 0 c['float'] = 'left' if (x/page_width) < 0.65 else 'right' c.update(self.convert_spacing()) if 'border-collapse' not in c: c['border-collapse'] = 'collapse' c.update(self.convert_border()) return self._css class Table: def __init__(self, namespace, tbl, styles, para_map, is_sub_table=False): self.namespace = namespace self.tbl = tbl self.styles = styles self.is_sub_table = is_sub_table # Read Table Style style = {'table':TableStyle(self.namespace)} for tblPr in self.namespace.XPath('./w:tblPr')(tbl): for ts in self.namespace.XPath('./w:tblStyle[@w:val]')(tblPr): style_id = self.namespace.get(ts, 'w:val') s = styles.get(style_id) if s is not None: if s.table_style is not None: style['table'].update(s.table_style) if s.paragraph_style is not None: if 'paragraph' in style: style['paragraph'].update(s.paragraph_style) else: style['paragraph'] = s.paragraph_style if s.character_style is not None: if 'run' in style: style['run'].update(s.character_style) else: style['run'] = s.character_style style['table'].update(TableStyle(self.namespace, tblPr)) self.table_style, self.paragraph_style = style['table'], style.get('paragraph', None) self.run_style = style.get('run', None) self.overrides = self.table_style.overrides if self.overrides is inherit: self.overrides = {} if 'wholeTable' in self.overrides and 'table' in self.overrides['wholeTable']: self.table_style.update(self.overrides['wholeTable']['table']) self.style_map = {} self.paragraphs = [] self.cell_map = [] rows = self.namespace.XPath('./w:tr')(tbl) for r, tr in enumerate(rows): overrides = self.get_overrides(r, None, len(rows), None) self.resolve_row_style(tr, overrides) cells = self.namespace.XPath('./w:tc')(tr) self.cell_map.append([]) for c, tc in enumerate(cells): overrides = self.get_overrides(r, c, len(rows), len(cells)) self.resolve_cell_style(tc, overrides, r, c, len(rows), len(cells)) self.cell_map[-1].append(tc) for p in self.namespace.XPath('./w:p')(tc): para_map[p] = self self.paragraphs.append(p) self.resolve_para_style(p, overrides) self.handle_merged_cells() self.sub_tables = {x:Table(namespace, x, styles, para_map, is_sub_table=True) for x in self.namespace.XPath('./w:tr/w:tc/w:tbl')(tbl)} @property def bidi(self): return self.table_style.bidi is True def override_allowed(self, name): 'Check if the named override is allowed by the tblLook element' if name.endswith('Cell') or name == 'wholeTable': return True look = self.table_style.look if (look & 0x0020 and name == 'firstRow') or (look & 0x0040 and name == 'lastRow') or \ (look & 0x0080 and name == 'firstCol') or (look & 0x0100 and name == 'lastCol'): return True if name.startswith('band'): if name.endswith('Horz'): return not bool(look & 0x0200) if name.endswith('Vert'): return not bool(look & 0x0400) return False def get_overrides(self, r, c, num_of_rows, num_of_cols_in_row): 'List of possible overrides for the given para' overrides = ['wholeTable'] def divisor(m, n): return (m - (m % n)) // n if c is not None: odd_column_band = (divisor(c, self.table_style.col_band_size) % 2) == 1 overrides.append('band%dVert' % (1 if odd_column_band else 2)) odd_row_band = (divisor(r, self.table_style.row_band_size) % 2) == 1 overrides.append('band%dHorz' % (1 if odd_row_band else 2)) # According to the OOXML spec columns should have higher override # priority than rows, but Word seems to do it the other way around. if c is not None: if c == 0: overrides.append('firstCol') if c >= num_of_cols_in_row - 1: overrides.append('lastCol') if r == 0: overrides.append('firstRow') if r >= num_of_rows - 1: overrides.append('lastRow') if c is not None: if r == 0: if c == 0: overrides.append('nwCell') if c == num_of_cols_in_row - 1: overrides.append('neCell') if r == num_of_rows - 1: if c == 0: overrides.append('swCell') if c == num_of_cols_in_row - 1: overrides.append('seCell') return tuple(filter(self.override_allowed, overrides)) def resolve_row_style(self, tr, overrides): rs = RowStyle(self.namespace) for o in overrides: if o in self.overrides: ovr = self.overrides[o] ors = ovr.get('row', None) if ors is not None: rs.update(ors) for trPr in self.namespace.XPath('./w:trPr')(tr): rs.update(RowStyle(self.namespace, trPr)) if self.bidi: rs.apply_bidi() self.style_map[tr] = rs def resolve_cell_style(self, tc, overrides, row, col, rows, cols_in_row): cs = CellStyle(self.namespace) for o in overrides: if o in self.overrides: ovr = self.overrides[o] ors = ovr.get('cell', None) if ors is not None: cs.update(ors) for tcPr in self.namespace.XPath('./w:tcPr')(tc): cs.update(CellStyle(self.namespace, tcPr)) for x in edges: p = 'cell_padding_%s' % x val = getattr(cs, p) if val is inherit: setattr(cs, p, getattr(self.table_style, p)) is_inside_edge = ( (x == 'left' and col > 0) or (x == 'top' and row > 0) or (x == 'right' and col < cols_in_row - 1) or (x == 'bottom' and row < rows -1) ) inside_edge = ('insideH' if x in {'top', 'bottom'} else 'insideV') if is_inside_edge else None for prop in border_props: if not prop.startswith('border'): continue eprop = prop % x iprop = (prop % inside_edge) if inside_edge else None val = getattr(cs, eprop) if val is inherit and iprop is not None: # Use the insideX borders if the main cell borders are not # specified val = getattr(cs, iprop) if val is inherit: val = getattr(self.table_style, iprop) if not is_inside_edge and val == 'none': # Cell borders must override table borders even when the # table border is not null and the cell border is null. val = 'hidden' setattr(cs, eprop, val) if self.bidi: cs.apply_bidi() self.style_map[tc] = cs def resolve_para_style(self, p, overrides): text_styles = [clone(self.paragraph_style), clone(self.run_style)] for o in overrides: if o in self.overrides: ovr = self.overrides[o] for i, name in enumerate(('para', 'run')): ops = ovr.get(name, None) if ops is not None: if text_styles[i] is None: text_styles[i] = ops else: text_styles[i].update(ops) self.style_map[p] = text_styles def handle_merged_cells(self): if not self.cell_map: return # Handle vMerge max_col_num = max(len(r) for r in self.cell_map) for c in range(max_col_num): cells = [row[c] if c < len(row) else None for row in self.cell_map] runs = [[]] for cell in cells: try: s = self.style_map[cell] except KeyError: # cell is None s = CellStyle(self.namespace) if s.vMerge == 'restart': runs.append([cell]) elif s.vMerge == 'continue': runs[-1].append(cell) else: runs.append([]) for run in runs: if len(run) > 1: self.style_map[run[0]].row_span = len(run) for tc in run[1:]: tc.getparent().remove(tc) # Handle hMerge for cells in self.cell_map: runs = [[]] for cell in cells: try: s = self.style_map[cell] except KeyError: # cell is None s = CellStyle(self.namespace) if s.col_span is not inherit: runs.append([]) continue if s.hMerge == 'restart': runs.append([cell]) elif s.hMerge == 'continue': runs[-1].append(cell) else: runs.append([]) for run in runs: if len(run) > 1: self.style_map[run[0]].col_span = len(run) for tc in run[1:]: tc.getparent().remove(tc) def __iter__(self): yield from self.paragraphs for t in itervalues(self.sub_tables): yield from t def apply_markup(self, rmap, page, parent=None): table = TABLE('\n\t\t') if self.bidi: table.set('dir', 'rtl') self.table_style.page = page style_map = {} if parent is None: try: first_para = rmap[next(iter(self))] except StopIteration: return parent = first_para.getparent() idx = parent.index(first_para) parent.insert(idx, table) else: parent.append(table) for row in self.namespace.XPath('./w:tr')(self.tbl): tr = TR('\n\t\t\t') style_map[tr] = self.style_map[row] tr.tail = '\n\t\t' table.append(tr) for tc in self.namespace.XPath('./w:tc')(row): td = TD() style_map[td] = s = self.style_map[tc] if s.col_span is not inherit: td.set('colspan', str(s.col_span)) if s.row_span is not inherit: td.set('rowspan', str(s.row_span)) td.tail = '\n\t\t\t' tr.append(td) for x in self.namespace.XPath('./w:p|./w:tbl')(tc): if x.tag.endswith('}p'): td.append(rmap[x]) else: self.sub_tables[x].apply_markup(rmap, page, parent=td) if len(tr): tr[-1].tail = '\n\t\t' if len(table): table[-1].tail = '\n\t' table_style = self.table_style.css if table_style: table.set('class', self.styles.register(table_style, 'table')) for elem, style in iteritems(style_map): css = style.css if css: elem.set('class', self.styles.register(css, elem.tag)) class Tables: def __init__(self, namespace): self.tables = [] self.para_map = {} self.sub_tables = set() self.namespace = namespace def register(self, tbl, styles): if tbl in self.sub_tables: return self.tables.append(Table(self.namespace, tbl, styles, self.para_map)) self.sub_tables |= set(self.tables[-1].sub_tables) def apply_markup(self, object_map, page_map): rmap = {v:k for k, v in iteritems(object_map)} for table in self.tables: table.apply_markup(rmap, page_map[table.tbl]) def para_style(self, p): table = self.para_map.get(p, None) if table is not None: return table.style_map.get(p, (None, None))[0] def run_style(self, p): table = self.para_map.get(p, None) if table is not None: return table.style_map.get(p, (None, None))[1]