%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /usr/lib/calibre/calibre/ebooks/docx/
Upload File :
Create Path :
Current File : //usr/lib/calibre/calibre/ebooks/docx/tables.py

#!/usr/bin/env python3


__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'

from lxml.html.builder import TABLE, TR, TD

from calibre.ebooks.docx.block_styles import inherit, read_shd as rs, read_border, binary_property, border_props, ParagraphStyle, border_to_css
from calibre.ebooks.docx.char_styles import RunStyle
from polyglot.builtins import iteritems, itervalues

# Read from XML {{{
read_shd = rs
edges = ('left', 'top', 'right', 'bottom')


def _read_width(elem, get):
    ans = inherit
    try:
        w = int(get(elem, 'w:w'))
    except (TypeError, ValueError):
        w = 0
    typ = get(elem, 'w:type', 'auto')
    if typ == 'nil':
        ans = '0'
    elif typ == 'auto':
        ans = 'auto'
    elif typ == 'dxa':
        ans = '%.3gpt' % (w/20)
    elif typ == 'pct':
        ans = '%.3g%%' % (w/50)
    return ans


def read_width(parent, dest, XPath, get):
    ans = inherit
    for tblW in XPath('./w:tblW')(parent):
        ans = _read_width(tblW, get)
    setattr(dest, 'width', ans)


def read_cell_width(parent, dest, XPath, get):
    ans = inherit
    for tblW in XPath('./w:tcW')(parent):
        ans = _read_width(tblW, get)
    setattr(dest, 'width', ans)


def read_padding(parent, dest, XPath, get):
    name = 'tblCellMar' if parent.tag.endswith('}tblPr') else 'tcMar'
    ans = {x:inherit for x in edges}
    for mar in XPath('./w:%s' % name)(parent):
        for x in edges:
            for edge in XPath('./w:%s' % x)(mar):
                ans[x] = _read_width(edge, get)
    for x in edges:
        setattr(dest, 'cell_padding_%s' % x, ans[x])


def read_justification(parent, dest, XPath, get):
    left = right = inherit
    for jc in XPath('./w:jc[@w:val]')(parent):
        val = get(jc, 'w:val')
        if not val:
            continue
        if val == 'left':
            right = 'auto'
        elif val == 'right':
            left = 'auto'
        elif val == 'center':
            left = right = 'auto'
    setattr(dest, 'margin_left', left)
    setattr(dest, 'margin_right', right)


def read_spacing(parent, dest, XPath, get):
    ans = inherit
    for cs in XPath('./w:tblCellSpacing')(parent):
        ans = _read_width(cs, get)
    setattr(dest, 'spacing', ans)


def read_float(parent, dest, XPath, get):
    ans = inherit
    for x in XPath('./w:tblpPr')(parent):
        ans = {k.rpartition('}')[-1]: v for k, v in iteritems(x.attrib)}
    setattr(dest, 'float', ans)


def read_indent(parent, dest, XPath, get):
    ans = inherit
    for cs in XPath('./w:tblInd')(parent):
        ans = _read_width(cs, get)
    setattr(dest, 'indent', ans)


border_edges = ('left', 'top', 'right', 'bottom', 'insideH', 'insideV')


def read_borders(parent, dest, XPath, get):
    name = 'tblBorders' if parent.tag.endswith('}tblPr') else 'tcBorders'
    read_border(parent, dest, XPath, get, border_edges, name)


def read_height(parent, dest, XPath, get):
    ans = inherit
    for rh in XPath('./w:trHeight')(parent):
        rule = get(rh, 'w:hRule', 'auto')
        if rule in {'auto', 'atLeast', 'exact'}:
            val = get(rh, 'w:val')
            ans = (rule, val)
    setattr(dest, 'height', ans)


def read_vertical_align(parent, dest, XPath, get):
    ans = inherit
    for va in XPath('./w:vAlign')(parent):
        val = get(va, 'w:val')
        ans = {'center': 'middle', 'top': 'top', 'bottom': 'bottom'}.get(val, 'middle')
    setattr(dest, 'vertical_align', ans)


def read_col_span(parent, dest, XPath, get):
    ans = inherit
    for gs in XPath('./w:gridSpan')(parent):
        try:
            ans = int(get(gs, 'w:val'))
        except (TypeError, ValueError):
            continue
    setattr(dest, 'col_span', ans)


def read_merge(parent, dest, XPath, get):
    for x in ('hMerge', 'vMerge'):
        ans = inherit
        for m in XPath('./w:%s' % x)(parent):
            ans = get(m, 'w:val', 'continue')
        setattr(dest, x, ans)


def read_band_size(parent, dest, XPath, get):
    for x in ('Col', 'Row'):
        ans = 1
        for y in XPath('./w:tblStyle%sBandSize' % x)(parent):
            try:
                ans = int(get(y, 'w:val'))
            except (TypeError, ValueError):
                continue
        setattr(dest, '%s_band_size' % x.lower(), ans)


def read_look(parent, dest, XPath, get):
    ans = 0
    for x in XPath('./w:tblLook')(parent):
        try:
            ans = int(get(x, 'w:val'), 16)
        except (ValueError, TypeError):
            continue
    setattr(dest, 'look', ans)

# }}}


def clone(style):
    if style is None:
        return None
    try:
        ans = type(style)(style.namespace)
    except TypeError:
        return None
    ans.update(style)
    return ans


class Style:

    is_bidi = False

    def update(self, other):
        for prop in self.all_properties:
            nval = getattr(other, prop)
            if nval is not inherit:
                setattr(self, prop, nval)

    def apply_bidi(self):
        self.is_bidi = True

    def convert_spacing(self):
        ans = {}
        if self.spacing is not inherit:
            if self.spacing in {'auto', '0'}:
                ans['border-collapse'] = 'collapse'
            else:
                ans['border-collapse'] = 'separate'
                ans['border-spacing'] = self.spacing
        return ans

    def convert_border(self):
        c = {}
        for x in edges:
            border_to_css(x, self, c)
            val = getattr(self, 'padding_%s' % x)
            if val is not inherit:
                c['padding-%s' % x] = '%.3gpt' % val
        if self.is_bidi:
            for a in ('padding-%s', 'border-%s-style', 'border-%s-color', 'border-%s-width'):
                l, r = c.get(a % 'left'), c.get(a % 'right')
                if l is not None:
                    c[a % 'right'] = l
                if r is not None:
                    c[a % 'left'] = r
        return c


class RowStyle(Style):

    all_properties = ('height', 'cantSplit', 'hidden', 'spacing',)

    def __init__(self, namespace, trPr=None):
        self.namespace = namespace
        if trPr is None:
            for p in self.all_properties:
                setattr(self, p, inherit)
        else:
            for p in ('hidden', 'cantSplit'):
                setattr(self, p, binary_property(trPr, p, namespace.XPath, namespace.get))
            for p in ('spacing', 'height'):
                f = globals()['read_%s' % p]
                f(trPr, self, namespace.XPath, namespace.get)
        self._css = None

    @property
    def css(self):
        if self._css is None:
            c = self._css = {}
            if self.hidden is True:
                c['display'] = 'none'
            if self.cantSplit is True:
                c['page-break-inside'] = 'avoid'
            if self.height is not inherit:
                rule, val = self.height
                if rule != 'auto':
                    try:
                        c['min-height' if rule == 'atLeast' else 'height'] = '%.3gpt' % (int(val)/20)
                    except (ValueError, TypeError):
                        pass
            c.update(self.convert_spacing())
        return self._css


class CellStyle(Style):

    all_properties = ('background_color', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top',
        'cell_padding_bottom', 'width', 'vertical_align', 'col_span', 'vMerge', 'hMerge', 'row_span',
    ) + tuple(k % edge for edge in border_edges for k in border_props)

    def __init__(self, namespace, tcPr=None):
        self.namespace = namespace
        if tcPr is None:
            for p in self.all_properties:
                setattr(self, p, inherit)
        else:
            for x in ('borders', 'shd', 'padding', 'cell_width', 'vertical_align', 'col_span', 'merge'):
                f = globals()['read_%s' % x]
                f(tcPr, self, namespace.XPath, namespace.get)
            self.row_span = inherit
        self._css = None

    @property
    def css(self):
        if self._css is None:
            self._css = c = {}
            if self.background_color is not inherit:
                c['background-color'] = self.background_color
            if self.width not in (inherit, 'auto'):
                c['width'] = self.width
            c['vertical-align'] = 'top' if self.vertical_align is inherit else self.vertical_align
            for x in edges:
                val = getattr(self, 'cell_padding_%s' % x)
                if val not in (inherit, 'auto'):
                    c['padding-%s' % x] =  val
                elif val is inherit and x in {'left', 'right'}:
                    c['padding-%s' % x] = '%.3gpt' % (115/20)
            # In Word, tables are apparently rendered with some default top and
            # bottom padding irrespective of the cellMargin values. Simulate
            # that here.
            for x in ('top', 'bottom'):
                if c.get('padding-%s' % x, '0pt') == '0pt':
                    c['padding-%s' % x] = '0.5ex'
            c.update(self.convert_border())

        return self._css


class TableStyle(Style):

    all_properties = (
        'width', 'float', 'cell_padding_left', 'cell_padding_right', 'cell_padding_top',
        'cell_padding_bottom', 'margin_left', 'margin_right', 'background_color',
        'spacing', 'indent', 'overrides', 'col_band_size', 'row_band_size', 'look', 'bidi',
    ) + tuple(k % edge for edge in border_edges for k in border_props)

    def __init__(self, namespace, tblPr=None):
        self.namespace = namespace
        if tblPr is None:
            for p in self.all_properties:
                setattr(self, p, inherit)
        else:
            self.overrides = inherit
            self.bidi = binary_property(tblPr, 'bidiVisual', namespace.XPath, namespace.get)
            for x in ('width', 'float', 'padding', 'shd', 'justification', 'spacing', 'indent', 'borders', 'band_size', 'look'):
                f = globals()['read_%s' % x]
                f(tblPr, self, self.namespace.XPath, self.namespace.get)
            parent = tblPr.getparent()
            if self.namespace.is_tag(parent, 'w:style'):
                self.overrides = {}
                for tblStylePr in self.namespace.XPath('./w:tblStylePr[@w:type]')(parent):
                    otype = self.namespace.get(tblStylePr, 'w:type')
                    orides = self.overrides[otype] = {}
                    for tblPr in self.namespace.XPath('./w:tblPr')(tblStylePr):
                        orides['table'] = TableStyle(self.namespace, tblPr)
                    for trPr in self.namespace.XPath('./w:trPr')(tblStylePr):
                        orides['row'] = RowStyle(self.namespace, trPr)
                    for tcPr in self.namespace.XPath('./w:tcPr')(tblStylePr):
                        orides['cell'] = CellStyle(self.namespace, tcPr)
                    for pPr in self.namespace.XPath('./w:pPr')(tblStylePr):
                        orides['para'] = ParagraphStyle(self.namespace, pPr)
                    for rPr in self.namespace.XPath('./w:rPr')(tblStylePr):
                        orides['run'] = RunStyle(self.namespace, rPr)
        self._css = None

    def resolve_based_on(self, parent):
        for p in self.all_properties:
            val = getattr(self, p)
            if val is inherit:
                setattr(self, p, getattr(parent, p))

    @property
    def css(self):
        if self._css is None:
            c = self._css = {}
            if self.width not in (inherit, 'auto'):
                c['width'] = self.width
            for x in ('background_color', 'margin_left', 'margin_right'):
                val = getattr(self, x)
                if val is not inherit:
                    c[x.replace('_', '-')] = val
            if self.indent not in (inherit, 'auto') and self.margin_left != 'auto':
                c['margin-left'] = self.indent
            if self.float is not inherit:
                for x in ('left', 'top', 'right', 'bottom'):
                    val = self.float.get('%sFromText' % x, 0)
                    try:
                        val = '%.3gpt' % (int(val) / 20)
                    except (ValueError, TypeError):
                        val = '0'
                    c['margin-%s' % x] = val
                if 'tblpXSpec' in self.float:
                    c['float'] = 'right' if self.float['tblpXSpec'] in {'right', 'outside'} else 'left'
                else:
                    page = self.page
                    page_width = page.width - page.margin_left - page.margin_right
                    try:
                        x = int(self.float['tblpX']) / 20
                    except (KeyError, ValueError, TypeError):
                        x = 0
                    c['float'] = 'left' if (x/page_width) < 0.65 else 'right'
            c.update(self.convert_spacing())
            if 'border-collapse' not in c:
                c['border-collapse'] = 'collapse'
            c.update(self.convert_border())

        return self._css


class Table:

    def __init__(self, namespace, tbl, styles, para_map, is_sub_table=False):
        self.namespace = namespace
        self.tbl = tbl
        self.styles = styles
        self.is_sub_table = is_sub_table

        # Read Table Style
        style = {'table':TableStyle(self.namespace)}
        for tblPr in self.namespace.XPath('./w:tblPr')(tbl):
            for ts in self.namespace.XPath('./w:tblStyle[@w:val]')(tblPr):
                style_id = self.namespace.get(ts, 'w:val')
                s = styles.get(style_id)
                if s is not None:
                    if s.table_style is not None:
                        style['table'].update(s.table_style)
                    if s.paragraph_style is not None:
                        if 'paragraph' in style:
                            style['paragraph'].update(s.paragraph_style)
                        else:
                            style['paragraph'] = s.paragraph_style
                    if s.character_style is not None:
                        if 'run' in style:
                            style['run'].update(s.character_style)
                        else:
                            style['run'] = s.character_style
            style['table'].update(TableStyle(self.namespace, tblPr))
        self.table_style, self.paragraph_style = style['table'], style.get('paragraph', None)
        self.run_style = style.get('run', None)
        self.overrides = self.table_style.overrides
        if self.overrides is inherit:
            self.overrides = {}
        if 'wholeTable' in self.overrides and 'table' in self.overrides['wholeTable']:
            self.table_style.update(self.overrides['wholeTable']['table'])

        self.style_map = {}
        self.paragraphs = []
        self.cell_map = []

        rows = self.namespace.XPath('./w:tr')(tbl)
        for r, tr in enumerate(rows):
            overrides = self.get_overrides(r, None, len(rows), None)
            self.resolve_row_style(tr, overrides)
            cells = self.namespace.XPath('./w:tc')(tr)
            self.cell_map.append([])
            for c, tc in enumerate(cells):
                overrides = self.get_overrides(r, c, len(rows), len(cells))
                self.resolve_cell_style(tc, overrides, r, c, len(rows), len(cells))
                self.cell_map[-1].append(tc)
                for p in self.namespace.XPath('./w:p')(tc):
                    para_map[p] = self
                    self.paragraphs.append(p)
                    self.resolve_para_style(p, overrides)

        self.handle_merged_cells()
        self.sub_tables = {x:Table(namespace, x, styles, para_map, is_sub_table=True) for x in self.namespace.XPath('./w:tr/w:tc/w:tbl')(tbl)}

    @property
    def bidi(self):
        return self.table_style.bidi is True

    def override_allowed(self, name):
        'Check if the named override is allowed by the tblLook element'
        if name.endswith('Cell') or name == 'wholeTable':
            return True
        look = self.table_style.look
        if (look & 0x0020 and name == 'firstRow') or (look & 0x0040 and name == 'lastRow') or \
           (look & 0x0080 and name == 'firstCol') or (look & 0x0100 and name == 'lastCol'):
            return True
        if name.startswith('band'):
            if name.endswith('Horz'):
                return not bool(look & 0x0200)
            if name.endswith('Vert'):
                return not bool(look & 0x0400)
        return False

    def get_overrides(self, r, c, num_of_rows, num_of_cols_in_row):
        'List of possible overrides for the given para'
        overrides = ['wholeTable']

        def divisor(m, n):
            return (m - (m % n)) // n
        if c is not None:
            odd_column_band = (divisor(c, self.table_style.col_band_size) % 2) == 1
            overrides.append('band%dVert' % (1 if odd_column_band else 2))
        odd_row_band = (divisor(r, self.table_style.row_band_size) % 2) == 1
        overrides.append('band%dHorz' % (1 if odd_row_band else 2))

        # According to the OOXML spec columns should have higher override
        # priority than rows, but Word seems to do it the other way around.
        if c is not None:
            if c == 0:
                overrides.append('firstCol')
            if c >= num_of_cols_in_row - 1:
                overrides.append('lastCol')
        if r == 0:
            overrides.append('firstRow')
        if r >= num_of_rows - 1:
            overrides.append('lastRow')
        if c is not None:
            if r == 0:
                if c == 0:
                    overrides.append('nwCell')
                if c == num_of_cols_in_row - 1:
                    overrides.append('neCell')
            if r == num_of_rows - 1:
                if c == 0:
                    overrides.append('swCell')
                if c == num_of_cols_in_row - 1:
                    overrides.append('seCell')
        return tuple(filter(self.override_allowed, overrides))

    def resolve_row_style(self, tr, overrides):
        rs = RowStyle(self.namespace)
        for o in overrides:
            if o in self.overrides:
                ovr = self.overrides[o]
                ors = ovr.get('row', None)
                if ors is not None:
                    rs.update(ors)

        for trPr in self.namespace.XPath('./w:trPr')(tr):
            rs.update(RowStyle(self.namespace, trPr))
        if self.bidi:
            rs.apply_bidi()
        self.style_map[tr] = rs

    def resolve_cell_style(self, tc, overrides, row, col, rows, cols_in_row):
        cs = CellStyle(self.namespace)
        for o in overrides:
            if o in self.overrides:
                ovr = self.overrides[o]
                ors = ovr.get('cell', None)
                if ors is not None:
                    cs.update(ors)

        for tcPr in self.namespace.XPath('./w:tcPr')(tc):
            cs.update(CellStyle(self.namespace, tcPr))

        for x in edges:
            p = 'cell_padding_%s' % x
            val = getattr(cs, p)
            if val is inherit:
                setattr(cs, p, getattr(self.table_style, p))

            is_inside_edge = (
                (x == 'left' and col > 0) or
                (x == 'top' and row > 0) or
                (x == 'right' and col < cols_in_row - 1) or
                (x == 'bottom' and row < rows -1)
            )
            inside_edge = ('insideH' if x in {'top', 'bottom'} else 'insideV') if is_inside_edge else None
            for prop in border_props:
                if not prop.startswith('border'):
                    continue
                eprop = prop % x
                iprop = (prop % inside_edge) if inside_edge else None
                val = getattr(cs, eprop)
                if val is inherit and iprop is not None:
                    # Use the insideX borders if the main cell borders are not
                    # specified
                    val = getattr(cs, iprop)
                    if val is inherit:
                        val = getattr(self.table_style, iprop)
                if not is_inside_edge and val == 'none':
                    # Cell borders must override table borders even when the
                    # table border is not null and the cell border is null.
                    val = 'hidden'
                setattr(cs, eprop, val)

        if self.bidi:
            cs.apply_bidi()
        self.style_map[tc] = cs

    def resolve_para_style(self, p, overrides):
        text_styles = [clone(self.paragraph_style), clone(self.run_style)]

        for o in overrides:
            if o in self.overrides:
                ovr = self.overrides[o]
                for i, name in enumerate(('para', 'run')):
                    ops = ovr.get(name, None)
                    if ops is not None:
                        if text_styles[i] is None:
                            text_styles[i] = ops
                        else:
                            text_styles[i].update(ops)
        self.style_map[p] = text_styles

    def handle_merged_cells(self):
        if not self.cell_map:
            return
        # Handle vMerge
        max_col_num = max(len(r) for r in self.cell_map)
        for c in range(max_col_num):
            cells = [row[c] if c < len(row) else None for row in self.cell_map]
            runs = [[]]
            for cell in cells:
                try:
                    s = self.style_map[cell]
                except KeyError:  # cell is None
                    s = CellStyle(self.namespace)
                if s.vMerge == 'restart':
                    runs.append([cell])
                elif s.vMerge == 'continue':
                    runs[-1].append(cell)
                else:
                    runs.append([])
            for run in runs:
                if len(run) > 1:
                    self.style_map[run[0]].row_span = len(run)
                    for tc in run[1:]:
                        tc.getparent().remove(tc)

        # Handle hMerge
        for cells in self.cell_map:
            runs = [[]]
            for cell in cells:
                try:
                    s = self.style_map[cell]
                except KeyError:  # cell is None
                    s = CellStyle(self.namespace)
                if s.col_span is not inherit:
                    runs.append([])
                    continue
                if s.hMerge == 'restart':
                    runs.append([cell])
                elif s.hMerge == 'continue':
                    runs[-1].append(cell)
                else:
                    runs.append([])

            for run in runs:
                if len(run) > 1:
                    self.style_map[run[0]].col_span = len(run)
                    for tc in run[1:]:
                        tc.getparent().remove(tc)

    def __iter__(self):
        yield from self.paragraphs
        for t in itervalues(self.sub_tables):
            yield from t

    def apply_markup(self, rmap, page, parent=None):
        table = TABLE('\n\t\t')
        if self.bidi:
            table.set('dir', 'rtl')
        self.table_style.page = page
        style_map = {}
        if parent is None:
            try:
                first_para = rmap[next(iter(self))]
            except StopIteration:
                return
            parent = first_para.getparent()
            idx = parent.index(first_para)
            parent.insert(idx, table)
        else:
            parent.append(table)
        for row in self.namespace.XPath('./w:tr')(self.tbl):
            tr = TR('\n\t\t\t')
            style_map[tr] = self.style_map[row]
            tr.tail = '\n\t\t'
            table.append(tr)
            for tc in self.namespace.XPath('./w:tc')(row):
                td = TD()
                style_map[td] = s = self.style_map[tc]
                if s.col_span is not inherit:
                    td.set('colspan', str(s.col_span))
                if s.row_span is not inherit:
                    td.set('rowspan', str(s.row_span))
                td.tail = '\n\t\t\t'
                tr.append(td)
                for x in self.namespace.XPath('./w:p|./w:tbl')(tc):
                    if x.tag.endswith('}p'):
                        td.append(rmap[x])
                    else:
                        self.sub_tables[x].apply_markup(rmap, page, parent=td)
            if len(tr):
                tr[-1].tail = '\n\t\t'
        if len(table):
            table[-1].tail = '\n\t'

        table_style = self.table_style.css
        if table_style:
            table.set('class', self.styles.register(table_style, 'table'))
        for elem, style in iteritems(style_map):
            css = style.css
            if css:
                elem.set('class', self.styles.register(css, elem.tag))


class Tables:

    def __init__(self, namespace):
        self.tables = []
        self.para_map = {}
        self.sub_tables = set()
        self.namespace = namespace

    def register(self, tbl, styles):
        if tbl in self.sub_tables:
            return
        self.tables.append(Table(self.namespace, tbl, styles, self.para_map))
        self.sub_tables |= set(self.tables[-1].sub_tables)

    def apply_markup(self, object_map, page_map):
        rmap = {v:k for k, v in iteritems(object_map)}
        for table in self.tables:
            table.apply_markup(rmap, page_map[table.tbl])

    def para_style(self, p):
        table = self.para_map.get(p, None)
        if table is not None:
            return table.style_map.get(p, (None, None))[0]

    def run_style(self, p):
        table = self.para_map.get(p, None)
        if table is not None:
            return table.style_map.get(p, (None, None))[1]

Zerion Mini Shell 1.0