%PDF- %PDF-
Direktori : /lib/calibre/calibre/ebooks/lrf/ |
Current File : //lib/calibre/calibre/ebooks/lrf/input.py |
#!/usr/bin/env python3 __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' import textwrap, operator from copy import deepcopy, copy from lxml import etree from calibre import guess_type from polyglot.builtins import as_bytes class Canvas(etree.XSLTExtension): def __init__(self, doc, styles, text_block, log): self.doc = doc self.styles = styles self.text_block = text_block self.log = log self.processed = set() def execute(self, context, self_node, input_node, output_parent): cid = input_node.get('objid', None) if cid is None or cid in self.processed: return self.processed.add(cid) input_node = self.doc.xpath('//Canvas[@objid="%s"]'%cid)[0] objects = list(self.get_objects(input_node)) if len(objects) == 1 and objects[0][0].tag == 'ImageBlock': self.image_page(input_node, objects[0][0], output_parent) else: canvases = [input_node] for x in input_node.itersiblings(): if x.tag == 'Canvas': oid = x.get('objid', None) if oid is not None: canvases.append(x) self.processed.add(oid) else: break table = etree.Element('table') table.text = '\n\t' for canvas in canvases: oid = canvas.get('objid') tr = table.makeelement('tr') tr.set('id', oid) tr.tail = '\n\t' table.append(tr) for obj, x, y in self.get_objects(canvas): if obj.tag != 'TextBlock': self.log.warn(obj.tag, 'elements in Canvas not supported') continue td = table.makeelement('td') self.text_block.render_block(obj, td) tr.append(td) output_parent.append(table) def image_page(self, input_node, block, output_parent): div = etree.Element('div') div.set('id', input_node.get('objid', 'scuzzy')) div.set('class', 'image_page') width = self.styles.to_num(block.get("xsize", None)) height = self.styles.to_num(block.get("ysize", None)) img = div.makeelement('img') if width is not None: img.set('width', str(int(width))) if height is not None: img.set('height', str(int(height))) ref = block.get('refstream', None) if ref is not None: imstr = self.doc.xpath('//ImageStream[@objid="%s"]'%ref) if imstr: src = imstr[0].get('file', None) if src: img.set('src', src) div.append(img) output_parent.append(div) def get_objects(self, node): for x in node.xpath('descendant::PutObj[@refobj and @x1 and @y1]'): objs = node.xpath('//*[@objid="%s"]'%x.get('refobj')) x, y = map(self.styles.to_num, (x.get('x1'), x.get('y1'))) if objs and x is not None and y is not None: yield objs[0], int(x), int(y) class MediaType(etree.XSLTExtension): def execute(self, context, self_node, input_node, output_parent): name = input_node.get('file', None) typ = guess_type(name)[0] if not typ: typ = 'application/octet-stream' output_parent.text = typ class ImageBlock(etree.XSLTExtension): def __init__(self, canvas): etree.XSLTExtension.__init__(self) self.canvas = canvas def execute(self, context, self_node, input_node, output_parent): self.canvas.image_page(input_node, input_node, output_parent) class RuledLine(etree.XSLTExtension): def execute(self, context, self_node, input_node, output_parent): hr = etree.Element('hr') output_parent.append(hr) class TextBlock(etree.XSLTExtension): def __init__(self, styles, char_button_map, plot_map, log): etree.XSLTExtension.__init__(self) self.styles = styles self.log = log self.char_button_map = char_button_map self.plot_map = plot_map def execute(self, context, self_node, input_node, output_parent): input_node = deepcopy(input_node) div = etree.Element('div') self.render_block(input_node, div) output_parent.append(div) def render_block(self, node, root): ts = node.get('textstyle', None) classes = [] bs = node.get('blockstyle') if bs in self.styles.block_style_map: classes.append('bs%d'%self.styles.block_style_map[bs]) if ts in self.styles.text_style_map: classes.append('ts%d'%self.styles.text_style_map[ts]) if classes: root.set('class', ' '.join(classes)) objid = node.get('objid', None) if objid: root.set('id', objid) root.text = node.text self.root = root self.parent = root self.add_text_to = (self.parent, 'text') self.fix_deep_nesting(node) for child in node: self.process_child(child) def fix_deep_nesting(self, node): deepest = 1 def depth(node): parent = node.getparent() ans = 1 while parent is not None: ans += 1 parent = parent.getparent() return ans for span in node.xpath('descendant::Span'): d = depth(span) if d > deepest: deepest = d if d > 500: break if deepest < 500: return self.log.warn('Found deeply nested spans. Flattening.') # with open('/t/before.xml', 'wb') as f: # f.write(etree.tostring(node, method='xml')) spans = [(depth(span), span) for span in node.xpath('descendant::Span')] spans.sort(key=operator.itemgetter(0), reverse=True) for depth, span in spans: if depth < 3: continue p = span.getparent() gp = p.getparent() idx = p.index(span) pidx = gp.index(p) children = list(p)[idx:] t = children[-1].tail t = t if t else '' children[-1].tail = t + (p.tail if p.tail else '') p.tail = '' pattrib = dict(**p.attrib) if p.tag == 'Span' else {} for child in children: p.remove(child) if pattrib and child.tag == "Span": attrib = copy(pattrib) attrib.update(child.attrib) child.attrib.update(attrib) for child in reversed(children): gp.insert(pidx+1, child) # with open('/t/after.xml', 'wb') as f: # f.write(etree.tostring(node, method='xml')) def add_text(self, text): if text: if getattr(self.add_text_to[0], self.add_text_to[1]) is None: setattr(self.add_text_to[0], self.add_text_to[1], '') setattr(self.add_text_to[0], self.add_text_to[1], getattr(self.add_text_to[0], self.add_text_to[1])+ text) def process_container(self, child, tgt): idx = self.styles.get_text_styles(child) if idx is not None: tgt.set('class', 'ts%d'%idx) self.parent.append(tgt) orig_parent = self.parent self.parent = tgt self.add_text_to = (self.parent, 'text') self.add_text(child.text) for gchild in child: self.process_child(gchild) self.parent = orig_parent self.add_text_to = (tgt, 'tail') self.add_text(child.tail) def process_child(self, child): if child.tag == 'CR': if self.parent == self.root or self.parent.tag == 'p': self.parent = self.root.makeelement('p') self.root.append(self.parent) self.add_text_to = (self.parent, 'text') else: br = self.parent.makeelement('br') self.parent.append(br) self.add_text_to = (br, 'tail') self.add_text(child.tail) elif child.tag in ('P', 'Span', 'EmpLine', 'NoBR'): span = self.root.makeelement('span') if child.tag == 'EmpLine': td = 'underline' if child.get('emplineposition', 'before') == 'before' else 'overline' span.set('style', 'text-decoration: '+td) self.process_container(child, span) elif child.tag == 'Sup': sup = self.root.makeelement('sup') self.process_container(child, sup) elif child.tag == 'Sub': sub = self.root.makeelement('sub') self.process_container(child, sub) elif child.tag == 'Italic': sup = self.root.makeelement('i') self.process_container(child, sup) elif child.tag == 'CharButton': a = self.root.makeelement('a') oid = child.get('refobj', None) if oid in self.char_button_map: a.set('href', self.char_button_map[oid]) self.process_container(child, a) elif child.tag == 'Plot': xsize = self.styles.to_num(child.get('xsize', None), 166/720) ysize = self.styles.to_num(child.get('ysize', None), 166/720) img = self.root.makeelement('img') if xsize is not None: img.set('width', str(int(xsize))) if ysize is not None: img.set('height', str(int(ysize))) ro = child.get('refobj', None) if ro in self.plot_map: img.set('src', self.plot_map[ro]) self.parent.append(img) self.add_text_to = (img, 'tail') self.add_text(child.tail) else: self.log.warn('Unhandled Text element:', child.tag) class Styles(etree.XSLTExtension): def __init__(self): etree.XSLTExtension.__init__(self) self.text_styles, self.block_styles = [], [] self.text_style_map, self.block_style_map = {}, {} self.CSS = textwrap.dedent(''' .image_page { text-align:center } ''') def write(self, name='styles.css'): def join(style): ans = ['%s : %s;'%(k, v) for k, v in style.items()] if ans: ans[-1] = ans[-1][:-1] return '\n\t'.join(ans) with open(name, 'wb') as f: f.write(as_bytes(self.CSS)) for (w, sel) in [(self.text_styles, 'ts'), (self.block_styles, 'bs')]: for i, s in enumerate(w): if not s: continue rsel = '.%s%d'%(sel, i) s = join(s) f.write(as_bytes(rsel + ' {\n\t' + s + '\n}\n\n')) def execute(self, context, self_node, input_node, output_parent): if input_node.tag == 'TextStyle': idx = self.get_text_styles(input_node) if idx is not None: self.text_style_map[input_node.get('objid')] = idx else: idx = self.get_block_styles(input_node) self.block_style_map[input_node.get('objid')] = idx def px_to_pt(self, px): try: return px * 72/166 except: return None def color(self, val): try: val = int(val, 16) r, g, b, a = val & 0xFF, (val>>8)&0xFF, (val>>16)&0xFF, (val>>24)&0xFF if a == 255: return None if a == 0: return 'rgb(%d,%d,%d)'%(r,g,b) return 'rgba(%d,%d,%d,%f)'%(r,g,b,1.-a/255.) except: return None def get_block_styles(self, node): ans = {} sm = self.px_to_pt(node.get('sidemargin', None)) if sm is not None: ans['margin-left'] = ans['margin-right'] = '%fpt'%sm ts = self.px_to_pt(node.get('topskip', None)) if ts is not None: ans['margin-top'] = '%fpt'%ts fs = self.px_to_pt(node.get('footskip', None)) if fs is not None: ans['margin-bottom'] = '%fpt'%fs fw = self.px_to_pt(node.get('framewidth', None)) if fw is not None: ans['border-width'] = '%fpt'%fw ans['border-style'] = 'solid' fc = self.color(node.get('framecolor', None)) if fc is not None: ans['border-color'] = fc bc = self.color(node.get('bgcolor', None)) if bc is not None: ans['background-color'] = bc if ans not in self.block_styles: self.block_styles.append(ans) return self.block_styles.index(ans) def to_num(self, val, factor=1.): try: return float(val)*factor except: return None def get_text_styles(self, node): ans = {} fs = self.to_num(node.get('fontsize', None), 0.1) if fs is not None: ans['font-size'] = '%fpt'%fs fw = self.to_num(node.get('fontweight', None)) if fw is not None: ans['font-weight'] = ('bold' if fw >= 700 else 'normal') # fn = getattr(obj, 'fontfacename', None) # if fn is not None: # fn = cls.FONT_MAP[fn] # item('font-family: %s;'%fn) fg = self.color(node.get('textcolor', None)) if fg is not None: ans['color'] = fg bg = self.color(node.get('textbgcolor', None)) if bg is not None: ans['background-color'] = bg al = node.get('align', None) if al is not None: all = dict(head='left', center='center', foot='right') ans['text-align'] = all.get(al, 'left') # lh = self.to_num(node.get('linespace', None), 0.1) # if lh is not None: # ans['line-height'] = '%fpt'%lh pi = self.to_num(node.get('parindent', None), 0.1) if pi is not None: ans['text-indent'] = '%fpt'%pi if not ans: return None if ans not in self.text_styles: self.text_styles.append(ans) return self.text_styles.index(ans)