%PDF- %PDF-
Direktori : /usr/lib/calibre/calibre/ebooks/oeb/display/ |
Current File : //usr/lib/calibre/calibre/ebooks/oeb/display/webview.py |
#!/usr/bin/env python3 __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' import re from calibre import guess_type from polyglot.builtins import iteritems class EntityDeclarationProcessor: # {{{ def __init__(self, html): self.declared_entities = {} for match in re.finditer(r'<!\s*ENTITY\s+([^>]+)>', html): tokens = match.group(1).split() if len(tokens) > 1: self.declared_entities[tokens[0].strip()] = tokens[1].strip().replace('"', '') self.processed_html = html for key, val in iteritems(self.declared_entities): self.processed_html = self.processed_html.replace('&%s;'%key, val) # }}} def self_closing_sub(match): tag = match.group(1) if tag.lower().strip() == 'br': return match.group() return '<%s%s></%s>'%(match.group(1), match.group(2), match.group(1)) def cleanup_html(html): html = EntityDeclarationProcessor(html).processed_html self_closing_pat = re.compile(r'<\s*([:A-Za-z0-9-]+)([^>]*)/\s*>') html = self_closing_pat.sub(self_closing_sub, html) return html xml_detect_pat = re.compile(r'<!(?:\[CDATA\[|ENTITY)') def load_as_html(html): return re.search(r'<[a-zA-Z0-9-]+:svg', html) is None and xml_detect_pat.search(html) is None def load_html(path, view, codec='utf-8', mime_type=None, pre_load_callback=lambda x:None, path_is_html=False, force_as_html=False, loading_url=None): from qt.core import QUrl, QByteArray if mime_type is None: mime_type = guess_type(path)[0] if not mime_type: mime_type = 'text/html' if path_is_html: html = path else: with open(path, 'rb') as f: html = f.read().decode(codec, 'replace') html = cleanup_html(html) loading_url = loading_url or QUrl.fromLocalFile(path) pre_load_callback(loading_url) if force_as_html or load_as_html(html): view.setHtml(html, loading_url) else: view.setContent(QByteArray(html.encode(codec)), mime_type, loading_url) mf = view.page().mainFrame() elem = mf.findFirstElement('parsererror') if not elem.isNull(): return False return True