%PDF- %PDF-
Direktori : /lib/calibre/calibre/ebooks/ |
Current File : //lib/calibre/calibre/ebooks/__init__.py |
__license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>' ''' Code for the conversion of ebook formats and the reading of metadata from various formats. ''' import os, re, numbers, sys from calibre import prints from calibre.ebooks.chardet import xml_to_unicode class ConversionError(Exception): def __init__(self, msg, only_msg=False): Exception.__init__(self, msg) self.only_msg = only_msg class UnknownFormatError(Exception): pass class DRMError(ValueError): pass class ParserError(ValueError): pass BOOK_EXTENSIONS = ['lrf', 'rar', 'zip', 'rtf', 'lit', 'txt', 'txtz', 'text', 'htm', 'xhtm', 'html', 'htmlz', 'xhtml', 'pdf', 'pdb', 'updb', 'pdr', 'prc', 'mobi', 'azw', 'doc', 'epub', 'fb2', 'fbz', 'djv', 'djvu', 'lrx', 'cbr', 'cb7', 'cbz', 'cbc', 'oebzip', 'rb', 'imp', 'odt', 'chm', 'tpz', 'azw1', 'pml', 'pmlz', 'mbp', 'tan', 'snb', 'xps', 'oxps', 'azw4', 'book', 'zbf', 'pobi', 'docx', 'docm', 'md', 'textile', 'markdown', 'ibook', 'ibooks', 'iba', 'azw3', 'ps', 'kepub', 'kfx', 'kpf'] def return_raster_image(path): from calibre.utils.imghdr import what if os.access(path, os.R_OK): with open(path, 'rb') as f: raw = f.read() if what(None, raw) not in (None, 'svg'): return raw def extract_cover_from_embedded_svg(html, base, log): from calibre.ebooks.oeb.base import XPath, SVG, XLINK from calibre.utils.xml_parse import safe_xml_fromstring root = safe_xml_fromstring(html) svg = XPath('//svg:svg')(root) if len(svg) == 1 and len(svg[0]) == 1 and svg[0][0].tag == SVG('image'): image = svg[0][0] href = image.get(XLINK('href'), None) if href: path = os.path.join(base, *href.split('/')) return return_raster_image(path) def extract_calibre_cover(raw, base, log): from calibre.ebooks.BeautifulSoup import BeautifulSoup soup = BeautifulSoup(raw) matches = soup.find(name=['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'span', 'font', 'br']) images = soup.findAll('img', src=True) if matches is None and len(images) == 1 and \ images[0].get('alt', '').lower()=='cover': img = images[0] img = os.path.join(base, *img['src'].split('/')) q = return_raster_image(img) if q is not None: return q # Look for a simple cover, i.e. a body with no text and only one <img> tag if matches is None: body = soup.find('body') if body is not None: text = ''.join(map(str, body.findAll(text=True))) if text.strip(): # Body has text, abort return images = body.findAll('img', src=True) if len(images) == 1: img = os.path.join(base, *images[0]['src'].split('/')) return return_raster_image(img) def render_html_svg_workaround(path_to_html, log, width=590, height=750): from calibre.ebooks.oeb.base import SVG_NS with open(path_to_html, 'rb') as f: raw = f.read() raw = xml_to_unicode(raw, strip_encoding_pats=True)[0] data = None if SVG_NS in raw: try: data = extract_cover_from_embedded_svg(raw, os.path.dirname(path_to_html), log) except Exception: pass if data is None: try: data = extract_calibre_cover(raw, os.path.dirname(path_to_html), log) except Exception: pass if data is None: data = render_html_data(path_to_html, width, height) return data def render_html_data(path_to_html, width, height): from calibre.ptempfile import TemporaryDirectory from calibre.utils.ipc.simple_worker import fork_job, WorkerError result = {} def report_error(text=''): prints('Failed to render', path_to_html, 'with errors:', file=sys.stderr) if text: prints(text, file=sys.stderr) if result and result['stdout_stderr']: with open(result['stdout_stderr'], 'rb') as f: prints(f.read(), file=sys.stderr) with TemporaryDirectory('-render-html') as tdir: try: result = fork_job('calibre.ebooks.render_html', 'main', args=(path_to_html, tdir, 'jpeg')) except WorkerError as e: report_error(e.orig_tb) else: if result['result']: with open(os.path.join(tdir, 'rendered.jpeg'), 'rb') as f: return f.read() else: report_error() def check_ebook_format(stream, current_guess): ans = current_guess if current_guess.lower() in ('prc', 'mobi', 'azw', 'azw1', 'azw3'): stream.seek(0) if stream.read(3) == b'TPZ': ans = 'tpz' stream.seek(0) return ans def normalize(x): if isinstance(x, str): import unicodedata x = unicodedata.normalize('NFC', x) return x def calibre_cover(title, author_string, series_string=None, output_format='jpg', title_size=46, author_size=36, logo_path=None): title = normalize(title) author_string = normalize(author_string) series_string = normalize(series_string) from calibre.ebooks.covers import calibre_cover2 from calibre.utils.img import image_to_data ans = calibre_cover2(title, author_string or '', series_string or '', logo_path=logo_path, as_qimage=True) return image_to_data(ans, fmt=output_format) UNIT_RE = re.compile(r'^(-*[0-9]*[.]?[0-9]*)\s*(%|em|ex|en|px|mm|cm|in|pt|pc|rem|q)$') def unit_convert(value, base, font, dpi, body_font_size=12): ' Return value in pts' if isinstance(value, numbers.Number): return value try: return float(value) * 72.0 / dpi except: pass result = value m = UNIT_RE.match(value) if m is not None and m.group(1): value = float(m.group(1)) unit = m.group(2) if unit == '%': result = (value / 100.0) * base elif unit == 'px': result = value * 72.0 / dpi elif unit == 'in': result = value * 72.0 elif unit == 'pt': result = value elif unit == 'em': result = value * font elif unit in ('ex', 'en'): # This is a hack for ex since we have no way to know # the x-height of the font font = font result = value * font * 0.5 elif unit == 'pc': result = value * 12.0 elif unit == 'mm': result = value * 2.8346456693 elif unit == 'cm': result = value * 28.346456693 elif unit == 'rem': result = value * body_font_size elif unit == 'q': result = value * 0.708661417325 return result def parse_css_length(value): try: m = UNIT_RE.match(value) except TypeError: return None, None if m is not None and m.group(1): value = float(m.group(1)) unit = m.group(2) return value, unit.lower() return None, None def generate_masthead(title, output_path=None, width=600, height=60): from calibre.ebooks.conversion.config import load_defaults recs = load_defaults('mobi_output') masthead_font_family = recs.get('masthead_font', None) from calibre.ebooks.covers import generate_masthead return generate_masthead(title, output_path=output_path, width=width, height=height, font_family=masthead_font_family) def escape_xpath_attr(value): if '"' in value: if "'" in value: parts = re.split('("+)', value) ans = [] for x in parts: if x: q = "'" if '"' in x else '"' ans.append(q + x + q) return 'concat(%s)' % ', '.join(ans) else: return "'%s'" % value return '"%s"' % value