%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /lib/calibre/calibre/srv/
Upload File :
Create Path :
Current File : //lib/calibre/calibre/srv/render_book.py

#!/usr/bin/env python3
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>


import json
import os
import re
import sys
import time
from collections import defaultdict
from datetime import datetime
from functools import partial
from itertools import count
from lxml.etree import Comment
from math import ceil

from calibre import detect_ncpus, force_unicode, prepare_string_for_xml
from calibre.constants import iswindows
from calibre.customize.ui import plugin_for_input_format
from calibre.ebooks.oeb.base import (
    OEB_DOCS, OEB_STYLES, OPF, XHTML, XHTML_NS, XLINK, XPath as _XPath,
    rewrite_links, urlunquote
)
from calibre.ebooks.oeb.iterator.book import extract_book
from calibre.ebooks.oeb.polish.container import Container as ContainerBase
from calibre.ebooks.oeb.polish.cover import (
    find_cover_image, find_cover_image_in_page, find_cover_page
)
from calibre.ebooks.oeb.polish.pretty import pretty_script_or_style
from calibre.ebooks.oeb.polish.toc import from_xpaths, get_landmarks, get_toc
from calibre.ebooks.oeb.polish.utils import guess_type
from calibre.ptempfile import PersistentTemporaryDirectory
from calibre.srv.metadata import encode_datetime
from calibre.srv.opts import grouper
from calibre.utils.date import EPOCH
from calibre.utils.filenames import rmtree
from calibre.utils.ipc.simple_worker import start_pipe_worker
from calibre.utils.logging import default_log
from calibre.utils.serialize import (
    json_dumps, json_loads, msgpack_dumps, msgpack_loads
)
from calibre.utils.short_uuid import uuid4
from calibre_extensions import speedup
from calibre_extensions.fast_css_transform import transform_properties
from polyglot.binary import (
    as_base64_unicode as encode_component, from_base64_bytes,
    from_base64_unicode as decode_component
)
from polyglot.builtins import as_bytes, iteritems
from polyglot.urllib import quote, urlparse

RENDER_VERSION = 1

BLANK_JPEG = b'\xff\xd8\xff\xdb\x00C\x00\x03\x02\x02\x02\x02\x02\x03\x02\x02\x02\x03\x03\x03\x03\x04\x06\x04\x04\x04\x04\x04\x08\x06\x06\x05\x06\t\x08\n\n\t\x08\t\t\n\x0c\x0f\x0c\n\x0b\x0e\x0b\t\t\r\x11\r\x0e\x0f\x10\x10\x11\x10\n\x0c\x12\x13\x12\x10\x13\x0f\x10\x10\x10\xff\xc9\x00\x0b\x08\x00\x01\x00\x01\x01\x01\x11\x00\xff\xcc\x00\x06\x00\x10\x10\x05\xff\xda\x00\x08\x01\x01\x00\x00?\x00\xd2\xcf \xff\xd9'  # noqa


class Spineless(ValueError):
    pass


def XPath(expr):
    ans = XPath.cache.get(expr)
    if ans is None:
        ans = XPath.cache[expr] = _XPath(expr)
    return ans


XPath.cache = {}


def encode_url(name, frag=''):
    name = encode_component(name)
    if frag:
        name += '#' + frag
    return name


def decode_url(x):
    parts = x.split('#', 1)
    return decode_component(parts[0]), (parts[1] if len(parts) > 1 else '')


def create_link_replacer(container, link_uid, changed):
    resource_template = link_uid + '|{}|'

    def link_replacer(base, url):
        if url.startswith('#'):
            frag = urlunquote(url[1:])
            changed.add(base)
            if not frag:
                return link_uid
            return resource_template.format(encode_url(base, frag))
        try:
            purl = urlparse(url)
        except Exception:
            return url
        if purl.netloc or purl.query:
            return url
        if purl.scheme and purl.scheme != 'file':
            return url
        if not purl.path or purl.path.startswith('/'):
            return url
        url, frag = purl.path, purl.fragment
        name = container.href_to_name(url, base)
        if name:
            if container.has_name_and_is_not_empty(name):
                frag = urlunquote(frag)
                url = resource_template.format(encode_url(name, frag))
            else:
                if isinstance(name, str):
                    name = name.encode('utf-8')
                url = 'missing:' + force_unicode(quote(name), 'utf-8')
            changed.add(base)
        return url

    return link_replacer


def check_for_maths(root):
    for x in root.iterdescendants('{*}math'):
        return True
    for s in root.iterdescendants(XHTML('script')):
        if s.get('type') == 'text/x-mathjax-config':
            return True
    return False


def has_ancestor(elem, q):
    while elem is not None:
        elem = elem.getparent()
        if elem is q:
            return True
    return False


def anchor_map(root):
    ans = []
    seen = set()
    for elem in root.xpath('//*[@id or @name]'):
        eid = elem.get('id')
        if not eid and elem.tag.endswith('}a'):
            eid = elem.get('name')
            if eid:
                elem.set('id', eid)
        if eid and eid not in seen:
            ans.append(eid)
            seen.add(eid)
    return ans


def get_length(root):
    ans = 0

    fast = getattr(speedup, 'get_element_char_length', None)
    if fast is None:
        ignore_tags = frozenset('script style title noscript'.split())
        img_tags = ('img', 'svg')
        strip_space = re.compile(r'\s+')

        def count(elem):
            tag = getattr(elem, 'tag', count)
            if callable(tag):
                return len(strip_space.sub('', getattr(elem, 'tail', None) or ''))
            num = 0
            tname = tag.rpartition('}')[-1].lower()
            if elem.text and tname not in ignore_tags:
                num += len(strip_space.sub('', elem.text))
            if elem.tail:
                num += len(strip_space.sub('', elem.tail))
            if tname in img_tags:
                num += 1000
            return num
    else:
        def count(elem):
            tag = getattr(elem, 'tag', count)
            if callable(tag):
                return fast('', None, getattr(elem, 'tail', None))
            return fast(tag, elem.text, elem.tail)

    for body in root.iterchildren(XHTML('body')):
        ans += count(body)
        for elem in body.iterdescendants():
            ans += count(elem)
    return ans


def toc_anchor_map(toc):
    ans = defaultdict(list)
    seen_map = defaultdict(set)

    def process_node(node):
        name = node['dest']
        if name and node['id'] not in seen_map[name]:
            ans[name].append({'id':node['id'], 'frag':node['frag']})
            seen_map[name].add(node['id'])
        for i in node['children']:
            process_node(i)

    process_node(toc)
    return dict(ans)


class SimpleContainer(ContainerBase):

    tweak_mode = True


def find_epub_cover(container):
    cover_image = find_cover_image(container)
    marked_title_page = find_cover_page(container)
    cover_image_in_first_page = None
    try:
        first_page_name = next(container.spine_names)[0]
    except StopIteration:
        return None, None
    if not marked_title_page:
        cover_image_in_first_page = find_cover_image_in_page(container, first_page_name)

    has_epub_cover = cover_image or marked_title_page or cover_image_in_first_page
    if not has_epub_cover:
        return None, None
    if marked_title_page and cover_image:
        return marked_title_page, cover_image

    if marked_title_page:
        if cover_image:
            return marked_title_page, cover_image
        cover_image = find_cover_image_in_page(container, marked_title_page)
        if cover_image:
            return marked_title_page, cover_image
        return None, None

    if cover_image_in_first_page:
        return first_page_name, cover_image_in_first_page

    return None, None


def create_cover_page(container, input_fmt, is_comic, book_metadata=None):
    templ = '''
    <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en">
    <head><style>
    html, body, img { height: 100vh; display: block; margin: 0; padding: 0; border-width: 0; }
    img {
        width: 100%%; height: 100%%;
        object-fit: contain;
        margin-left: auto; margin-right: auto;
        max-width: 100vw; max-height: 100vh;
        top: 50vh; transform: translateY(-50%%);
        position: relative;
    }
    body.cover-fill img { object-fit: fill; }
    </style></head><body><img src="%s"/></body></html>
    '''

    def generic_cover():
        if book_metadata is not None:
            from calibre.ebooks.covers import create_cover
            mi = book_metadata
            return create_cover(mi.title, mi.authors, mi.series, mi.series_index)
        return BLANK_JPEG

    if input_fmt == 'epub':
        titlepage_name, raster_cover_name = find_epub_cover(container)
        if raster_cover_name and titlepage_name:
            raw = templ % prepare_string_for_xml(container.name_to_href(raster_cover_name, titlepage_name), True)
            with container.open(titlepage_name, 'wb') as f:
                f.write(raw.encode('utf-8'))
    else:
        raster_cover_name = find_cover_image(container, strict=True)
        if raster_cover_name is None:
            return None, None
        if is_comic:
            return raster_cover_name, None
        item = container.generate_item(name='titlepage.html', id_prefix='titlepage')
        titlepage_name = container.href_to_name(item.get('href'), container.opf_name)
        raw = templ % prepare_string_for_xml(container.name_to_href(raster_cover_name, titlepage_name), True)
        with container.open(titlepage_name, 'wb') as f:
            f.write(raw.encode('utf-8'))
        spine = container.opf_xpath('//opf:spine')[0]
        ref = spine.makeelement(OPF('itemref'), idref=item.get('id'))
        container.insert_into_xml(spine, ref, index=0)
    return raster_cover_name, titlepage_name


def transform_style_sheet(container, name, link_uid, virtualize_resources, virtualized_names):
    changed = False
    link_replacer = None
    if virtualize_resources:
        changed_names = set()
        link_replacer = partial(create_link_replacer(container, link_uid, changed_names), name)
    raw = container.raw_data(name, decode=True)
    nraw = transform_properties(raw, is_declaration=False, url_callback=link_replacer)
    if virtualize_resources:
        if name in changed_names:
            changed = True
            virtualized_names.add(name)
    if nraw != raw:
        changed = True
        raw = nraw
    raw = raw.lstrip()
    if not raw.startswith('@charset'):
        raw = '@charset "UTF-8";\n' + raw
        changed = True
    if changed:
        with container.open(name, 'wb') as f:
            f.write(raw.encode('utf-8'))


def transform_svg_image(container, name, link_uid, virtualize_resources, virtualized_names):
    if not virtualize_resources:
        return
    link_replacer = create_link_replacer(container, link_uid, set())
    xlink = XLINK('href')
    altered = False
    xlink_xpath = XPath('//*[@xl:href]')
    for elem in xlink_xpath(container.parsed(name)):
        href = elem.get(xlink)
        if not href.startswith('#'):
            elem.set(xlink, link_replacer(name, href))
            altered = True
    if altered:
        virtualized_names.add(name)
        container.dirty(name)
        container.commit_item(name)


def transform_inline_styles(container, name, transform_sheet, transform_style):
    root = container.parsed(name)
    changed = False
    for style in root.xpath('//*[local-name()="style"]'):
        if style.text and (style.get('type') or 'text/css').lower() == 'text/css':
            nraw = transform_sheet(style.text)
            if nraw != style.text:
                changed = True
                style.text = nraw
                pretty_script_or_style(container, style)
    for elem in root.xpath('//*[@style]'):
        text = elem.get('style', None)
        if text:
            ntext = transform_style(text)
            if ntext != text:
                changed = True
                elem.set('style', ntext)
    return changed


def transform_html(container, name, virtualize_resources, link_uid, link_to_map, virtualized_names):
    link_xpath = XPath('//h:a[@href]')
    svg_link_xpath = XPath('//svg:a')
    img_xpath = XPath('//h:img[@src]')
    res_link_xpath = XPath('//h:link[@href]')
    root = container.parsed(name)
    changed_names = set()
    link_replacer = create_link_replacer(container, link_uid, changed_names)

    # Used for viewing images
    for img in img_xpath(root):
        img_name = container.href_to_name(img.get('src'), name)
        if img_name:
            img.set('data-calibre-src', img_name)

    # Disable non-stylesheet link tags. This link will not be loaded by the
    # browser anyway and will causes the resource load check to hang
    for link in res_link_xpath(root):
        ltype = (link.get('type') or 'text/css').lower()
        rel = (link.get('rel') or 'stylesheet').lower()
        if ltype != 'text/css' or rel != 'stylesheet':
            link.attrib.clear()

    # URLs in the inline CSS will be replaced in virtualize_html
    def transform_sheet(sheet_text):
        ans = transform_properties(sheet_text, is_declaration=False)
        if name in changed_names:
            virtualized_names.add(name)
        return ans

    def transform_declaration(decl_text):
        ans = transform_properties(decl_text, is_declaration=True)
        if name in changed_names:
            virtualized_names.add(name)
        return ans

    # Transform <style> and style=""
    transform_inline_styles(container, name, transform_sheet=transform_sheet, transform_style=transform_declaration)

    if virtualize_resources:
        virtualize_html(container, name, link_uid, link_to_map, virtualized_names)
    else:

        def handle_link(a, attr='href'):
            href = a.get(attr)
            if href:
                href = link_replacer(name, href)
            if href and href.startswith(link_uid):
                a.set(attr, 'javascript:void(0)')
                parts = href.split('|')
                if len(parts) > 1:
                    parts = decode_url(parts[1])
                    lname, lfrag = parts[0], parts[1]
                    link_to_map.setdefault(lname, {}).setdefault(lfrag or '', set()).add(name)
                    a.set('data-' + link_uid, json.dumps({'name':lname, 'frag':lfrag}, ensure_ascii=False))

        for a in link_xpath(root):
            handle_link(a)
        xhref = XLINK('href')
        for a in svg_link_xpath(root):
            handle_link(a, xhref)

    shtml = html_as_json(root)
    with container.open(name, 'wb') as f:
        f.write(shtml)


class RenderManager:

    def __init__(self, max_workers):
        self.max_workers = max_workers

    def launch_worker(self):
        with lopen(os.path.join(self.tdir, f'{len(self.workers)}.json'), 'wb') as output:
            error = lopen(os.path.join(self.tdir, f'{len(self.workers)}.error'), 'wb')
            p = start_pipe_worker('from calibre.srv.render_book import worker_main; worker_main()', stdout=error, stderr=error)
            p.output_path = output.name
            p.error_path = error.name
        self.workers.append(p)

    def __enter__(self):
        self.workers = []
        self.tdir = PersistentTemporaryDirectory()
        return self

    def __exit__(self, *a):
        while self.workers:
            p = self.workers.pop()
            if p.poll() is not None:
                continue
            p.terminate()
            if not iswindows and p.poll() is None:
                time.sleep(0.02)
                if p.poll() is None:
                    p.kill()
        del self.workers
        try:
            rmtree(self.tdir)
        except OSError:
            time.sleep(0.1)
            try:
                rmtree(self.tdir)
            except OSError:
                pass
        del self.tdir

    def launch_workers(self, names, in_process_container):
        num_workers = min(detect_ncpus(), len(names))
        if self.max_workers:
            num_workers = min(num_workers, self.max_workers)
        if num_workers > 1:
            if len(names) < 3 or sum(os.path.getsize(in_process_container.name_path_map[n]) for n in names) < 128 * 1024:
                num_workers = 1
        if num_workers > 1:
            num_other_workers = num_workers - 1
            while len(self.workers) < num_other_workers:
                self.launch_worker()
        return num_workers

    def __call__(self, names, args, in_process_container):
        num_workers = len(self.workers) + 1
        if num_workers == 1:
            return [process_book_files(names, *args, container=in_process_container)]

        group_sz = int(ceil(len(names) / num_workers))
        groups = tuple(grouper(group_sz, names))
        for group, worker in zip(groups[:-1], self.workers):
            worker.stdin.write(as_bytes(msgpack_dumps((worker.output_path, group,) + args)))
            worker.stdin.flush(), worker.stdin.close()
            worker.job_sent = True

        for worker in self.workers:
            if not hasattr(worker, 'job_sent'):
                worker.stdin.write(b'_'), worker.stdin.flush(), worker.stdin.close()

        error = None
        results = [process_book_files(groups[-1], *args, container=in_process_container)]
        for worker in self.workers:
            if not hasattr(worker, 'job_sent'):
                worker.wait()
                continue
            if worker.wait() != 0:
                with lopen(worker.error_path, 'rb') as f:
                    error = f.read().decode('utf-8', 'replace')
            else:
                with lopen(worker.output_path, 'rb') as f:
                    results.append(msgpack_loads(f.read()))
        if error is not None:
            raise Exception('Render worker failed with error:\n' + error)
        return results


def worker_main():
    stdin = getattr(sys.stdin, 'buffer', sys.stdin)
    raw = stdin.read()
    if raw == b'_':
        return
    args = msgpack_loads(raw)
    result = process_book_files(*args[1:])
    with open(args[0], 'wb') as f:
        f.write(as_bytes(msgpack_dumps(result)))


def virtualize_html(container, name, link_uid, link_to_map, virtualized_names):

    changed = set()
    link_xpath = XPath('//h:a[@href]')
    svg_link_xpath = XPath('//svg:a')
    link_replacer = create_link_replacer(container, link_uid, changed)

    virtualized_names.add(name)
    root = container.parsed(name)
    rewrite_links(root, partial(link_replacer, name))

    def handle_link(a, attr='href'):
        href = a.get(attr) or ''
        if href.startswith(link_uid):
            a.set(attr, 'javascript:void(0)')
            parts = decode_url(href.split('|')[1])
            lname, lfrag = parts[0], parts[1]
            link_to_map.setdefault(lname, {}).setdefault(lfrag or '', set()).add(name)
            a.set('data-' + link_uid, json.dumps({'name':lname, 'frag':lfrag}, ensure_ascii=False))
        elif href:
            a.set('target', '_blank')
            a.set('rel', 'noopener noreferrer')

    for a in link_xpath(root):
        handle_link(a)
    xhref = XLINK('href')
    for a in svg_link_xpath(root):
        handle_link(a, xhref)

    return name in changed


def process_book_files(names, container_dir, opfpath, virtualize_resources, link_uid, data_for_clone, container=None):
    if container is None:
        container = SimpleContainer(container_dir, opfpath, default_log, clone_data=data_for_clone)
        container.cloned = False
    link_to_map = {}
    html_data = {}
    virtualized_names = set()
    for name in names:
        if name is None:
            continue
        mt = container.mime_map[name].lower()
        if mt in OEB_DOCS:
            root = container.parsed(name)
            html_data[name] = {
                'length': get_length(root),
                'has_maths': check_for_maths(root),
                'anchor_map': anchor_map(root)
            }
            transform_html(container, name, virtualize_resources, link_uid, link_to_map, virtualized_names)
        elif mt in OEB_STYLES:
            transform_style_sheet(container, name, link_uid, virtualize_resources, virtualized_names)
        elif mt == 'image/svg+xml':
            transform_svg_image(container, name, link_uid, virtualize_resources, virtualized_names)
    return link_to_map, html_data, virtualized_names


def process_exploded_book(
    book_fmt, opfpath, input_fmt, tdir, render_manager, log=None, book_hash=None, save_bookmark_data=False,
    book_metadata=None, virtualize_resources=True
):
    log = log or default_log
    container = SimpleContainer(tdir, opfpath, log)
    input_plugin = plugin_for_input_format(input_fmt)
    is_comic = bool(getattr(input_plugin, 'is_image_collection', False))

    def needs_work(mt):
        return mt in OEB_STYLES or mt in OEB_DOCS or mt == 'image/svg+xml'

    def work_priority(name):
        # ensure workers with large files or stylesheets
        # have the less names
        size = os.path.getsize(container.name_path_map[name]),
        is_html = container.mime_map.get(name) in OEB_DOCS
        return (0 if is_html else 1), size

    if not is_comic:
        render_manager.launch_workers(tuple(n for n, mt in iteritems(container.mime_map) if needs_work(mt)), container)

    bookmark_data = None
    if save_bookmark_data:
        bm_file = 'META-INF/calibre_bookmarks.txt'
        if container.exists(bm_file):
            with container.open(bm_file, 'rb') as f:
                bookmark_data = f.read()

    # We do not add zero byte sized files as the IndexedDB API in the
    # browser has no good way to distinguish between zero byte files and
    # load failures.
    excluded_names = {
        name for name, mt in iteritems(container.mime_map) if
        name == container.opf_name or mt == guess_type('a.ncx') or name.startswith('META-INF/') or
        name == 'mimetype' or not container.has_name_and_is_not_empty(name)}
    raster_cover_name, titlepage_name = create_cover_page(container, input_fmt.lower(), is_comic, book_metadata)

    toc = get_toc(container, verify_destinations=False).to_dict(count())
    if not toc or not toc.get('children'):
        toc = from_xpaths(container, ['//h:h1', '//h:h2', '//h:h3']).to_dict(count())
    spine = [name for name, is_linear in container.spine_names]
    spineq = frozenset(spine)
    landmarks = [l for l in get_landmarks(container) if l['dest'] in spineq]

    if not spineq:
        raise Spineless('Book is empty, no content in spine')

    page_progression_direction = None
    try:
        page_progression_direction = container.opf_xpath('//opf:spine/@page-progression-direction')[0]
    except IndexError:
        pass

    book_render_data = {
        'version': RENDER_VERSION,
        'toc':toc,
        'book_format': book_fmt,
        'spine':spine,
        'link_uid': uuid4(),
        'book_hash': book_hash,
        'is_comic': is_comic,
        'raster_cover_name': raster_cover_name,
        'title_page_name': titlepage_name,
        'has_maths': False,
        'total_length': 0,
        'spine_length': 0,
        'toc_anchor_map': toc_anchor_map(toc),
        'landmarks': landmarks,
        'link_to_map': {},
        'page_progression_direction': page_progression_direction,
    }

    names = sorted(
        (n for n, mt in iteritems(container.mime_map) if needs_work(mt)),
        key=work_priority)

    results = render_manager(
        names, (
            tdir, opfpath, virtualize_resources, book_render_data['link_uid'], container.data_for_clone()
        ), container
    )
    ltm = book_render_data['link_to_map']
    html_data = {}
    virtualized_names = set()

    def merge_ltm(dest, src):
        for k, v in iteritems(src):
            if k in dest:
                dest[k] |= v
            else:
                dest[k] = v

    for link_to_map, hdata, vnames in results:
        html_data.update(hdata)
        virtualized_names |= vnames
        for k, v in iteritems(link_to_map):
            if k in ltm:
                merge_ltm(ltm[k], v)
            else:
                ltm[k] = v

    def manifest_data(name):
        mt = (container.mime_map.get(name) or 'application/octet-stream').lower()
        ans = {
            'size':os.path.getsize(container.name_path_map[name]),
            'is_virtualized': name in virtualized_names,
            'mimetype':mt,
            'is_html': mt in OEB_DOCS,
        }
        if ans['is_html']:
            data = html_data[name]
            ans['length'] = l = data['length']
            book_render_data['total_length'] += l
            if name in book_render_data['spine']:
                book_render_data['spine_length'] += l
            ans['has_maths'] = hm = data['has_maths']
            if hm:
                book_render_data['has_maths'] = True
            ans['anchor_map'] = data['anchor_map']
        return ans

    book_render_data['files'] = {name:manifest_data(name) for name in set(container.name_path_map) - excluded_names}
    container.commit()

    for name in excluded_names:
        os.remove(container.name_path_map[name])

    ltm = book_render_data['link_to_map']
    for name, amap in iteritems(ltm):
        for k, v in tuple(iteritems(amap)):
            amap[k] = tuple(v)  # needed for JSON serialization

    data = as_bytes(json.dumps(book_render_data, ensure_ascii=False))
    with lopen(os.path.join(container.root, 'calibre-book-manifest.json'), 'wb') as f:
        f.write(data)

    return container, bookmark_data


def split_name(name):
    l, r = name.partition('}')[::2]
    if r:
        return l[1:], r
    return None, l


known_tags = ('img', 'script', 'link', 'image', 'style')
discarded_tags = ('meta', 'base')


def ensure_body(root):
    # Make sure we have only a single <body>
    bodies = list(root.iterchildren(XHTML('body')))
    if len(bodies) != 1:
        if not bodies:
            root.append(root.makeelement(XHTML('body')))
            return
        body = bodies[0]
        for b in bodies[1:]:
            div = root.makeelement(XHTML('div'))
            div.attrib.update(b.attrib)
            div.text = b.text
            for child in b:
                div.append(child)
            body.append(div)


def html_as_json(root):
    from calibre_extensions.html_as_json import serialize
    ns, name = split_name(root.tag)
    if ns not in (None, XHTML_NS):
        raise ValueError('HTML tag must be in empty or XHTML namespace')
    ensure_body(root)
    for child in tuple(root.iterchildren('*')):
        if child.tag.partition('}')[-1] not in ('head', 'body'):
            root.remove(child)
    root.text = root.tail = None
    return serialize(root, Comment)


def serialize_datetimes(d):
    for k in tuple(d):
        v = d[k]
        if isinstance(v, datetime):
            v = encode_datetime(v)
            d[k] = v


EPUB_FILE_TYPE_MAGIC = b'encoding=json+base64:\n'


def get_stored_annotations(container, bookmark_data):
    raw = bookmark_data or b''
    if not raw:
        return
    if raw.startswith(EPUB_FILE_TYPE_MAGIC):
        raw = raw[len(EPUB_FILE_TYPE_MAGIC):].replace(b'\n', b'')
        yield from json_loads(from_base64_bytes(raw))
        return

    from calibre.ebooks.oeb.iterator.bookmarks import parse_bookmarks
    for bm in parse_bookmarks(raw):
        if bm['type'] == 'cfi' and isinstance(bm['pos'], str):
            spine_index = (1 + bm['spine']) * 2
            epubcfi = 'epubcfi(/{}/{})'.format(spine_index, bm['pos'].lstrip('/'))
            title = bm.get('title')
            if title and title != 'calibre_current_page_bookmark':
                yield {'type': 'bookmark', 'title': title, 'pos': epubcfi, 'pos_type': 'epubcfi', 'timestamp': EPOCH}
            else:
                yield {'type': 'last-read', 'pos': epubcfi, 'pos_type': 'epubcfi', 'timestamp': EPOCH}


def render(pathtoebook, output_dir, book_hash=None, serialize_metadata=False, extract_annotations=False, virtualize_resources=True, max_workers=1):
    pathtoebook = os.path.abspath(pathtoebook)
    with RenderManager(max_workers) as render_manager:
        mi = None
        if serialize_metadata:
            from calibre.customize.ui import quick_metadata
            from calibre.ebooks.metadata.meta import get_metadata
            with lopen(pathtoebook, 'rb') as f, quick_metadata:
                mi = get_metadata(f, os.path.splitext(pathtoebook)[1][1:].lower())
        book_fmt, opfpath, input_fmt = extract_book(pathtoebook, output_dir, log=default_log)
        container, bookmark_data = process_exploded_book(
            book_fmt, opfpath, input_fmt, output_dir, render_manager,
            book_hash=book_hash, save_bookmark_data=extract_annotations,
            book_metadata=mi, virtualize_resources=virtualize_resources
        )
        if serialize_metadata:
            from calibre.ebooks.metadata.book.serialize import metadata_as_dict
            d = metadata_as_dict(mi)
            d.pop('cover_data', None)
            serialize_datetimes(d), serialize_datetimes(d.get('user_metadata', {}))
            with lopen(os.path.join(output_dir, 'calibre-book-metadata.json'), 'wb') as f:
                f.write(json_dumps(d))
        if extract_annotations:
            annotations = None
            if bookmark_data:
                annotations = json_dumps(tuple(get_stored_annotations(container, bookmark_data)))
            if annotations:
                with lopen(os.path.join(output_dir, 'calibre-book-annotations.json'), 'wb') as f:
                    f.write(annotations)


def render_for_viewer(path, out_dir, book_hash):
    return render(
        path, out_dir, book_hash=book_hash, serialize_metadata=True,
        extract_annotations=True, virtualize_resources=False, max_workers=0
    )


def viewer_main():
    stdin = getattr(sys.stdin, 'buffer', sys.stdin)
    args = msgpack_loads(stdin.read())
    render_for_viewer(*args)


class Profiler:

    def __init__(self):
        try:
            import cProfile as profile
        except ImportError:
            import profile
        self.profile = profile.Profile()

    def __enter__(self):
        self.profile.enable()

    def __exit__(self, *a):
        self.profile.disable()
        self.profile.create_stats()
        import pstats
        stats = pstats.Stats(self.profile)
        stats.sort_stats('cumulative')
        stats.print_stats(.05)


def profile():
    from calibre.ptempfile import TemporaryDirectory
    path = sys.argv[-1]
    with TemporaryDirectory() as tdir, Profiler():
        return render(
            path, tdir, serialize_metadata=True,
            extract_annotations=True, virtualize_resources=False, max_workers=1
        )


def develop():
    from calibre.ptempfile import TemporaryDirectory
    path = sys.argv[-1]
    with TemporaryDirectory() as tdir:
        render(
            path, tdir, serialize_metadata=True,
            extract_annotations=True, virtualize_resources=True, max_workers=1
        )
        print('Extracted to:', tdir)
        input('Press Enter to quit')


if __name__ == '__main__':
    develop()

Zerion Mini Shell 1.0