%PDF- %PDF-
Direktori : /lib/calibre/calibre/ebooks/metadata/ |
Current File : //lib/calibre/calibre/ebooks/metadata/utils.py |
#!/usr/bin/env python3 # License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net> from collections import namedtuple from calibre.ebooks.chardet import xml_to_unicode from calibre.ebooks.oeb.base import OPF from calibre.ebooks.oeb.polish.utils import guess_type from calibre.spell import parse_lang_code from calibre.utils.cleantext import clean_xml_chars from calibre.utils.localization import lang_as_iso639_1 from calibre.utils.xml_parse import safe_xml_fromstring OPFVersion = namedtuple('OPFVersion', 'major minor patch') def parse_opf_version(raw): parts = (raw or '').split('.') try: major = int(parts[0]) except Exception: return OPFVersion(2, 0, 0) try: v = list(map(int, raw.split('.'))) except Exception: v = [major, 0, 0] while len(v) < 3: v.append(0) v = v[:3] return OPFVersion(*v) def parse_opf(stream_or_path): stream = stream_or_path if not hasattr(stream, 'read'): stream = open(stream, 'rb') raw = stream.read() if not raw: raise ValueError('Empty file: '+getattr(stream, 'name', 'stream')) raw, encoding = xml_to_unicode(raw, strip_encoding_pats=True, resolve_entities=True, assume_utf8=True) raw = raw[raw.find('<'):] root = safe_xml_fromstring(clean_xml_chars(raw)) if root is None: raise ValueError('Not an OPF file') return root def normalize_languages(opf_languages, mi_languages): ' Preserve original country codes and use 2-letter lang codes where possible ' def parse(x): try: return parse_lang_code(x) except ValueError: return None opf_languages = filter(None, map(parse, opf_languages)) cc_map = {c.langcode:c.countrycode for c in opf_languages} mi_languages = filter(None, map(parse, mi_languages)) def norm(x): lc = x.langcode cc = x.countrycode or cc_map.get(lc, None) lc = lang_as_iso639_1(lc) or lc if cc: lc += '-' + cc return lc return list(map(norm, mi_languages)) def ensure_unique(template, existing): b, e = template.rpartition('.')[::2] if b and e: e = '.' + e else: b, e = template, '' q = template c = 0 while q in existing: c += 1 q = '%s-%d%s' % (b, c, e) return q def create_manifest_item(root, href_template, id_template, media_type=None): all_ids = frozenset(root.xpath('//*/@id')) all_hrefs = frozenset(root.xpath('//*/@href')) href = ensure_unique(href_template, all_hrefs) item_id = ensure_unique(id_template, all_ids) manifest = root.find(OPF('manifest')) if manifest is not None: i = manifest.makeelement(OPF('item')) i.set('href', href), i.set('id', item_id) i.set('media-type', media_type or guess_type(href_template)) manifest.append(i) return i def pretty_print_opf(root): from calibre.ebooks.oeb.polish.pretty import pretty_opf, pretty_xml_tree pretty_opf(root) pretty_xml_tree(root)