%PDF- %PDF-
| Direktori : /proc/thread-self/root/usr/lib/calibre/calibre/ebooks/metadata/ |
| Current File : //proc/thread-self/root/usr/lib/calibre/calibre/ebooks/metadata/opf3.py |
#!/usr/bin/env python3
# License: GPLv3 Copyright: 2016, Kovid Goyal <kovid at kovidgoyal.net>
import json
import re
from collections import defaultdict, namedtuple
from contextlib import suppress
from operator import attrgetter
from functools import wraps
from lxml import etree
from calibre import prints
from calibre.ebooks.metadata import authors_to_string, check_isbn, string_to_authors
from calibre.ebooks.metadata.book.base import Metadata
from calibre.ebooks.metadata.book.json_codec import (
decode_is_multiple, encode_is_multiple, object_to_unicode
)
from calibre.ebooks.metadata.utils import (
create_manifest_item, ensure_unique, normalize_languages, parse_opf,
pretty_print_opf
)
from calibre.ebooks.oeb.base import DC, OPF, OPF2_NSMAP
from calibre.utils.config import from_json, to_json
from calibre.utils.date import (
fix_only_date, is_date_undefined, isoformat, parse_date as parse_date_, utcnow,
w3cdtf
)
from calibre.utils.iso8601 import parse_iso8601
from calibre.utils.localization import canonicalize_lang
from polyglot.builtins import iteritems
# Utils {{{
_xpath_cache = {}
_re_cache = {}
def uniq(vals):
''' Remove all duplicates from vals, while preserving order. '''
vals = vals or ()
seen = set()
seen_add = seen.add
return list(x for x in vals if x not in seen and not seen_add(x))
def dump_dict(cats):
return json.dumps(object_to_unicode(cats or {}), ensure_ascii=False, skipkeys=True)
def XPath(x):
try:
return _xpath_cache[x]
except KeyError:
_xpath_cache[x] = ans = etree.XPath(x, namespaces=OPF2_NSMAP)
return ans
def regex(r, flags=0):
try:
return _re_cache[(r, flags)]
except KeyError:
_re_cache[(r, flags)] = ans = re.compile(r, flags)
return ans
def remove_refines(e, refines):
for x in refines[e.get('id')]:
x.getparent().remove(x)
refines.pop(e.get('id'), None)
def remove_element(e, refines):
remove_refines(e, refines)
e.getparent().remove(e)
def properties_for_id(item_id, refines):
ans = {}
if item_id:
for elem in refines[item_id]:
key = elem.get('property')
if key:
val = (elem.text or '').strip()
if val:
ans[key] = val
return ans
def properties_for_id_with_scheme(item_id, prefixes, refines):
ans = defaultdict(list)
if item_id:
for elem in refines[item_id]:
key = elem.get('property')
if key:
val = (elem.text or '').strip()
if val:
scheme = elem.get('scheme') or None
scheme_ns = None
if scheme is not None:
p, r = scheme.partition(':')[::2]
if p and r:
ns = prefixes.get(p)
if ns:
scheme_ns = ns
scheme = r
ans[key].append((scheme_ns, scheme, val))
return ans
def getroot(elem):
while True:
q = elem.getparent()
if q is None:
return elem
elem = q
def ensure_id(elem):
root = getroot(elem)
eid = elem.get('id')
if not eid:
eid = ensure_unique('id', frozenset(XPath('//*/@id')(root)))
elem.set('id', eid)
return eid
def normalize_whitespace(text):
if not text:
return text
return re.sub(r'\s+', ' ', text).strip()
def simple_text(f):
@wraps(f)
def wrapper(*args, **kw):
return normalize_whitespace(f(*args, **kw))
return wrapper
def items_with_property(root, q, prefixes=None):
if prefixes is None:
prefixes = read_prefixes(root)
q = expand_prefix(q, known_prefixes).lower()
for item in XPath("./opf:manifest/opf:item[@properties]")(root):
for prop in (item.get('properties') or '').lower().split():
prop = expand_prefix(prop, prefixes)
if prop == q:
yield item
break
# }}}
# Prefixes {{{
# http://www.idpf.org/epub/vocab/package/pfx/
reserved_prefixes = {
'dcterms': 'http://purl.org/dc/terms/',
'epubsc': 'http://idpf.org/epub/vocab/sc/#',
'marc': 'http://id.loc.gov/vocabulary/',
'media': 'http://www.idpf.org/epub/vocab/overlays/#',
'onix': 'http://www.editeur.org/ONIX/book/codelists/current.html#',
'rendition':'http://www.idpf.org/vocab/rendition/#',
'schema': 'http://schema.org/',
'xsd': 'http://www.w3.org/2001/XMLSchema#',
}
CALIBRE_PREFIX = 'https://calibre-ebook.com'
known_prefixes = reserved_prefixes.copy()
known_prefixes['calibre'] = CALIBRE_PREFIX
def parse_prefixes(x):
return {m.group(1):m.group(2) for m in re.finditer(r'(\S+): \s*(\S+)', x)}
def read_prefixes(root):
ans = reserved_prefixes.copy()
ans.update(parse_prefixes(root.get('prefix') or ''))
return ans
def expand_prefix(raw, prefixes):
return regex(r'(\S+)\s*:\s*(\S+)').sub(lambda m:(prefixes.get(m.group(1), m.group(1)) + ':' + m.group(2)), raw or '')
def ensure_prefix(root, prefixes, prefix, value=None):
if prefixes is None:
prefixes = read_prefixes(root)
prefixes[prefix] = value or reserved_prefixes[prefix]
prefixes = {k:v for k, v in iteritems(prefixes) if reserved_prefixes.get(k) != v}
if prefixes:
root.set('prefix', ' '.join(f'{k}: {v}' for k, v in iteritems(prefixes)))
else:
root.attrib.pop('prefix', None)
# }}}
# Refines {{{
def read_refines(root):
ans = defaultdict(list)
for meta in XPath('./opf:metadata/opf:meta[@refines]')(root):
r = meta.get('refines') or ''
if r.startswith('#'):
ans[r[1:]].append(meta)
return ans
def refdef(prop, val, scheme=None):
return (prop, val, scheme)
def set_refines(elem, existing_refines, *new_refines):
eid = ensure_id(elem)
remove_refines(elem, existing_refines)
for ref in reversed(new_refines):
prop, val, scheme = ref
r = elem.makeelement(OPF('meta'))
r.set('refines', '#' + eid), r.set('property', prop)
r.text = val.strip()
if scheme:
r.set('scheme', scheme)
p = elem.getparent()
p.insert(p.index(elem)+1, r)
# }}}
# Identifiers {{{
def parse_identifier(ident, val, refines):
idid = ident.get('id')
refines = refines[idid]
scheme = None
lval = val.lower()
def finalize(scheme, val):
if not scheme or not val:
return None, None
scheme = scheme.lower()
if scheme in ('http', 'https'):
return None, None
if scheme.startswith('isbn'):
scheme = 'isbn'
if scheme == 'isbn':
val = val.split(':')[-1]
val = check_isbn(val)
if val is None:
return None, None
return scheme, val
# Try the OPF 2 style opf:scheme attribute, which will be present, for
# example, in EPUB 3 files that have had their metadata set by an
# application that only understands EPUB 2.
scheme = ident.get(OPF('scheme'))
if scheme and not lval.startswith('urn:'):
return finalize(scheme, val)
# Technically, we should be looking for refines that define the scheme, but
# the IDioticPF created such a bad spec that they got their own
# examples wrong, so I cannot be bothered doing this.
# http://www.idpf.org/epub/301/spec/epub-publications-errata/
# Parse the value for the scheme
if lval.startswith('urn:'):
val = val[4:]
prefix, rest = val.partition(':')[::2]
return finalize(prefix, rest)
def read_identifiers(root, prefixes, refines):
ans = defaultdict(list)
for ident in XPath('./opf:metadata/dc:identifier')(root):
val = (ident.text or '').strip()
if val:
scheme, val = parse_identifier(ident, val, refines)
if scheme and val:
ans[scheme].append(val)
return ans
def set_identifiers(root, prefixes, refines, new_identifiers, force_identifiers=False):
uid = root.get('unique-identifier')
package_identifier = None
for ident in XPath('./opf:metadata/dc:identifier')(root):
if uid is not None and uid == ident.get('id'):
package_identifier = ident
continue
val = (ident.text or '').strip()
if not val:
ident.getparent().remove(ident)
continue
scheme, val = parse_identifier(ident, val, refines)
if not scheme or not val or force_identifiers or scheme in new_identifiers:
remove_element(ident, refines)
continue
metadata = XPath('./opf:metadata')(root)[0]
for scheme, val in iteritems(new_identifiers):
ident = metadata.makeelement(DC('identifier'))
ident.text = f'{scheme}:{val}'
if package_identifier is None:
metadata.append(ident)
else:
p = package_identifier.getparent()
p.insert(p.index(package_identifier), ident)
def identifier_writer(name):
def writer(root, prefixes, refines, ival=None):
uid = root.get('unique-identifier')
package_identifier = None
for ident in XPath('./opf:metadata/dc:identifier')(root):
is_package_id = uid is not None and uid == ident.get('id')
if is_package_id:
package_identifier = ident
val = (ident.text or '').strip()
if (val.startswith(name + ':') or ident.get(OPF('scheme')) == name) and not is_package_id:
remove_element(ident, refines)
metadata = XPath('./opf:metadata')(root)[0]
if ival:
ident = metadata.makeelement(DC('identifier'))
ident.text = f'{name}:{ival}'
if package_identifier is None:
metadata.append(ident)
else:
p = package_identifier.getparent()
p.insert(p.index(package_identifier), ident)
return writer
set_application_id = identifier_writer('calibre')
set_uuid = identifier_writer('uuid')
# }}}
# Title {{{
def find_main_title(root, refines, remove_blanks=False):
first_title = main_title = None
for title in XPath('./opf:metadata/dc:title')(root):
if not title.text or not title.text.strip():
if remove_blanks:
remove_element(title, refines)
continue
if first_title is None:
first_title = title
props = properties_for_id(title.get('id'), refines)
if props.get('title-type') == 'main':
main_title = title
break
else:
main_title = first_title
return main_title
def find_subtitle(root, refines):
for title in XPath('./opf:metadata/dc:title')(root):
if not title.text or not title.text.strip():
continue
props = properties_for_id(title.get('id'), refines)
q = props.get('title-type') or ''
if 'subtitle' in q or 'sub-title' in q:
return title
@simple_text
def read_title(root, prefixes, refines):
main_title = find_main_title(root, refines)
if main_title is None:
return None
ans = main_title.text.strip()
st = find_subtitle(root, refines)
if st is not None and st is not main_title:
ans += ': ' + st.text.strip()
return ans
@simple_text
def read_title_sort(root, prefixes, refines):
main_title = find_main_title(root, refines)
if main_title is not None:
fa = properties_for_id(main_title.get('id'), refines).get('file-as')
if fa:
return fa
# Look for OPF 2.0 style title_sort
for m in XPath('./opf:metadata/opf:meta[@name="calibre:title_sort"]')(root):
ans = m.get('content')
if ans:
return ans
def set_title(root, prefixes, refines, title, title_sort=None):
main_title = find_main_title(root, refines, remove_blanks=True)
st = find_subtitle(root, refines)
if st is not None:
remove_element(st, refines)
if main_title is None:
m = XPath('./opf:metadata')(root)[0]
main_title = m.makeelement(DC('title'))
m.insert(0, main_title)
main_title.text = title or None
ts = [refdef('file-as', title_sort)] if title_sort else ()
set_refines(main_title, refines, refdef('title-type', 'main'), *ts)
for m in XPath('./opf:metadata/opf:meta[@name="calibre:title_sort"]')(root):
remove_element(m, refines)
# }}}
# Languages {{{
def read_languages(root, prefixes, refines):
ans = []
for lang in XPath('./opf:metadata/dc:language')(root):
val = canonicalize_lang((lang.text or '').strip())
if val and val not in ans and val != 'und':
ans.append(val)
return uniq(ans)
def set_languages(root, prefixes, refines, languages):
opf_languages = []
for lang in XPath('./opf:metadata/dc:language')(root):
remove_element(lang, refines)
val = (lang.text or '').strip()
if val:
opf_languages.append(val)
languages = list(filter(lambda x: x and x != 'und', normalize_languages(opf_languages, languages)))
if not languages:
# EPUB spec says dc:language is required
languages = ['und']
metadata = XPath('./opf:metadata')(root)[0]
for lang in uniq(languages):
l = metadata.makeelement(DC('language'))
l.text = lang
metadata.append(l)
# }}}
# Creator/Contributor {{{
Author = namedtuple('Author', 'name sort seq', defaults=(0,))
def is_relators_role(props, q):
for role in props.get('role'):
if role:
scheme_ns, scheme, role = role
if role.lower() == q and (scheme_ns is None or (scheme_ns, scheme) == (reserved_prefixes['marc'], 'relators')):
return True
return False
def read_authors(root, prefixes, refines):
roled_authors, unroled_authors = [], []
editors_map = {}
def author(item, props, val):
aus = None
file_as = props.get('file-as')
if file_as:
aus = file_as[0][-1]
else:
aus = item.get(OPF('file-as')) or None
seq = 0
ds = props.get('display-seq')
with suppress(Exception):
seq = int(ds[0][-1])
return Author(normalize_whitespace(val), normalize_whitespace(aus), seq)
for item in XPath('./opf:metadata/dc:creator')(root):
val = (item.text or '').strip()
if val:
props = properties_for_id_with_scheme(item.get('id'), prefixes, refines)
role = props.get('role')
opf_role = item.get(OPF('role'))
if role:
if is_relators_role(props, 'aut'):
roled_authors.append(author(item, props, val))
if is_relators_role(props, 'edt'):
# See https://bugs.launchpad.net/calibre/+bug/1950579
a = author(item, props, val)
editors_map[a.name] = a
elif opf_role:
if opf_role.lower() == 'aut':
roled_authors.append(author(item, props, val))
else:
unroled_authors.append(author(item, props, val))
if roled_authors or unroled_authors:
ans = uniq(roled_authors or unroled_authors)
else:
ans = uniq(editors_map.values())
ans.sort(key=attrgetter('seq'))
return ans
def set_authors(root, prefixes, refines, authors):
ensure_prefix(root, prefixes, 'marc')
removals = []
for role in ('aut', 'edt'):
for item in XPath('./opf:metadata/dc:creator')(root):
props = properties_for_id_with_scheme(item.get('id'), prefixes, refines)
opf_role = item.get(OPF('role'))
if (opf_role and opf_role.lower() != role) or (props.get('role') and not is_relators_role(props, role)):
continue
removals.append(item)
if removals:
break
for item in removals:
remove_element(item, refines)
metadata = XPath('./opf:metadata')(root)[0]
for author in authors:
if author.name:
a = metadata.makeelement(DC('creator'))
aid = ensure_id(a)
a.text = author.name
metadata.append(a)
m = metadata.makeelement(OPF('meta'), attrib={'refines':'#'+aid, 'property':'role', 'scheme':'marc:relators'})
m.text = 'aut'
metadata.append(m)
if author.sort:
m = metadata.makeelement(OPF('meta'), attrib={'refines':'#'+aid, 'property':'file-as'})
m.text = author.sort
metadata.append(m)
def read_book_producers(root, prefixes, refines):
ans = []
for item in XPath('./opf:metadata/dc:contributor')(root):
val = (item.text or '').strip()
if val:
props = properties_for_id_with_scheme(item.get('id'), prefixes, refines)
role = props.get('role')
opf_role = item.get(OPF('role'))
if role:
if is_relators_role(props, 'bkp'):
ans.append(normalize_whitespace(val))
elif opf_role and opf_role.lower() == 'bkp':
ans.append(normalize_whitespace(val))
return ans
def set_book_producers(root, prefixes, refines, producers):
for item in XPath('./opf:metadata/dc:contributor')(root):
props = properties_for_id_with_scheme(item.get('id'), prefixes, refines)
opf_role = item.get(OPF('role'))
if (opf_role and opf_role.lower() != 'bkp') or (props.get('role') and not is_relators_role(props, 'bkp')):
continue
remove_element(item, refines)
metadata = XPath('./opf:metadata')(root)[0]
for bkp in producers:
if bkp:
a = metadata.makeelement(DC('contributor'))
aid = ensure_id(a)
a.text = bkp
metadata.append(a)
m = metadata.makeelement(OPF('meta'), attrib={'refines':'#'+aid, 'property':'role', 'scheme':'marc:relators'})
m.text = 'bkp'
metadata.append(m)
# }}}
# Dates {{{
def parse_date(raw, is_w3cdtf=False):
raw = raw.strip()
if is_w3cdtf:
ans = parse_iso8601(raw, assume_utc=True)
if 'T' not in raw and ' ' not in raw:
ans = fix_only_date(ans)
else:
ans = parse_date_(raw, assume_utc=True)
if ' ' not in raw and 'T' not in raw and (ans.hour, ans.minute, ans.second) == (0, 0, 0):
ans = fix_only_date(ans)
return ans
def read_pubdate(root, prefixes, refines):
for date in XPath('./opf:metadata/dc:date')(root):
val = (date.text or '').strip()
if val:
try:
return parse_date(val)
except Exception:
continue
def set_pubdate(root, prefixes, refines, val):
for date in XPath('./opf:metadata/dc:date')(root):
remove_element(date, refines)
if not is_date_undefined(val):
val = isoformat(val)
m = XPath('./opf:metadata')(root)[0]
d = m.makeelement(DC('date'))
d.text = val
m.append(d)
def read_timestamp(root, prefixes, refines):
pq = '%s:timestamp' % CALIBRE_PREFIX
sq = '%s:w3cdtf' % reserved_prefixes['dcterms']
for meta in XPath('./opf:metadata/opf:meta[@property]')(root):
val = (meta.text or '').strip()
if val:
prop = expand_prefix(meta.get('property'), prefixes)
if prop.lower() == pq:
scheme = expand_prefix(meta.get('scheme'), prefixes).lower()
try:
return parse_date(val, is_w3cdtf=scheme == sq)
except Exception:
continue
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:timestamp"]')(root):
val = meta.get('content')
if val:
try:
return parse_date(val, is_w3cdtf=True)
except Exception:
continue
def create_timestamp(root, prefixes, m, val):
if not is_date_undefined(val):
ensure_prefix(root, prefixes, 'calibre', CALIBRE_PREFIX)
ensure_prefix(root, prefixes, 'dcterms')
val = w3cdtf(val)
d = m.makeelement(OPF('meta'), attrib={'property':'calibre:timestamp', 'scheme':'dcterms:W3CDTF'})
d.text = val
m.append(d)
def set_timestamp(root, prefixes, refines, val):
pq = '%s:timestamp' % CALIBRE_PREFIX
for meta in XPath('./opf:metadata/opf:meta')(root):
prop = expand_prefix(meta.get('property'), prefixes)
if prop.lower() == pq or meta.get('name') == 'calibre:timestamp':
remove_element(meta, refines)
create_timestamp(root, prefixes, XPath('./opf:metadata')(root)[0], val)
def read_last_modified(root, prefixes, refines):
pq = '%s:modified' % reserved_prefixes['dcterms']
sq = '%s:w3cdtf' % reserved_prefixes['dcterms']
for meta in XPath('./opf:metadata/opf:meta[@property]')(root):
val = (meta.text or '').strip()
if val:
prop = expand_prefix(meta.get('property'), prefixes)
if prop.lower() == pq:
scheme = expand_prefix(meta.get('scheme'), prefixes).lower()
try:
return parse_date(val, is_w3cdtf=scheme == sq)
except Exception:
continue
def set_last_modified(root, prefixes, refines, val=None):
pq = '%s:modified' % reserved_prefixes['dcterms']
val = w3cdtf(val or utcnow())
for meta in XPath('./opf:metadata/opf:meta[@property]')(root):
prop = expand_prefix(meta.get('property'), prefixes)
if prop.lower() == pq:
iid = meta.get('id')
if not iid or not refines[iid]:
break
else:
ensure_prefix(root, prefixes, 'dcterms')
m = XPath('./opf:metadata')(root)[0]
meta = m.makeelement(OPF('meta'), attrib={'property':'dcterms:modified', 'scheme':'dcterms:W3CDTF'})
m.append(meta)
meta.text = val
# }}}
# Comments {{{
def read_comments(root, prefixes, refines):
ans = ''
for dc in XPath('./opf:metadata/dc:description')(root):
if dc.text:
ans += '\n' + dc.text.strip()
return ans.strip()
def set_comments(root, prefixes, refines, val):
for dc in XPath('./opf:metadata/dc:description')(root):
remove_element(dc, refines)
m = XPath('./opf:metadata')(root)[0]
if val:
val = val.strip()
if val:
c = m.makeelement(DC('description'))
c.text = val
m.append(c)
# }}}
# Publisher {{{
@simple_text
def read_publisher(root, prefixes, refines):
for dc in XPath('./opf:metadata/dc:publisher')(root):
if dc.text:
return dc.text
def set_publisher(root, prefixes, refines, val):
for dc in XPath('./opf:metadata/dc:publisher')(root):
remove_element(dc, refines)
m = XPath('./opf:metadata')(root)[0]
if val:
val = val.strip()
if val:
c = m.makeelement(DC('publisher'))
c.text = normalize_whitespace(val)
m.append(c)
# }}}
# Tags {{{
def read_tags(root, prefixes, refines):
ans = []
for dc in XPath('./opf:metadata/dc:subject')(root):
if dc.text:
ans.extend(map(normalize_whitespace, dc.text.split(',')))
return uniq(list(filter(None, ans)))
def set_tags(root, prefixes, refines, val):
for dc in XPath('./opf:metadata/dc:subject')(root):
remove_element(dc, refines)
m = XPath('./opf:metadata')(root)[0]
if val:
val = uniq(list(filter(None, val)))
for x in val:
c = m.makeelement(DC('subject'))
c.text = normalize_whitespace(x)
if c.text:
m.append(c)
# }}}
# Rating {{{
def read_rating(root, prefixes, refines):
pq = '%s:rating' % CALIBRE_PREFIX
for meta in XPath('./opf:metadata/opf:meta[@property]')(root):
val = (meta.text or '').strip()
if val:
prop = expand_prefix(meta.get('property'), prefixes)
if prop.lower() == pq:
try:
return float(val)
except Exception:
continue
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:rating"]')(root):
val = meta.get('content')
if val:
try:
return float(val)
except Exception:
continue
def create_rating(root, prefixes, val):
ensure_prefix(root, prefixes, 'calibre', CALIBRE_PREFIX)
m = XPath('./opf:metadata')(root)[0]
d = m.makeelement(OPF('meta'), attrib={'property':'calibre:rating'})
d.text = val
m.append(d)
def set_rating(root, prefixes, refines, val):
pq = '%s:rating' % CALIBRE_PREFIX
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:rating"]')(root):
remove_element(meta, refines)
for meta in XPath('./opf:metadata/opf:meta[@property]')(root):
prop = expand_prefix(meta.get('property'), prefixes)
if prop.lower() == pq:
remove_element(meta, refines)
if val:
create_rating(root, prefixes, '%.2g' % float(val))
# }}}
# Series {{{
def read_series(root, prefixes, refines):
series_index = 1.0
for meta in XPath('./opf:metadata/opf:meta[@property="belongs-to-collection" and @id]')(root):
val = (meta.text or '').strip()
if val:
props = properties_for_id(meta.get('id'), refines)
if props.get('collection-type') == 'series':
try:
series_index = float(props.get('group-position').strip())
except Exception:
pass
return normalize_whitespace(val), series_index
for si in XPath('./opf:metadata/opf:meta[@name="calibre:series_index"]/@content')(root):
try:
series_index = float(si)
break
except:
pass
for s in XPath('./opf:metadata/opf:meta[@name="calibre:series"]/@content')(root):
s = normalize_whitespace(s)
if s:
return s, series_index
return None, series_index
def create_series(root, refines, series, series_index):
m = XPath('./opf:metadata')(root)[0]
d = m.makeelement(OPF('meta'), attrib={'property':'belongs-to-collection'})
d.text = series
m.append(d)
set_refines(d, refines, refdef('collection-type', 'series'), refdef('group-position', series_index))
def set_series(root, prefixes, refines, series, series_index):
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:series" or @name="calibre:series_index"]')(root):
remove_element(meta, refines)
for meta in XPath('./opf:metadata/opf:meta[@property="belongs-to-collection"]')(root):
remove_element(meta, refines)
if series:
create_series(root, refines, series, '%.2g' % series_index)
# }}}
# User metadata {{{
def dict_reader(name, load=json.loads, try2=True):
pq = f'{CALIBRE_PREFIX}:{name}'
def reader(root, prefixes, refines):
for meta in XPath('./opf:metadata/opf:meta[@property]')(root):
val = (meta.text or '').strip()
if val:
prop = expand_prefix(meta.get('property'), prefixes)
if prop.lower() == pq:
try:
ans = load(val)
if isinstance(ans, dict):
return ans
except Exception:
continue
if try2:
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:%s"]' % name)(root):
val = meta.get('content')
if val:
try:
ans = load(val)
if isinstance(ans, dict):
return ans
except Exception:
continue
return reader
read_user_categories = dict_reader('user_categories')
read_author_link_map = dict_reader('author_link_map')
def dict_writer(name, serialize=dump_dict, remove2=True):
pq = f'{CALIBRE_PREFIX}:{name}'
def writer(root, prefixes, refines, val):
if remove2:
for meta in XPath('./opf:metadata/opf:meta[@name="calibre:%s"]' % name)(root):
remove_element(meta, refines)
for meta in XPath('./opf:metadata/opf:meta[@property]')(root):
prop = expand_prefix(meta.get('property'), prefixes)
if prop.lower() == pq:
remove_element(meta, refines)
if val:
ensure_prefix(root, prefixes, 'calibre', CALIBRE_PREFIX)
m = XPath('./opf:metadata')(root)[0]
d = m.makeelement(OPF('meta'), attrib={'property':'calibre:%s' % name})
d.text = serialize(val)
m.append(d)
return writer
set_user_categories = dict_writer('user_categories')
set_author_link_map = dict_writer('author_link_map')
def deserialize_user_metadata(val):
val = json.loads(val, object_hook=from_json)
ans = {}
for name, fm in iteritems(val):
decode_is_multiple(fm)
ans[name] = fm
return ans
read_user_metadata3 = dict_reader('user_metadata', load=deserialize_user_metadata, try2=False)
def read_user_metadata2(root, remove_tags=False):
ans = {}
for meta in XPath('./opf:metadata/opf:meta[starts-with(@name, "calibre:user_metadata:")]')(root):
name = meta.get('name')
name = ':'.join(name.split(':')[2:])
if not name or not name.startswith('#'):
continue
fm = meta.get('content')
if remove_tags:
meta.getparent().remove(meta)
try:
fm = json.loads(fm, object_hook=from_json)
decode_is_multiple(fm)
ans[name] = fm
except Exception:
prints('Failed to read user metadata:', name)
import traceback
traceback.print_exc()
continue
return ans
def read_user_metadata(root, prefixes, refines):
return read_user_metadata3(root, prefixes, refines) or read_user_metadata2(root)
def serialize_user_metadata(val):
return json.dumps(object_to_unicode(val), ensure_ascii=False, default=to_json, indent=2, sort_keys=True)
set_user_metadata3 = dict_writer('user_metadata', serialize=serialize_user_metadata, remove2=False)
def set_user_metadata(root, prefixes, refines, val):
for meta in XPath('./opf:metadata/opf:meta[starts-with(@name, "calibre:user_metadata:")]')(root):
remove_element(meta, refines)
if val:
nval = {}
for name, fm in val.items():
fm = fm.copy()
encode_is_multiple(fm)
nval[name] = fm
set_user_metadata3(root, prefixes, refines, nval)
# }}}
# Covers {{{
def read_raster_cover(root, prefixes, refines):
def get_href(item):
mt = item.get('media-type')
if mt and 'xml' not in mt and 'html' not in mt:
href = item.get('href')
if href:
return href
for item in items_with_property(root, 'cover-image', prefixes):
href = get_href(item)
if href:
return href
for item_id in XPath('./opf:metadata/opf:meta[@name="cover"]/@content')(root):
for item in XPath('./opf:manifest/opf:item[@id and @href and @media-type]')(root):
if item.get('id') == item_id:
href = get_href(item)
if href:
return href
def ensure_is_only_raster_cover(root, prefixes, refines, raster_cover_item_href):
for item in XPath('./opf:metadata/opf:meta[@name="cover"]')(root):
remove_element(item, refines)
for item in items_with_property(root, 'cover-image', prefixes):
prop = normalize_whitespace(item.get('properties').replace('cover-image', ''))
if prop:
item.set('properties', prop)
else:
del item.attrib['properties']
for item in XPath('./opf:manifest/opf:item')(root):
if item.get('href') == raster_cover_item_href:
item.set('properties', normalize_whitespace((item.get('properties') or '') + ' cover-image'))
# }}}
# Reading/setting Metadata objects {{{
def first_spine_item(root, prefixes, refines):
for i in XPath('./opf:spine/opf:itemref/@idref')(root):
for item in XPath('./opf:manifest/opf:item')(root):
if item.get('id') == i:
return item.get('href') or None
def set_last_modified_in_opf(root):
prefixes, refines = read_prefixes(root), read_refines(root)
set_last_modified(root, prefixes, refines)
def read_metadata(root, ver=None, return_extra_data=False):
ans = Metadata(_('Unknown'), [_('Unknown')])
prefixes, refines = read_prefixes(root), read_refines(root)
identifiers = read_identifiers(root, prefixes, refines)
ids = {}
for key, vals in iteritems(identifiers):
if key == 'calibre':
ans.application_id = vals[0]
elif key == 'uuid':
ans.uuid = vals[0]
else:
ids[key] = vals[0]
ans.set_identifiers(ids)
ans.title = read_title(root, prefixes, refines) or ans.title
ans.title_sort = read_title_sort(root, prefixes, refines) or ans.title_sort
ans.languages = read_languages(root, prefixes, refines) or ans.languages
auts, aus = [], []
for a in read_authors(root, prefixes, refines):
auts.append(a.name), aus.append(a.sort)
ans.authors = auts or ans.authors
ans.author_sort = authors_to_string(aus) or ans.author_sort
bkp = read_book_producers(root, prefixes, refines)
if bkp:
if bkp[0]:
ans.book_producer = bkp[0]
pd = read_pubdate(root, prefixes, refines)
if not is_date_undefined(pd):
ans.pubdate = pd
ts = read_timestamp(root, prefixes, refines)
if not is_date_undefined(ts):
ans.timestamp = ts
lm = read_last_modified(root, prefixes, refines)
if not is_date_undefined(lm):
ans.last_modified = lm
ans.comments = read_comments(root, prefixes, refines) or ans.comments
ans.publisher = read_publisher(root, prefixes, refines) or ans.publisher
ans.tags = read_tags(root, prefixes, refines) or ans.tags
ans.rating = read_rating(root, prefixes, refines) or ans.rating
s, si = read_series(root, prefixes, refines)
if s:
ans.series, ans.series_index = s, si
ans.author_link_map = read_author_link_map(root, prefixes, refines) or ans.author_link_map
ans.user_categories = read_user_categories(root, prefixes, refines) or ans.user_categories
for name, fm in iteritems(read_user_metadata(root, prefixes, refines) or {}):
ans.set_user_metadata(name, fm)
if return_extra_data:
ans = ans, ver, read_raster_cover(root, prefixes, refines), first_spine_item(root, prefixes, refines)
return ans
def get_metadata(stream):
root = parse_opf(stream)
return read_metadata(root)
def apply_metadata(root, mi, cover_prefix='', cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False, add_missing_cover=True):
prefixes, refines = read_prefixes(root), read_refines(root)
current_mi = read_metadata(root)
if apply_null:
def ok(x):
return True
else:
def ok(x):
return not mi.is_null(x)
if ok('identifiers'):
set_identifiers(root, prefixes, refines, mi.identifiers, force_identifiers=force_identifiers)
if ok('title'):
set_title(root, prefixes, refines, mi.title, mi.title_sort)
if ok('languages'):
set_languages(root, prefixes, refines, mi.languages)
if ok('book_producer'):
set_book_producers(root, prefixes, refines, (mi.book_producer,))
aus = string_to_authors(mi.author_sort or '')
authors = []
for i, aut in enumerate(mi.authors):
authors.append(Author(aut, aus[i] if i < len(aus) else None))
if authors or apply_null:
set_authors(root, prefixes, refines, authors)
if ok('pubdate'):
set_pubdate(root, prefixes, refines, mi.pubdate)
if update_timestamp and mi.timestamp is not None:
set_timestamp(root, prefixes, refines, mi.timestamp)
if ok('comments'):
set_comments(root, prefixes, refines, mi.comments)
if ok('publisher'):
set_publisher(root, prefixes, refines, mi.publisher)
if ok('tags'):
set_tags(root, prefixes, refines, mi.tags)
if ok('rating') and mi.rating is not None and float(mi.rating) > 0.1:
set_rating(root, prefixes, refines, mi.rating)
if ok('series'):
set_series(root, prefixes, refines, mi.series, mi.series_index or 1)
if ok('author_link_map'):
set_author_link_map(root, prefixes, refines, getattr(mi, 'author_link_map', None))
if ok('user_categories'):
set_user_categories(root, prefixes, refines, getattr(mi, 'user_categories', None))
# We ignore apply_null for the next two to match the behavior with opf2.py
if mi.application_id:
set_application_id(root, prefixes, refines, mi.application_id)
if mi.uuid:
set_uuid(root, prefixes, refines, mi.uuid)
current_mi.remove_stale_user_metadata(mi)
new_user_metadata, current_user_metadata = mi.get_all_user_metadata(True), current_mi.get_all_user_metadata(True)
missing = object()
for key in tuple(new_user_metadata):
meta = new_user_metadata.get(key)
if meta is None:
if apply_null:
new_user_metadata[key] = None
continue
dt = meta.get('datatype')
if dt == 'text' and meta.get('is_multiple'):
val = mi.get(key, [])
if val or apply_null:
current_user_metadata[key] = meta
elif dt in {'int', 'float', 'bool'}:
val = mi.get(key, missing)
if val is missing:
if apply_null:
current_user_metadata[key] = meta
elif apply_null or val is not None:
current_user_metadata[key] = meta
elif apply_null or not mi.is_null(key):
current_user_metadata[key] = meta
set_user_metadata(root, prefixes, refines, current_user_metadata)
raster_cover = read_raster_cover(root, prefixes, refines)
if not raster_cover and cover_data and add_missing_cover:
if cover_prefix and not cover_prefix.endswith('/'):
cover_prefix += '/'
name = cover_prefix + 'cover.jpg'
i = create_manifest_item(root, name, 'cover')
if i is not None:
ensure_is_only_raster_cover(root, prefixes, refines, name)
raster_cover = name
pretty_print_opf(root)
return raster_cover
def set_metadata(stream, mi, cover_prefix='', cover_data=None, apply_null=False, update_timestamp=False, force_identifiers=False, add_missing_cover=True):
root = parse_opf(stream)
return apply_metadata(
root, mi, cover_prefix=cover_prefix, cover_data=cover_data,
apply_null=apply_null, update_timestamp=update_timestamp,
force_identifiers=force_identifiers)
# }}}
if __name__ == '__main__':
import sys
print(get_metadata(open(sys.argv[-1], 'rb')))