%PDF- %PDF-
| Direktori : /proc/thread-self/root/usr/lib/calibre/calibre/ebooks/metadata/ |
| Current File : //proc/thread-self/root/usr/lib/calibre/calibre/ebooks/metadata/opf2.py |
#!/usr/bin/env python3
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
__docformat__ = 'restructuredtext en'
'''
lxml based OPF parser.
'''
import re, sys, functools, os, uuid, glob, io, json, copy
from lxml import etree
from calibre.ebooks import escape_xpath_attr
from calibre.constants import __appname__, __version__, filesystem_encoding
from calibre.ebooks.metadata.toc import TOC
from calibre.ebooks.metadata.utils import parse_opf, pretty_print_opf as _pretty_print
from calibre.ebooks.metadata import string_to_authors, MetaInformation, check_isbn
from calibre.ebooks.metadata.book.base import Metadata
from calibre.utils.date import parse_date, isoformat
from calibre.utils.localization import get_lang, canonicalize_lang
from calibre import prints, guess_type
from calibre.utils.cleantext import clean_ascii_chars, clean_xml_chars
from calibre.utils.config import tweaks
from calibre.utils.xml_parse import safe_xml_fromstring
from polyglot.builtins import iteritems
from polyglot.urllib import unquote, urlparse
pretty_print_opf = False
class PrettyPrint:
def __enter__(self):
global pretty_print_opf
pretty_print_opf = True
def __exit__(self, *args):
global pretty_print_opf
pretty_print_opf = False
pretty_print = PrettyPrint()
class Resource: # {{{
'''
Represents a resource (usually a file on the filesystem or a URL pointing
to the web. Such resources are commonly referred to in OPF files.
They have the interface:
:member:`path`
:member:`mime_type`
:method:`href`
'''
def __init__(self, href_or_path, basedir=os.getcwd(), is_path=True):
self.orig = href_or_path
self._href = None
self._basedir = basedir
self.path = None
self.fragment = ''
try:
self.mime_type = guess_type(href_or_path)[0]
except:
self.mime_type = None
if self.mime_type is None:
self.mime_type = 'application/octet-stream'
if is_path:
path = href_or_path
if not os.path.isabs(path):
path = os.path.abspath(os.path.join(basedir, path))
if isinstance(path, bytes):
path = path.decode(filesystem_encoding)
self.path = path
else:
href_or_path = href_or_path
url = urlparse(href_or_path)
if url[0] not in ('', 'file'):
self._href = href_or_path
else:
pc = url[2]
if isinstance(pc, str):
pc = pc.encode('utf-8')
pc = pc.decode('utf-8')
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
self.fragment = url[-1]
def href(self, basedir=None):
'''
Return a URL pointing to this resource. If it is a file on the filesystem
the URL is relative to `basedir`.
`basedir`: If None, the basedir of this resource is used (see :method:`set_basedir`).
If this resource has no basedir, then the current working directory is used as the basedir.
'''
if basedir is None:
if self._basedir:
basedir = self._basedir
else:
basedir = os.getcwd()
if self.path is None:
return self._href
frag = ('#' + self.fragment) if self.fragment else ''
if self.path == basedir:
return frag
try:
rpath = os.path.relpath(self.path, basedir)
except ValueError: # On windows path and basedir could be on different drives
rpath = self.path
if isinstance(rpath, bytes):
rpath = rpath.decode(filesystem_encoding)
return rpath.replace(os.sep, '/')+frag
def set_basedir(self, path):
self._basedir = path
def basedir(self):
return self._basedir
def __repr__(self):
return 'Resource(%s, %s)'%(repr(self.path), repr(self.href()))
# }}}
class ResourceCollection: # {{{
def __init__(self):
self._resources = []
def __iter__(self):
yield from self._resources
def __len__(self):
return len(self._resources)
def __getitem__(self, index):
return self._resources[index]
def __bool__(self):
return len(self._resources) > 0
def __str__(self):
resources = map(repr, self)
return '[%s]'%', '.join(resources)
__unicode__ = __str__
def __repr__(self):
return str(self)
def append(self, resource):
if not isinstance(resource, Resource):
raise ValueError('Can only append objects of type Resource')
self._resources.append(resource)
def remove(self, resource):
self._resources.remove(resource)
def replace(self, start, end, items):
'Same as list[start:end] = items'
self._resources[start:end] = items
@staticmethod
def from_directory_contents(top, topdown=True):
collection = ResourceCollection()
for spec in os.walk(top, topdown=topdown):
path = os.path.abspath(os.path.join(spec[0], spec[1]))
res = Resource.from_path(path)
res.set_basedir(top)
collection.append(res)
return collection
def set_basedir(self, path):
for res in self:
res.set_basedir(path)
# }}}
class ManifestItem(Resource): # {{{
@staticmethod
def from_opf_manifest_item(item, basedir):
href = item.get('href', None)
if href:
res = ManifestItem(href, basedir=basedir, is_path=True)
mt = item.get('media-type', '').strip()
if mt:
res.mime_type = mt
return res
@property
def media_type(self):
return self.mime_type
@media_type.setter
def media_type(self, val):
self.mime_type = val
def __unicode__representation__(self):
return '<item id="%s" href="%s" media-type="%s" />'%(self.id, self.href(), self.media_type)
__str__ = __unicode__representation__
def __repr__(self):
return str(self)
def __getitem__(self, index):
if index == 0:
return self.href()
if index == 1:
return self.media_type
raise IndexError('%d out of bounds.'%index)
# }}}
class Manifest(ResourceCollection): # {{{
def append_from_opf_manifest_item(self, item, dir):
self.append(ManifestItem.from_opf_manifest_item(item, dir))
id = item.get('id', '')
if not id:
id = 'id%d'%self.next_id
self[-1].id = id
self.next_id += 1
@staticmethod
def from_opf_manifest_element(items, dir):
m = Manifest()
for item in items:
try:
m.append_from_opf_manifest_item(item, dir)
except ValueError:
continue
return m
@staticmethod
def from_paths(entries):
'''
`entries`: List of (path, mime-type) If mime-type is None it is autodetected
'''
m = Manifest()
for path, mt in entries:
mi = ManifestItem(path, is_path=True)
if mt:
mi.mime_type = mt
mi.id = 'id%d'%m.next_id
m.next_id += 1
m.append(mi)
return m
def add_item(self, path, mime_type=None):
mi = ManifestItem(path, is_path=True)
if mime_type:
mi.mime_type = mime_type
mi.id = 'id%d'%self.next_id
self.next_id += 1
self.append(mi)
return mi.id
def __init__(self):
ResourceCollection.__init__(self)
self.next_id = 1
def item(self, id):
for i in self:
if i.id == id:
return i
def id_for_path(self, path):
path = os.path.normpath(os.path.abspath(path))
for i in self:
if i.path and os.path.normpath(i.path) == path:
return i.id
def path_for_id(self, id):
for i in self:
if i.id == id:
return i.path
def type_for_id(self, id):
for i in self:
if i.id == id:
return i.mime_type
# }}}
class Spine(ResourceCollection): # {{{
class Item(Resource):
def __init__(self, idfunc, *args, **kwargs):
Resource.__init__(self, *args, **kwargs)
self.is_linear = True
self.id = idfunc(self.path)
self.idref = None
def __repr__(self):
return 'Spine.Item(path=%r, id=%s, is_linear=%s)' % \
(self.path, self.id, self.is_linear)
@staticmethod
def from_opf_spine_element(itemrefs, manifest):
s = Spine(manifest)
seen = set()
path_map = {i.id:i.path for i in s.manifest}
for itemref in itemrefs:
idref = itemref.get('idref', None)
if idref is not None:
path = path_map.get(idref)
if path and path not in seen:
r = Spine.Item(lambda x:idref, path, is_path=True)
r.is_linear = itemref.get('linear', 'yes') == 'yes'
r.idref = idref
s.append(r)
seen.add(path)
return s
@staticmethod
def from_paths(paths, manifest):
s = Spine(manifest)
for path in paths:
try:
s.append(Spine.Item(s.manifest.id_for_path, path, is_path=True))
except:
continue
return s
def __init__(self, manifest):
ResourceCollection.__init__(self)
self.manifest = manifest
def replace(self, start, end, ids):
'''
Replace the items between start (inclusive) and end (not inclusive) with
with the items identified by ids. ids can be a list of any length.
'''
items = []
for id in ids:
path = self.manifest.path_for_id(id)
if path is None:
raise ValueError('id %s not in manifest')
items.append(Spine.Item(lambda x: id, path, is_path=True))
ResourceCollection.replace(start, end, items)
def linear_items(self):
for r in self:
if r.is_linear:
yield r.path
def nonlinear_items(self):
for r in self:
if not r.is_linear:
yield r.path
def items(self):
for i in self:
yield i.path
# }}}
class Guide(ResourceCollection): # {{{
class Reference(Resource):
@staticmethod
def from_opf_resource_item(ref, basedir):
title, href, type = ref.get('title', ''), ref.get('href'), ref.get('type')
res = Guide.Reference(href, basedir, is_path=True)
res.title = title
res.type = type
return res
def __repr__(self):
ans = '<reference type="%s" href="%s" '%(self.type, self.href())
if self.title:
ans += 'title="%s" '%self.title
return ans + '/>'
@staticmethod
def from_opf_guide(references, base_dir=os.getcwd()):
coll = Guide()
for ref in references:
try:
ref = Guide.Reference.from_opf_resource_item(ref, base_dir)
coll.append(ref)
except:
continue
return coll
def set_cover(self, path):
for i in tuple(self):
if 'cover' in i.type.lower():
self.remove(i)
for typ in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'):
self.append(Guide.Reference(path, is_path=True))
self[-1].type = typ
self[-1].title = ''
# }}}
class MetadataField:
def __init__(self, name, is_dc=True, formatter=None, none_is=None,
renderer=lambda x: str(x)):
self.name = name
self.is_dc = is_dc
self.formatter = formatter
self.none_is = none_is
self.renderer = renderer
def __real_get__(self, obj, type=None):
ans = obj.get_metadata_element(self.name)
if ans is None:
return None
ans = obj.get_text(ans)
if ans is None:
return ans
if self.formatter is not None:
try:
ans = self.formatter(ans)
except:
return None
if hasattr(ans, 'strip'):
ans = ans.strip()
return ans
def __get__(self, obj, type=None):
ans = self.__real_get__(obj, type)
if ans is None:
ans = self.none_is
return ans
def __set__(self, obj, val):
elem = obj.get_metadata_element(self.name)
if val is None:
if elem is not None:
elem.getparent().remove(elem)
return
if elem is None:
elem = obj.create_metadata_element(self.name, is_dc=self.is_dc)
obj.set_text(elem, self.renderer(val))
class TitleSortField(MetadataField):
def __get__(self, obj, type=None):
c = self.__real_get__(obj, type)
if c is None:
matches = obj.title_path(obj.metadata)
if matches:
for match in matches:
ans = match.get('{%s}file-as'%obj.NAMESPACES['opf'], None)
if not ans:
ans = match.get('file-as', None)
if ans:
c = ans
if not c:
c = self.none_is
else:
c = c.strip()
return c
def __set__(self, obj, val):
MetadataField.__set__(self, obj, val)
matches = obj.title_path(obj.metadata)
if matches:
for match in matches:
for attr in list(match.attrib):
if attr.endswith('file-as'):
del match.attrib[attr]
def serialize_user_metadata(metadata_elem, all_user_metadata, tail='\n'+(' '*8)):
from calibre.utils.config import to_json
from calibre.ebooks.metadata.book.json_codec import (object_to_unicode,
encode_is_multiple)
for name, fm in all_user_metadata.items():
try:
fm = copy.copy(fm)
encode_is_multiple(fm)
fm = object_to_unicode(fm)
fm = json.dumps(fm, default=to_json, ensure_ascii=False)
except:
prints('Failed to write user metadata:', name)
import traceback
traceback.print_exc()
continue
meta = metadata_elem.makeelement('meta')
meta.set('name', 'calibre:user_metadata:'+name)
meta.set('content', fm)
meta.tail = tail
metadata_elem.append(meta)
def serialize_annotations(metadata_elem, annotations, tail='\n'+(' '*8)):
for item in annotations:
data = json.dumps(item, ensure_ascii=False)
if isinstance(data, bytes):
data = data.decode('utf-8')
meta = metadata_elem.makeelement('meta')
meta.set('name', 'calibre:annotation')
meta.set('content', data)
meta.tail = tail
metadata_elem.append(meta)
def dump_dict(cats):
if not cats:
cats = {}
from calibre.ebooks.metadata.book.json_codec import object_to_unicode
return json.dumps(object_to_unicode(cats), ensure_ascii=False,
skipkeys=True)
class OPF: # {{{
MIMETYPE = 'application/oebps-package+xml'
NAMESPACES = {
None: "http://www.idpf.org/2007/opf",
'dc': "http://purl.org/dc/elements/1.1/",
'opf': "http://www.idpf.org/2007/opf",
}
META = '{%s}meta' % NAMESPACES['opf']
xpn = NAMESPACES.copy()
xpn.pop(None)
xpn['re'] = 'http://exslt.org/regular-expressions'
XPath = functools.partial(etree.XPath, namespaces=xpn)
CONTENT = XPath('self::*[re:match(name(), "meta$", "i")]/@content')
TEXT = XPath('string()')
metadata_path = XPath('descendant::*[re:match(name(), "metadata", "i")]')
metadata_elem_path = XPath(
'descendant::*[re:match(name(), concat($name, "$"), "i") or (re:match(name(), "meta$", "i") '
'and re:match(@name, concat("^calibre:", $name, "$"), "i"))]')
title_path = XPath('descendant::*[re:match(name(), "title", "i")]')
authors_path = XPath('descendant::*[re:match(name(), "creator", "i") and (@role="aut" or @opf:role="aut" or (not(@role) and not(@opf:role)))]')
editors_path = XPath('descendant::*[re:match(name(), "creator", "i") and (@role="edt" or @opf:role="edt")]')
bkp_path = XPath('descendant::*[re:match(name(), "contributor", "i") and (@role="bkp" or @opf:role="bkp")]')
tags_path = XPath('descendant::*[re:match(name(), "subject", "i")]')
isbn_path = XPath('descendant::*[re:match(name(), "identifier", "i") and '
'(re:match(@scheme, "isbn", "i") or re:match(@opf:scheme, "isbn", "i"))]')
pubdate_path = XPath('descendant::*[re:match(name(), "date", "i")]')
raster_cover_path = XPath('descendant::*[re:match(name(), "meta", "i") and '
're:match(@name, "cover", "i") and @content]')
guide_cover_path = XPath('descendant::*[local-name()="guide"]/*[local-name()="reference" and re:match(@type, "cover", "i")]/@href')
identifier_path = XPath('descendant::*[re:match(name(), "identifier", "i")]')
application_id_path = XPath('descendant::*[re:match(name(), "identifier", "i") and '
'(re:match(@opf:scheme, "calibre|libprs500", "i") or re:match(@scheme, "calibre|libprs500", "i"))]')
uuid_id_path = XPath('descendant::*[re:match(name(), "identifier", "i") and '
'(re:match(@opf:scheme, "uuid", "i") or re:match(@scheme, "uuid", "i"))]')
languages_path = XPath('descendant::*[local-name()="language"]')
manifest_path = XPath('descendant::*[re:match(name(), "manifest", "i")]/*[re:match(name(), "item", "i")]')
manifest_ppath = XPath('descendant::*[re:match(name(), "manifest", "i")]')
spine_path = XPath('descendant::*[re:match(name(), "spine", "i")]/*[re:match(name(), "itemref", "i")]')
guide_path = XPath('descendant::*[re:match(name(), "guide", "i")]/*[re:match(name(), "reference", "i")]')
publisher = MetadataField('publisher')
comments = MetadataField('description')
category = MetadataField('type')
rights = MetadataField('rights')
series = MetadataField('series', is_dc=False)
if tweaks['use_series_auto_increment_tweak_when_importing']:
series_index = MetadataField('series_index', is_dc=False,
formatter=float, none_is=None)
else:
series_index = MetadataField('series_index', is_dc=False,
formatter=float, none_is=1)
title_sort = TitleSortField('title_sort', is_dc=False)
rating = MetadataField('rating', is_dc=False, formatter=float)
publication_type = MetadataField('publication_type', is_dc=False)
timestamp = MetadataField('timestamp', is_dc=False,
formatter=parse_date, renderer=isoformat)
user_categories = MetadataField('user_categories', is_dc=False,
formatter=json.loads,
renderer=dump_dict)
author_link_map = MetadataField('author_link_map', is_dc=False,
formatter=json.loads, renderer=dump_dict)
def __init__(self, stream, basedir=os.getcwd(), unquote_urls=True,
populate_spine=True, try_to_guess_cover=True, preparsed_opf=None, read_toc=True):
self.try_to_guess_cover = try_to_guess_cover
self.basedir = self.base_dir = basedir
self.path_to_html_toc = self.html_toc_fragment = None
self.root = parse_opf(stream) if preparsed_opf is None else preparsed_opf
try:
self.package_version = float(self.root.get('version', None))
except (AttributeError, TypeError, ValueError):
self.package_version = 0
self.metadata = self.metadata_path(self.root)
if not self.metadata:
self.metadata = [self.root.makeelement('{http://www.idpf.org/2007/opf}metadata')]
self.root.insert(0, self.metadata[0])
self.metadata[0].tail = '\n'
self.metadata = self.metadata[0]
if unquote_urls:
self.unquote_urls()
self.manifest = Manifest()
m = self.manifest_path(self.root)
if m:
self.manifest = Manifest.from_opf_manifest_element(m, basedir)
self.spine = None
s = self.spine_path(self.root)
if populate_spine and s:
self.spine = Spine.from_opf_spine_element(s, self.manifest)
self.guide = None
guide = self.guide_path(self.root)
self.guide = Guide.from_opf_guide(guide, basedir) if guide else None
self.cover_data = (None, None)
if read_toc:
self.find_toc()
else:
self.toc = None
self.read_user_metadata()
def read_user_metadata(self):
self._user_metadata_ = {}
temp = Metadata('x', ['x'])
from calibre.utils.config import from_json
from calibre.ebooks.metadata.book.json_codec import decode_is_multiple
elems = self.root.xpath('//*[name() = "meta" and starts-with(@name,'
'"calibre:user_metadata:") and @content]')
for elem in elems:
name = elem.get('name')
name = ':'.join(name.split(':')[2:])
if not name or not name.startswith('#'):
continue
fm = elem.get('content')
try:
fm = json.loads(fm, object_hook=from_json)
decode_is_multiple(fm)
temp.set_user_metadata(name, fm)
except:
prints('Failed to read user metadata:', name)
import traceback
traceback.print_exc()
continue
self._user_metadata_ = temp.get_all_user_metadata(True)
def to_book_metadata(self):
if self.package_version >= 3.0:
from calibre.ebooks.metadata.opf3 import read_metadata
return read_metadata(self.root)
ans = MetaInformation(self)
for n, v in self._user_metadata_.items():
ans.set_user_metadata(n, v)
ans.set_identifiers(self.get_identifiers())
return ans
def read_annotations(self):
for elem in self.root.xpath('//*[name() = "meta" and @name = "calibre:annotation" and @content]'):
try:
yield json.loads(elem.get('content'))
except Exception:
pass
def write_user_metadata(self):
elems = self.root.xpath('//*[name() = "meta" and starts-with(@name,'
'"calibre:user_metadata:") and @content]')
for elem in elems:
elem.getparent().remove(elem)
serialize_user_metadata(self.metadata,
self._user_metadata_)
def find_toc(self):
self.toc = None
try:
spine = self.XPath('descendant::*[re:match(name(), "spine", "i")]')(self.root)
toc = None
if spine:
spine = spine[0]
toc = spine.get('toc', None)
if toc is None and self.guide:
for item in self.guide:
if item.type and item.type.lower() == 'toc':
toc = item.path
if toc is None:
for item in self.manifest:
if 'toc' in item.href().lower():
toc = item.path
if toc is None:
return
self.toc = TOC(base_path=self.base_dir)
is_ncx = getattr(self, 'manifest', None) is not None and \
self.manifest.type_for_id(toc) is not None and \
'dtbncx' in self.manifest.type_for_id(toc)
if is_ncx or toc.lower() in ('ncx', 'ncxtoc'):
path = self.manifest.path_for_id(toc)
if path:
self.toc.read_ncx_toc(path)
else:
f = glob.glob(os.path.join(self.base_dir, '*.ncx'))
if f:
self.toc.read_ncx_toc(f[0])
else:
self.path_to_html_toc, self.html_toc_fragment = \
toc.partition('#')[0], toc.partition('#')[-1]
if not os.access(self.path_to_html_toc, os.R_OK) or \
not os.path.isfile(self.path_to_html_toc):
self.path_to_html_toc = None
self.toc.read_html_toc(toc)
except:
pass
def get_text(self, elem):
return ''.join(self.CONTENT(elem) or self.TEXT(elem))
def set_text(self, elem, content):
if elem.tag == self.META:
elem.attrib['content'] = content
else:
elem.text = content
def itermanifest(self):
return self.manifest_path(self.root)
def create_manifest_item(self, href, media_type, append=False):
ids = {i.get('id', None) for i in self.itermanifest()}
manifest_id = 'id1'
c = 1
while manifest_id in ids:
c += 1
manifest_id = 'id%d'%c
if not media_type:
media_type = 'application/xhtml+xml'
ans = etree.Element('{%s}item'%self.NAMESPACES['opf'],
attrib={'id':manifest_id, 'href':href, 'media-type':media_type})
ans.tail = '\n\t\t'
if append:
manifest = self.manifest_ppath(self.root)[0]
manifest.append(ans)
return ans
def replace_manifest_item(self, item, items):
items = [self.create_manifest_item(*i) for i in items]
for i, item2 in enumerate(items):
item2.set('id', item.get('id')+'.%d'%(i+1))
manifest = item.getparent()
index = manifest.index(item)
manifest[index:index+1] = items
return [i.get('id') for i in items]
def iterspine(self):
return self.spine_path(self.root)
def spine_items(self):
for item in self.iterspine():
idref = item.get('idref', '')
for x in self.itermanifest():
if x.get('id', None) == idref:
yield x.get('href', '')
def first_spine_item(self):
items = self.iterspine()
if not items:
return None
idref = items[0].get('idref', '')
for x in self.itermanifest():
if x.get('id', None) == idref:
return x.get('href', None)
def create_spine_item(self, idref):
ans = etree.Element('{%s}itemref'%self.NAMESPACES['opf'], idref=idref)
ans.tail = '\n\t\t'
return ans
def replace_spine_items_by_idref(self, idref, new_idrefs):
items = list(map(self.create_spine_item, new_idrefs))
spine = self.XPath('/opf:package/*[re:match(name(), "spine", "i")]')(self.root)[0]
old = [i for i in self.iterspine() if i.get('idref', None) == idref]
for x in old:
i = spine.index(x)
spine[i:i+1] = items
def create_guide_element(self):
e = etree.SubElement(self.root, '{%s}guide'%self.NAMESPACES['opf'])
e.text = '\n '
e.tail = '\n'
return e
def remove_guide(self):
self.guide = None
for g in self.root.xpath('./*[re:match(name(), "guide", "i")]', namespaces={'re':'http://exslt.org/regular-expressions'}):
self.root.remove(g)
def create_guide_item(self, type, title, href):
e = etree.Element('{%s}reference'%self.NAMESPACES['opf'],
type=type, title=title, href=href)
e.tail='\n'
return e
def add_guide_item(self, type, title, href):
g = self.root.xpath('./*[re:match(name(), "guide", "i")]', namespaces={'re':'http://exslt.org/regular-expressions'})[0]
g.append(self.create_guide_item(type, title, href))
def iterguide(self):
return self.guide_path(self.root)
def unquote_urls(self):
def get_href(item):
raw = unquote(item.get('href', ''))
if not isinstance(raw, str):
raw = raw.decode('utf-8')
return raw
for item in self.itermanifest():
item.set('href', get_href(item))
for item in self.iterguide():
item.set('href', get_href(item))
@property
def title(self):
# TODO: Add support for EPUB 3 refinements
for elem in self.title_path(self.metadata):
title = self.get_text(elem)
if title and title.strip():
return re.sub(r'\s+', ' ', title.strip())
@title.setter
def title(self, val):
val = (val or '').strip()
titles = self.title_path(self.metadata)
if self.package_version < 3:
# EPUB 3 allows multiple title elements containing sub-titles,
# series and other things. We all loooove EPUB 3.
for title in titles:
title.getparent().remove(title)
titles = ()
if val:
title = titles[0] if titles else self.create_metadata_element('title')
title.text = re.sub(r'\s+', ' ', str(val))
@property
def authors(self):
ans = []
for elem in self.authors_path(self.metadata):
ans.extend(string_to_authors(self.get_text(elem)))
if not ans:
for elem in self.editors_path(self.metadata):
ans.extend(string_to_authors(self.get_text(elem)))
return ans
@authors.setter
def authors(self, val):
remove = list(self.authors_path(self.metadata)) or list(self.editors_path(self.metadata))
for elem in remove:
elem.getparent().remove(elem)
# Ensure new author element is at the top of the list
# for broken implementations that always use the first
# <dc:creator> element with no attention to the role
for author in reversed(val):
elem = self.metadata.makeelement('{%s}creator'%
self.NAMESPACES['dc'], nsmap=self.NAMESPACES)
elem.tail = '\n'
self.metadata.insert(0, elem)
elem.set('{%s}role'%self.NAMESPACES['opf'], 'aut')
self.set_text(elem, author.strip())
@property
def author_sort(self):
matches = self.authors_path(self.metadata) or self.editors_path(self.metadata)
if matches:
for match in matches:
ans = match.get('{%s}file-as'%self.NAMESPACES['opf']) or match.get('file-as')
if ans:
return ans
@author_sort.setter
def author_sort(self, val):
matches = self.authors_path(self.metadata) or self.editors_path(self.metadata)
if matches:
for key in matches[0].attrib:
if key.endswith('file-as'):
matches[0].attrib.pop(key)
matches[0].set('{%s}file-as'%self.NAMESPACES['opf'], str(val))
@property
def tags(self):
ans = []
for tag in self.tags_path(self.metadata):
text = self.get_text(tag)
if text and text.strip():
ans.extend([x.strip() for x in text.split(',')])
return ans
@tags.setter
def tags(self, val):
for tag in list(self.tags_path(self.metadata)):
tag.getparent().remove(tag)
for tag in val:
elem = self.create_metadata_element('subject')
self.set_text(elem, str(tag))
@property
def pubdate(self):
ans = None
for match in self.pubdate_path(self.metadata):
try:
val = parse_date(etree.tostring(match, encoding='unicode',
method='text', with_tail=False).strip())
except:
continue
if ans is None or val < ans:
ans = val
return ans
@pubdate.setter
def pubdate(self, val):
least_val = least_elem = None
for match in self.pubdate_path(self.metadata):
try:
cval = parse_date(etree.tostring(match, encoding='unicode',
method='text', with_tail=False).strip())
except:
match.getparent().remove(match)
else:
if not val:
match.getparent().remove(match)
if least_val is None or cval < least_val:
least_val, least_elem = cval, match
if val:
if least_val is None:
least_elem = self.create_metadata_element('date')
least_elem.attrib.clear()
least_elem.text = isoformat(val)
@property
def isbn(self):
for match in self.isbn_path(self.metadata):
return self.get_text(match) or None
@isbn.setter
def isbn(self, val):
uuid_id = None
for attr in self.root.attrib:
if attr.endswith('unique-identifier'):
uuid_id = self.root.attrib[attr]
break
matches = self.isbn_path(self.metadata)
if not val:
for x in matches:
xid = x.get('id', None)
is_package_identifier = uuid_id is not None and uuid_id == xid
if is_package_identifier:
self.set_text(x, str(uuid.uuid4()))
for attr in x.attrib:
if attr.endswith('scheme'):
x.attrib[attr] = 'uuid'
else:
x.getparent().remove(x)
return
if not matches:
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'ISBN'}
matches = [self.create_metadata_element('identifier',
attrib=attrib)]
self.set_text(matches[0], str(val))
def get_identifiers(self):
identifiers = {}
schemeless = []
for x in self.XPath(
'descendant::*[local-name() = "identifier" and text()]')(
self.metadata):
found_scheme = False
for attr, val in iteritems(x.attrib):
if attr.endswith('scheme'):
typ = icu_lower(val)
val = etree.tostring(x, with_tail=False, encoding='unicode',
method='text').strip()
if val and typ not in ('calibre', 'uuid'):
if typ == 'isbn' and val.lower().startswith('urn:isbn:'):
val = val[len('urn:isbn:'):]
identifiers[typ] = val
found_scheme = True
break
if not found_scheme:
val = etree.tostring(x, with_tail=False, encoding='unicode',
method='text').strip()
if val.lower().startswith('urn:isbn:'):
val = check_isbn(val.split(':')[-1])
if val is not None:
identifiers['isbn'] = val
else:
schemeless.append(val)
if schemeless and 'isbn' not in identifiers:
for val in schemeless:
if check_isbn(val, simple_sanitize=True) is not None:
identifiers['isbn'] = check_isbn(val)
break
return identifiers
def set_identifiers(self, identifiers):
identifiers = identifiers.copy()
uuid_id = None
for attr in self.root.attrib:
if attr.endswith('unique-identifier'):
uuid_id = self.root.attrib[attr]
break
for x in self.XPath(
'descendant::*[local-name() = "identifier"]')(
self.metadata):
xid = x.get('id', None)
is_package_identifier = uuid_id is not None and uuid_id == xid
typ = {val.lower() for attr, val in iteritems(x.attrib) if attr.endswith('scheme')}
if is_package_identifier:
typ = tuple(typ)
if typ and typ[0] in identifiers:
self.set_text(x, identifiers.pop(typ[0]))
continue
if typ and not (typ & {'calibre', 'uuid'}):
x.getparent().remove(x)
for typ, val in iteritems(identifiers):
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: typ.upper()}
self.set_text(self.create_metadata_element(
'identifier', attrib=attrib), str(val))
@property
def application_id(self):
for match in self.application_id_path(self.metadata):
return self.get_text(match) or None
@application_id.setter
def application_id(self, val):
removed_ids = set()
for x in tuple(self.application_id_path(self.metadata)):
removed_ids.add(x.get('id', None))
x.getparent().remove(x)
uuid_id = None
for attr in self.root.attrib:
if attr.endswith('unique-identifier'):
uuid_id = self.root.attrib[attr]
break
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'calibre'}
if uuid_id and uuid_id in removed_ids:
attrib['id'] = uuid_id
self.set_text(self.create_metadata_element(
'identifier', attrib=attrib), str(val))
@property
def uuid(self):
for match in self.uuid_id_path(self.metadata):
return self.get_text(match) or None
@uuid.setter
def uuid(self, val):
matches = self.uuid_id_path(self.metadata)
if not matches:
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'uuid'}
matches = [self.create_metadata_element('identifier',
attrib=attrib)]
self.set_text(matches[0], str(val))
@property
def language(self):
ans = self.languages
if ans:
return ans[0]
@language.setter
def language(self, val):
self.languages = [val]
@property
def languages(self):
ans = []
for match in self.languages_path(self.metadata):
t = self.get_text(match)
if t and t.strip():
l = canonicalize_lang(t.strip())
if l:
ans.append(l)
return ans
@languages.setter
def languages(self, val):
matches = self.languages_path(self.metadata)
for x in matches:
x.getparent().remove(x)
for lang in val:
l = self.create_metadata_element('language')
self.set_text(l, str(lang))
@property
def raw_languages(self):
for match in self.languages_path(self.metadata):
t = self.get_text(match)
if t and t.strip():
yield t.strip()
@property
def book_producer(self):
for match in self.bkp_path(self.metadata):
return self.get_text(match) or None
@book_producer.setter
def book_producer(self, val):
matches = self.bkp_path(self.metadata)
if not matches:
matches = [self.create_metadata_element('contributor')]
matches[0].set('{%s}role'%self.NAMESPACES['opf'], 'bkp')
self.set_text(matches[0], str(val))
def identifier_iter(self):
yield from self.identifier_path(self.metadata)
@property
def raw_unique_identifier(self):
uuid_elem = None
for attr in self.root.attrib:
if attr.endswith('unique-identifier'):
uuid_elem = self.root.attrib[attr]
break
if uuid_elem:
matches = self.root.xpath('//*[@id=%s]'%escape_xpath_attr(uuid_elem))
if matches:
for m in matches:
raw = m.text
if raw:
return raw
@property
def unique_identifier(self):
raw = self.raw_unique_identifier
if raw:
return raw.rpartition(':')[-1]
@property
def page_progression_direction(self):
spine = self.XPath('descendant::*[re:match(name(), "spine", "i")][1]')(self.root)
if spine:
for k, v in iteritems(spine[0].attrib):
if k == 'page-progression-direction' or k.endswith('}page-progression-direction'):
return v
@property
def primary_writing_mode(self):
for m in self.XPath('//*[local-name()="meta" and @name="primary-writing-mode" and @content]')(self.root):
return m.get('content')
def guess_cover(self):
'''
Try to guess a cover. Needed for some old/badly formed OPF files.
'''
if self.base_dir and os.path.exists(self.base_dir):
for item in self.identifier_path(self.metadata):
scheme = None
for key in item.attrib.keys():
if key.endswith('scheme'):
scheme = item.get(key)
break
if scheme is None:
continue
if item.text:
prefix = item.text.replace('-', '')
for suffix in ['.jpg', '.jpeg', '.gif', '.png', '.bmp']:
cpath = os.access(os.path.join(self.base_dir, prefix+suffix), os.R_OK)
if os.access(os.path.join(self.base_dir, prefix+suffix), os.R_OK):
return cpath
@property
def epub3_raster_cover(self):
for item in self.itermanifest():
props = set((item.get('properties') or '').lower().split())
if 'cover-image' in props:
mt = item.get('media-type', '')
if mt and 'xml' not in mt and 'html' not in mt:
return item.get('href', None)
@property
def raster_cover(self):
covers = self.raster_cover_path(self.metadata)
if covers:
cover_id = covers[0].get('content')
for item in self.itermanifest():
if item.get('id', None) == cover_id:
mt = item.get('media-type', '')
if mt and 'xml' not in mt and 'html' not in mt:
return item.get('href', None)
for item in self.itermanifest():
if item.get('href', None) == cover_id:
mt = item.get('media-type', '')
if mt and 'xml' not in mt and 'html' not in mt:
return item.get('href', None)
elif self.package_version >= 3.0:
return self.epub3_raster_cover
@property
def guide_raster_cover(self):
covers = self.guide_cover_path(self.root)
if covers:
mt_map = {i.get('href'):i for i in self.itermanifest()}
for href in covers:
if href:
i = mt_map.get(href)
if i is not None:
iid, mt = i.get('id'), i.get('media-type')
if iid and mt and mt.lower() in {'image/png', 'image/jpeg', 'image/jpg', 'image/gif'}:
return i
@property
def epub3_nav(self):
if self.package_version >= 3.0:
for item in self.itermanifest():
props = (item.get('properties') or '').lower().split()
if 'nav' in props:
mt = item.get('media-type') or ''
if 'html' in mt.lower():
mid = item.get('id')
if mid:
path = self.manifest.path_for_id(mid)
if path and os.path.exists(path):
return path
@property
def cover(self):
if self.guide is not None:
for t in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'):
for item in self.guide:
if item.type and item.type.lower() == t:
return item.path
try:
if self.try_to_guess_cover:
return self.guess_cover()
except:
pass
@cover.setter
def cover(self, path):
if self.guide is not None:
self.guide.set_cover(path)
for item in list(self.iterguide()):
if 'cover' in item.get('type', ''):
item.getparent().remove(item)
else:
g = self.create_guide_element()
self.guide = Guide()
self.guide.set_cover(path)
etree.SubElement(g, 'opf:reference', nsmap=self.NAMESPACES,
attrib={'type':'cover', 'href':self.guide[-1].href()})
id = self.manifest.id_for_path(self.cover)
if id is None:
for t in ('cover', 'other.ms-coverimage-standard', 'other.ms-coverimage'):
for item in self.guide:
if item.type.lower() == t:
self.create_manifest_item(item.href(), guess_type(path)[0])
def get_metadata_element(self, name):
matches = self.metadata_elem_path(self.metadata, name=name)
if matches:
return matches[-1]
def create_metadata_element(self, name, attrib=None, is_dc=True):
if is_dc:
name = '{{{}}}{}'.format(self.NAMESPACES['dc'], name)
else:
attrib = attrib or {}
attrib['name'] = 'calibre:' + name
name = '{{{}}}{}'.format(self.NAMESPACES['opf'], 'meta')
nsmap = dict(self.NAMESPACES)
del nsmap['opf']
elem = etree.SubElement(self.metadata, name, attrib=attrib,
nsmap=nsmap)
elem.tail = '\n'
return elem
def render(self, encoding='utf-8'):
for meta in self.raster_cover_path(self.metadata):
# Ensure that the name attribute occurs before the content
# attribute. Needed for Nooks.
a = meta.attrib
c = a.get('content', None)
if c is not None:
del a['content']
a['content'] = c
# The PocketBook requires calibre:series_index to come after
# calibre:series or it fails to read series info
# We swap attributes instead of elements, as that avoids namespace
# re-declarations
smap = {}
for child in self.metadata.xpath('./*[@name="calibre:series" or @name="calibre:series_index"]'):
smap[child.get('name')] = (child, self.metadata.index(child))
if len(smap) == 2 and smap['calibre:series'][1] > smap['calibre:series_index'][1]:
s, si = smap['calibre:series'][0], smap['calibre:series_index'][0]
def swap(attr):
t = s.get(attr, '')
s.set(attr, si.get(attr, '')), si.set(attr, t)
swap('name'), swap('content')
self.write_user_metadata()
if pretty_print_opf:
_pretty_print(self.root)
raw = etree.tostring(self.root, encoding=encoding, pretty_print=True)
if not raw.lstrip().startswith(b'<?xml '):
raw = ('<?xml version="1.0" encoding="%s"?>\n'%encoding.upper()).encode('ascii') + raw
return raw
def smart_update(self, mi, replace_metadata=False, apply_null=False):
for attr in ('title', 'authors', 'author_sort', 'title_sort',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'tags', 'category', 'comments', 'book_producer',
'pubdate', 'user_categories', 'author_link_map'):
val = getattr(mi, attr, None)
if attr == 'rating' and val:
val = float(val)
is_null = val is None or val in ((), [], (None, None), {}) or (attr == 'rating' and (not val or val < 0.1))
if is_null:
if apply_null and attr in {'series', 'tags', 'isbn', 'comments', 'publisher', 'rating'}:
setattr(self, attr, ([] if attr == 'tags' else None))
else:
setattr(self, attr, val)
langs = getattr(mi, 'languages', [])
if langs == ['und']:
langs = []
if apply_null or langs:
self.languages = langs or []
temp = self.to_book_metadata()
temp.remove_stale_user_metadata(mi)
temp.smart_update(mi, replace_metadata=replace_metadata)
if not replace_metadata and callable(getattr(temp, 'custom_field_keys', None)):
# We have to replace non-null fields regardless of the value of
# replace_metadata to match the behavior of the builtin fields
# above.
for x in temp.custom_field_keys():
meta = temp.get_user_metadata(x, make_copy=True)
if meta is None:
continue
if meta['datatype'] == 'text' and meta['is_multiple']:
val = mi.get(x, [])
if val or apply_null:
temp.set(x, val)
elif meta['datatype'] in {'int', 'float', 'bool'}:
missing = object()
val = mi.get(x, missing)
if val is missing:
if apply_null:
temp.set(x, None)
elif apply_null or val is not None:
temp.set(x, val)
elif apply_null and mi.is_null(x) and not temp.is_null(x):
temp.set(x, None)
self._user_metadata_ = temp.get_all_user_metadata(True)
# }}}
class OPFCreator(Metadata):
def __init__(self, base_path, other):
'''
Initialize.
@param base_path: An absolute path to the folder in which this OPF file
will eventually be. This is used by the L{create_manifest} method
to convert paths to files into relative paths.
'''
Metadata.__init__(self, title='', other=other)
self.base_path = os.path.abspath(base_path)
self.page_progression_direction = None
self.primary_writing_mode = None
if self.application_id is None:
self.application_id = str(uuid.uuid4())
if not isinstance(self.toc, TOC):
self.toc = None
if not self.authors:
self.authors = [_('Unknown')]
if self.guide is None:
self.guide = Guide()
if self.cover:
self.guide.set_cover(self.cover)
def create_manifest(self, entries):
'''
Create <manifest>
`entries`: List of (path, mime-type) If mime-type is None it is autodetected
'''
entries = list(map(lambda x: x if os.path.isabs(x[0]) else
(os.path.abspath(os.path.join(self.base_path, x[0])), x[1]),
entries))
self.manifest = Manifest.from_paths(entries)
self.manifest.set_basedir(self.base_path)
def create_manifest_from_files_in(self, files_and_dirs,
exclude=lambda x:False):
entries = []
def dodir(dir):
for spec in os.walk(dir):
root, files = spec[0], spec[-1]
for name in files:
path = os.path.join(root, name)
if os.path.isfile(path) and not exclude(path):
entries.append((path, None))
for i in files_and_dirs:
if os.path.isdir(i):
dodir(i)
else:
entries.append((i, None))
self.create_manifest(entries)
def create_spine(self, entries):
'''
Create the <spine> element. Must first call :method:`create_manifest`.
`entries`: List of paths
'''
entries = list(map(lambda x: x if os.path.isabs(x) else
os.path.abspath(os.path.join(self.base_path, x)), entries))
self.spine = Spine.from_paths(entries, self.manifest)
def set_toc(self, toc):
'''
Set the toc. You must call :method:`create_spine` before calling this
method.
:param toc: A :class:`TOC` object
'''
self.toc = toc
def create_guide(self, guide_element):
self.guide = Guide.from_opf_guide(guide_element, self.base_path)
self.guide.set_basedir(self.base_path)
def render(self, opf_stream=sys.stdout, ncx_stream=None,
ncx_manifest_entry=None, encoding=None, process_guide=None):
if encoding is None:
encoding = 'utf-8'
toc = getattr(self, 'toc', None)
if self.manifest:
self.manifest.set_basedir(self.base_path)
if ncx_manifest_entry is not None and toc is not None:
if not os.path.isabs(ncx_manifest_entry):
ncx_manifest_entry = os.path.join(self.base_path, ncx_manifest_entry)
remove = [i for i in self.manifest if i.id == 'ncx']
for item in remove:
self.manifest.remove(item)
self.manifest.append(ManifestItem(ncx_manifest_entry, self.base_path))
self.manifest[-1].id = 'ncx'
self.manifest[-1].mime_type = 'application/x-dtbncx+xml'
if self.guide is None:
self.guide = Guide()
if self.cover:
cover = self.cover
if not os.path.isabs(cover):
cover = os.path.abspath(os.path.join(self.base_path, cover))
self.guide.set_cover(cover)
self.guide.set_basedir(self.base_path)
# Actual rendering
from lxml.builder import ElementMaker
from calibre.ebooks.oeb.base import OPF2_NS, DC11_NS, CALIBRE_NS
DNS = OPF2_NS+'___xx___'
E = ElementMaker(namespace=DNS, nsmap={None:DNS})
M = ElementMaker(namespace=DNS,
nsmap={'dc':DC11_NS, 'calibre':CALIBRE_NS, 'opf':OPF2_NS})
DC = ElementMaker(namespace=DC11_NS)
def DC_ELEM(tag, text, dc_attrs={}, opf_attrs={}):
if text:
elem = getattr(DC, tag)(clean_ascii_chars(text), **dc_attrs)
else:
elem = getattr(DC, tag)(**dc_attrs)
for k, v in opf_attrs.items():
elem.set('{%s}%s'%(OPF2_NS, k), v)
return elem
def CAL_ELEM(name, content):
return M.meta(name=name, content=content)
metadata = M.metadata()
a = metadata.append
role = {}
a(DC_ELEM('title', self.title if self.title else _('Unknown'),
opf_attrs=role))
for i, author in enumerate(self.authors):
fa = {'role':'aut'}
if i == 0 and self.author_sort:
fa['file-as'] = self.author_sort
a(DC_ELEM('creator', author, opf_attrs=fa))
a(DC_ELEM('contributor', '%s (%s) [%s]'%(__appname__, __version__,
'https://calibre-ebook.com'), opf_attrs={'role':'bkp',
'file-as':__appname__}))
a(DC_ELEM('identifier', str(self.application_id),
opf_attrs={'scheme':__appname__},
dc_attrs={'id':__appname__+'_id'}))
if getattr(self, 'pubdate', None) is not None:
a(DC_ELEM('date', self.pubdate.isoformat()))
langs = self.languages
if not langs or langs == ['und']:
langs = [get_lang().replace('_', '-').partition('-')[0]]
for lang in langs:
a(DC_ELEM('language', lang))
if self.comments:
a(DC_ELEM('description', self.comments))
if self.publisher:
a(DC_ELEM('publisher', self.publisher))
for key, val in iteritems(self.get_identifiers()):
a(DC_ELEM('identifier', val, opf_attrs={'scheme':icu_upper(key)}))
if self.rights:
a(DC_ELEM('rights', self.rights))
if self.tags:
for tag in self.tags:
a(DC_ELEM('subject', tag))
if self.series:
a(CAL_ELEM('calibre:series', self.series))
if self.series_index is not None:
a(CAL_ELEM('calibre:series_index', self.format_series_index()))
if self.title_sort:
a(CAL_ELEM('calibre:title_sort', self.title_sort))
if self.rating is not None:
a(CAL_ELEM('calibre:rating', str(self.rating)))
if self.timestamp is not None:
a(CAL_ELEM('calibre:timestamp', self.timestamp.isoformat()))
if self.publication_type is not None:
a(CAL_ELEM('calibre:publication_type', self.publication_type))
if self.user_categories:
from calibre.ebooks.metadata.book.json_codec import object_to_unicode
a(CAL_ELEM('calibre:user_categories',
json.dumps(object_to_unicode(self.user_categories))))
if self.primary_writing_mode:
a(M.meta(name='primary-writing-mode', content=self.primary_writing_mode))
manifest = E.manifest()
if self.manifest is not None:
for ref in self.manifest:
href = ref.href()
if isinstance(href, bytes):
href = href.decode('utf-8')
item = E.item(id=str(ref.id), href=href)
item.set('media-type', ref.mime_type)
manifest.append(item)
spine = E.spine()
if self.toc is not None:
spine.set('toc', 'ncx')
if self.page_progression_direction is not None:
spine.set('page-progression-direction', self.page_progression_direction)
if self.spine is not None:
for ref in self.spine:
if ref.id is not None:
spine.append(E.itemref(idref=ref.id))
guide = E.guide()
if self.guide is not None:
for ref in self.guide:
href = ref.href()
if isinstance(href, bytes):
href = href.decode('utf-8')
item = E.reference(type=ref.type, href=href)
if ref.title:
item.set('title', ref.title)
guide.append(item)
if process_guide is not None:
process_guide(E, guide)
serialize_user_metadata(metadata, self.get_all_user_metadata(False))
root = E.package(
metadata,
manifest,
spine,
guide
)
root.set('unique-identifier', __appname__+'_id')
root.set('version', '2.0')
raw = etree.tostring(root, pretty_print=True, xml_declaration=True,
encoding=encoding)
raw = raw.replace(DNS.encode('utf-8'), OPF2_NS.encode('utf-8'))
opf_stream.write(raw)
opf_stream.flush()
if toc is not None and ncx_stream is not None:
toc.render(ncx_stream, self.application_id)
ncx_stream.flush()
def metadata_to_opf(mi, as_string=True, default_lang=None):
from lxml import etree
import textwrap
from calibre.ebooks.oeb.base import OPF, DC
if not mi.application_id:
mi.application_id = str(uuid.uuid4())
if not mi.uuid:
mi.uuid = str(uuid.uuid4())
if not mi.book_producer:
mi.book_producer = __appname__ + ' (%s) '%__version__ + \
'[https://calibre-ebook.com]'
if not mi.languages:
lang = (get_lang().replace('_', '-').partition('-')[0] if default_lang
is None else default_lang)
mi.languages = [lang]
root = safe_xml_fromstring(textwrap.dedent(
'''
<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="uuid_id" version="2.0">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
<dc:identifier opf:scheme="%(a)s" id="%(a)s_id">%(id)s</dc:identifier>
<dc:identifier opf:scheme="uuid" id="uuid_id">%(uuid)s</dc:identifier>
</metadata>
<guide/>
</package>
'''%dict(a=__appname__, id=mi.application_id, uuid=mi.uuid)))
metadata = root[0]
guide = root[1]
metadata[0].tail = '\n'+(' '*8)
def factory(tag, text=None, sort=None, role=None, scheme=None, name=None,
content=None):
attrib = {}
if sort:
attrib[OPF('file-as')] = sort
if role:
attrib[OPF('role')] = role
if scheme:
attrib[OPF('scheme')] = scheme
if name:
attrib['name'] = name
if content:
attrib['content'] = content
try:
elem = metadata.makeelement(tag, attrib=attrib)
except ValueError:
elem = metadata.makeelement(tag, attrib={k:clean_xml_chars(v) for k, v in iteritems(attrib)})
elem.tail = '\n'+(' '*8)
if text:
try:
elem.text = text.strip()
except ValueError:
elem.text = clean_ascii_chars(text.strip())
metadata.append(elem)
factory(DC('title'), mi.title)
for au in mi.authors:
factory(DC('creator'), au, mi.author_sort, 'aut')
factory(DC('contributor'), mi.book_producer, __appname__, 'bkp')
if hasattr(mi.pubdate, 'isoformat'):
factory(DC('date'), isoformat(mi.pubdate))
if hasattr(mi, 'category') and mi.category:
factory(DC('type'), mi.category)
if mi.comments:
factory(DC('description'), clean_ascii_chars(mi.comments))
if mi.publisher:
factory(DC('publisher'), mi.publisher)
for key, val in iteritems(mi.get_identifiers()):
factory(DC('identifier'), val, scheme=icu_upper(key))
if mi.rights:
factory(DC('rights'), mi.rights)
for lang in mi.languages:
if not lang or lang.lower() == 'und':
continue
factory(DC('language'), lang)
if mi.tags:
for tag in mi.tags:
factory(DC('subject'), tag)
meta = lambda n, c: factory('meta', name='calibre:'+n, content=c)
if getattr(mi, 'author_link_map', None) is not None:
meta('author_link_map', dump_dict(mi.author_link_map))
if mi.series:
meta('series', mi.series)
if mi.series_index is not None:
meta('series_index', mi.format_series_index())
if mi.rating is not None:
meta('rating', str(mi.rating))
if hasattr(mi.timestamp, 'isoformat'):
meta('timestamp', isoformat(mi.timestamp))
if mi.publication_type:
meta('publication_type', mi.publication_type)
if mi.title_sort:
meta('title_sort', mi.title_sort)
if mi.user_categories:
meta('user_categories', dump_dict(mi.user_categories))
serialize_user_metadata(metadata, mi.get_all_user_metadata(False))
all_annotations = getattr(mi, 'all_annotations', None)
if all_annotations:
serialize_annotations(metadata, all_annotations)
metadata[-1].tail = '\n' +(' '*4)
if mi.cover:
if not isinstance(mi.cover, str):
mi.cover = mi.cover.decode(filesystem_encoding)
guide.text = '\n'+(' '*8)
r = guide.makeelement(OPF('reference'),
attrib={'type':'cover', 'title':_('Cover'), 'href':mi.cover})
r.tail = '\n' +(' '*4)
guide.append(r)
if pretty_print_opf:
_pretty_print(root)
return etree.tostring(root, pretty_print=True, encoding='utf-8',
xml_declaration=True) if as_string else root
def test_m2o():
from calibre.utils.date import now as nowf
mi = MetaInformation('test & title', ['a"1', "a'2"])
mi.title_sort = 'a\'"b'
mi.author_sort = 'author sort'
mi.pubdate = nowf()
mi.language = 'en'
mi.comments = 'what a fun book\n\n'
mi.publisher = 'publisher'
mi.set_identifiers({'isbn':'booo', 'dummy':'dummy'})
mi.tags = ['a', 'b']
mi.series = 's"c\'l&<>'
mi.series_index = 3.34
mi.rating = 3
mi.timestamp = nowf()
mi.publication_type = 'ooooo'
mi.rights = 'yes'
mi.cover = os.path.abspath('asd.jpg')
opf = metadata_to_opf(mi)
print(opf)
newmi = MetaInformation(OPF(io.BytesIO(opf)))
for attr in ('author_sort', 'title_sort', 'comments',
'publisher', 'series', 'series_index', 'rating',
'isbn', 'tags', 'cover_data', 'application_id',
'language', 'cover',
'book_producer', 'timestamp',
'pubdate', 'rights', 'publication_type'):
o, n = getattr(mi, attr), getattr(newmi, attr)
if o != n and o.strip() != n.strip():
print('FAILED:', attr, getattr(mi, attr), '!=', getattr(newmi, attr))
if mi.get_identifiers() != newmi.get_identifiers():
print('FAILED:', 'identifiers', mi.get_identifiers(), end=' ')
print('!=', newmi.get_identifiers())
def suite():
import unittest
class OPFTest(unittest.TestCase):
def setUp(self):
self.stream = io.BytesIO(
b'''\
<?xml version="1.0" encoding="UTF-8"?>
<package version="2.0" xmlns="http://www.idpf.org/2007/opf" >
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
<dc:title opf:file-as="Wow">A Cool & © ß Title</dc:title>
<creator opf:role="aut" file-as="Monkey">Monkey Kitchen</creator>
<creator opf:role="aut">Next</creator>
<dc:subject>One</dc:subject><dc:subject>Two</dc:subject>
<dc:identifier scheme="ISBN">123456789</dc:identifier>
<dc:identifier scheme="dummy">dummy</dc:identifier>
<meta name="calibre:series" content="A one book series" />
<meta name="calibre:rating" content="4"/>
<meta name="calibre:publication_type" content="test"/>
<meta name="calibre:series_index" content="2.5" />
</metadata>
<manifest>
<item id="1" href="a%20%7E%20b" media-type="text/txt" />
</manifest>
</package>
'''
)
self.opf = OPF(self.stream, os.getcwd())
def testReading(self, opf=None):
if opf is None:
opf = self.opf
self.assertEqual(opf.title, 'A Cool & \xa9 \xdf Title')
self.assertEqual(opf.authors, 'Monkey Kitchen,Next'.split(','))
self.assertEqual(opf.author_sort, 'Monkey')
self.assertEqual(opf.title_sort, 'Wow')
self.assertEqual(opf.tags, ['One', 'Two'])
self.assertEqual(opf.isbn, '123456789')
self.assertEqual(opf.series, 'A one book series')
self.assertEqual(opf.series_index, 2.5)
self.assertEqual(opf.rating, 4)
self.assertEqual(opf.publication_type, 'test')
self.assertEqual(list(opf.itermanifest())[0].get('href'), 'a ~ b')
self.assertEqual(opf.get_identifiers(), {'isbn':'123456789',
'dummy':'dummy'})
def testWriting(self):
for test in [('title', 'New & Title'), ('authors', ['One', 'Two']),
('author_sort', "Kitchen"), ('tags', ['Three']),
('isbn', 'a'), ('rating', 3), ('series_index', 1),
('title_sort', 'ts')]:
setattr(self.opf, *test)
attr, val = test
self.assertEqual(getattr(self.opf, attr), val)
self.opf.render()
def testCreator(self):
opf = OPFCreator(os.getcwd(), self.opf)
buf = io.BytesIO()
opf.render(buf)
raw = buf.getvalue()
self.testReading(opf=OPF(io.BytesIO(raw), os.getcwd()))
def testSmartUpdate(self):
self.opf.smart_update(MetaInformation(self.opf))
self.testReading()
return unittest.TestLoader().loadTestsFromTestCase(OPFTest)
def test():
import unittest
unittest.TextTestRunner(verbosity=2).run(suite())
def test_user_metadata():
mi = Metadata('Test title', ['test author1', 'test author2'])
um = {
'#myseries': {'#value#': 'test series\xe4', 'datatype':'text',
'is_multiple': None, 'name': 'My Series'},
'#myseries_index': {'#value#': 2.45, 'datatype': 'float',
'is_multiple': None},
'#mytags': {'#value#':['t1','t2','t3'], 'datatype':'text',
'is_multiple': '|', 'name': 'My Tags'}
}
mi.set_all_user_metadata(um)
raw = metadata_to_opf(mi)
opfc = OPFCreator(os.getcwd(), other=mi)
out = io.BytesIO()
opfc.render(out)
raw2 = out.getvalue()
f = io.BytesIO(raw)
opf = OPF(f)
f2 = io.BytesIO(raw2)
opf2 = OPF(f2)
assert um == opf._user_metadata_
assert um == opf2._user_metadata_
print(opf.render())
if __name__ == '__main__':
# test_user_metadata()
test_m2o()
test()