%PDF- %PDF-
Direktori : /lib/calibre/calibre/srv/ |
Current File : //lib/calibre/calibre/srv/opds.py |
#!/usr/bin/env python3 __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' import hashlib from collections import OrderedDict, namedtuple from functools import partial from html5_parser import parse from lxml import etree from lxml.builder import ElementMaker from calibre import force_unicode, guess_type, prepare_string_for_xml as xml from calibre.constants import __appname__ from calibre.db.view import sanitize_sort_field_name from calibre.ebooks.metadata import authors_to_string, fmt_sidx, rating_to_stars from calibre.library.comments import comments_to_html from calibre.srv.errors import HTTPInternalServerError, HTTPNotFound from calibre.srv.http_request import parse_uri from calibre.srv.routes import endpoint from calibre.srv.utils import Offsets, get_library_data, http_date from calibre.utils.config import prefs from calibre.utils.date import as_utc, is_date_undefined, timestampfromdt from calibre.utils.icu import sort_key from calibre.utils.search_query_parser import ParseException from calibre.utils.xml_parse import safe_xml_fromstring from polyglot.binary import as_hex_unicode, from_hex_unicode from polyglot.builtins import as_bytes, iteritems from polyglot.urllib import unquote_plus, urlencode def atom(ctx, rd, endpoint, output): rd.outheaders.set('Content-Type', 'application/atom+xml; charset=UTF-8', replace_all=True) rd.outheaders.set('Calibre-Instance-Id', force_unicode(prefs['installation_uuid'], 'utf-8'), replace_all=True) if isinstance(output, bytes): ans = output # Assume output is already UTF-8 XML elif isinstance(output, str): ans = output.encode('utf-8') else: ans = etree.tostring(output, encoding='utf-8', xml_declaration=True, pretty_print=True) return ans def format_tag_string(tags, sep, joinval=', '): if tags: tlist = tags if sep is None else [t.strip() for t in tags.split(sep)] else: tlist = [] tlist.sort(key=sort_key) return joinval.join(tlist) if tlist else '' # Vocabulary for building OPDS feeds {{{ DC_NS = 'http://purl.org/dc/terms/' E = ElementMaker(namespace='http://www.w3.org/2005/Atom', nsmap={ None : 'http://www.w3.org/2005/Atom', 'dc' : DC_NS, 'opds' : 'http://opds-spec.org/2010/catalog', }) FEED = E.feed TITLE = E.title ID = E.id ICON = E.icon def UPDATED(dt, *args, **kwargs): return E.updated(as_utc(dt).strftime('%Y-%m-%dT%H:%M:%S+00:00'), *args, **kwargs) LINK = partial(E.link, type='application/atom+xml') NAVLINK = partial(E.link, type='application/atom+xml;type=feed;profile=opds-catalog') def SEARCH_LINK(url_for, *args, **kwargs): kwargs['rel'] = 'search' kwargs['title'] = 'Search' kwargs['href'] = url_for('/opds/search', query='XXX').replace('XXX', '{searchTerms}') return LINK(*args, **kwargs) def AUTHOR(name, uri=None): args = [E.name(name)] if uri is not None: args.append(E.uri(uri)) return E.author(*args) SUBTITLE = E.subtitle def NAVCATALOG_ENTRY(url_for, updated, title, description, query): href = url_for('/opds/navcatalog', which=as_hex_unicode(query)) id_ = 'calibre-navcatalog:' + hashlib.sha1(as_bytes(href)).hexdigest() return E.entry( TITLE(title), ID(id_), UPDATED(updated), E.content(description, type='text'), NAVLINK(href=href) ) START_LINK = partial(NAVLINK, rel='start') UP_LINK = partial(NAVLINK, rel='up') FIRST_LINK = partial(NAVLINK, rel='first') LAST_LINK = partial(NAVLINK, rel='last') NEXT_LINK = partial(NAVLINK, rel='next', title='Next') PREVIOUS_LINK = partial(NAVLINK, rel='previous') def html_to_lxml(raw): raw = '<div>%s</div>'%raw root = parse(raw, keep_doctype=False, namespace_elements=False, maybe_xhtml=False, sanitize_names=True) root = next(root.iterdescendants('div')) root.set('xmlns', "http://www.w3.org/1999/xhtml") raw = etree.tostring(root, encoding='unicode') try: return safe_xml_fromstring(raw, recover=False) except: for x in root.iterdescendants(): remove = [] for attr in x.attrib: if ':' in attr: remove.append(attr) for a in remove: del x.attrib[a] raw = etree.tostring(root, encoding='unicode') try: return safe_xml_fromstring(raw, recover=False) except: from calibre.ebooks.oeb.parse_utils import _html4_parse return _html4_parse(raw) def CATALOG_ENTRY(item, item_kind, request_context, updated, catalog_name, ignore_count=False, add_kind=False): id_ = 'calibre:category:'+item.name iid = 'N' + item.name if item.id is not None: iid = 'I' + str(item.id) iid += ':'+item_kind href = request_context.url_for('/opds/category', category=as_hex_unicode(catalog_name), which=as_hex_unicode(iid)) link = NAVLINK(href=href) if ignore_count: count = '' else: count = ngettext('one book', '{} books', item.count).format(item.count) if item.use_sort_as_name: name = item.sort else: name = item.name return E.entry( TITLE(name + ('' if not add_kind else ' (%s)'%item_kind)), ID(id_), UPDATED(updated), E.content(count, type='text'), link ) def CATALOG_GROUP_ENTRY(item, category, request_context, updated): id_ = 'calibre:category-group:'+category+':'+item.text iid = item.text link = NAVLINK(href=request_context.url_for('/opds/categorygroup', category=as_hex_unicode(category), which=as_hex_unicode(iid))) return E.entry( TITLE(item.text), ID(id_), UPDATED(updated), E.content(ngettext('one item', '{} items', item.count).format(item.count), type='text'), link ) def ACQUISITION_ENTRY(book_id, updated, request_context): field_metadata = request_context.db.field_metadata mi = request_context.db.get_metadata(book_id) extra = [] if (mi.rating or 0) > 0: rating = rating_to_stars(mi.rating) extra.append(_('RATING: %s<br />')%rating) if mi.tags: extra.append(_('TAGS: %s<br />')%xml(format_tag_string(mi.tags, None))) if mi.series: extra.append(_('SERIES: %(series)s [%(sidx)s]<br />')% dict(series=xml(mi.series), sidx=fmt_sidx(float(mi.series_index)))) for key in filter(request_context.ctx.is_field_displayable, field_metadata.ignorable_field_keys()): name, val = mi.format_field(key) if val: fm = field_metadata[key] datatype = fm['datatype'] if datatype == 'text' and fm['is_multiple']: extra.append('%s: %s<br />'% (xml(name), xml(format_tag_string(val, fm['is_multiple']['ui_to_list'], joinval=fm['is_multiple']['list_to_ui'])))) elif datatype == 'comments' or (fm['datatype'] == 'composite' and fm['display'].get('contains_html', False)): extra.append('%s: %s<br />'%(xml(name), comments_to_html(str(val)))) else: extra.append('%s: %s<br />'%(xml(name), xml(str(val)))) if mi.comments: comments = comments_to_html(mi.comments) extra.append(comments) if extra: extra = html_to_lxml('\n'.join(extra)) ans = E.entry(TITLE(mi.title), E.author(E.name(authors_to_string(mi.authors))), ID('urn:uuid:' + mi.uuid), UPDATED(mi.last_modified), E.published(mi.timestamp.isoformat())) if mi.pubdate and not is_date_undefined(mi.pubdate): ans.append(ans.makeelement('{%s}date' % DC_NS)) ans[-1].text = mi.pubdate.isoformat() if len(extra): ans.append(E.content(extra, type='xhtml')) get = partial(request_context.ctx.url_for, '/get', book_id=book_id, library_id=request_context.library_id) if mi.formats: fm = mi.format_metadata for fmt in mi.formats: fmt = fmt.lower() mt = guess_type('a.'+fmt)[0] if mt: link = E.link(type=mt, href=get(what=fmt), rel="http://opds-spec.org/acquisition") ffm = fm.get(fmt.upper()) if ffm: link.set('length', str(ffm['size'])) link.set('mtime', ffm['mtime'].isoformat()) ans.append(link) ans.append(E.link(type='image/jpeg', href=get(what='cover'), rel="http://opds-spec.org/cover")) ans.append(E.link(type='image/jpeg', href=get(what='thumb'), rel="http://opds-spec.org/thumbnail")) ans.append(E.link(type='image/jpeg', href=get(what='cover'), rel="http://opds-spec.org/image")) ans.append(E.link(type='image/jpeg', href=get(what='thumb'), rel="http://opds-spec.org/image/thumbnail")) return ans # }}} default_feed_title = __appname__ + ' ' + _('Library') class Feed: # {{{ def __init__(self, id_, updated, request_context, subtitle=None, title=None, up_link=None, first_link=None, last_link=None, next_link=None, previous_link=None): self.base_href = request_context.url_for('/opds') self.root = \ FEED( TITLE(title or default_feed_title), AUTHOR(__appname__, uri='https://calibre-ebook.com'), ID(id_), ICON(request_context.ctx.url_for('/favicon.png')), UPDATED(updated), SEARCH_LINK(request_context.url_for), START_LINK(href=request_context.url_for('/opds')) ) if up_link: self.root.append(UP_LINK(href=up_link)) if first_link: self.root.append(FIRST_LINK(href=first_link)) if last_link: self.root.append(LAST_LINK(href=last_link)) if next_link: self.root.append(NEXT_LINK(href=next_link)) if previous_link: self.root.append(PREVIOUS_LINK(href=previous_link)) if subtitle: self.root.insert(1, SUBTITLE(subtitle)) # }}} class TopLevel(Feed): # {{{ def __init__(self, updated, # datetime object in UTC categories, request_context, id_='urn:calibre:main', subtitle=_('Books in your library') ): Feed.__init__(self, id_, updated, request_context, subtitle=subtitle) subc = partial(NAVCATALOG_ENTRY, request_context.url_for, updated) subcatalogs = [subc(_('By ')+title, _('Books sorted by ') + desc, q) for title, desc, q in categories] for x in subcatalogs: self.root.append(x) for library_id, library_name in sorted(iteritems(request_context.library_map), key=lambda item: sort_key(item[1])): id_ = 'calibre-library:' + library_id self.root.append(E.entry( TITLE(_('Library:') + ' ' + library_name), ID(id_), UPDATED(updated), E.content(_('Change calibre library to:') + ' ' + library_name, type='text'), NAVLINK(href=request_context.url_for('/opds', library_id=library_id)) )) # }}} class NavFeed(Feed): def __init__(self, id_, updated, request_context, offsets, page_url, up_url, title=None): kwargs = {'up_link': up_url} kwargs['first_link'] = page_url kwargs['last_link'] = page_url+'&offset=%d'%offsets.last_offset if offsets.offset > 0: kwargs['previous_link'] = \ page_url+'&offset=%d'%offsets.previous_offset if offsets.next_offset > -1: kwargs['next_link'] = \ page_url+'&offset=%d'%offsets.next_offset if title: kwargs['title'] = title Feed.__init__(self, id_, updated, request_context, **kwargs) class AcquisitionFeed(NavFeed): def __init__(self, id_, updated, request_context, items, offsets, page_url, up_url, title=None): NavFeed.__init__(self, id_, updated, request_context, offsets, page_url, up_url, title=title) for book_id in items: self.root.append(ACQUISITION_ENTRY(book_id, updated, request_context)) class CategoryFeed(NavFeed): def __init__(self, items, which, id_, updated, request_context, offsets, page_url, up_url, title=None): NavFeed.__init__(self, id_, updated, request_context, offsets, page_url, up_url, title=title) ignore_count = False if which == 'search': ignore_count = True for item in items: self.root.append(CATALOG_ENTRY( item, item.category, request_context, updated, which, ignore_count=ignore_count, add_kind=which != item.category)) class CategoryGroupFeed(NavFeed): def __init__(self, items, which, id_, updated, request_context, offsets, page_url, up_url, title=None): NavFeed.__init__(self, id_, updated, request_context, offsets, page_url, up_url, title=title) for item in items: self.root.append(CATALOG_GROUP_ENTRY(item, which, request_context, updated)) class RequestContext: def __init__(self, ctx, rd): self.db, self.library_id, self.library_map, self.default_library = get_library_data(ctx, rd) self.ctx, self.rd = ctx, rd def url_for(self, path, **kwargs): lid = kwargs.pop('library_id', self.library_id) ans = self.ctx.url_for(path, **kwargs) q = {'library_id':lid} ans += '?' + urlencode(q) return ans def allowed_book_ids(self): return self.ctx.allowed_book_ids(self.rd, self.db) @property def outheaders(self): return self.rd.outheaders @property def opts(self): return self.ctx.opts def last_modified(self): return self.db.last_modified() def get_categories(self, report_parse_errors=False): return self.ctx.get_categories(self.rd, self.db, report_parse_errors=report_parse_errors) def search(self, query): return self.ctx.search(self.rd, self.db, query) def get_acquisition_feed(rc, ids, offset, page_url, up_url, id_, sort_by='title', ascending=True, feed_title=None): if not ids: raise HTTPNotFound('No books found') with rc.db.safe_read_lock: sort_by = sanitize_sort_field_name(rc.db.field_metadata, sort_by) items = rc.db.multisort([(sort_by, ascending)], ids) max_items = rc.opts.max_opds_items offsets = Offsets(offset, max_items, len(items)) items = items[offsets.offset:offsets.offset+max_items] lm = rc.last_modified() rc.outheaders['Last-Modified'] = http_date(timestampfromdt(lm)) return AcquisitionFeed(id_, lm, rc, items, offsets, page_url, up_url, title=feed_title).root def get_all_books(rc, which, page_url, up_url, offset=0): try: offset = int(offset) except Exception: raise HTTPNotFound('Not found') if which not in ('title', 'newest'): raise HTTPNotFound('Not found') sort = 'timestamp' if which == 'newest' else 'title' ascending = which == 'title' feed_title = {'newest':_('Newest'), 'title': _('Title')}.get(which, which) feed_title = default_feed_title + ' :: ' + _('By %s') % feed_title ids = rc.allowed_book_ids() return get_acquisition_feed(rc, ids, offset, page_url, up_url, id_='calibre-all:'+sort, sort_by=sort, ascending=ascending, feed_title=feed_title) def get_navcatalog(request_context, which, page_url, up_url, offset=0): categories = request_context.get_categories() if which not in categories: raise HTTPNotFound('Category %r not found'%which) items = categories[which] updated = request_context.last_modified() category_meta = request_context.db.field_metadata meta = category_meta.get(which, {}) category_name = meta.get('name', which) feed_title = default_feed_title + ' :: ' + _('By %s') % category_name id_ = 'calibre-category-feed:'+which MAX_ITEMS = request_context.opts.max_opds_ungrouped_items if MAX_ITEMS > 0 and len(items) <= MAX_ITEMS: max_items = request_context.opts.max_opds_items offsets = Offsets(offset, max_items, len(items)) items = list(items)[offsets.offset:offsets.offset+max_items] ans = CategoryFeed(items, which, id_, updated, request_context, offsets, page_url, up_url, title=feed_title) else: Group = namedtuple('Group', 'text count') starts = set() for x in items: val = getattr(x, 'sort', x.name) if not val: val = 'A' starts.add(val[0].upper()) category_groups = OrderedDict() for x in sorted(starts, key=sort_key): category_groups[x] = len([y for y in items if getattr(y, 'sort', y.name).upper().startswith(x)]) items = [Group(x, y) for x, y in category_groups.items()] max_items = request_context.opts.max_opds_items offsets = Offsets(offset, max_items, len(items)) items = items[offsets.offset:offsets.offset+max_items] ans = CategoryGroupFeed(items, which, id_, updated, request_context, offsets, page_url, up_url, title=feed_title) request_context.outheaders['Last-Modified'] = http_date(timestampfromdt(updated)) return ans.root @endpoint('/opds', postprocess=atom) def opds(ctx, rd): rc = RequestContext(ctx, rd) db = rc.db try: categories = rc.get_categories(report_parse_errors=True) except ParseException as p: raise HTTPInternalServerError(p.msg) category_meta = db.field_metadata cats = [ (_('Newest'), _('Date'), 'Onewest'), (_('Title'), _('Title'), 'Otitle'), ] def getter(x): try: return category_meta[x]['name'].lower() except KeyError: return x fm = rc.db.field_metadata for category in sorted(categories, key=lambda x: sort_key(getter(x))): if fm.is_ignorable_field(category) and not rc.ctx.is_field_displayable(category): continue if len(categories[category]) == 0: continue if category in ('formats', 'identifiers'): continue meta = category_meta.get(category, None) if meta is None: continue cats.append((meta['name'], meta['name'], 'N'+category)) last_modified = db.last_modified() rd.outheaders['Last-Modified'] = http_date(timestampfromdt(last_modified)) return TopLevel(last_modified, cats, rc).root @endpoint('/opds/navcatalog/{which}', postprocess=atom) def opds_navcatalog(ctx, rd, which): try: offset = int(rd.query.get('offset', 0)) except Exception: raise HTTPNotFound('Not found') rc = RequestContext(ctx, rd) page_url = rc.url_for('/opds/navcatalog', which=which) up_url = rc.url_for('/opds') which = from_hex_unicode(which) type_ = which[0] which = which[1:] if type_ == 'O': return get_all_books(rc, which, page_url, up_url, offset=offset) elif type_ == 'N': return get_navcatalog(rc, which, page_url, up_url, offset=offset) raise HTTPNotFound('Not found') @endpoint('/opds/category/{category}/{which}', postprocess=atom) def opds_category(ctx, rd, category, which): try: offset = int(rd.query.get('offset', 0)) except Exception: raise HTTPNotFound('Not found') if not which or not category: raise HTTPNotFound('Not found') rc = RequestContext(ctx, rd) page_url = rc.url_for('/opds/category', which=which, category=category) up_url = rc.url_for('/opds/navcatalog', which=category) which, category = from_hex_unicode(which), from_hex_unicode(category) type_ = which[0] which = which[1:] if type_ == 'I': try: p = which.rindex(':') category = which[p+1:] which = which[:p] # This line will toss an exception for composite columns which = int(which[:p]) except Exception: # Might be a composite column, where we have the lookup key if not (category in rc.db.field_metadata and rc.db.field_metadata[category]['datatype'] == 'composite'): raise HTTPNotFound('Tag %r not found'%which) categories = rc.get_categories() if category not in categories: raise HTTPNotFound('Category %r not found'%which) if category == 'search': try: ids = rc.search('search:"%s"'%which) except Exception: raise HTTPNotFound('Search: %r not understood'%which) return get_acquisition_feed(rc, ids, offset, page_url, up_url, 'calibre-search:'+which) if type_ != 'I': raise HTTPNotFound('Non id categories not supported') q = category if q == 'news': q = 'tags' ids = rc.db.get_books_for_category(q, which) & rc.allowed_book_ids() sort_by = 'series' if category == 'series' else 'title' return get_acquisition_feed(rc, ids, offset, page_url, up_url, 'calibre-category:'+category+':'+str(which), sort_by=sort_by) @endpoint('/opds/categorygroup/{category}/{which}', postprocess=atom) def opds_categorygroup(ctx, rd, category, which): try: offset = int(rd.query.get('offset', 0)) except Exception: raise HTTPNotFound('Not found') if not which or not category: raise HTTPNotFound('Not found') rc = RequestContext(ctx, rd) categories = rc.get_categories() page_url = rc.url_for('/opds/categorygroup', category=category, which=which) category = from_hex_unicode(category) if category not in categories: raise HTTPNotFound('Category %r not found'%which) category_meta = rc.db.field_metadata meta = category_meta.get(category, {}) category_name = meta.get('name', which) which = from_hex_unicode(which) feed_title = default_feed_title + ' :: ' + (_('By {0} :: {1}').format(category_name, which)) owhich = as_hex_unicode('N'+category) up_url = rc.url_for('/opds/navcatalog', which=owhich) items = categories[category] def belongs(x, which): return getattr(x, 'sort', x.name).lower().startswith(which.lower()) items = [x for x in items if belongs(x, which)] if not items: raise HTTPNotFound('No items in group %r:%r'%(category, which)) updated = rc.last_modified() id_ = 'calibre-category-group-feed:'+category+':'+which max_items = rc.opts.max_opds_items offsets = Offsets(offset, max_items, len(items)) items = list(items)[offsets.offset:offsets.offset+max_items] rc.outheaders['Last-Modified'] = http_date(timestampfromdt(updated)) return CategoryFeed(items, category, id_, updated, rc, offsets, page_url, up_url, title=feed_title).root @endpoint('/opds/search/{query=""}', postprocess=atom) def opds_search(ctx, rd, query): try: offset = int(rd.query.get('offset', 0)) except Exception: raise HTTPNotFound('Not found') rc = RequestContext(ctx, rd) if query: path = parse_uri(rd.request_original_uri, parse_query=False, unquote_func=unquote_plus)[1] query = path[-1] if isinstance(query, bytes): query = query.decode('utf-8') try: ids = rc.search(query) except Exception: raise HTTPNotFound('Search: %r not understood'%query) page_url = rc.url_for('/opds/search', query=query) return get_acquisition_feed(rc, ids, offset, page_url, rc.url_for('/opds'), 'calibre-search:'+query)