%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /lib/calibre/calibre/gui2/store/stores/
Upload File :
Create Path :
Current File : //lib/calibre/calibre/gui2/store/stores/manybooks_plugin.py

# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function, unicode_literals

store_version = 2  # Needed for dynamic plugin loading

__license__ = 'GPL 3'
__copyright__ = '2011, John Schember <john@nachtimwald.com>'
__docformat__ = 'restructuredtext en'

import mimetypes
from contextlib import closing

from lxml import etree

from calibre import browser
from calibre.gui2.store.basic_config import BasicStoreConfig
from calibre.gui2.store.opensearch_store import OpenSearchOPDSStore
from calibre.gui2.store.search_result import SearchResult
from calibre.utils.opensearch.description import Description
from calibre.utils.opensearch.query import Query


def search_manybooks(query, max_results=10, timeout=60, open_search_url='http://www.manybooks.net/opds/'):
    '''
    Manybooks uses a very strange opds feed. The opds
    main feed is structured like a stanza feed. The
    search result entries give very little information
    and requires you to go to a detail link. The detail
    link has the wrong type specified (text/html instead
    of application/atom+xml).
    '''

    description = Description(open_search_url)
    url_template = description.get_best_template()
    if not url_template:
        return
    oquery = Query(url_template)

    # set up initial values
    oquery.searchTerms = query
    oquery.count = max_results
    url = oquery.url()

    counter = max_results
    br = browser()
    with closing(br.open(url, timeout=timeout)) as f:
        raw_data = f.read()
        raw_data = raw_data.decode('utf-8', 'replace')
        doc = etree.fromstring(raw_data, parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))
        for data in doc.xpath('//*[local-name() = "entry"]'):
            if counter <= 0:
                break

            counter -= 1

            s = SearchResult()

            detail_links = data.xpath('./*[local-name() = "link" and @type = "text/html"]')
            if not detail_links:
                continue
            detail_link = detail_links[0]
            detail_href = detail_link.get('href')
            if not detail_href:
                continue

            s.detail_item = 'http://manybooks.net/titles/' + detail_href.split('tid=')[-1] + '.html'
            # These can have HTML inside of them. We are going to get them again later
            # just in case.
            s.title = ''.join(data.xpath('./*[local-name() = "title"]//text()')).strip()
            s.author = ', '.join(data.xpath('./*[local-name() = "author"]//text()')).strip()

            # Follow the detail link to get the rest of the info.
            with closing(br.open(detail_href, timeout=timeout/4)) as df:
                ddoc = etree.fromstring(df.read(), parser=etree.XMLParser(recover=True, no_network=True, resolve_entities=False))
                ddata = ddoc.xpath('//*[local-name() = "entry"][1]')
                if ddata:
                    ddata = ddata[0]

                    # This is the real title and author info we want. We got
                    # it previously just in case it's not specified here for some reason.
                    s.title = ''.join(ddata.xpath('./*[local-name() = "title"]//text()')).strip()
                    s.author = ', '.join(ddata.xpath('./*[local-name() = "author"]//text()')).strip()
                    if s.author.startswith(','):
                        s.author = s.author[1:]
                    if s.author.endswith(','):
                        s.author = s.author[:-1]

                    s.cover_url = ''.join(ddata.xpath('./*[local-name() = "link" and @rel = "http://opds-spec.org/thumbnail"][1]/@href')).strip()

                    for link in ddata.xpath('./*[local-name() = "link" and @rel = "http://opds-spec.org/acquisition"]'):
                        type = link.get('type')
                        href = link.get('href')
                        if type:
                            ext = mimetypes.guess_extension(type)
                            if ext:
                                ext = ext[1:].upper().strip()
                                s.downloads[ext] = href

            s.price = '$0.00'
            s.drm = SearchResult.DRM_UNLOCKED
            s.formats = 'EPUB, PDB (eReader, PalmDoc, zTXT, Plucker, iSilo), FB2, ZIP, AZW, MOBI, PRC, LIT, PKG, PDF, TXT, RB, RTF, LRF, TCR, JAR'

            yield s


class ManyBooksStore(BasicStoreConfig, OpenSearchOPDSStore):

    open_search_url = 'http://www.manybooks.net/opds/'
    web_url = 'http://manybooks.net'

    def search(self, query, max_results=10, timeout=60):
        for r in search_manybooks(query, max_results=max_results, timeout=timeout, open_search_url=self.open_search_url):
            yield r


if __name__ == '__main__':
    import sys
    for result in search_manybooks(' '.join(sys.argv[1:])):
        print(result)

Zerion Mini Shell 1.0