%PDF- %PDF-
| Direktori : /lib/calibre/calibre/ebooks/metadata/sources/ |
| Current File : //lib/calibre/calibre/ebooks/metadata/sources/test.py |
#!/usr/bin/env python3
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import absolute_import, division, print_function, unicode_literals
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import os, tempfile, time
from threading import Event
from calibre.customize.ui import all_metadata_plugins
from calibre import prints, sanitize_file_name
from calibre.ebooks.metadata import check_isbn
from calibre.ebooks.metadata.sources.base import create_log, get_cached_cover_urls
from calibre.ebooks.metadata.sources.prefs import msprefs
from polyglot.queue import Queue, Empty
def isbn_test(isbn):
isbn_ = check_isbn(isbn)
def test(mi):
misbn = check_isbn(mi.isbn)
if misbn and misbn == isbn_:
return True
prints('ISBN test failed. Expected: \'%s\' found \'%s\''%(isbn_, misbn))
return False
return test
def title_test(title, exact=False):
title = title.lower()
def test(mi):
mt = mi.title.lower()
if (exact and mt == title) or \
(not exact and title in mt):
return True
prints('Title test failed. Expected: \'%s\' found \'%s\''%(title, mt))
return False
return test
def authors_test(authors, subset=False):
authors = {x.lower() for x in authors}
def test(mi):
au = {x.lower() for x in mi.authors}
if msprefs['swap_author_names']:
def revert_to_fn_ln(a):
if ',' not in a:
return a
parts = a.split(',', 1)
t = parts[-1]
parts = parts[:-1]
parts.insert(0, t)
return ' '.join(parts)
au = {revert_to_fn_ln(x) for x in au}
if subset and authors.issubset(au):
return True
if au == authors:
return True
prints('Author test failed. Expected: \'%s\' found \'%s\''%(authors, au))
return False
return test
def tags_test(tags):
tags = {x.lower() for x in tags}
def test(mi):
t = {x.lower() for x in mi.tags}
if t == tags:
return True
prints('Tags test failed. Expected: \'%s\' found \'%s\''%(tags, t))
return False
return test
def series_test(series, series_index):
series = series.lower()
def test(mi):
ms = mi.series.lower() if mi.series else ''
if (ms == series) and (series_index == mi.series_index):
return True
if mi.series:
prints('Series test failed. Expected: \'%s [%d]\' found \'%s[%d]\''%
(series, series_index, ms, mi.series_index))
else:
prints('Series test failed. Expected: \'%s [%d]\' found no series'%
(series, series_index))
return False
return test
def comments_test(sentinel):
def test(mi):
comm = mi.comments.lower() if mi.comments else ''
if sentinel and sentinel.lower() in comm:
return True
prints('comments test failed. %s not in comments'%sentinel)
return False
return test
def pubdate_test(year, month, day):
def test(mi):
p = mi.pubdate
if p is not None and p.year == year and p.month == month and p.day == day:
return True
return False
return test
def init_test(tdir_name):
tdir = tempfile.gettempdir()
lf = os.path.join(tdir, tdir_name.replace(' ', '')+'_identify_test.txt')
log = create_log(open(lf, 'w'))
abort = Event()
return tdir, lf, log, abort
def dump_log(lf):
prints(open(lf, 'rb').read().decode('utf-8'))
def test_identify(tests): # {{{
'''
:param tests: List of 2-tuples. Each two tuple is of the form (args,
test_funcs). args is a dict of keyword arguments to pass to
the identify method. test_funcs are callables that accept a
Metadata object and return True iff the object passes the
test.
'''
from calibre.ebooks.metadata.sources.identify import identify
tdir, lf, log, abort = init_test('Full Identify')
prints('Log saved to', lf)
times = []
for kwargs, test_funcs in tests:
log('')
log('#'*80)
log('### Running test with:', kwargs)
log('#'*80)
prints('Running test with:', kwargs)
args = (log, abort)
start_time = time.time()
results = identify(*args, **kwargs)
total_time = time.time() - start_time
times.append(total_time)
if not results:
prints('identify failed to find any results')
break
prints('Found', len(results), 'matches:', end=' ')
prints('Smaller relevance means better match')
for i, mi in enumerate(results):
prints('*'*30, 'Relevance:', i, '*'*30)
if mi.rating:
mi.rating *= 2
prints(mi)
prints('\nCached cover URLs :',
[x[0].name for x in get_cached_cover_urls(mi)])
prints('*'*75, '\n\n')
possibles = []
for mi in results:
test_failed = False
for tfunc in test_funcs:
if not tfunc(mi):
test_failed = True
break
if not test_failed:
possibles.append(mi)
if not possibles:
prints('ERROR: No results that passed all tests were found')
prints('Log saved to', lf)
log.close()
dump_log(lf)
raise SystemExit(1)
if results[0] is not possibles[0]:
prints('Most relevant result failed the tests')
raise SystemExit(1)
log('\n\n')
prints('Average time per query', sum(times)/len(times))
prints('Full log is at:', lf)
# }}}
def test_identify_plugin(name, tests, modify_plugin=lambda plugin:None, # {{{
fail_missing_meta=True):
'''
:param name: Plugin name
:param tests: List of 2-tuples. Each two tuple is of the form (args,
test_funcs). args is a dict of keyword arguments to pass to
the identify method. test_funcs are callables that accept a
Metadata object and return True iff the object passes the
test.
'''
plugin = None
for x in all_metadata_plugins():
if x.name == name and 'identify' in x.capabilities:
plugin = x
break
modify_plugin(plugin)
prints('Testing the identify function of', plugin.name)
prints('Using extra headers:', plugin.browser.addheaders)
tdir, lf, log, abort = init_test(plugin.name)
prints('Log saved to', lf)
times = []
for kwargs, test_funcs in tests:
log('')
log('#'*80)
log('### Running test with:', kwargs)
log('#'*80)
prints('Running test with:', kwargs)
rq = Queue()
args = (log, rq, abort)
start_time = time.time()
plugin.running_a_test = True
try:
err = plugin.identify(*args, **kwargs)
finally:
plugin.running_a_test = False
total_time = time.time() - start_time
times.append(total_time)
if err is not None:
prints('identify returned an error for args', args)
prints(err)
break
results = []
while True:
try:
results.append(rq.get_nowait())
except Empty:
break
prints('Found', len(results), 'matches:', end=' ')
prints('Smaller relevance means better match')
results.sort(key=plugin.identify_results_keygen(
title=kwargs.get('title', None), authors=kwargs.get('authors',
None), identifiers=kwargs.get('identifiers', {})))
for i, mi in enumerate(results):
prints('*'*30, 'Relevance:', i, '*'*30)
if mi.rating:
mi.rating *= 2
prints(mi)
prints('\nCached cover URL :',
plugin.get_cached_cover_url(mi.identifiers))
prints('*'*75, '\n\n')
possibles = []
for mi in results:
test_failed = False
for tfunc in test_funcs:
if not tfunc(mi):
test_failed = True
break
if not test_failed:
possibles.append(mi)
if not possibles:
prints('ERROR: No results that passed all tests were found')
prints('Log saved to', lf)
log.close()
dump_log(lf)
raise SystemExit(1)
good = [x for x in possibles if plugin.test_fields(x) is
None]
if not good:
prints('Failed to find', plugin.test_fields(possibles[0]))
if fail_missing_meta:
raise SystemExit(1)
if results[0] is not possibles[0]:
prints('Most relevant result failed the tests')
raise SystemExit(1)
if 'cover' in plugin.capabilities:
rq = Queue()
mi = results[0]
plugin.download_cover(log, rq, abort, title=mi.title,
authors=mi.authors, identifiers=mi.identifiers)
results = []
while True:
try:
results.append(rq.get_nowait())
except Empty:
break
if not results and fail_missing_meta:
prints('Cover download failed')
raise SystemExit(1)
elif results:
cdata = results[0]
cover = os.path.join(tdir, plugin.name.replace(' ',
'')+'-%s-cover.jpg'%sanitize_file_name(mi.title.replace(' ',
'_')))
with open(cover, 'wb') as f:
f.write(cdata[-1])
prints('Cover downloaded to:', cover)
if len(cdata[-1]) < 10240:
prints('Downloaded cover too small')
raise SystemExit(1)
prints('Average time per query', sum(times)/len(times))
if os.stat(lf).st_size > 10:
prints('There were some errors/warnings, see log', lf)
# }}}