%PDF- %PDF-
Direktori : /proc/thread-self/root/usr/lib/calibre/calibre/db/ |
Current File : //proc/thread-self/root/usr/lib/calibre/calibre/db/adding.py |
#!/usr/bin/env python3 __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' import os import re import time from collections import defaultdict from contextlib import contextmanager from functools import partial from calibre import prints from calibre.constants import filesystem_encoding, ismacos, iswindows from calibre.ebooks import BOOK_EXTENSIONS from calibre.utils.filenames import make_long_path_useable from polyglot.builtins import itervalues def splitext(path): key, ext = os.path.splitext(path) return key, ext[1:].lower() def formats_ok(formats): return len(formats) > 0 def path_ok(path): return not os.path.isdir(path) and os.access(path, os.R_OK) def compile_glob(pat): import fnmatch return re.compile(fnmatch.translate(pat), flags=re.I) def compile_rule(rule): mt = rule['match_type'] if 'with' in mt: q = icu_lower(rule['query']) if 'startswith' in mt: func = lambda filename: icu_lower(filename).startswith(q) else: func = lambda filename: icu_lower(filename).endswith(q) elif 'glob' in mt: q = compile_glob(rule['query']) func = lambda filename: q.match(filename) is not None else: q = re.compile(rule['query']) func = lambda filename: q.match(filename) is not None ans = func if mt.startswith('not_'): ans = lambda filename: not func(filename) return ans, rule['action'] == 'add' def filter_filename(compiled_rules, filename): for q, action in compiled_rules: if q(filename): return action _metadata_extensions = None def metadata_extensions(): # Set of all known book extensions + OPF (the OPF is used to read metadata, # but not actually added) global _metadata_extensions if _metadata_extensions is None: _metadata_extensions = frozenset(BOOK_EXTENSIONS) | {'opf'} return _metadata_extensions if iswindows or ismacos: unicode_listdir = os.listdir else: def unicode_listdir(root): root = root.encode(filesystem_encoding) for x in os.listdir(root): try: yield x.decode(filesystem_encoding) except UnicodeDecodeError: prints('Ignoring un-decodable file:', x) def listdir(root, sort_by_mtime=False): items = (make_long_path_useable(os.path.join(root, x)) for x in unicode_listdir(root)) if sort_by_mtime: def safe_mtime(x): try: return os.path.getmtime(x) except OSError: return time.time() items = sorted(items, key=safe_mtime) for path in items: if path_ok(path): yield path def allow_path(path, ext, compiled_rules): ans = filter_filename(compiled_rules, os.path.basename(path)) if ans is None: ans = ext in metadata_extensions() return ans import_ctx = None @contextmanager def run_import_plugins_before_metadata(tdir, group_id=0): global import_ctx import_ctx = {'tdir': tdir, 'group_id': group_id, 'format_map': {}} yield import_ctx import_ctx = None def run_import_plugins(formats): from calibre.ebooks.metadata.worker import run_import_plugins import_ctx['group_id'] += 1 ans = run_import_plugins(formats, import_ctx['group_id'], import_ctx['tdir']) fm = import_ctx['format_map'] for old_path, new_path in zip(formats, ans): new_path = make_long_path_useable(new_path) fm[new_path] = old_path return ans def find_books_in_directory(dirpath, single_book_per_directory, compiled_rules=(), listdir_impl=listdir): dirpath = make_long_path_useable(os.path.abspath(dirpath)) if single_book_per_directory: formats = {} for path in listdir_impl(dirpath): key, ext = splitext(path) if allow_path(path, ext, compiled_rules): formats[ext] = path if formats_ok(formats): yield list(itervalues(formats)) else: books = defaultdict(dict) for path in listdir_impl(dirpath, sort_by_mtime=True): key, ext = splitext(path) if allow_path(path, ext, compiled_rules): books[icu_lower(key) if isinstance(key, str) else key.lower()][ext] = path for formats in itervalues(books): if formats_ok(formats): yield list(itervalues(formats)) def create_format_map(formats): format_map = {} for path in formats: ext = os.path.splitext(path)[1][1:].upper() if ext == 'OPF': continue format_map[ext] = path return format_map def import_book_directory_multiple(db, dirpath, callback=None, added_ids=None, compiled_rules=(), add_duplicates=False): from calibre.ebooks.metadata.meta import metadata_from_formats duplicates = [] for formats in find_books_in_directory(dirpath, False, compiled_rules=compiled_rules): mi = metadata_from_formats(formats) if mi.title is None: continue ids, dups = db.new_api.add_books([(mi, create_format_map(formats))], add_duplicates=add_duplicates) if dups: duplicates.append((mi, formats)) continue book_id = next(iter(ids)) if added_ids is not None: added_ids.add(book_id) if callable(callback): if callback(mi.title): break return duplicates def import_book_directory(db, dirpath, callback=None, added_ids=None, compiled_rules=(), add_duplicates=False): from calibre.ebooks.metadata.meta import metadata_from_formats dirpath = os.path.abspath(dirpath) formats = None for formats in find_books_in_directory(dirpath, True, compiled_rules=compiled_rules): break if not formats: return mi = metadata_from_formats(formats) if mi.title is None: return ids, dups = db.new_api.add_books([(mi, create_format_map(formats))], add_duplicates=add_duplicates) if dups: return [(mi, formats)] book_id = next(iter(ids)) if added_ids is not None: added_ids.add(book_id) if callable(callback): callback(mi.title) def recursive_import(db, root, single_book_per_directory=True, callback=None, added_ids=None, compiled_rules=(), add_duplicates=False): root = os.path.abspath(root) duplicates = [] for dirpath in os.walk(root): func = import_book_directory if single_book_per_directory else import_book_directory_multiple res = func(db, dirpath[0], callback=callback, added_ids=added_ids, compiled_rules=compiled_rules, add_duplicates=add_duplicates) if res is not None: duplicates.extend(res) if callable(callback): if callback(''): break return duplicates def cdb_find_in_dir(dirpath, single_book_per_directory, compiled_rules): return find_books_in_directory(dirpath, single_book_per_directory=single_book_per_directory, compiled_rules=compiled_rules, listdir_impl=partial(listdir, sort_by_mtime=True)) def cdb_recursive_find(root, single_book_per_directory=True, compiled_rules=()): root = os.path.abspath(root) for dirpath in os.walk(root): yield from cdb_find_in_dir(dirpath[0], single_book_per_directory, compiled_rules) def add_catalog(cache, path, title, dbapi=None): from calibre.ebooks.metadata.book.base import Metadata from calibre.ebooks.metadata.meta import get_metadata from calibre.utils.date import utcnow fmt = os.path.splitext(path)[1][1:].lower() new_book_added = False with lopen(path, 'rb') as stream: with cache.write_lock: matches = cache._search('title:="{}" and tags:="{}"'.format(title.replace('"', '\\"'), _('Catalog')), None) db_id = None if matches: db_id = list(matches)[0] try: mi = get_metadata(stream, fmt) mi.authors = ['calibre'] except: mi = Metadata(title, ['calibre']) mi.title, mi.authors = title, ['calibre'] mi.author_sort = 'calibre' # The MOBI/AZW3 format sets author sort to date mi.tags = [_('Catalog')] mi.pubdate = mi.timestamp = utcnow() if fmt == 'mobi': mi.cover, mi.cover_data = None, (None, None) if db_id is None: db_id = cache._create_book_entry(mi, apply_import_tags=False) new_book_added = True else: cache._set_metadata(db_id, mi) cache.add_format(db_id, fmt, stream, dbapi=dbapi) # Can't keep write lock since post-import hooks might run return db_id, new_book_added def add_news(cache, path, arg, dbapi=None): from calibre.ebooks.metadata.meta import get_metadata from calibre.utils.date import utcnow fmt = os.path.splitext(getattr(path, 'name', path))[1][1:].lower() stream = path if hasattr(path, 'read') else lopen(path, 'rb') stream.seek(0) mi = get_metadata(stream, fmt, use_libprs_metadata=False, force_read_metadata=True) # Force the author to calibre as the auto delete of old news checks for # both the author==calibre and the tag News mi.authors = ['calibre'] stream.seek(0) with cache.write_lock: if mi.series_index is None: mi.series_index = cache._get_next_series_num_for(mi.series) mi.tags = [_('News')] if arg.get('add_title_tag'): mi.tags += [arg['title']] if arg.get('custom_tags'): mi.tags += arg['custom_tags'] if mi.pubdate is None: mi.pubdate = utcnow() if mi.timestamp is None: mi.timestamp = utcnow() db_id = cache._create_book_entry(mi, apply_import_tags=False) cache.add_format(db_id, fmt, stream, dbapi=dbapi) # Can't keep write lock since post-import hooks might run if not hasattr(path, 'read'): stream.close() return db_id