%PDF- %PDF-
Direktori : /proc/thread-self/root/usr/lib/calibre/calibre/db/ |
Current File : //proc/thread-self/root/usr/lib/calibre/calibre/db/utils.py |
#!/usr/bin/env python3 __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' import os, errno, sys, re from locale import localeconv from collections import OrderedDict, namedtuple from polyglot.builtins import iteritems, itervalues, string_or_bytes from threading import Lock from calibre import as_unicode, prints from calibre.constants import cache_dir, get_windows_number_formats, iswindows, preferred_encoding from calibre.utils.localization import canonicalize_lang def force_to_bool(val): if isinstance(val, (bytes, str)): if isinstance(val, bytes): val = val.decode(preferred_encoding, 'replace') try: val = icu_lower(val) if not val: val = None elif val in [_('yes'), _('checked'), 'true', 'yes']: val = True elif val in [_('no'), _('unchecked'), 'false', 'no']: val = False else: val = bool(int(val)) except: val = None return val _fuzzy_title_patterns = None def fuzzy_title_patterns(): global _fuzzy_title_patterns if _fuzzy_title_patterns is None: from calibre.ebooks.metadata import get_title_sort_pat _fuzzy_title_patterns = tuple((re.compile(pat, re.IGNORECASE) if isinstance(pat, string_or_bytes) else pat, repl) for pat, repl in [ (r'[\[\](){}<>\'";,:#]', ''), (get_title_sort_pat(), ''), (r'[-._]', ' '), (r'\s+', ' ') ] ) return _fuzzy_title_patterns def fuzzy_title(title): title = icu_lower(title.strip()) for pat, repl in fuzzy_title_patterns(): title = pat.sub(repl, title) return title def find_identical_books(mi, data): author_map, aid_map, title_map, lang_map = data found_books = None for a in mi.authors: author_ids = author_map.get(icu_lower(a)) if author_ids is None: return set() books_by_author = {book_id for aid in author_ids for book_id in aid_map.get(aid, ())} if found_books is None: found_books = books_by_author else: found_books &= books_by_author if not found_books: return set() ans = set() titleq = fuzzy_title(mi.title) for book_id in found_books: title = title_map.get(book_id, '') if fuzzy_title(title) == titleq: ans.add(book_id) langq = tuple(filter(lambda x: x and x != 'und', map(canonicalize_lang, mi.languages or ()))) if not langq: return ans def lang_matches(book_id): book_langq = lang_map.get(book_id) return not book_langq or langq == book_langq return {book_id for book_id in ans if lang_matches(book_id)} Entry = namedtuple('Entry', 'path size timestamp thumbnail_size') class CacheError(Exception): pass class ThumbnailCache: ' This is a persistent disk cache to speed up loading and resizing of covers ' def __init__(self, max_size=1024, # The maximum disk space in MB name='thumbnail-cache', # The name of this cache (should be unique in location) thumbnail_size=(100, 100), # The size of the thumbnails, can be changed location=None, # The location for this cache, if None cache_dir() is used test_mode=False, # Used for testing min_disk_cache=0): # If the size is set less than or equal to this value, the cache is disabled. self.location = os.path.join(location or cache_dir(), name) if max_size <= min_disk_cache: max_size = 0 self.max_size = int(max_size * (1024**2)) self.group_id = 'group' self.thumbnail_size = thumbnail_size self.size_changed = False self.lock = Lock() self.min_disk_cache = min_disk_cache if test_mode: self.log = self.fail_on_error def log(self, *args, **kwargs): kwargs['file'] = sys.stderr prints(*args, **kwargs) def fail_on_error(self, *args, **kwargs): msg = ' '.join(args) raise CacheError(msg) def _do_delete(self, path): try: os.remove(path) except OSError as err: self.log('Failed to delete cached thumbnail file:', as_unicode(err)) def _load_index(self): 'Load the index, automatically removing incorrectly sized thumbnails and pruning to fit max_size' try: os.makedirs(self.location) except OSError as err: if err.errno != errno.EEXIST: self.log('Failed to make thumbnail cache dir:', as_unicode(err)) self.total_size = 0 self.items = OrderedDict() order = self._read_order() def listdir(*args): try: return os.listdir(os.path.join(*args)) except OSError: return () # not a directory or no permission or whatever entries = ('/'.join((parent, subdir, entry)) for parent in listdir(self.location) for subdir in listdir(self.location, parent) for entry in listdir(self.location, parent, subdir)) invalidate = set() try: with open(os.path.join(self.location, 'invalidate'), 'rb') as f: raw = f.read().decode('utf-8') except OSError as err: if getattr(err, 'errno', None) != errno.ENOENT: self.log('Failed to read thumbnail invalidate data:', as_unicode(err)) else: try: os.remove(os.path.join(self.location, 'invalidate')) except OSError as err: self.log('Failed to remove thumbnail invalidate data:', as_unicode(err)) else: def record(line): try: uuid, book_id = line.partition(' ')[0::2] book_id = int(book_id) return (uuid, book_id) except Exception: return None invalidate = {record(x) for x in raw.splitlines()} items = [] try: for entry in entries: try: uuid, name = entry.split('/')[0::2] book_id, timestamp, size, thumbnail_size = name.split('-') book_id, timestamp, size = int(book_id), float(timestamp), int(size) thumbnail_size = tuple(map(int, thumbnail_size.partition('x')[0::2])) except (ValueError, TypeError, IndexError, KeyError, AttributeError): continue key = (uuid, book_id) path = os.path.join(self.location, entry) if self.thumbnail_size == thumbnail_size and key not in invalidate: items.append((key, Entry(path, size, timestamp, thumbnail_size))) self.total_size += size else: self._do_delete(path) except OSError as err: self.log('Failed to read thumbnail cache dir:', as_unicode(err)) self.items = OrderedDict(sorted(items, key=lambda x:order.get(x[0], 0))) self._apply_size() def _invalidate_sizes(self): if self.size_changed: size = self.thumbnail_size remove = tuple(key for key, entry in iteritems(self.items) if size != entry.thumbnail_size) for key in remove: self._remove(key) self.size_changed = False def _remove(self, key): entry = self.items.pop(key, None) if entry is not None: self._do_delete(entry.path) self.total_size -= entry.size def _apply_size(self): while self.total_size > self.max_size and self.items: entry = self.items.popitem(last=False)[1] self._do_delete(entry.path) self.total_size -= entry.size def _write_order(self): if hasattr(self, 'items'): try: data = '\n'.join(group_id + ' ' + str(book_id) for (group_id, book_id) in self.items) with lopen(os.path.join(self.location, 'order'), 'wb') as f: f.write(data.encode('utf-8')) except OSError as err: self.log('Failed to save thumbnail cache order:', as_unicode(err)) def _read_order(self): order = {} try: with lopen(os.path.join(self.location, 'order'), 'rb') as f: for line in f.read().decode('utf-8').splitlines(): parts = line.split(' ', 1) if len(parts) == 2: order[(parts[0], int(parts[1]))] = len(order) except Exception as err: if getattr(err, 'errno', None) != errno.ENOENT: self.log('Failed to load thumbnail cache order:', as_unicode(err)) return order def shutdown(self): with self.lock: self._write_order() def set_group_id(self, group_id): with self.lock: self.group_id = group_id def set_thumbnail_size(self, width, height): new_size = (width, height) with self.lock: if new_size != self.thumbnail_size: self.thumbnail_size = new_size self.size_changed = True return True return False def insert(self, book_id, timestamp, data): if self.max_size < len(data): return with self.lock: if not hasattr(self, 'total_size'): self._load_index() self._invalidate_sizes() ts = ('%.2f' % timestamp).replace('.00', '') path = '%s%s%s%s%d-%s-%d-%dx%d' % ( self.group_id, os.sep, book_id % 100, os.sep, book_id, ts, len(data), self.thumbnail_size[0], self.thumbnail_size[1]) path = os.path.join(self.location, path) key = (self.group_id, book_id) e = self.items.pop(key, None) self.total_size -= getattr(e, 'size', 0) try: with open(path, 'wb') as f: f.write(data) except OSError as err: d = os.path.dirname(path) if not os.path.exists(d): try: os.makedirs(d) with open(path, 'wb') as f: f.write(data) except OSError as err: self.log('Failed to write cached thumbnail:', path, as_unicode(err)) return self._apply_size() else: self.log('Failed to write cached thumbnail:', path, as_unicode(err)) return self._apply_size() self.items[key] = Entry(path, len(data), timestamp, self.thumbnail_size) self.total_size += len(data) self._apply_size() def __len__(self): with self.lock: try: return len(self.items) except AttributeError: self._load_index() return len(self.items) def __contains__(self, book_id): with self.lock: try: return (self.group_id, book_id) in self.items except AttributeError: self._load_index() return (self.group_id, book_id) in self.items def __getitem__(self, book_id): with self.lock: if not hasattr(self, 'total_size'): self._load_index() self._invalidate_sizes() key = (self.group_id, book_id) entry = self.items.pop(key, None) if entry is None: return None, None if entry.thumbnail_size != self.thumbnail_size: try: os.remove(entry.path) except OSError as err: if getattr(err, 'errno', None) != errno.ENOENT: self.log('Failed to remove cached thumbnail:', entry.path, as_unicode(err)) self.total_size -= entry.size return None, None self.items[key] = entry try: with open(entry.path, 'rb') as f: data = f.read() except OSError as err: self.log('Failed to read cached thumbnail:', entry.path, as_unicode(err)) return None, None return data, entry.timestamp def invalidate(self, book_ids): with self.lock: if hasattr(self, 'total_size'): for book_id in book_ids: self._remove((self.group_id, book_id)) elif os.path.exists(self.location): try: raw = '\n'.join('%s %d' % (self.group_id, book_id) for book_id in book_ids) with open(os.path.join(self.location, 'invalidate'), 'ab') as f: f.write(raw.encode('ascii')) except OSError as err: self.log('Failed to write invalidate thumbnail record:', as_unicode(err)) @property def current_size(self): with self.lock: if not hasattr(self, 'total_size'): self._load_index() return self.total_size def empty(self): with self.lock: try: os.remove(os.path.join(self.location, 'order')) except OSError: pass if not hasattr(self, 'total_size'): self._load_index() for entry in itervalues(self.items): self._do_delete(entry.path) self.total_size = 0 self.items = OrderedDict() def __hash__(self): return id(self) def set_size(self, size_in_mb): if size_in_mb <= self.min_disk_cache: size_in_mb = 0 size_in_mb = max(0, size_in_mb) with self.lock: self.max_size = int(size_in_mb * (1024**2)) if hasattr(self, 'total_size'): self._apply_size() number_separators = None def atof(string): # Python 2.x does not handle unicode number separators correctly, so we # have to implement our own global number_separators if number_separators is None: if iswindows: number_separators = get_windows_number_formats() else: lc = localeconv() t, d = lc['thousands_sep'], lc['decimal_point'] if isinstance(t, bytes): t = t.decode('utf-8', 'ignore') or ',' if isinstance(d, bytes): d = d.decode('utf-8', 'ignore') or '.' number_separators = t, d return float(string.replace(number_separators[1], '.').replace(number_separators[0], '')) def type_safe_sort_key_function(keyfunc=None): if keyfunc is None: keyfunc = lambda x: x sentinel = object() first_value = sentinel def key(x): nonlocal first_value ans = keyfunc(x) if first_value is sentinel: first_value = ans else: try: ans < first_value first_value < ans except TypeError: ans = first_value return ans return key