%PDF- %PDF-
Direktori : /proc/thread-self/root/usr/lib/calibre/calibre/db/ |
Current File : //proc/thread-self/root/usr/lib/calibre/calibre/db/backend.py |
#!/usr/bin/env python3 __license__ = 'GPL v3' __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' # Imports {{{ import apsw import errno import glob import hashlib import json import os import shutil import sys import time import uuid from functools import partial from calibre import as_unicode, force_unicode, isbytestring, prints from calibre.constants import ( filesystem_encoding, iswindows, plugins, preferred_encoding ) from calibre.db import SPOOL_SIZE from calibre.db.annotations import annot_db_data, unicode_normalize from calibre.db.delete_service import delete_service from calibre.db.errors import NoSuchFormat from calibre.db.schema_upgrades import SchemaUpgrade from calibre.db.tables import ( AuthorsTable, CompositeTable, FormatsTable, IdentifiersTable, ManyToManyTable, ManyToOneTable, OneToOneTable, PathTable, RatingTable, SizeTable, UUIDTable ) from calibre.ebooks.metadata import author_to_author_sort, title_sort from calibre.library.field_metadata import FieldMetadata from calibre.ptempfile import PersistentTemporaryFile, TemporaryFile from calibre.utils import pickle_binary_string, unpickle_binary_string from calibre.utils.config import from_json, prefs, to_json, tweaks from calibre.utils.date import EPOCH, parse_date, utcfromtimestamp, utcnow from calibre.utils.filenames import ( WindowsAtomicFolderMove, ascii_filename, atomic_rename, copyfile_using_links, copytree_using_links, hardlink_file, is_case_sensitive, remove_dir_if_empty, samefile ) from calibre.utils.formatter_functions import ( compile_user_template_functions, formatter_functions, load_user_template_functions, unload_user_template_functions ) from calibre.utils.icu import sort_key from calibre.utils.img import save_cover_data_to from polyglot.builtins import ( cmp, iteritems, itervalues, native_string_type, reraise, string_or_bytes ) # }}} class FTSQueryError(ValueError): def __init__(self, query, sql_statement, apsw_error): ValueError.__init__(self, f'Failed to parse search query: {query} with error: {apsw_error}') self.query = query self.sql_statement = sql_statement CUSTOM_DATA_TYPES = frozenset(('rating', 'text', 'comments', 'datetime', 'int', 'float', 'bool', 'series', 'composite', 'enumeration')) WINDOWS_RESERVED_NAMES = frozenset('CON PRN AUX NUL COM1 COM2 COM3 COM4 COM5 COM6 COM7 COM8 COM9 LPT1 LPT2 LPT3 LPT4 LPT5 LPT6 LPT7 LPT8 LPT9'.split()) class DynamicFilter: # {{{ 'No longer used, present for legacy compatibility' def __init__(self, name): self.name = name self.ids = frozenset() def __call__(self, id_): return int(id_ in self.ids) def change(self, ids): self.ids = frozenset(ids) # }}} class DBPrefs(dict): # {{{ 'Store preferences as key:value pairs in the db' def __init__(self, db): dict.__init__(self) self.db = db self.defaults = {} self.disable_setting = False self.load_from_db() def load_from_db(self): self.clear() for key, val in self.db.conn.get('SELECT key,val FROM preferences'): try: val = self.raw_to_object(val) except: prints('Failed to read value for:', key, 'from db') continue dict.__setitem__(self, key, val) def raw_to_object(self, raw): if not isinstance(raw, str): raw = raw.decode(preferred_encoding) return json.loads(raw, object_hook=from_json) def to_raw(self, val): # sort_keys=True is required so that the serialization of dictionaries is # not random, which is needed for the changed check in __setitem__ return json.dumps(val, indent=2, default=to_json, sort_keys=True) def has_setting(self, key): return key in self def __getitem__(self, key): try: return dict.__getitem__(self, key) except KeyError: return self.defaults[key] def __delitem__(self, key): dict.__delitem__(self, key) self.db.execute('DELETE FROM preferences WHERE key=?', (key,)) def __setitem__(self, key, val): if not self.disable_setting: raw = self.to_raw(val) do_set = False with self.db.conn: try: dbraw = next(self.db.execute('SELECT id,val FROM preferences WHERE key=?', (key,))) except StopIteration: dbraw = None if dbraw is None or dbraw[1] != raw: if dbraw is None: self.db.execute('INSERT INTO preferences (key,val) VALUES (?,?)', (key, raw)) else: self.db.execute('UPDATE preferences SET val=? WHERE id=?', (raw, dbraw[0])) do_set = True if do_set: dict.__setitem__(self, key, val) def set(self, key, val): self.__setitem__(key, val) def get_namespaced(self, namespace, key, default=None): key = 'namespaced:%s:%s'%(namespace, key) try: return dict.__getitem__(self, key) except KeyError: return default def set_namespaced(self, namespace, key, val): if ':' in key: raise KeyError('Colons are not allowed in keys') if ':' in namespace: raise KeyError('Colons are not allowed in the namespace') key = 'namespaced:%s:%s'%(namespace, key) self[key] = val def write_serialized(self, library_path): try: to_filename = os.path.join(library_path, 'metadata_db_prefs_backup.json') data = json.dumps(self, indent=2, default=to_json) if not isinstance(data, bytes): data = data.encode('utf-8') with open(to_filename, "wb") as f: f.write(data) except: import traceback traceback.print_exc() @classmethod def read_serialized(cls, library_path, recreate_prefs=False): from_filename = os.path.join(library_path, 'metadata_db_prefs_backup.json') with open(from_filename, "rb") as f: return json.load(f, object_hook=from_json) # }}} # Extra collators {{{ def pynocase(one, two, encoding='utf-8'): if isbytestring(one): try: one = one.decode(encoding, 'replace') except: pass if isbytestring(two): try: two = two.decode(encoding, 'replace') except: pass return cmp(one.lower(), two.lower()) def _author_to_author_sort(x): if not x: return '' return author_to_author_sort(x.replace('|', ',')) def icu_collator(s1, s2): return cmp(sort_key(force_unicode(s1, 'utf-8')), sort_key(force_unicode(s2, 'utf-8'))) # }}} # Unused aggregators {{{ def Concatenate(sep=','): '''String concatenation aggregator for sqlite''' def step(ctxt, value): if value is not None: ctxt.append(value) def finalize(ctxt): try: if not ctxt: return None return sep.join(ctxt) except Exception: import traceback traceback.print_exc() raise return ([], step, finalize) def SortedConcatenate(sep=','): '''String concatenation aggregator for sqlite, sorted by supplied index''' def step(ctxt, ndx, value): if value is not None: ctxt[ndx] = value def finalize(ctxt): try: if len(ctxt) == 0: return None return sep.join(map(ctxt.get, sorted(ctxt))) except Exception: import traceback traceback.print_exc() raise return ({}, step, finalize) def IdentifiersConcat(): '''String concatenation aggregator for the identifiers map''' def step(ctxt, key, val): ctxt.append('%s:%s'%(key, val)) def finalize(ctxt): try: return ','.join(ctxt) except Exception: import traceback traceback.print_exc() raise return ([], step, finalize) def AumSortedConcatenate(): '''String concatenation aggregator for the author sort map''' def step(ctxt, ndx, author, sort, link): if author is not None: ctxt[ndx] = ':::'.join((author, sort, link)) def finalize(ctxt): try: keys = list(ctxt) l = len(keys) if l == 0: return None if l == 1: return ctxt[keys[0]] return ':#:'.join([ctxt[v] for v in sorted(keys)]) except Exception: import traceback traceback.print_exc() raise return ({}, step, finalize) # }}} # Annotations {{{ def annotations_for_book(cursor, book_id, fmt, user_type='local', user='viewer'): for (data,) in cursor.execute( 'SELECT annot_data FROM annotations WHERE book=? AND format=? AND user_type=? AND user=?', (book_id, fmt.upper(), user_type, user) ): try: yield json.loads(data) except Exception: pass def save_annotations_for_book(cursor, book_id, fmt, annots_list, user_type='local', user='viewer'): data = [] fmt = fmt.upper() for annot, timestamp_in_secs in annots_list: atype = annot['type'].lower() aid, text = annot_db_data(annot) if aid is None: continue data.append((book_id, fmt, user_type, user, timestamp_in_secs, aid, atype, json.dumps(annot), text)) cursor.execute('INSERT OR IGNORE INTO annotations_dirtied (book) VALUES (?)', (book_id,)) cursor.execute('DELETE FROM annotations WHERE book=? AND format=? AND user_type=? AND user=?', (book_id, fmt, user_type, user)) cursor.executemany( 'INSERT OR REPLACE INTO annotations (book, format, user_type, user, timestamp, annot_id, annot_type, annot_data, searchable_text)' ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)', data) # }}} class Connection(apsw.Connection): # {{{ BUSY_TIMEOUT = 10000 # milliseconds def __init__(self, path): from calibre.utils.localization import get_lang from calibre_extensions.sqlite_extension import set_ui_language set_ui_language(get_lang()) super().__init__(path) plugins.load_apsw_extension(self, 'sqlite_extension') self.setbusytimeout(self.BUSY_TIMEOUT) self.execute('pragma cache_size=-5000') self.execute('pragma temp_store=2') encoding = next(self.execute('pragma encoding'))[0] self.createcollation('PYNOCASE', partial(pynocase, encoding=encoding)) self.createscalarfunction('title_sort', title_sort, 1) self.createscalarfunction('author_to_author_sort', _author_to_author_sort, 1) self.createscalarfunction('uuid4', lambda: str(uuid.uuid4()), 0) # Dummy functions for dynamically created filters self.createscalarfunction('books_list_filter', lambda x: 1, 1) self.createcollation('icucollate', icu_collator) # Legacy aggregators (never used) but present for backwards compat self.createaggregatefunction('sortconcat', SortedConcatenate, 2) self.createaggregatefunction('sortconcat_bar', partial(SortedConcatenate, sep='|'), 2) self.createaggregatefunction('sortconcat_amper', partial(SortedConcatenate, sep='&'), 2) self.createaggregatefunction('identifiers_concat', IdentifiersConcat, 2) self.createaggregatefunction('concat', Concatenate, 1) self.createaggregatefunction('aum_sortconcat', AumSortedConcatenate, 4) def create_dynamic_filter(self, name): f = DynamicFilter(name) self.createscalarfunction(name, f, 1) def get(self, *args, **kw): ans = self.cursor().execute(*args) if kw.get('all', True): return ans.fetchall() try: return next(ans)[0] except (StopIteration, IndexError): return None def execute(self, sql, bindings=None): cursor = self.cursor() return cursor.execute(sql, bindings) def executemany(self, sql, sequence_of_bindings): with self: # Disable autocommit mode, for performance return self.cursor().executemany(sql, sequence_of_bindings) # }}} def set_global_state(backend): load_user_template_functions( backend.library_id, (), precompiled_user_functions=backend.get_user_template_functions()) def rmtree_with_retry(path, sleep_time=1): try: shutil.rmtree(path) except OSError as e: if e.errno == errno.ENOENT and not os.path.exists(path): return time.sleep(sleep_time) # In case something has temporarily locked a file shutil.rmtree(path) class DB: PATH_LIMIT = 40 if iswindows else 100 WINDOWS_LIBRARY_PATH_LIMIT = 75 # Initialize database {{{ def __init__(self, library_path, default_prefs=None, read_only=False, restore_all_prefs=False, progress_callback=lambda x, y:True, load_user_formatter_functions=True): self.is_closed = False try: if isbytestring(library_path): library_path = library_path.decode(filesystem_encoding) except: import traceback traceback.print_exc() self.field_metadata = FieldMetadata() self.library_path = os.path.abspath(library_path) self.dbpath = os.path.join(library_path, 'metadata.db') self.dbpath = os.environ.get('CALIBRE_OVERRIDE_DATABASE_PATH', self.dbpath) if iswindows and len(self.library_path) + 4*self.PATH_LIMIT + 10 > 259: raise ValueError(_( 'Path to library ({0}) too long. It must be less than' ' {1} characters.').format(self.library_path, 259-4*self.PATH_LIMIT-10)) exists = self._exists = os.path.exists(self.dbpath) if not exists: # Be more strict when creating new libraries as the old calculation # allowed for max path lengths of 265 chars. if (iswindows and len(self.library_path) > self.WINDOWS_LIBRARY_PATH_LIMIT): raise ValueError(_( 'Path to library too long. It must be less than' ' %d characters.')%self.WINDOWS_LIBRARY_PATH_LIMIT) if read_only and os.path.exists(self.dbpath): # Work on only a copy of metadata.db to ensure that # metadata.db is not changed pt = PersistentTemporaryFile('_metadata_ro.db') pt.close() shutil.copyfile(self.dbpath, pt.name) self.dbpath = pt.name if not os.path.exists(os.path.dirname(self.dbpath)): os.makedirs(os.path.dirname(self.dbpath)) self._conn = None if self.user_version == 0: self.initialize_database() if not os.path.exists(self.library_path): os.makedirs(self.library_path) self.is_case_sensitive = is_case_sensitive(self.library_path) SchemaUpgrade(self, self.library_path, self.field_metadata) # Guarantee that the library_id is set self.library_id # Fix legacy triggers and columns self.execute(''' DROP TRIGGER IF EXISTS author_insert_trg; CREATE TEMP TRIGGER author_insert_trg AFTER INSERT ON authors BEGIN UPDATE authors SET sort=author_to_author_sort(NEW.name) WHERE id=NEW.id; END; DROP TRIGGER IF EXISTS author_update_trg; CREATE TEMP TRIGGER author_update_trg BEFORE UPDATE ON authors BEGIN UPDATE authors SET sort=author_to_author_sort(NEW.name) WHERE id=NEW.id AND name <> NEW.name; END; UPDATE authors SET sort=author_to_author_sort(name) WHERE sort IS NULL; ''') # Initialize_prefs must be called before initialize_custom_columns because # icc can set a pref. self.initialize_prefs(default_prefs, restore_all_prefs, progress_callback) self.initialize_custom_columns() self.initialize_tables() self.set_user_template_functions(compile_user_template_functions( self.prefs.get('user_template_functions', []))) if load_user_formatter_functions: set_global_state(self) def get_template_functions(self): return self._template_functions def get_user_template_functions(self): return self._user_template_functions def set_user_template_functions(self, user_formatter_functions): self._user_template_functions = user_formatter_functions self._template_functions = formatter_functions().get_builtins_and_aliases().copy() self._template_functions.update(user_formatter_functions) def initialize_prefs(self, default_prefs, restore_all_prefs, progress_callback): # {{{ self.prefs = DBPrefs(self) if default_prefs is not None and not self._exists: progress_callback(None, len(default_prefs)) # Only apply default prefs to a new database for i, key in enumerate(default_prefs): # be sure that prefs not to be copied are listed below if restore_all_prefs or key not in frozenset(['news_to_be_synced']): self.prefs[key] = default_prefs[key] progress_callback(_('restored preference ') + key, i+1) if 'field_metadata' in default_prefs: fmvals = [f for f in default_prefs['field_metadata'].values() if f['is_custom']] progress_callback(None, len(fmvals)) for i, f in enumerate(fmvals): progress_callback(_('creating custom column ') + f['label'], i) self.create_custom_column(f['label'], f['name'], f['datatype'], (f['is_multiple'] is not None and len(f['is_multiple']) > 0), f['is_editable'], f['display']) defs = self.prefs.defaults defs['gui_restriction'] = defs['cs_restriction'] = '' defs['categories_using_hierarchy'] = [] defs['column_color_rules'] = [] defs['column_icon_rules'] = [] defs['cover_grid_icon_rules'] = [] defs['grouped_search_make_user_categories'] = [] defs['similar_authors_search_key'] = 'authors' defs['similar_authors_match_kind'] = 'match_any' defs['similar_publisher_search_key'] = 'publisher' defs['similar_publisher_match_kind'] = 'match_any' defs['similar_tags_search_key'] = 'tags' defs['similar_tags_match_kind'] = 'match_all' defs['similar_series_search_key'] = 'series' defs['similar_series_match_kind'] = 'match_any' defs['book_display_fields'] = [ ('title', False), ('authors', True), ('series', True), ('identifiers', True), ('tags', True), ('formats', True), ('path', True), ('publisher', False), ('rating', False), ('author_sort', False), ('sort', False), ('timestamp', False), ('uuid', False), ('comments', True), ('id', False), ('pubdate', False), ('last_modified', False), ('size', False), ('languages', False), ] defs['popup_book_display_fields'] = [('title', True)] + [(f[0], True) for f in defs['book_display_fields'] if f[0] != 'title'] defs['qv_display_fields'] = [('title', True), ('authors', True), ('series', True)] defs['virtual_libraries'] = {} defs['virtual_lib_on_startup'] = defs['cs_virtual_lib_on_startup'] = '' defs['virt_libs_hidden'] = defs['virt_libs_order'] = () defs['update_all_last_mod_dates_on_start'] = False defs['field_under_covers_in_grid'] = 'title' defs['cover_browser_title_template'] = '{title}' defs['cover_browser_subtitle_field'] = 'rating' defs['styled_columns'] = {} defs['edit_metadata_ignore_display_order'] = False # Migrate the bool tristate tweak defs['bools_are_tristate'] = \ tweaks.get('bool_custom_columns_are_tristate', 'yes') == 'yes' if self.prefs.get('bools_are_tristate') is None: self.prefs.set('bools_are_tristate', defs['bools_are_tristate']) # Migrate column coloring rules if self.prefs.get('column_color_name_1', None) is not None: from calibre.library.coloring import migrate_old_rule old_rules = [] for i in range(1, 6): col = self.prefs.get('column_color_name_%d' % i, None) templ = self.prefs.get('column_color_template_%d' % i, None) if col and templ: try: del self.prefs['column_color_name_%d' % i] rules = migrate_old_rule(self.field_metadata, templ) for templ in rules: old_rules.append((col, templ)) except: pass if old_rules: self.prefs['column_color_rules'] += old_rules # Migrate saved search and user categories to db preference scheme def migrate_preference(key, default): oldval = prefs[key] if oldval != default: self.prefs[key] = oldval prefs[key] = default if key not in self.prefs: self.prefs[key] = default migrate_preference('user_categories', {}) migrate_preference('saved_searches', {}) # migrate grouped_search_terms if self.prefs.get('grouped_search_terms', None) is None: try: ogst = tweaks.get('grouped_search_terms', {}) ngst = {} for t in ogst: ngst[icu_lower(t)] = ogst[t] self.prefs.set('grouped_search_terms', ngst) except: pass # migrate the gui_restriction preference to a virtual library gr_pref = self.prefs.get('gui_restriction', None) if gr_pref: virt_libs = self.prefs.get('virtual_libraries', {}) virt_libs[gr_pref] = 'search:"' + gr_pref + '"' self.prefs['virtual_libraries'] = virt_libs self.prefs['gui_restriction'] = '' self.prefs['virtual_lib_on_startup'] = gr_pref # migrate the cs_restriction preference to a virtual library gr_pref = self.prefs.get('cs_restriction', None) if gr_pref: virt_libs = self.prefs.get('virtual_libraries', {}) virt_libs[gr_pref] = 'search:"' + gr_pref + '"' self.prefs['virtual_libraries'] = virt_libs self.prefs['cs_restriction'] = '' self.prefs['cs_virtual_lib_on_startup'] = gr_pref # Rename any user categories with names that differ only in case user_cats = self.prefs.get('user_categories', []) catmap = {} for uc in user_cats: ucl = icu_lower(uc) if ucl not in catmap: catmap[ucl] = [] catmap[ucl].append(uc) cats_changed = False for uc in catmap: if len(catmap[uc]) > 1: prints('found user category case overlap', catmap[uc]) cat = catmap[uc][0] suffix = 1 while icu_lower(cat + str(suffix)) in catmap: suffix += 1 prints('Renaming user category %s to %s'%(cat, cat+str(suffix))) user_cats[cat + str(suffix)] = user_cats[cat] del user_cats[cat] cats_changed = True if cats_changed: self.prefs.set('user_categories', user_cats) # }}} def initialize_custom_columns(self): # {{{ self.custom_columns_deleted = False with self.conn: # Delete previously marked custom columns for record in self.conn.get( 'SELECT id FROM custom_columns WHERE mark_for_delete=1'): num = record[0] table, lt = self.custom_table_names(num) self.execute('''\ DROP INDEX IF EXISTS {table}_idx; DROP INDEX IF EXISTS {lt}_aidx; DROP INDEX IF EXISTS {lt}_bidx; DROP TRIGGER IF EXISTS fkc_update_{lt}_a; DROP TRIGGER IF EXISTS fkc_update_{lt}_b; DROP TRIGGER IF EXISTS fkc_insert_{lt}; DROP TRIGGER IF EXISTS fkc_delete_{lt}; DROP TRIGGER IF EXISTS fkc_insert_{table}; DROP TRIGGER IF EXISTS fkc_delete_{table}; DROP VIEW IF EXISTS tag_browser_{table}; DROP VIEW IF EXISTS tag_browser_filtered_{table}; DROP TABLE IF EXISTS {table}; DROP TABLE IF EXISTS {lt}; '''.format(table=table, lt=lt) ) self.prefs.set('update_all_last_mod_dates_on_start', True) self.execute('DELETE FROM custom_columns WHERE mark_for_delete=1') # Load metadata for custom columns self.custom_column_label_map, self.custom_column_num_map = {}, {} self.custom_column_num_to_label_map = {} triggers = [] remove = [] custom_tables = self.custom_tables for record in self.conn.get( 'SELECT label,name,datatype,editable,display,normalized,id,is_multiple FROM custom_columns'): data = { 'label':record[0], 'name':record[1], 'datatype':record[2], 'editable':bool(record[3]), 'display':json.loads(record[4]), 'normalized':bool(record[5]), 'num':record[6], 'is_multiple':bool(record[7]), } if data['display'] is None: data['display'] = {} # set up the is_multiple separator dict if data['is_multiple']: if data['display'].get('is_names', False): seps = {'cache_to_list': '|', 'ui_to_list': '&', 'list_to_ui': ' & '} elif data['datatype'] == 'composite': seps = {'cache_to_list': ',', 'ui_to_list': ',', 'list_to_ui': ', '} else: seps = {'cache_to_list': '|', 'ui_to_list': ',', 'list_to_ui': ', '} else: seps = {} data['multiple_seps'] = seps table, lt = self.custom_table_names(data['num']) if table not in custom_tables or (data['normalized'] and lt not in custom_tables): remove.append(data) continue self.custom_column_num_map[data['num']] = \ self.custom_column_label_map[data['label']] = data self.custom_column_num_to_label_map[data['num']] = data['label'] # Create Foreign Key triggers if data['normalized']: trigger = 'DELETE FROM %s WHERE book=OLD.id;'%lt else: trigger = 'DELETE FROM %s WHERE book=OLD.id;'%table triggers.append(trigger) if remove: with self.conn: for data in remove: prints('WARNING: Custom column %r not found, removing.' % data['label']) self.execute('DELETE FROM custom_columns WHERE id=?', (data['num'],)) if triggers: with self.conn: self.execute('''\ CREATE TEMP TRIGGER custom_books_delete_trg AFTER DELETE ON books BEGIN %s END; '''%(' \n'.join(triggers))) # Setup data adapters def adapt_text(x, d): if d['is_multiple']: if x is None: return [] if isinstance(x, (str, bytes)): x = x.split(d['multiple_seps']['ui_to_list']) x = [y.strip() for y in x if y.strip()] x = [y.decode(preferred_encoding, 'replace') if not isinstance(y, str) else y for y in x] return [' '.join(y.split()) for y in x] else: return x if x is None or isinstance(x, str) else \ x.decode(preferred_encoding, 'replace') def adapt_datetime(x, d): if isinstance(x, (str, bytes)): if isinstance(x, bytes): x = x.decode(preferred_encoding, 'replace') x = parse_date(x, assume_utc=False, as_utc=False) return x def adapt_bool(x, d): if isinstance(x, (str, bytes)): if isinstance(x, bytes): x = x.decode(preferred_encoding, 'replace') x = x.lower() if x == 'true': x = True elif x == 'false': x = False elif x == 'none': x = None else: x = bool(int(x)) return x def adapt_enum(x, d): v = adapt_text(x, d) if not v: v = None return v def adapt_number(x, d): if x is None: return None if isinstance(x, (str, bytes)): if isinstance(x, bytes): x = x.decode(preferred_encoding, 'replace') if x.lower() == 'none': return None if d['datatype'] == 'int': return int(x) return float(x) self.custom_data_adapters = { 'float': adapt_number, 'int': adapt_number, 'rating':lambda x,d: x if x is None else min(10., max(0., float(x))), 'bool': adapt_bool, 'comments': lambda x,d: adapt_text(x, {'is_multiple':False}), 'datetime': adapt_datetime, 'text':adapt_text, 'series':adapt_text, 'enumeration': adapt_enum } # Create Tag Browser categories for custom columns for k in sorted(self.custom_column_label_map): v = self.custom_column_label_map[k] if v['normalized']: is_category = True else: is_category = False is_m = v['multiple_seps'] tn = 'custom_column_{}'.format(v['num']) self.field_metadata.add_custom_field(label=v['label'], table=tn, column='value', datatype=v['datatype'], colnum=v['num'], name=v['name'], display=v['display'], is_multiple=is_m, is_category=is_category, is_editable=v['editable'], is_csp=False) # }}} def initialize_tables(self): # {{{ tables = self.tables = {} for col in ('title', 'sort', 'author_sort', 'series_index', 'comments', 'timestamp', 'pubdate', 'uuid', 'path', 'cover', 'last_modified'): metadata = self.field_metadata[col].copy() if col == 'comments': metadata['table'], metadata['column'] = 'comments', 'text' if not metadata['table']: metadata['table'], metadata['column'] = 'books', ('has_cover' if col == 'cover' else col) if not metadata['column']: metadata['column'] = col tables[col] = (PathTable if col == 'path' else UUIDTable if col == 'uuid' else OneToOneTable)(col, metadata) for col in ('series', 'publisher'): tables[col] = ManyToOneTable(col, self.field_metadata[col].copy()) for col in ('authors', 'tags', 'formats', 'identifiers', 'languages', 'rating'): cls = { 'authors':AuthorsTable, 'formats':FormatsTable, 'identifiers':IdentifiersTable, 'rating':RatingTable, }.get(col, ManyToManyTable) tables[col] = cls(col, self.field_metadata[col].copy()) tables['size'] = SizeTable('size', self.field_metadata['size'].copy()) self.FIELD_MAP = { 'id':0, 'title':1, 'authors':2, 'timestamp':3, 'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8, 'publisher':9, 'series_index':10, 'sort':11, 'author_sort':12, 'formats':13, 'path':14, 'pubdate':15, 'uuid':16, 'cover':17, 'au_map':18, 'last_modified':19, 'identifiers':20, 'languages':21, } for k,v in iteritems(self.FIELD_MAP): self.field_metadata.set_field_record_index(k, v, prefer_custom=False) base = max(itervalues(self.FIELD_MAP)) for label_ in sorted(self.custom_column_label_map): data = self.custom_column_label_map[label_] label = self.field_metadata.custom_field_prefix + label_ metadata = self.field_metadata[label].copy() link_table = self.custom_table_names(data['num'])[1] self.FIELD_MAP[data['num']] = base = base+1 self.field_metadata.set_field_record_index(label_, base, prefer_custom=True) if data['datatype'] == 'series': # account for the series index column. Field_metadata knows that # the series index is one larger than the series. If you change # it here, be sure to change it there as well. self.FIELD_MAP[str(data['num'])+'_index'] = base = base+1 self.field_metadata.set_field_record_index(label_+'_index', base, prefer_custom=True) if data['normalized']: if metadata['is_multiple']: tables[label] = ManyToManyTable(label, metadata, link_table=link_table) else: tables[label] = ManyToOneTable(label, metadata, link_table=link_table) if metadata['datatype'] == 'series': # Create series index table label += '_index' metadata = self.field_metadata[label].copy() metadata['column'] = 'extra' metadata['table'] = link_table tables[label] = OneToOneTable(label, metadata) else: if data['datatype'] == 'composite': tables[label] = CompositeTable(label, metadata) else: tables[label] = OneToOneTable(label, metadata) self.FIELD_MAP['ondevice'] = base = base+1 self.field_metadata.set_field_record_index('ondevice', base, prefer_custom=False) self.FIELD_MAP['marked'] = base = base+1 self.field_metadata.set_field_record_index('marked', base, prefer_custom=False) self.FIELD_MAP['series_sort'] = base = base+1 self.field_metadata.set_field_record_index('series_sort', base, prefer_custom=False) # }}} @property def conn(self): if self._conn is None: self._conn = Connection(self.dbpath) self.is_closed = False if self._exists and self.user_version == 0: self._conn.close() os.remove(self.dbpath) self._conn = Connection(self.dbpath) return self._conn def execute(self, sql, bindings=None): try: return self.conn.cursor().execute(sql, bindings) except apsw.IOError: # This can happen if the computer was suspended see for example: # https://bugs.launchpad.net/bugs/1286522. Try to reopen the db if not self.conn.getautocommit(): raise # We are in a transaction, re-opening the db will fail anyway self.reopen(force=True) return self.conn.cursor().execute(sql, bindings) def executemany(self, sql, sequence_of_bindings): try: with self.conn: # Disable autocommit mode, for performance return self.conn.cursor().executemany(sql, sequence_of_bindings) except apsw.IOError: # This can happen if the computer was suspended see for example: # https://bugs.launchpad.net/bugs/1286522. Try to reopen the db if not self.conn.getautocommit(): raise # We are in a transaction, re-opening the db will fail anyway self.reopen(force=True) with self.conn: # Disable autocommit mode, for performance return self.conn.cursor().executemany(sql, sequence_of_bindings) def get(self, *args, **kw): ans = self.execute(*args) if kw.get('all', True): return ans.fetchall() try: return next(ans)[0] except (StopIteration, IndexError): return None def last_insert_rowid(self): return self.conn.last_insert_rowid() def custom_field_name(self, label=None, num=None): if label is not None: return self.field_metadata.custom_field_prefix + label return self.field_metadata.custom_field_prefix + self.custom_column_num_to_label_map[num] def custom_field_metadata(self, label=None, num=None): if label is not None: return self.custom_column_label_map[label] return self.custom_column_num_map[num] def set_custom_column_metadata(self, num, name=None, label=None, is_editable=None, display=None): changed = False if name is not None: self.execute('UPDATE custom_columns SET name=? WHERE id=?', (name, num)) changed = True if label is not None: self.execute('UPDATE custom_columns SET label=? WHERE id=?', (label, num)) changed = True if is_editable is not None: self.execute('UPDATE custom_columns SET editable=? WHERE id=?', (bool(is_editable), num)) self.custom_column_num_map[num]['is_editable'] = bool(is_editable) changed = True if display is not None: self.execute('UPDATE custom_columns SET display=? WHERE id=?', (json.dumps(display), num)) changed = True # Note: the caller is responsible for scheduling a metadata backup if necessary return changed def create_custom_column(self, label, name, datatype, is_multiple, editable=True, display={}): # {{{ import re if not label: raise ValueError(_('No label was provided')) if re.match(r'^\w*$', label) is None or not label[0].isalpha() or label.lower() != label: raise ValueError(_('The label must contain only lower case letters, digits and underscores, and start with a letter')) if datatype not in CUSTOM_DATA_TYPES: raise ValueError('%r is not a supported data type'%datatype) normalized = datatype not in ('datetime', 'comments', 'int', 'bool', 'float', 'composite') is_multiple = is_multiple and datatype in ('text', 'composite') self.execute( ('INSERT INTO ' 'custom_columns(label,name,datatype,is_multiple,editable,display,normalized)' 'VALUES (?,?,?,?,?,?,?)'), (label, name, datatype, is_multiple, editable, json.dumps(display), normalized)) num = self.conn.last_insert_rowid() if datatype in ('rating', 'int'): dt = 'INT' elif datatype in ('text', 'comments', 'series', 'composite', 'enumeration'): dt = 'TEXT' elif datatype in ('float',): dt = 'REAL' elif datatype == 'datetime': dt = 'timestamp' elif datatype == 'bool': dt = 'BOOL' collate = 'COLLATE NOCASE' if dt == 'TEXT' else '' table, lt = self.custom_table_names(num) if normalized: if datatype == 'series': s_index = 'extra REAL,' else: s_index = '' lines = [ '''\ CREATE TABLE %s( id INTEGER PRIMARY KEY AUTOINCREMENT, value %s NOT NULL %s, UNIQUE(value)); '''%(table, dt, collate), 'CREATE INDEX %s_idx ON %s (value %s);'%(table, table, collate), '''\ CREATE TABLE %s( id INTEGER PRIMARY KEY AUTOINCREMENT, book INTEGER NOT NULL, value INTEGER NOT NULL, %s UNIQUE(book, value) );'''%(lt, s_index), 'CREATE INDEX %s_aidx ON %s (value);'%(lt,lt), 'CREATE INDEX %s_bidx ON %s (book);'%(lt,lt), '''\ CREATE TRIGGER fkc_update_{lt}_a BEFORE UPDATE OF book ON {lt} BEGIN SELECT CASE WHEN (SELECT id from books WHERE id=NEW.book) IS NULL THEN RAISE(ABORT, 'Foreign key violation: book not in books') END; END; CREATE TRIGGER fkc_update_{lt}_b BEFORE UPDATE OF author ON {lt} BEGIN SELECT CASE WHEN (SELECT id from {table} WHERE id=NEW.value) IS NULL THEN RAISE(ABORT, 'Foreign key violation: value not in {table}') END; END; CREATE TRIGGER fkc_insert_{lt} BEFORE INSERT ON {lt} BEGIN SELECT CASE WHEN (SELECT id from books WHERE id=NEW.book) IS NULL THEN RAISE(ABORT, 'Foreign key violation: book not in books') WHEN (SELECT id from {table} WHERE id=NEW.value) IS NULL THEN RAISE(ABORT, 'Foreign key violation: value not in {table}') END; END; CREATE TRIGGER fkc_delete_{lt} AFTER DELETE ON {table} BEGIN DELETE FROM {lt} WHERE value=OLD.id; END; CREATE VIEW tag_browser_{table} AS SELECT id, value, (SELECT COUNT(id) FROM {lt} WHERE value={table}.id) count, (SELECT AVG(r.rating) FROM {lt}, books_ratings_link as bl, ratings as r WHERE {lt}.value={table}.id and bl.book={lt}.book and r.id = bl.rating and r.rating <> 0) avg_rating, value AS sort FROM {table}; CREATE VIEW tag_browser_filtered_{table} AS SELECT id, value, (SELECT COUNT({lt}.id) FROM {lt} WHERE value={table}.id AND books_list_filter(book)) count, (SELECT AVG(r.rating) FROM {lt}, books_ratings_link as bl, ratings as r WHERE {lt}.value={table}.id AND bl.book={lt}.book AND r.id = bl.rating AND r.rating <> 0 AND books_list_filter(bl.book)) avg_rating, value AS sort FROM {table}; '''.format(lt=lt, table=table), ] else: lines = [ '''\ CREATE TABLE %s( id INTEGER PRIMARY KEY AUTOINCREMENT, book INTEGER, value %s NOT NULL %s, UNIQUE(book)); '''%(table, dt, collate), 'CREATE INDEX %s_idx ON %s (book);'%(table, table), '''\ CREATE TRIGGER fkc_insert_{table} BEFORE INSERT ON {table} BEGIN SELECT CASE WHEN (SELECT id from books WHERE id=NEW.book) IS NULL THEN RAISE(ABORT, 'Foreign key violation: book not in books') END; END; CREATE TRIGGER fkc_update_{table} BEFORE UPDATE OF book ON {table} BEGIN SELECT CASE WHEN (SELECT id from books WHERE id=NEW.book) IS NULL THEN RAISE(ABORT, 'Foreign key violation: book not in books') END; END; '''.format(table=table), ] script = ' \n'.join(lines) self.execute(script) self.prefs.set('update_all_last_mod_dates_on_start', True) return num # }}} def delete_custom_column(self, label=None, num=None): data = self.custom_field_metadata(label, num) self.execute('UPDATE custom_columns SET mark_for_delete=1 WHERE id=?', (data['num'],)) def close(self, force=False, unload_formatter_functions=True): if getattr(self, '_conn', None) is not None: if unload_formatter_functions: try: unload_user_template_functions(self.library_id) except Exception: pass self._conn.close(force) del self._conn self.is_closed = True def reopen(self, force=False): self.close(force=force, unload_formatter_functions=False) self._conn = None self.conn def dump_and_restore(self, callback=None, sql=None): import codecs from apsw import Shell from contextlib import closing if callback is None: callback = lambda x: x uv = int(self.user_version) with TemporaryFile(suffix='.sql') as fname: if sql is None: callback(_('Dumping database to SQL') + '...') with codecs.open(fname, 'wb', encoding='utf-8') as buf: shell = Shell(db=self.conn, stdout=buf) shell.process_command('.dump') else: with lopen(fname, 'wb') as buf: buf.write(sql if isinstance(sql, bytes) else sql.encode('utf-8')) with TemporaryFile(suffix='_tmpdb.db', dir=os.path.dirname(self.dbpath)) as tmpdb: callback(_('Restoring database from SQL') + '...') with closing(Connection(tmpdb)) as conn: shell = Shell(db=conn, encoding='utf-8') shell.process_command('.read ' + fname.replace(os.sep, '/')) conn.execute('PRAGMA user_version=%d;'%uv) self.close(unload_formatter_functions=False) try: atomic_rename(tmpdb, self.dbpath) finally: self.reopen() def vacuum(self): self.execute('VACUUM') @property def user_version(self): '''The user version of this database''' return self.conn.get('pragma user_version;', all=False) @user_version.setter def user_version(self, val): self.execute('pragma user_version=%d'%int(val)) def initialize_database(self): metadata_sqlite = P('metadata_sqlite.sql', data=True, allow_user_override=False).decode('utf-8') cur = self.conn.cursor() cur.execute('BEGIN EXCLUSIVE TRANSACTION') try: cur.execute(metadata_sqlite) except: cur.execute('ROLLBACK') else: cur.execute('COMMIT') if self.user_version == 0: self.user_version = 1 # }}} def normpath(self, path): path = os.path.abspath(os.path.realpath(path)) if not self.is_case_sensitive: path = os.path.normcase(path).lower() return path def is_deletable(self, path): return path and not self.normpath(self.library_path).startswith(self.normpath(path)) def rmtree(self, path): if self.is_deletable(path): rmtree_with_retry(path) def construct_path_name(self, book_id, title, author): ''' Construct the directory name for this book based on its metadata. ''' book_id = ' (%d)' % book_id l = self.PATH_LIMIT - (len(book_id) // 2) - 2 author = ascii_filename(author)[:l] title = ascii_filename(title.lstrip())[:l].rstrip() if not title: title = 'Unknown'[:l] try: while author[-1] in (' ', '.'): author = author[:-1] except IndexError: author = '' if not author: author = ascii_filename(_('Unknown')) if author.upper() in WINDOWS_RESERVED_NAMES: author += 'w' return f'{author}/{title}{book_id}' def construct_file_name(self, book_id, title, author, extlen): ''' Construct the file name for this book based on its metadata. ''' extlen = max(extlen, 14) # 14 accounts for ORIGINAL_EPUB # The PATH_LIMIT on windows already takes into account the doubling # (it is used to enforce the total path length limit, individual path # components can be much longer than the total path length would allow on # windows). l = (self.PATH_LIMIT - (extlen // 2) - 2) if iswindows else ((self.PATH_LIMIT - extlen - 2) // 2) if l < 5: raise ValueError('Extension length too long: %d' % extlen) author = ascii_filename(author)[:l] title = ascii_filename(title.lstrip())[:l].rstrip() if not title: title = 'Unknown'[:l] name = title + ' - ' + author while name.endswith('.'): name = name[:-1] if not name: name = ascii_filename(_('Unknown')) return name # Database layer API {{{ def custom_table_names(self, num): return 'custom_column_%d'%num, 'books_custom_column_%d_link'%num @property def custom_tables(self): return {x[0] for x in self.conn.get( 'SELECT name FROM sqlite_master WHERE type="table" AND ' '(name GLOB "custom_column_*" OR name GLOB "books_custom_column_*")')} @classmethod def exists_at(cls, path): return path and os.path.exists(os.path.join(path, 'metadata.db')) @property def library_id(self): '''The UUID for this library. As long as the user only operates on libraries with calibre, it will be unique''' if getattr(self, '_library_id_', None) is None: ans = self.conn.get('SELECT uuid FROM library_id', all=False) if ans is None: ans = str(uuid.uuid4()) self.library_id = ans else: self._library_id_ = ans return self._library_id_ @library_id.setter def library_id(self, val): self._library_id_ = str(val) self.execute(''' DELETE FROM library_id; INSERT INTO library_id (uuid) VALUES (?); ''', (self._library_id_,)) def last_modified(self): ''' Return last modified time as a UTC datetime object ''' return utcfromtimestamp(os.stat(self.dbpath).st_mtime) def read_tables(self): ''' Read all data from the db into the python in-memory tables ''' with self.conn: # Use a single transaction, to ensure nothing modifies the db while we are reading for table in itervalues(self.tables): try: table.read(self) except: prints('Failed to read table:', table.name) import pprint pprint.pprint(table.metadata) raise def format_abspath(self, book_id, fmt, fname, path): path = os.path.join(self.library_path, path) fmt = ('.' + fmt.lower()) if fmt else '' fmt_path = os.path.join(path, fname+fmt) if os.path.exists(fmt_path): return fmt_path try: candidates = glob.glob(os.path.join(path, '*'+fmt)) except: # If path contains strange characters this throws an exc candidates = [] if fmt and candidates and os.path.exists(candidates[0]): try: shutil.copyfile(candidates[0], fmt_path) except shutil.SameFileError: # some other process synced in the file since the last # os.path.exists() return candidates[0] return fmt_path def cover_abspath(self, book_id, path): path = os.path.join(self.library_path, path) fmt_path = os.path.join(path, 'cover.jpg') if os.path.exists(fmt_path): return fmt_path def apply_to_format(self, book_id, path, fname, fmt, func, missing_value=None): path = self.format_abspath(book_id, fmt, fname, path) if path is None: return missing_value with lopen(path, 'r+b') as f: return func(f) def format_hash(self, book_id, fmt, fname, path): path = self.format_abspath(book_id, fmt, fname, path) if path is None: raise NoSuchFormat('Record %d has no fmt: %s'%(book_id, fmt)) sha = hashlib.sha256() with lopen(path, 'rb') as f: while True: raw = f.read(SPOOL_SIZE) sha.update(raw) if len(raw) < SPOOL_SIZE: break return sha.hexdigest() def format_metadata(self, book_id, fmt, fname, path): path = self.format_abspath(book_id, fmt, fname, path) ans = {} if path is not None: stat = os.stat(path) ans['path'] = path ans['size'] = stat.st_size ans['mtime'] = utcfromtimestamp(stat.st_mtime) return ans def has_format(self, book_id, fmt, fname, path): return self.format_abspath(book_id, fmt, fname, path) is not None def is_format_accessible(self, book_id, fmt, fname, path): fpath = self.format_abspath(book_id, fmt, fname, path) return fpath and os.access(fpath, os.R_OK | os.W_OK) def rename_format_file(self, book_id, src_fname, src_fmt, dest_fname, dest_fmt, path): src_path = self.format_abspath(book_id, src_fmt, src_fname, path) dest_path = self.format_abspath(book_id, dest_fmt, dest_fname, path) atomic_rename(src_path, dest_path) return os.path.getsize(dest_path) def remove_formats(self, remove_map): paths = [] for book_id, removals in iteritems(remove_map): for fmt, fname, path in removals: path = self.format_abspath(book_id, fmt, fname, path) if path is not None: paths.append(path) try: delete_service().delete_files(paths, self.library_path) except: import traceback traceback.print_exc() def cover_last_modified(self, path): path = os.path.abspath(os.path.join(self.library_path, path, 'cover.jpg')) try: return utcfromtimestamp(os.stat(path).st_mtime) except OSError: pass # Cover doesn't exist def copy_cover_to(self, path, dest, windows_atomic_move=None, use_hardlink=False, report_file_size=None): path = os.path.abspath(os.path.join(self.library_path, path, 'cover.jpg')) if windows_atomic_move is not None: if not isinstance(dest, string_or_bytes): raise Exception("Error, you must pass the dest as a path when" " using windows_atomic_move") if os.access(path, os.R_OK) and dest and not samefile(dest, path): windows_atomic_move.copy_path_to(path, dest) return True else: if os.access(path, os.R_OK): try: f = lopen(path, 'rb') except OSError: time.sleep(0.2) try: f = lopen(path, 'rb') except OSError as e: # Ensure the path that caused this error is reported raise Exception(f'Failed to open {path!r} with error: {e}') with f: if hasattr(dest, 'write'): if report_file_size is not None: f.seek(0, os.SEEK_END) report_file_size(f.tell()) f.seek(0) shutil.copyfileobj(f, dest) if hasattr(dest, 'flush'): dest.flush() return True elif dest and not samefile(dest, path): if use_hardlink: try: hardlink_file(path, dest) return True except: pass with lopen(dest, 'wb') as d: shutil.copyfileobj(f, d) return True return False def cover_or_cache(self, path, timestamp): path = os.path.abspath(os.path.join(self.library_path, path, 'cover.jpg')) try: stat = os.stat(path) except OSError: return False, None, None if abs(timestamp - stat.st_mtime) < 0.1: return True, None, None try: f = lopen(path, 'rb') except OSError: time.sleep(0.2) f = lopen(path, 'rb') with f: return True, f.read(), stat.st_mtime def compress_covers(self, path_map, jpeg_quality, progress_callback): cpath_map = {} if not progress_callback: progress_callback = lambda book_id, old_sz, new_sz: None for book_id, path in path_map.items(): path = os.path.abspath(os.path.join(self.library_path, path, 'cover.jpg')) try: sz = os.path.getsize(path) except OSError: progress_callback(book_id, 0, 'ENOENT') else: cpath_map[book_id] = (path, sz) from calibre.db.covers import compress_covers compress_covers(cpath_map, jpeg_quality, progress_callback) def set_cover(self, book_id, path, data, no_processing=False): path = os.path.abspath(os.path.join(self.library_path, path)) if not os.path.exists(path): os.makedirs(path) path = os.path.join(path, 'cover.jpg') if callable(getattr(data, 'save', None)): from calibre.gui2 import pixmap_to_data data = pixmap_to_data(data) elif callable(getattr(data, 'read', None)): data = data.read() if data is None: if os.path.exists(path): try: os.remove(path) except OSError: time.sleep(0.2) os.remove(path) else: if no_processing: with lopen(path, 'wb') as f: f.write(data) else: try: save_cover_data_to(data, path) except OSError: time.sleep(0.2) save_cover_data_to(data, path) def copy_format_to(self, book_id, fmt, fname, path, dest, windows_atomic_move=None, use_hardlink=False, report_file_size=None): path = self.format_abspath(book_id, fmt, fname, path) if path is None: return False if windows_atomic_move is not None: if not isinstance(dest, string_or_bytes): raise Exception("Error, you must pass the dest as a path when" " using windows_atomic_move") if dest: if samefile(dest, path): # Ensure that the file has the same case as dest try: if path != dest: os.rename(path, dest) except: pass # Nothing too catastrophic happened, the cases mismatch, that's all else: windows_atomic_move.copy_path_to(path, dest) else: if hasattr(dest, 'write'): with lopen(path, 'rb') as f: if report_file_size is not None: f.seek(0, os.SEEK_END) report_file_size(f.tell()) f.seek(0) shutil.copyfileobj(f, dest) if hasattr(dest, 'flush'): dest.flush() elif dest: if samefile(dest, path): if not self.is_case_sensitive and path != dest: # Ensure that the file has the same case as dest try: os.rename(path, dest) except: pass # Nothing too catastrophic happened, the cases mismatch, that's all else: if use_hardlink: try: hardlink_file(path, dest) return True except: pass with lopen(path, 'rb') as f, lopen(dest, 'wb') as d: shutil.copyfileobj(f, d) return True def windows_check_if_files_in_use(self, paths): ''' Raises an EACCES IOError if any of the files in the folder of book_id are opened in another program on windows. ''' if iswindows: for path in paths: spath = os.path.join(self.library_path, *path.split('/')) wam = None if os.path.exists(spath): try: wam = WindowsAtomicFolderMove(spath) finally: if wam is not None: wam.close_handles() def add_format(self, book_id, fmt, stream, title, author, path, current_name, mtime=None): fmt = ('.' + fmt.lower()) if fmt else '' fname = self.construct_file_name(book_id, title, author, len(fmt)) path = os.path.join(self.library_path, path) dest = os.path.join(path, fname + fmt) if not os.path.exists(path): os.makedirs(path) size = 0 if current_name is not None: old_path = os.path.join(path, current_name + fmt) if old_path != dest: # Ensure that the old format file is not orphaned, this can # happen if the algorithm in construct_file_name is changed. try: # rename rather than remove, so that if something goes # wrong in the rest of this function, at least the file is # not deleted os.rename(old_path, dest) except OSError as e: if getattr(e, 'errno', None) != errno.ENOENT: # Failing to rename the old format will at worst leave a # harmless orphan, so log and ignore the error import traceback traceback.print_exc() if (not getattr(stream, 'name', False) or not samefile(dest, stream.name)): with lopen(dest, 'wb') as f: shutil.copyfileobj(stream, f) size = f.tell() if mtime is not None: os.utime(dest, (mtime, mtime)) elif os.path.exists(dest): size = os.path.getsize(dest) if mtime is not None: os.utime(dest, (mtime, mtime)) return size, fname def update_path(self, book_id, title, author, path_field, formats_field): path = self.construct_path_name(book_id, title, author) current_path = path_field.for_book(book_id, default_value='') formats = formats_field.for_book(book_id, default_value=()) try: extlen = max(len(fmt) for fmt in formats) + 1 except ValueError: extlen = 10 fname = self.construct_file_name(book_id, title, author, extlen) # Check if the metadata used to construct paths has changed changed = False for fmt in formats: name = formats_field.format_fname(book_id, fmt) if name and name != fname: changed = True break if path == current_path and not changed: return spath = os.path.join(self.library_path, *current_path.split('/')) tpath = os.path.join(self.library_path, *path.split('/')) source_ok = current_path and os.path.exists(spath) wam = WindowsAtomicFolderMove(spath) if iswindows and source_ok else None format_map = {} original_format_map = {} try: if not os.path.exists(tpath): os.makedirs(tpath) if source_ok: # Migrate existing files dest = os.path.join(tpath, 'cover.jpg') self.copy_cover_to(current_path, dest, windows_atomic_move=wam, use_hardlink=True) for fmt in formats: dest = os.path.join(tpath, fname+'.'+fmt.lower()) format_map[fmt] = dest ofmt_fname = formats_field.format_fname(book_id, fmt) original_format_map[fmt] = os.path.join(spath, ofmt_fname+'.'+fmt.lower()) self.copy_format_to(book_id, fmt, ofmt_fname, current_path, dest, windows_atomic_move=wam, use_hardlink=True) # Update db to reflect new file locations for fmt in formats: formats_field.table.set_fname(book_id, fmt, fname, self) path_field.table.set_path(book_id, path, self) # Delete not needed files and directories if source_ok: if os.path.exists(spath): if samefile(spath, tpath): # The format filenames may have changed while the folder # name remains the same for fmt, opath in iteritems(original_format_map): npath = format_map.get(fmt, None) if npath and os.path.abspath(npath.lower()) != os.path.abspath(opath.lower()) and samefile(opath, npath): # opath and npath are different hard links to the same file os.unlink(opath) else: if wam is not None: wam.delete_originals() self.rmtree(spath) parent = os.path.dirname(spath) if len(os.listdir(parent)) == 0: self.rmtree(parent) finally: if wam is not None: wam.close_handles() curpath = self.library_path c1, c2 = current_path.split('/'), path.split('/') if not self.is_case_sensitive and len(c1) == len(c2): # On case-insensitive systems, title and author renames that only # change case don't cause any changes to the directories in the file # system. This can lead to having the directory names not match the # title/author, which leads to trouble when libraries are copied to # a case-sensitive system. The following code attempts to fix this # by checking each segment. If they are different because of case, # then rename the segment. Note that the code above correctly # handles files in the directories, so no need to do them here. for oldseg, newseg in zip(c1, c2): if oldseg.lower() == newseg.lower() and oldseg != newseg: try: os.rename(os.path.join(curpath, oldseg), os.path.join(curpath, newseg)) except: break # Fail silently since nothing catastrophic has happened curpath = os.path.join(curpath, newseg) def write_backup(self, path, raw): path = os.path.abspath(os.path.join(self.library_path, path, 'metadata.opf')) try: with lopen(path, 'wb') as f: f.write(raw) except OSError: exc_info = sys.exc_info() try: os.makedirs(os.path.dirname(path)) except OSError as err: if err.errno == errno.EEXIST: # Parent directory already exists, re-raise original exception reraise(*exc_info) raise finally: del exc_info with lopen(path, 'wb') as f: f.write(raw) def read_backup(self, path): path = os.path.abspath(os.path.join(self.library_path, path, 'metadata.opf')) with lopen(path, 'rb') as f: return f.read() def remove_books(self, path_map, permanent=False): self.executemany( 'DELETE FROM books WHERE id=?', [(x,) for x in path_map]) paths = {os.path.join(self.library_path, x) for x in itervalues(path_map) if x} paths = {x for x in paths if os.path.exists(x) and self.is_deletable(x)} if permanent: for path in paths: self.rmtree(path) remove_dir_if_empty(os.path.dirname(path), ignore_metadata_caches=True) else: delete_service().delete_books(paths, self.library_path) def add_custom_data(self, name, val_map, delete_first): if delete_first: self.execute('DELETE FROM books_plugin_data WHERE name=?', (name, )) self.executemany( 'INSERT OR REPLACE INTO books_plugin_data (book, name, val) VALUES (?, ?, ?)', [(book_id, name, json.dumps(val, default=to_json)) for book_id, val in iteritems(val_map)]) def get_custom_book_data(self, name, book_ids, default=None): book_ids = frozenset(book_ids) def safe_load(val): try: return json.loads(val, object_hook=from_json) except: return default if len(book_ids) == 1: bid = next(iter(book_ids)) ans = {book_id:safe_load(val) for book_id, val in self.execute('SELECT book, val FROM books_plugin_data WHERE book=? AND name=?', (bid, name))} return ans or {bid:default} ans = {} for book_id, val in self.execute( 'SELECT book, val FROM books_plugin_data WHERE name=?', (name,)): if not book_ids or book_id in book_ids: val = safe_load(val) ans[book_id] = val return ans def delete_custom_book_data(self, name, book_ids): if book_ids: self.executemany('DELETE FROM books_plugin_data WHERE book=? AND name=?', [(book_id, name) for book_id in book_ids]) else: self.execute('DELETE FROM books_plugin_data WHERE name=?', (name,)) def dirtied_books(self): for (book_id,) in self.execute('SELECT book FROM metadata_dirtied'): yield book_id def dirty_books(self, book_ids): self.executemany('INSERT OR IGNORE INTO metadata_dirtied (book) VALUES (?)', ((x,) for x in book_ids)) def mark_book_as_clean(self, book_id): self.execute('DELETE FROM metadata_dirtied WHERE book=?', (book_id,)) def get_ids_for_custom_book_data(self, name): return frozenset(r[0] for r in self.execute('SELECT book FROM books_plugin_data WHERE name=?', (name,))) def annotations_for_book(self, book_id, fmt, user_type, user): yield from annotations_for_book(self.conn, book_id, fmt, user_type, user) def search_annotations(self, fts_engine_query, use_stemming, highlight_start, highlight_end, snippet_size, annotation_type, restrict_to_book_ids, restrict_to_user, ignore_removed=False ): fts_engine_query = unicode_normalize(fts_engine_query) fts_table = 'annotations_fts_stemmed' if use_stemming else 'annotations_fts' text = 'annotations.searchable_text' if highlight_start is not None and highlight_end is not None: if snippet_size is not None: text = 'snippet({fts_table}, 0, "{highlight_start}", "{highlight_end}", "…", {snippet_size})'.format( fts_table=fts_table, highlight_start=highlight_start, highlight_end=highlight_end, snippet_size=max(1, min(snippet_size, 64))) else: text = f'highlight({fts_table}, 0, "{highlight_start}", "{highlight_end}")' query = 'SELECT {0}.id, {0}.book, {0}.format, {0}.user_type, {0}.user, {0}.annot_data, {1} FROM {0} ' query = query.format('annotations', text) query += ' JOIN {fts_table} ON annotations.id = {fts_table}.rowid'.format(fts_table=fts_table) query += f' WHERE {fts_table} MATCH ?' data = [fts_engine_query] if restrict_to_user: query += ' AND annotations.user_type = ? AND annotations.user = ?' data += list(restrict_to_user) if annotation_type: query += ' AND annotations.annot_type = ? ' data.append(annotation_type) query += f' ORDER BY {fts_table}.rank ' ls = json.loads try: for (rowid, book_id, fmt, user_type, user, annot_data, text) in self.execute(query, tuple(data)): if restrict_to_book_ids is not None and book_id not in restrict_to_book_ids: continue try: parsed_annot = ls(annot_data) except Exception: continue if ignore_removed and parsed_annot.get('removed'): continue yield { 'id': rowid, 'book_id': book_id, 'format': fmt, 'user_type': user_type, 'user': user, 'text': text, 'annotation': parsed_annot, } except apsw.SQLError as e: raise FTSQueryError(fts_engine_query, query, e) def all_annotations_for_book(self, book_id, ignore_removed=False): for (fmt, user_type, user, data) in self.execute( 'SELECT format, user_type, user, annot_data FROM annotations WHERE book=?', (book_id,) ): try: annot = json.loads(data) except Exception: pass if not ignore_removed or not annot.get('removed'): yield {'format': fmt, 'user_type': user_type, 'user': user, 'annotation': annot} def delete_annotations(self, annot_ids): replacements = [] removals = [] now = utcnow() ts = now.isoformat() timestamp = (now - EPOCH).total_seconds() for annot_id in annot_ids: for (raw_annot_data, annot_type) in self.execute( 'SELECT annot_data, annot_type FROM annotations WHERE id=?', (annot_id,) ): try: annot_data = json.loads(raw_annot_data) except Exception: removals.append((annot_id,)) continue now = utcnow() new_annot = {'removed': True, 'timestamp': ts, 'type': annot_type} uuid = annot_data.get('uuid') if uuid is not None: new_annot['uuid'] = uuid else: new_annot['title'] = annot_data['title'] replacements.append((json.dumps(new_annot), timestamp, annot_id)) if replacements: self.executemany('UPDATE annotations SET annot_data=?, timestamp=?, searchable_text="" WHERE id=?', replacements) if removals: self.executemany('DELETE FROM annotations WHERE id=?', removals) def update_annotations(self, annot_id_map): now = utcnow() ts = now.isoformat() timestamp = (now - EPOCH).total_seconds() with self.conn: for annot_id, annot in annot_id_map.items(): atype = annot['type'] aid, text = annot_db_data(annot) if aid is not None: annot['timestamp'] = ts self.execute('UPDATE annotations SET annot_data=?, timestamp=?, annot_type=?, searchable_text=?, annot_id=? WHERE id=?', (json.dumps(annot), timestamp, atype, text, aid, annot_id)) def all_annotations(self, restrict_to_user=None, limit=None, annotation_type=None, ignore_removed=False, restrict_to_book_ids=None): ls = json.loads q = 'SELECT id, book, format, user_type, user, annot_data FROM annotations' data = [] restrict_clauses = [] if restrict_to_user is not None: data.extend(restrict_to_user) restrict_clauses.append(' user_type = ? AND user = ?') if annotation_type: data.append(annotation_type) restrict_clauses.append(' annot_type = ? ') if restrict_clauses: q += ' WHERE ' + ' AND '.join(restrict_clauses) q += ' ORDER BY timestamp DESC ' count = 0 for (rowid, book_id, fmt, user_type, user, annot_data) in self.execute(q, tuple(data)): if restrict_to_book_ids is not None and book_id not in restrict_to_book_ids: continue try: annot = ls(annot_data) atype = annot['type'] except Exception: continue if ignore_removed and annot.get('removed'): continue text = '' if atype == 'bookmark': text = annot['title'] elif atype == 'highlight': text = annot.get('highlighted_text') or '' yield { 'id': rowid, 'book_id': book_id, 'format': fmt, 'user_type': user_type, 'user': user, 'text': text, 'annotation': annot, } count += 1 if limit is not None and count >= limit: break def all_annotation_users(self): return self.execute('SELECT DISTINCT user_type, user FROM annotations') def all_annotation_types(self): for x in self.execute('SELECT DISTINCT annot_type FROM annotations'): yield x[0] def set_annotations_for_book(self, book_id, fmt, annots_list, user_type='local', user='viewer'): try: with self.conn: # Disable autocommit mode, for performance save_annotations_for_book(self.conn.cursor(), book_id, fmt, annots_list, user_type, user) except apsw.IOError: # This can happen if the computer was suspended see for example: # https://bugs.launchpad.net/bugs/1286522. Try to reopen the db if not self.conn.getautocommit(): raise # We are in a transaction, re-opening the db will fail anyway self.reopen(force=True) with self.conn: # Disable autocommit mode, for performance save_annotations_for_book(self.conn.cursor(), book_id, fmt, annots_list, user_type, user) def dirty_books_with_dirtied_annotations(self): with self.conn: self.execute('INSERT or IGNORE INTO metadata_dirtied(book) SELECT book FROM annotations_dirtied;') changed = self.conn.changes() > 0 if changed: self.execute('DELETE FROM annotations_dirtied') return changed def annotation_count_for_book(self, book_id): for (count,) in self.execute(''' SELECT count(id) FROM annotations WHERE book=? AND json_extract(annot_data, "$.removed") IS NULL ''', (book_id,)): return count return 0 def reindex_annotations(self): self.execute(''' INSERT INTO {0}({0}) VALUES('rebuild'); INSERT INTO {1}({1}) VALUES('rebuild'); '''.format('annotations_fts', 'annotations_fts_stemmed')) def conversion_options(self, book_id, fmt): for (data,) in self.conn.get('SELECT data FROM conversion_options WHERE book=? AND format=?', (book_id, fmt.upper())): if data: try: return unpickle_binary_string(bytes(data)) except Exception: pass def has_conversion_options(self, ids, fmt='PIPE'): ids = frozenset(ids) with self.conn: self.execute('DROP TABLE IF EXISTS conversion_options_temp; CREATE TEMP TABLE conversion_options_temp (id INTEGER PRIMARY KEY);') self.executemany('INSERT INTO conversion_options_temp VALUES (?)', [(x,) for x in ids]) for (book_id,) in self.conn.get( 'SELECT book FROM conversion_options WHERE format=? AND book IN (SELECT id FROM conversion_options_temp)', (fmt.upper(),)): return True return False def delete_conversion_options(self, book_ids, fmt): self.executemany('DELETE FROM conversion_options WHERE book=? AND format=?', [(book_id, fmt.upper()) for book_id in book_ids]) def set_conversion_options(self, options, fmt): def map_data(x): if not isinstance(x, string_or_bytes): x = native_string_type(x) x = x.encode('utf-8') if isinstance(x, str) else x x = pickle_binary_string(x) return x options = [(book_id, fmt.upper(), map_data(data)) for book_id, data in iteritems(options)] self.executemany('INSERT OR REPLACE INTO conversion_options(book,format,data) VALUES (?,?,?)', options) def get_top_level_move_items(self, all_paths): items = set(os.listdir(self.library_path)) paths = set(all_paths) paths.update({'metadata.db', 'metadata_db_prefs_backup.json'}) path_map = {x:x for x in paths} if not self.is_case_sensitive: for x in items: path_map[x.lower()] = x items = {x.lower() for x in items} paths = {x.lower() for x in paths} items = items.intersection(paths) return items, path_map def move_library_to(self, all_paths, newloc, progress=(lambda item_name, item_count, total: None), abort=None): if not os.path.exists(newloc): os.makedirs(newloc) old_dirs, old_files = set(), set() items, path_map = self.get_top_level_move_items(all_paths) total = len(items) + 1 for i, x in enumerate(items): if abort is not None and abort.is_set(): return src = os.path.join(self.library_path, x) dest = os.path.join(newloc, path_map[x]) if os.path.isdir(src): if os.path.exists(dest): shutil.rmtree(dest) copytree_using_links(src, dest, dest_is_parent=False) old_dirs.add(src) else: if os.path.exists(dest): os.remove(dest) copyfile_using_links(src, dest, dest_is_dir=False) old_files.add(src) x = path_map[x] if not isinstance(x, str): x = x.decode(filesystem_encoding, 'replace') progress(x, i+1, total) dbpath = os.path.join(newloc, os.path.basename(self.dbpath)) odir = self.library_path self.conn.close() self.library_path, self.dbpath = newloc, dbpath if self._conn is not None: self._conn.close() self._conn = None for loc in old_dirs: try: rmtree_with_retry(loc) except OSError as e: if os.path.exists(loc): prints('Failed to delete:', loc, 'with error:', as_unicode(e)) for loc in old_files: try: os.remove(loc) except OSError as e: if e.errno != errno.ENOENT: prints('Failed to delete:', loc, 'with error:', as_unicode(e)) try: os.rmdir(odir) except OSError: pass self.conn # Connect to the moved metadata.db progress(_('Completed'), total, total) def restore_book(self, book_id, path, formats): self.execute('UPDATE books SET path=? WHERE id=?', (path.replace(os.sep, '/'), book_id)) vals = [(book_id, fmt, size, name) for fmt, size, name in formats] self.executemany('INSERT INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)', vals) def backup_database(self, path): dest_db = apsw.Connection(path) with dest_db.backup('main', self.conn, 'main') as b: while not b.done: try: b.step(100) except apsw.BusyError: pass dest_db.cursor().execute('DELETE FROM metadata_dirtied; VACUUM;') dest_db.close() # }}}