%PDF- %PDF-
Direktori : /lib/calibre/calibre/db/ |
Current File : //lib/calibre/calibre/db/write.py |
#!/usr/bin/env python3 __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' __docformat__ = 'restructuredtext en' import re from functools import partial from datetime import datetime from polyglot.builtins import iteritems, itervalues from calibre.constants import preferred_encoding from calibre.ebooks.metadata import author_to_author_sort, title_sort from calibre.utils.date import ( parse_only_date, parse_date, UNDEFINED_DATE, isoformat, is_date_undefined) from calibre.utils.localization import canonicalize_lang from calibre.utils.icu import strcmp missing = object() # Convert data into values suitable for the db {{{ def sqlite_datetime(x): return isoformat(x, sep=' ') if isinstance(x, datetime) else x def single_text(x): if x is None: return x if not isinstance(x, str): x = x.decode(preferred_encoding, 'replace') x = x.strip() return x if x else None series_index_pat = re.compile(r'(.*)\s+\[([.0-9]+)\]$') def get_series_values(val): if not val: return (val, None) match = series_index_pat.match(val.strip()) if match is not None: idx = match.group(2) try: idx = float(idx) return (match.group(1).strip(), idx) except: pass return (val, None) def multiple_text(sep, ui_sep, x): if not x: return () if isinstance(x, bytes): x = x.decode(preferred_encoding, 'replace') if isinstance(x, str): x = x.split(sep) else: x = (y.decode(preferred_encoding, 'replace') if isinstance(y, bytes) else y for y in x) ui_sep = ui_sep.strip() repsep = ',' if ui_sep == ';' else ';' x = (y.strip().replace(ui_sep, repsep) for y in x if y.strip()) return tuple(' '.join(y.split()) for y in x if y) def adapt_datetime(x): if isinstance(x, (str, bytes)): x = parse_date(x, assume_utc=False, as_utc=False) if x and is_date_undefined(x): x = UNDEFINED_DATE return x def adapt_date(x): if isinstance(x, (str, bytes)): x = parse_only_date(x) if x is None or is_date_undefined(x): x = UNDEFINED_DATE return x def adapt_number(typ, x): if x is None: return None if isinstance(x, (str, bytes)): if isinstance(x, bytes): x = x.decode(preferred_encoding, 'replace') if not x or x.lower() == 'none': return None return typ(x) def adapt_bool(x): if isinstance(x, (str, bytes)): if isinstance(x, bytes): x = x.decode(preferred_encoding, 'replace') x = x.lower() if x == 'true': x = True elif x == 'false': x = False elif x == 'none' or x == '': x = None else: x = bool(int(x)) return x if x is None else bool(x) def adapt_languages(to_tuple, x): ans = [] for lang in to_tuple(x): lc = canonicalize_lang(lang) if not lc or lc in ans or lc in ('und', 'zxx', 'mis', 'mul'): continue ans.append(lc) return tuple(ans) def clean_identifier(typ, val): typ = icu_lower(typ or '').strip().replace(':', '').replace(',', '') val = (val or '').strip().replace(',', '|') return typ, val def adapt_identifiers(to_tuple, x): if not isinstance(x, dict): x = {k:v for k, v in (y.partition(':')[0::2] for y in to_tuple(x))} ans = {} for k, v in iteritems(x): k, v = clean_identifier(k, v) if k and v: ans[k] = v return ans def adapt_series_index(x): ret = adapt_number(float, x) return 1.0 if ret is None else ret def get_adapter(name, metadata): dt = metadata['datatype'] if dt == 'text': if metadata['is_multiple']: m = metadata['is_multiple'] ans = partial(multiple_text, m['ui_to_list'], m['list_to_ui']) else: ans = single_text elif dt == 'series': ans = single_text elif dt == 'datetime': ans = adapt_date if name == 'pubdate' else adapt_datetime elif dt == 'int': ans = partial(adapt_number, int) elif dt == 'float': ans = partial(adapt_number, float) elif dt == 'bool': ans = adapt_bool elif dt == 'comments': ans = single_text elif dt == 'rating': ans = lambda x: None if x in {None, 0} else min(10, max(0, adapt_number(int, x))) elif dt == 'enumeration': ans = single_text elif dt == 'composite': ans = lambda x: x if name == 'title': return lambda x: ans(x) or _('Unknown') if name == 'author_sort': return lambda x: ans(x) or '' if name == 'authors': return lambda x: tuple(y.replace('|', ',') for y in ans(x)) or (_('Unknown'),) if name in {'timestamp', 'last_modified'}: return lambda x: ans(x) or UNDEFINED_DATE if name == 'series_index': return adapt_series_index if name == 'languages': return partial(adapt_languages, ans) if name == 'identifiers': return partial(adapt_identifiers, ans) return ans # }}} # One-One fields {{{ def one_one_in_books(book_id_val_map, db, field, *args): 'Set a one-one field in the books table' # Ignore those items whose value is the same as the current value # We can't do this for the cover because the file might change without # the presence-of-cover flag changing if field.name != 'cover': g = field.table.book_col_map.get book_id_val_map = {k:v for k, v in book_id_val_map.items() if v != g(k, missing)} if book_id_val_map: sequence = ((sqlite_datetime(v), k) for k, v in book_id_val_map.items()) db.executemany( 'UPDATE books SET %s=? WHERE id=?'%field.metadata['column'], sequence) field.table.book_col_map.update(book_id_val_map) return set(book_id_val_map) def set_uuid(book_id_val_map, db, field, *args): field.table.update_uuid_cache(book_id_val_map) return one_one_in_books(book_id_val_map, db, field, *args) def set_title(book_id_val_map, db, field, *args): ans = one_one_in_books(book_id_val_map, db, field, *args) # Set the title sort field if the title changed field.title_sort_field.writer.set_books( {k:title_sort(v) for k, v in book_id_val_map.items() if k in ans}, db) return ans def one_one_in_other(book_id_val_map, db, field, *args): 'Set a one-one field in the non-books table, like comments' # Ignore those items whose value is the same as the current value g = field.table.book_col_map.get book_id_val_map = {k:v for k, v in iteritems(book_id_val_map) if v != g(k, missing)} deleted = tuple((k,) for k, v in iteritems(book_id_val_map) if v is None) if deleted: db.executemany('DELETE FROM %s WHERE book=?'%field.metadata['table'], deleted) for book_id in deleted: field.table.book_col_map.pop(book_id[0], None) updated = {k:v for k, v in iteritems(book_id_val_map) if v is not None} if updated: db.executemany('INSERT OR REPLACE INTO %s(book,%s) VALUES (?,?)'%( field.metadata['table'], field.metadata['column']), ((k, sqlite_datetime(v)) for k, v in iteritems(updated))) field.table.book_col_map.update(updated) return set(book_id_val_map) def custom_series_index(book_id_val_map, db, field, *args): series_field = field.series_field sequence = [] for book_id, sidx in book_id_val_map.items(): ids = series_field.ids_for_book(book_id) if sidx is None: sidx = 1.0 if ids: if field.table.book_col_map.get(book_id, missing) != sidx: sequence.append((sidx, book_id, ids[0])) field.table.book_col_map[book_id] = sidx else: # the series has been deleted from the book, which means no row for # it exists in the series table. The series_index value should be # removed from the in-memory table as well, to ensure this book # sorts the same as other books with no series. field.table.remove_books((book_id,), db) if sequence: db.executemany('UPDATE %s SET %s=? WHERE book=? AND value=?'%( field.metadata['table'], field.metadata['column']), sequence) return {s[1] for s in sequence} # }}} # Many-One fields {{{ def safe_lower(x): try: return icu_lower(x) except (TypeError, ValueError, KeyError, AttributeError): return x def get_db_id(val, db, m, table, kmap, rid_map, allow_case_change, case_changes, val_map, is_authors=False): ''' Get the db id for the value val. If val does not exist in the db it is inserted into the db. ''' kval = kmap(val) item_id = rid_map.get(kval, None) if item_id is None: if is_authors: aus = author_to_author_sort(val) db.execute('INSERT INTO authors(name,sort) VALUES (?,?)', (val.replace(',', '|'), aus)) else: db.execute('INSERT INTO %s(%s) VALUES (?)'%( m['table'], m['column']), (val,)) item_id = rid_map[kval] = db.last_insert_rowid() table.id_map[item_id] = val table.col_book_map[item_id] = set() if is_authors: table.asort_map[item_id] = aus table.alink_map[item_id] = '' elif allow_case_change and val != table.id_map[item_id]: case_changes[item_id] = val val_map[val] = item_id def change_case(case_changes, dirtied, db, table, m, is_authors=False): if is_authors: vals = ((val.replace(',', '|'), item_id) for item_id, val in iteritems(case_changes)) else: vals = ((val, item_id) for item_id, val in iteritems(case_changes)) db.executemany( 'UPDATE %s SET %s=? WHERE id=?'%(m['table'], m['column']), vals) for item_id, val in iteritems(case_changes): table.id_map[item_id] = val dirtied.update(table.col_book_map[item_id]) if is_authors: table.asort_map[item_id] = author_to_author_sort(val) def many_one(book_id_val_map, db, field, allow_case_change, *args): dirtied = set() m = field.metadata table = field.table dt = m['datatype'] is_custom_series = dt == 'series' and table.name.startswith('#') # Map values to db ids, including any new values kmap = safe_lower if dt in {'text', 'series'} else lambda x:x rid_map = {kmap(item):item_id for item_id, item in iteritems(table.id_map)} if len(rid_map) != len(table.id_map): # table has some entries that differ only in case, fix it table.fix_case_duplicates(db) rid_map = {kmap(item):item_id for item_id, item in iteritems(table.id_map)} val_map = {None:None} case_changes = {} for val in itervalues(book_id_val_map): if val is not None: get_db_id(val, db, m, table, kmap, rid_map, allow_case_change, case_changes, val_map) if case_changes: change_case(case_changes, dirtied, db, table, m) book_id_item_id_map = {k:val_map[v] for k, v in iteritems(book_id_val_map)} # Ignore those items whose value is the same as the current value book_id_item_id_map = {k:v for k, v in iteritems(book_id_item_id_map) if v != table.book_col_map.get(k, None)} dirtied |= set(book_id_item_id_map) # Update the book->col and col->book maps deleted = set() updated = {} for book_id, item_id in iteritems(book_id_item_id_map): old_item_id = table.book_col_map.get(book_id, None) if old_item_id is not None: table.col_book_map[old_item_id].discard(book_id) if item_id is None: table.book_col_map.pop(book_id, None) deleted.add(book_id) else: table.book_col_map[book_id] = item_id table.col_book_map[item_id].add(book_id) updated[book_id] = item_id # Update the db link table if deleted: db.executemany('DELETE FROM %s WHERE book=?'%table.link_table, ((k,) for k in deleted)) if updated: sql = ( 'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1},extra) VALUES(?, ?, 1.0)' if is_custom_series else 'DELETE FROM {0} WHERE book=?; INSERT INTO {0}(book,{1}) VALUES(?, ?)' ) db.executemany(sql.format(table.link_table, m['link_column']), ((book_id, book_id, item_id) for book_id, item_id in iteritems(updated))) # Remove no longer used items remove = {item_id for item_id in table.id_map if not table.col_book_map.get(item_id, False)} if remove: db.executemany('DELETE FROM %s WHERE id=?'%m['table'], ((item_id,) for item_id in remove)) for item_id in remove: del table.id_map[item_id] table.col_book_map.pop(item_id, None) return dirtied # }}} # Many-Many fields {{{ def uniq(vals, kmap=lambda x:x): ''' Remove all duplicates from vals, while preserving order. kmap must be a callable that returns a hashable value for every item in vals ''' vals = vals or () lvals = (kmap(x) for x in vals) seen = set() seen_add = seen.add return tuple(x for x, k in zip(vals, lvals) if k not in seen and not seen_add(k)) def many_many(book_id_val_map, db, field, allow_case_change, *args): dirtied = set() m = field.metadata table = field.table dt = m['datatype'] is_authors = field.name == 'authors' # Map values to db ids, including any new values kmap = safe_lower if dt == 'text' else lambda x:x rid_map = {kmap(item):item_id for item_id, item in iteritems(table.id_map)} if len(rid_map) != len(table.id_map): # table has some entries that differ only in case, fix it table.fix_case_duplicates(db) rid_map = {kmap(item):item_id for item_id, item in iteritems(table.id_map)} val_map = {} case_changes = {} book_id_val_map = {k:uniq(vals, kmap) for k, vals in iteritems(book_id_val_map)} for vals in itervalues(book_id_val_map): for val in vals: get_db_id(val, db, m, table, kmap, rid_map, allow_case_change, case_changes, val_map, is_authors=is_authors) if case_changes: change_case(case_changes, dirtied, db, table, m, is_authors=is_authors) if is_authors: for item_id, val in iteritems(case_changes): for book_id in table.col_book_map[item_id]: current_sort = field.db_author_sort_for_book(book_id) new_sort = field.author_sort_for_book(book_id) if strcmp(current_sort, new_sort) == 0: # The sort strings differ only by case, update the db # sort field.author_sort_field.writer.set_books({book_id:new_sort}, db) book_id_item_id_map = {k:tuple(val_map[v] for v in vals) for k, vals in book_id_val_map.items()} # Ignore those items whose value is the same as the current value g = table.book_col_map.get not_set = () book_id_item_id_map = {k:v for k, v in book_id_item_id_map.items() if v != g(k, not_set)} dirtied |= set(book_id_item_id_map) # Update the book->col and col->book maps deleted = set() updated = {} for book_id, item_ids in iteritems(book_id_item_id_map): old_item_ids = table.book_col_map.get(book_id, None) if old_item_ids: for old_item_id in old_item_ids: table.col_book_map[old_item_id].discard(book_id) if item_ids: table.book_col_map[book_id] = item_ids for item_id in item_ids: table.col_book_map[item_id].add(book_id) updated[book_id] = item_ids else: table.book_col_map.pop(book_id, None) deleted.add(book_id) # Update the db link table if deleted: db.executemany('DELETE FROM %s WHERE book=?'%table.link_table, ((k,) for k in deleted)) if updated: vals = ( (book_id, val) for book_id, vals in iteritems(updated) for val in vals ) db.executemany('DELETE FROM %s WHERE book=?'%table.link_table, ((k,) for k in updated)) db.executemany('INSERT INTO {}(book,{}) VALUES(?, ?)'.format( table.link_table, m['link_column']), vals) if is_authors: aus_map = {book_id:field.author_sort_for_book(book_id) for book_id in updated} field.author_sort_field.writer.set_books(aus_map, db) # Remove no longer used items remove = {item_id for item_id in table.id_map if not table.col_book_map.get(item_id, False)} if remove: db.executemany('DELETE FROM %s WHERE id=?'%m['table'], ((item_id,) for item_id in remove)) for item_id in remove: del table.id_map[item_id] table.col_book_map.pop(item_id, None) if is_authors: table.asort_map.pop(item_id, None) table.alink_map.pop(item_id, None) return dirtied # }}} def identifiers(book_id_val_map, db, field, *args): # {{{ # Ignore those items whose value is the same as the current value g = field.table.book_col_map.get book_id_val_map = {k:v for k, v in book_id_val_map.items() if v != g(k, missing)} table = field.table updates = set() for book_id, identifiers in iteritems(book_id_val_map): if book_id not in table.book_col_map: table.book_col_map[book_id] = {} current_ids = table.book_col_map[book_id] remove_keys = set(current_ids) - set(identifiers) for key in remove_keys: table.col_book_map.get(key, set()).discard(book_id) current_ids.pop(key, None) current_ids.update(identifiers) for key, val in iteritems(identifiers): if key not in table.col_book_map: table.col_book_map[key] = set() table.col_book_map[key].add(book_id) updates.add((book_id, key, val)) db.executemany('DELETE FROM identifiers WHERE book=?', ((x,) for x in book_id_val_map)) if updates: db.executemany('INSERT OR REPLACE INTO identifiers (book, type, val) VALUES (?, ?, ?)', tuple(updates)) return set(book_id_val_map) # }}} def dummy(book_id_val_map, *args): return set() class Writer: def __init__(self, field): self.adapter = get_adapter(field.name, field.metadata) self.name = field.name self.field = field dt = field.metadata['datatype'] self.accept_vals = lambda x: True if dt == 'composite' or field.name in { 'id', 'size', 'path', 'formats', 'news'}: self.set_books_func = dummy elif self.name[0] == '#' and self.name.endswith('_index'): self.set_books_func = custom_series_index elif self.name == 'identifiers': self.set_books_func = identifiers elif self.name == 'uuid': self.set_books_func = set_uuid elif self.name == 'title': self.set_books_func = set_title elif field.is_many_many: self.set_books_func = many_many elif field.is_many: self.set_books_func = (self.set_books_for_enum if dt == 'enumeration' else many_one) else: self.set_books_func = (one_one_in_books if field.metadata['table'] == 'books' else one_one_in_other) if self.name in {'timestamp', 'uuid', 'sort'}: self.accept_vals = bool def set_books(self, book_id_val_map, db, allow_case_change=True): book_id_val_map = {k:self.adapter(v) for k, v in iteritems(book_id_val_map) if self.accept_vals(v)} if not book_id_val_map: return set() dirtied = self.set_books_func(book_id_val_map, db, self.field, allow_case_change) return dirtied def set_books_for_enum(self, book_id_val_map, db, field, allow_case_change): allowed = set(field.metadata['display']['enum_values']) book_id_val_map = {k:v for k, v in iteritems(book_id_val_map) if v is None or v in allowed} if not book_id_val_map: return set() return many_one(book_id_val_map, db, field, False)