%PDF- %PDF-
| Direktori : /lib/calibre/calibre/db/ |
| Current File : //lib/calibre/calibre/db/tables.py |
#!/usr/bin/env python3
__license__ = 'GPL v3'
__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import numbers
from datetime import datetime, timedelta
from collections import defaultdict
from calibre.utils.date import parse_date, UNDEFINED_DATE, utc_tz
from calibre.ebooks.metadata import author_to_author_sort
from polyglot.builtins import iteritems, itervalues
from calibre_extensions.speedup import parse_date as _c_speedup
def c_parse(val):
try:
year, month, day, hour, minutes, seconds, tzsecs = _c_speedup(val)
except (AttributeError, TypeError):
# If a value like 2001 is stored in the column, apsw will return it as
# an int
if isinstance(val, numbers.Number):
return datetime(int(val), 1, 3, tzinfo=utc_tz)
if val is None:
return UNDEFINED_DATE
except:
pass
else:
try:
ans = datetime(year, month, day, hour, minutes, seconds, tzinfo=utc_tz)
if tzsecs != 0:
ans -= timedelta(seconds=tzsecs)
except OverflowError:
ans = UNDEFINED_DATE
return ans
try:
return parse_date(val, as_utc=True, assume_utc=True)
except (ValueError, TypeError):
return UNDEFINED_DATE
ONE_ONE, MANY_ONE, MANY_MANY = range(3)
null = object()
class Table:
def __init__(self, name, metadata, link_table=None):
self.name, self.metadata = name, metadata
self.sort_alpha = metadata.get('is_multiple', False) and metadata.get('display', {}).get('sort_alpha', False)
# self.unserialize() maps values from the db to python objects
self.unserialize = {
'datetime': c_parse,
'bool': bool
}.get(metadata['datatype'], None)
if name == 'authors':
# Legacy
self.unserialize = lambda x: x.replace('|', ',') if x else ''
self.link_table = (link_table if link_table else
'books_%s_link'%self.metadata['table'])
def remove_books(self, book_ids, db):
return set()
def fix_link_table(self, db):
pass
def fix_case_duplicates(self, db):
''' If this table contains entries that differ only by case, then merge
those entries. This can happen in databases created with old versions
of calibre and non-ascii values, since sqlite's NOCASE only works with
ascii text. '''
pass
class VirtualTable(Table):
'''
A dummy table used for fields that only exist in memory like ondevice
'''
def __init__(self, name, table_type=ONE_ONE, datatype='text'):
metadata = {'datatype':datatype, 'table':name}
self.table_type = table_type
Table.__init__(self, name, metadata)
class OneToOneTable(Table):
'''
Represents data that is unique per book (it may not actually be unique) but
each item is assigned to a book in a one-to-one mapping. For example: uuid,
timestamp, size, etc.
'''
table_type = ONE_ONE
def read(self, db):
idcol = 'id' if self.metadata['table'] == 'books' else 'book'
query = db.execute('SELECT {}, {} FROM {}'.format(idcol,
self.metadata['column'], self.metadata['table']))
if self.unserialize is None:
try:
self.book_col_map = dict(query)
except UnicodeDecodeError:
# The db is damaged, try to work around it by ignoring
# failures to decode utf-8
query = db.execute('SELECT {}, cast({} as blob) FROM {}'.format(idcol,
self.metadata['column'], self.metadata['table']))
self.book_col_map = {k:bytes(val).decode('utf-8', 'replace') for k, val in query}
else:
us = self.unserialize
self.book_col_map = {book_id:us(val) for book_id, val in query}
def remove_books(self, book_ids, db):
clean = set()
for book_id in book_ids:
val = self.book_col_map.pop(book_id, null)
if val is not null:
clean.add(val)
return clean
class PathTable(OneToOneTable):
def set_path(self, book_id, path, db):
self.book_col_map[book_id] = path
db.execute('UPDATE books SET path=? WHERE id=?',
(path, book_id))
class SizeTable(OneToOneTable):
def read(self, db):
query = db.execute(
'SELECT books.id, (SELECT MAX(uncompressed_size) FROM data '
'WHERE data.book=books.id) FROM books')
self.book_col_map = dict(query)
def update_sizes(self, size_map):
self.book_col_map.update(size_map)
class UUIDTable(OneToOneTable):
def read(self, db):
OneToOneTable.read(self, db)
self.uuid_to_id_map = {v:k for k, v in iteritems(self.book_col_map)}
def update_uuid_cache(self, book_id_val_map):
for book_id, uuid in iteritems(book_id_val_map):
self.uuid_to_id_map.pop(self.book_col_map.get(book_id, None), None) # discard old uuid
self.uuid_to_id_map[uuid] = book_id
def remove_books(self, book_ids, db):
clean = set()
for book_id in book_ids:
val = self.book_col_map.pop(book_id, null)
if val is not null:
self.uuid_to_id_map.pop(val, None)
clean.add(val)
return clean
def lookup_by_uuid(self, uuid):
return self.uuid_to_id_map.get(uuid, None)
class CompositeTable(OneToOneTable):
def read(self, db):
self.book_col_map = {}
d = self.metadata['display']
self.composite_template = ['composite_template']
self.contains_html = d.get('contains_html', False)
self.make_category = d.get('make_category', False)
self.composite_sort = d.get('composite_sort', False)
self.use_decorations = d.get('use_decorations', False)
def remove_books(self, book_ids, db):
return set()
class ManyToOneTable(Table):
'''
Represents data where one data item can map to many books, for example:
series or publisher.
Each book however has only one value for data of this type.
'''
table_type = MANY_ONE
def read(self, db):
self.id_map = {}
self.col_book_map = defaultdict(set)
self.book_col_map = {}
self.read_id_maps(db)
self.read_maps(db)
def read_id_maps(self, db):
query = db.execute('SELECT id, {} FROM {}'.format(
self.metadata['column'], self.metadata['table']))
if self.unserialize is None:
self.id_map = dict(query)
else:
us = self.unserialize
self.id_map = {book_id:us(val) for book_id, val in query}
def read_maps(self, db):
cbm = self.col_book_map
bcm = self.book_col_map
for book, item_id in db.execute(
'SELECT book, {} FROM {}'.format(
self.metadata['link_column'], self.link_table)):
cbm[item_id].add(book)
bcm[book] = item_id
def fix_link_table(self, db):
linked_item_ids = {item_id for item_id in itervalues(self.book_col_map)}
extra_item_ids = linked_item_ids - set(self.id_map)
if extra_item_ids:
for item_id in extra_item_ids:
book_ids = self.col_book_map.pop(item_id, ())
for book_id in book_ids:
self.book_col_map.pop(book_id, None)
db.executemany('DELETE FROM {} WHERE {}=?'.format(
self.link_table, self.metadata['link_column']), tuple((x,) for x in extra_item_ids))
def fix_case_duplicates(self, db):
case_map = defaultdict(set)
for item_id, val in iteritems(self.id_map):
case_map[icu_lower(val)].add(item_id)
for v in itervalues(case_map):
if len(v) > 1:
main_id = min(v)
v.discard(main_id)
for item_id in v:
self.id_map.pop(item_id, None)
books = self.col_book_map.pop(item_id, set())
for book_id in books:
self.book_col_map[book_id] = main_id
db.executemany('UPDATE {0} SET {1}=? WHERE {1}=?'.format(
self.link_table, self.metadata['link_column']),
tuple((main_id, x) for x in v))
db.executemany('DELETE FROM {} WHERE id=?'.format(self.metadata['table']),
tuple((x,) for x in v))
def remove_books(self, book_ids, db):
clean = set()
for book_id in book_ids:
item_id = self.book_col_map.pop(book_id, None)
if item_id is not None:
try:
self.col_book_map[item_id].discard(book_id)
except KeyError:
if self.id_map.pop(item_id, null) is not null:
clean.add(item_id)
else:
if not self.col_book_map[item_id]:
del self.col_book_map[item_id]
if self.id_map.pop(item_id, null) is not null:
clean.add(item_id)
if clean:
db.executemany(
'DELETE FROM {} WHERE id=?'.format(self.metadata['table']),
[(x,) for x in clean])
return clean
def remove_items(self, item_ids, db, restrict_to_book_ids=None):
affected_books = set()
if restrict_to_book_ids is not None:
items_to_process_normally = set()
# Check if all the books with the item are in the restriction. If
# so, process them normally
for item_id in item_ids:
books_to_process = self.col_book_map.get(item_id, set())
books_not_to_delete = books_to_process - restrict_to_book_ids
if books_not_to_delete:
# Some books not in restriction. Must do special processing
books_to_delete = books_to_process & restrict_to_book_ids
# remove the books from the old id maps
self.col_book_map[item_id] = books_not_to_delete
for book_id in books_to_delete:
self.book_col_map.pop(book_id, None)
if books_to_delete:
# Delete links to the affected books from the link table. As
# this is a many-to-one mapping we know that we can delete
# links without checking the item ID
db.executemany(
f'DELETE FROM {self.link_table} WHERE book=?', tuple((x,) for x in books_to_delete))
affected_books |= books_to_delete
else:
# Process normally any items where the VL was not significant
items_to_process_normally.add(item_id)
if items_to_process_normally:
affected_books |= self.remove_items(items_to_process_normally, db)
return affected_books
for item_id in item_ids:
val = self.id_map.pop(item_id, null)
if val is null:
continue
book_ids = self.col_book_map.pop(item_id, set())
for book_id in book_ids:
self.book_col_map.pop(book_id, None)
affected_books.update(book_ids)
item_ids = tuple((x,) for x in item_ids)
db.executemany('DELETE FROM {} WHERE {}=?'.format(self.link_table, self.metadata['link_column']), item_ids)
db.executemany('DELETE FROM {} WHERE id=?'.format(self.metadata['table']), item_ids)
return affected_books
def rename_item(self, item_id, new_name, db):
rmap = {icu_lower(v):k for k, v in iteritems(self.id_map)}
existing_item = rmap.get(icu_lower(new_name), None)
table, col, lcol = self.metadata['table'], self.metadata['column'], self.metadata['link_column']
affected_books = self.col_book_map.get(item_id, set())
new_id = item_id
if existing_item is None or existing_item == item_id:
# A simple rename will do the trick
self.id_map[item_id] = new_name
db.execute(f'UPDATE {table} SET {col}=? WHERE id=?', (new_name, item_id))
else:
# We have to replace
new_id = existing_item
self.id_map.pop(item_id, None)
books = self.col_book_map.pop(item_id, set())
for book_id in books:
self.book_col_map[book_id] = existing_item
self.col_book_map[existing_item].update(books)
# For custom series this means that the series index can
# potentially have duplicates/be incorrect, but there is no way to
# handle that in this context.
db.execute('UPDATE {0} SET {1}=? WHERE {1}=?; DELETE FROM {2} WHERE id=?'.format(
self.link_table, lcol, table), (existing_item, item_id, item_id))
return affected_books, new_id
class RatingTable(ManyToOneTable):
def read_id_maps(self, db):
ManyToOneTable.read_id_maps(self, db)
# Ensure there are no records with rating=0 in the table. These should
# be represented as rating:None instead.
bad_ids = {item_id for item_id, rating in iteritems(self.id_map) if rating == 0}
if bad_ids:
self.id_map = {item_id:rating for item_id, rating in iteritems(self.id_map) if rating != 0}
db.executemany('DELETE FROM {} WHERE {}=?'.format(self.link_table, self.metadata['link_column']),
tuple((x,) for x in bad_ids))
db.execute('DELETE FROM {} WHERE {}=0'.format(
self.metadata['table'], self.metadata['column']))
class ManyToManyTable(ManyToOneTable):
'''
Represents data that has a many-to-many mapping with books. i.e. each book
can have more than one value and each value can be mapped to more than one
book. For example: tags or authors.
'''
table_type = MANY_MANY
selectq = 'SELECT book, {0} FROM {1} ORDER BY id'
do_clean_on_remove = True
def read_maps(self, db):
bcm = defaultdict(list)
cbm = self.col_book_map
for book, item_id in db.execute(
self.selectq.format(self.metadata['link_column'], self.link_table)):
cbm[item_id].add(book)
bcm[book].append(item_id)
self.book_col_map = {k:tuple(v) for k, v in iteritems(bcm)}
def fix_link_table(self, db):
linked_item_ids = {item_id for item_ids in itervalues(self.book_col_map) for item_id in item_ids}
extra_item_ids = linked_item_ids - set(self.id_map)
if extra_item_ids:
for item_id in extra_item_ids:
book_ids = self.col_book_map.pop(item_id, ())
for book_id in book_ids:
self.book_col_map[book_id] = tuple(iid for iid in self.book_col_map.pop(book_id, ()) if iid not in extra_item_ids)
db.executemany('DELETE FROM {} WHERE {}=?'.format(
self.link_table, self.metadata['link_column']), tuple((x,) for x in extra_item_ids))
def remove_books(self, book_ids, db):
clean = set()
for book_id in book_ids:
item_ids = self.book_col_map.pop(book_id, ())
for item_id in item_ids:
try:
self.col_book_map[item_id].discard(book_id)
except KeyError:
if self.id_map.pop(item_id, null) is not null:
clean.add(item_id)
else:
if not self.col_book_map[item_id]:
del self.col_book_map[item_id]
if self.id_map.pop(item_id, null) is not null:
clean.add(item_id)
if clean and self.do_clean_on_remove:
db.executemany(
'DELETE FROM {} WHERE id=?'.format(self.metadata['table']),
[(x,) for x in clean])
return clean
def remove_items(self, item_ids, db, restrict_to_book_ids=None):
affected_books = set()
if restrict_to_book_ids is not None:
items_to_process_normally = set()
# Check if all the books with the item are in the restriction. If
# so, process them normally
for item_id in item_ids:
books_to_process = self.col_book_map.get(item_id, set())
books_not_to_delete = books_to_process - restrict_to_book_ids
if books_not_to_delete:
# Some books not in restriction. Must do special processing
books_to_delete = books_to_process & restrict_to_book_ids
# remove the books from the old id maps
self.col_book_map[item_id] = books_not_to_delete
for book_id in books_to_delete:
self.book_col_map[book_id] = tuple(
x for x in self.book_col_map.get(book_id, ()) if x != item_id)
affected_books |= books_to_delete
else:
items_to_process_normally.add(item_id)
# Delete book/item pairs from the link table. We don't need to do
# anything with the main table because books with the old ID are
# still in the library.
db.executemany('DELETE FROM {} WHERE {}=? and {}=?'.format(
self.link_table, 'book', self.metadata['link_column']),
[(b, i) for b in affected_books for i in item_ids])
# Take care of any items where the VL was not significant
if items_to_process_normally:
affected_books |= self.remove_items(items_to_process_normally, db)
return affected_books
for item_id in item_ids:
val = self.id_map.pop(item_id, null)
if val is null:
continue
book_ids = self.col_book_map.pop(item_id, set())
for book_id in book_ids:
self.book_col_map[book_id] = tuple(x for x in self.book_col_map.get(book_id, ()) if x != item_id)
affected_books.update(book_ids)
item_ids = tuple((x,) for x in item_ids)
db.executemany('DELETE FROM {} WHERE {}=?'.format(self.link_table, self.metadata['link_column']), item_ids)
db.executemany('DELETE FROM {} WHERE id=?'.format(self.metadata['table']), item_ids)
return affected_books
def rename_item(self, item_id, new_name, db):
rmap = {icu_lower(v):k for k, v in iteritems(self.id_map)}
existing_item = rmap.get(icu_lower(new_name), None)
table, col, lcol = self.metadata['table'], self.metadata['column'], self.metadata['link_column']
affected_books = self.col_book_map.get(item_id, set())
new_id = item_id
if existing_item is None or existing_item == item_id:
# A simple rename will do the trick
self.id_map[item_id] = new_name
db.execute(f'UPDATE {table} SET {col}=? WHERE id=?', (new_name, item_id))
else:
# We have to replace
new_id = existing_item
self.id_map.pop(item_id, None)
books = self.col_book_map.pop(item_id, set())
# Replacing item_id with existing_item could cause the same id to
# appear twice in the book list. Handle that by removing existing
# item from the book list before replacing.
for book_id in books:
self.book_col_map[book_id] = tuple((existing_item if x == item_id else x) for x in self.book_col_map.get(book_id, ()) if x != existing_item)
self.col_book_map[existing_item].update(books)
db.executemany(f'DELETE FROM {self.link_table} WHERE book=? AND {lcol}=?', [
(book_id, existing_item) for book_id in books])
db.execute('UPDATE {0} SET {1}=? WHERE {1}=?; DELETE FROM {2} WHERE id=?'.format(
self.link_table, lcol, table), (existing_item, item_id, item_id))
return affected_books, new_id
def fix_case_duplicates(self, db):
from calibre.db.write import uniq
case_map = defaultdict(set)
for item_id, val in iteritems(self.id_map):
case_map[icu_lower(val)].add(item_id)
for v in itervalues(case_map):
if len(v) > 1:
done_books = set()
main_id = min(v)
v.discard(main_id)
for item_id in v:
self.id_map.pop(item_id, None)
books = self.col_book_map.pop(item_id, set())
for book_id in books:
if book_id in done_books:
continue
done_books.add(book_id)
orig = self.book_col_map.get(book_id, ())
if not orig:
continue
vals = uniq(tuple(main_id if x in v else x for x in orig))
self.book_col_map[book_id] = vals
if len(orig) == len(vals):
# We have a simple replacement
db.executemany(
'UPDATE {0} SET {1}=? WHERE {1}=? AND book=?'.format(
self.link_table, self.metadata['link_column']),
tuple((main_id, x, book_id) for x in v))
else:
# duplicates
db.execute(f'DELETE FROM {self.link_table} WHERE book=?', (book_id,))
db.executemany(
'INSERT INTO {} (book,{}) VALUES (?,?)'.format(self.link_table, self.metadata['link_column']),
tuple((book_id, x) for x in vals))
db.executemany('DELETE FROM {} WHERE id=?'.format(self.metadata['table']),
tuple((x,) for x in v))
class AuthorsTable(ManyToManyTable):
def read_id_maps(self, db):
self.alink_map = lm = {}
self.asort_map = sm = {}
self.id_map = im = {}
us = self.unserialize
for aid, name, sort, link in db.execute(
'SELECT id, name, sort, link FROM authors'):
name = us(name)
im[aid] = name
sm[aid] = (sort or author_to_author_sort(name))
lm[aid] = link
def set_sort_names(self, aus_map, db):
aus_map = {aid:(a or '').strip() for aid, a in iteritems(aus_map)}
aus_map = {aid:a for aid, a in iteritems(aus_map) if a != self.asort_map.get(aid, None)}
self.asort_map.update(aus_map)
db.executemany('UPDATE authors SET sort=? WHERE id=?',
[(v, k) for k, v in iteritems(aus_map)])
return aus_map
def set_links(self, link_map, db):
link_map = {aid:(l or '').strip() for aid, l in iteritems(link_map)}
link_map = {aid:l for aid, l in iteritems(link_map) if l != self.alink_map.get(aid, None)}
self.alink_map.update(link_map)
db.executemany('UPDATE authors SET link=? WHERE id=?',
[(v, k) for k, v in iteritems(link_map)])
return link_map
def remove_books(self, book_ids, db):
clean = ManyToManyTable.remove_books(self, book_ids, db)
for item_id in clean:
self.alink_map.pop(item_id, None)
self.asort_map.pop(item_id, None)
return clean
def rename_item(self, item_id, new_name, db):
ret = ManyToManyTable.rename_item(self, item_id, new_name, db)
if item_id not in self.id_map:
self.alink_map.pop(item_id, None)
self.asort_map.pop(item_id, None)
else:
# Was a simple rename, update the author sort value
self.set_sort_names({item_id:author_to_author_sort(new_name)}, db)
return ret
def remove_items(self, item_ids, db, restrict_to_book_ids=None):
raise NotImplementedError('Direct removal of authors is not allowed')
class FormatsTable(ManyToManyTable):
do_clean_on_remove = False
def read_id_maps(self, db):
pass
def fix_case_duplicates(self, db):
pass
def read_maps(self, db):
self.fname_map = fnm = defaultdict(dict)
self.size_map = sm = defaultdict(dict)
self.col_book_map = cbm = defaultdict(set)
bcm = defaultdict(list)
for book, fmt, name, sz in db.execute('SELECT book, format, name, uncompressed_size FROM data'):
if fmt is not None:
fmt = fmt.upper()
cbm[fmt].add(book)
bcm[book].append(fmt)
fnm[book][fmt] = name
sm[book][fmt] = sz
self.book_col_map = {k:tuple(sorted(v)) for k, v in iteritems(bcm)}
def remove_books(self, book_ids, db):
clean = ManyToManyTable.remove_books(self, book_ids, db)
for book_id in book_ids:
self.fname_map.pop(book_id, None)
self.size_map.pop(book_id, None)
return clean
def set_fname(self, book_id, fmt, fname, db):
self.fname_map[book_id][fmt] = fname
db.execute('UPDATE data SET name=? WHERE book=? AND format=?',
(fname, book_id, fmt))
def remove_formats(self, formats_map, db):
for book_id, fmts in iteritems(formats_map):
self.book_col_map[book_id] = [fmt for fmt in self.book_col_map.get(book_id, []) if fmt not in fmts]
for m in (self.fname_map, self.size_map):
m[book_id] = {k:v for k, v in iteritems(m[book_id]) if k not in fmts}
for fmt in fmts:
try:
self.col_book_map[fmt].discard(book_id)
except KeyError:
pass
db.executemany('DELETE FROM data WHERE book=? AND format=?',
[(book_id, fmt) for book_id, fmts in iteritems(formats_map) for fmt in fmts])
def zero_max(book_id):
try:
return max(itervalues(self.size_map[book_id]))
except ValueError:
return 0
return {book_id:zero_max(book_id) for book_id in formats_map}
def remove_items(self, item_ids, db):
raise NotImplementedError('Cannot delete a format directly')
def rename_item(self, item_id, new_name, db):
raise NotImplementedError('Cannot rename formats')
def update_fmt(self, book_id, fmt, fname, size, db):
fmts = list(self.book_col_map.get(book_id, []))
try:
fmts.remove(fmt)
except ValueError:
pass
fmts.append(fmt)
self.book_col_map[book_id] = tuple(fmts)
try:
self.col_book_map[fmt].add(book_id)
except KeyError:
self.col_book_map[fmt] = {book_id}
self.fname_map[book_id][fmt] = fname
self.size_map[book_id][fmt] = size
db.execute('INSERT OR REPLACE INTO data (book,format,uncompressed_size,name) VALUES (?,?,?,?)',
(book_id, fmt, size, fname))
return max(itervalues(self.size_map[book_id]))
class IdentifiersTable(ManyToManyTable):
def read_id_maps(self, db):
pass
def fix_case_duplicates(self, db):
pass
def read_maps(self, db):
self.book_col_map = defaultdict(dict)
self.col_book_map = defaultdict(set)
for book, typ, val in db.execute('SELECT book, type, val FROM identifiers'):
if typ is not None and val is not None:
self.col_book_map[typ].add(book)
self.book_col_map[book][typ] = val
def remove_books(self, book_ids, db):
clean = set()
for book_id in book_ids:
item_map = self.book_col_map.pop(book_id, {})
for item_id in item_map:
try:
self.col_book_map[item_id].discard(book_id)
except KeyError:
clean.add(item_id)
else:
if not self.col_book_map[item_id]:
del self.col_book_map[item_id]
clean.add(item_id)
return clean
def remove_items(self, item_ids, db):
raise NotImplementedError('Direct deletion of identifiers is not implemented')
def rename_item(self, item_id, new_name, db):
raise NotImplementedError('Cannot rename identifiers')
def all_identifier_types(self):
return frozenset(k for k, v in iteritems(self.col_book_map) if v)