%PDF- %PDF-
| Direktori : /lib/calibre/calibre/db/ |
| Current File : //lib/calibre/calibre/db/categories.py |
#!/usr/bin/env python3
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import copy
from functools import partial
from polyglot.builtins import iteritems, native_string_type
from calibre.ebooks.metadata import author_to_author_sort
from calibre.utils.config_base import tweaks, prefs
from calibre.utils.icu import sort_key, collation_order
CATEGORY_SORTS = ('name', 'popularity', 'rating') # This has to be a tuple not a set
class Tag:
__slots__ = ('name', 'original_name', 'id', 'count', 'state', 'is_hierarchical',
'is_editable', 'is_searchable', 'id_set', 'avg_rating', 'sort',
'use_sort_as_name', 'category', 'search_expression', 'original_categories')
def __init__(self, name, id=None, count=0, state=0, avg=0, sort=None,
category=None, id_set=None, search_expression=None,
is_editable=True, is_searchable=True, use_sort_as_name=False,
original_categories=None):
self.name = self.original_name = name
self.id = id
self.count = count
self.state = state
self.is_hierarchical = ''
self.is_editable = is_editable
self.is_searchable = is_searchable
self.id_set = id_set if id_set is not None else set()
self.avg_rating = avg/2.0 if avg is not None else 0
self.sort = sort
self.use_sort_as_name = use_sort_as_name
self.category = category
self.search_expression = search_expression
self.original_categories = None
@property
def string_representation(self):
return '%s:%s:%s:%s:%s:%s'%(self.name, self.count, self.id, self.state,
self.category, self.original_categories)
def __str__(self):
return self.string_representation
def __repr__(self):
return native_string_type(self)
__calibre_serializable__ = True
def as_dict(self):
return {k: getattr(self, k) for k in self.__slots__}
@classmethod
def from_dict(cls, d):
ans = cls('')
for k in cls.__slots__:
setattr(ans, k, d[k])
return ans
def find_categories(field_metadata):
for category, cat in field_metadata.iter_items():
if (cat['is_category'] and cat['kind'] not in {'user', 'search'}):
yield (category, cat['is_multiple'].get('cache_to_list', None), False)
elif (cat['datatype'] == 'composite' and
cat['display'].get('make_category', False)):
yield (category, cat['is_multiple'].get('cache_to_list', None), True)
def create_tag_class(category, fm):
cat = fm[category]
dt = cat['datatype']
is_editable = category not in {'news', 'rating', 'languages', 'formats',
'identifiers'} and dt != 'composite'
if (tweaks['categories_use_field_for_author_name'] == 'author_sort' and
(category == 'authors' or
(cat['display'].get('is_names', False) and
cat['is_custom'] and cat['is_multiple'] and
dt == 'text'))):
use_sort_as_name = True
else:
use_sort_as_name = False
return partial(Tag, use_sort_as_name=use_sort_as_name,
is_editable=is_editable, category=category)
def clean_user_categories(dbcache):
user_cats = dbcache.pref('user_categories', {})
new_cats = {}
for k in user_cats:
comps = [c.strip() for c in k.split('.') if c.strip()]
if len(comps) == 0:
i = 1
while True:
if str(i) not in user_cats:
new_cats[str(i)] = user_cats[k]
break
i += 1
else:
new_cats['.'.join(comps)] = user_cats[k]
try:
if new_cats != user_cats:
dbcache.set_pref('user_categories', new_cats)
except:
pass
return new_cats
numeric_collation = prefs['numeric_collation']
def sort_key_for_name_and_first_letter(x):
v1 = icu_upper(x.sort or x.name)
v2 = v1 or ' '
# The idea is that '9999999999' is larger than any digit so all digits
# will sort in front. Non-digits will sort according to their ICU first letter
c = v2[0]
return (c if numeric_collation and c.isdigit() else '9999999999',
collation_order(v2), sort_key(v1))
category_sort_keys = {True:{}, False: {}}
category_sort_keys[True]['popularity'] = category_sort_keys[False]['popularity'] = \
lambda x:(-getattr(x, 'count', 0), sort_key(x.sort or x.name))
category_sort_keys[True]['rating'] = category_sort_keys[False]['rating'] = \
lambda x:(-getattr(x, 'avg_rating', 0.0), sort_key(x.sort or x.name))
category_sort_keys[True]['name'] = \
sort_key_for_name_and_first_letter
category_sort_keys[False]['name'] = \
lambda x:sort_key(x.sort or x.name)
def get_categories(dbcache, sort='name', book_ids=None, first_letter_sort=False):
if sort not in CATEGORY_SORTS:
raise ValueError('sort ' + sort + ' not a valid value')
fm = dbcache.field_metadata
book_rating_map = dbcache.fields['rating'].book_value_map
lang_map = dbcache.fields['languages'].book_value_map
categories = {}
book_ids = frozenset(book_ids) if book_ids else book_ids
pm_cache = {}
def get_metadata(book_id):
ans = pm_cache.get(book_id)
if ans is None:
ans = pm_cache[book_id] = dbcache._get_proxy_metadata(book_id)
return ans
bids = None
first_letter_sort = bool(first_letter_sort)
for category, is_multiple, is_composite in find_categories(fm):
tag_class = create_tag_class(category, fm)
sort_on, reverse = sort, False
if is_composite:
if bids is None:
bids = dbcache._all_book_ids() if book_ids is None else book_ids
cats = dbcache.fields[category].get_composite_categories(
tag_class, book_rating_map, bids, is_multiple, get_metadata)
elif category == 'news':
cats = dbcache.fields['tags'].get_news_category(tag_class, book_ids)
else:
cat = fm[category]
brm = book_rating_map
dt = cat['datatype']
if dt == 'rating':
if category != 'rating':
brm = dbcache.fields[category].book_value_map
if sort_on == 'name':
sort_on, reverse = 'rating', True
cats = dbcache.fields[category].get_categories(
tag_class, brm, lang_map, book_ids)
if (category != 'authors' and dt == 'text' and
cat['is_multiple'] and cat['display'].get('is_names', False)):
for item in cats:
item.sort = author_to_author_sort(item.sort)
cats.sort(key=category_sort_keys[first_letter_sort][sort_on], reverse=reverse)
categories[category] = cats
# Needed for legacy databases that have multiple ratings that
# map to n stars
for r in categories['rating']:
for x in tuple(categories['rating']):
if r.name == x.name and r.id != x.id:
r.id_set |= x.id_set
r.count = len(r.id_set)
categories['rating'].remove(x)
break
# User categories
user_categories = clean_user_categories(dbcache).copy()
# First add any grouped search terms to the user categories
muc = dbcache.pref('grouped_search_make_user_categories', [])
gst = dbcache.pref('grouped_search_terms', {})
for c in gst:
if c not in muc:
continue
uc = []
for sc in gst[c]:
for t in categories.get(sc, ()):
uc.append([t.name, sc, 0])
user_categories[c] = uc
if user_categories:
# We want to use same node in the user category as in the source
# category. To do that, we need to find the original Tag node. There is
# a time/space tradeoff here. By converting the tags into a map, we can
# do the verification in the category loop much faster, at the cost of
# temporarily duplicating the categories lists.
taglist = {}
for c, items in iteritems(categories):
taglist[c] = dict(map(lambda t:(icu_lower(t.name), t), items))
# Add the category values to the user categories
for user_cat in sorted(user_categories, key=sort_key):
items = []
names_seen = {}
user_cat_is_gst = user_cat in gst
for name, label, ign in user_categories[user_cat]:
n = icu_lower(name)
if label in taglist and n in taglist[label]:
if user_cat_is_gst:
# for gst items, make copy and consolidate the tags by name.
if n in names_seen:
# We must combine this node into a previous one with
# the same name ignoring case. As part of the process,
# remember the source categories and correct the
# average rating
t = names_seen[n]
other_tag = taglist[label][n]
t.id_set |= other_tag.id_set
t.count = len(t.id_set)
t.original_categories.add(other_tag.category)
total_rating = 0
count = 0
for id_ in t.id_set:
rating = book_rating_map.get(id_, 0)
if rating:
total_rating += rating/2
count += 1
if total_rating and count:
t.avg_rating = total_rating/count
else:
# Must deepcopy so we don't share the id_set between nodes
t = copy.deepcopy(taglist[label][n])
t.original_categories = {t.category}
names_seen[n] = t
items.append(t)
else:
items.append(taglist[label][n])
# else: do nothing, to not include nodes w zero counts
cat_name = '@' + user_cat # add the '@' to avoid name collision
items.sort(key=category_sort_keys[False][sort])
categories[cat_name] = items
# ### Finally, the saved searches category ####
items = []
queries = dbcache._search_api.saved_searches.queries
for srch in sorted(queries, key=sort_key):
items.append(Tag(srch, sort=srch, search_expression=queries[srch],
category='search', is_editable=False))
if len(items):
categories['search'] = items
return categories