%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /lib/calibre/calibre/utils/
Upload File :
Create Path :
Current File : //lib/calibre/calibre/utils/icu.py

#!/usr/bin/env python3

__license__   = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'

# Setup code {{{
import codecs
import sys

from calibre.utils.config_base import tweaks, prefs
from calibre_extensions import icu as _icu
from polyglot.builtins import cmp

_locale = _collator = _primary_collator = _sort_collator = _non_numeric_sort_collator = _numeric_collator = _case_sensitive_collator = None
cmp

_none = ''
_none2 = b''
_cmap = {}

icu_unicode_version = getattr(_icu, 'unicode_version', None)
_nmodes = {m:getattr(_icu, m) for m in ('NFC', 'NFD', 'NFKC', 'NFKD')}

# Ensure that the python internal filesystem and default encodings are not ASCII


def is_ascii(name):
    try:
        return codecs.lookup(name).name == b'ascii'
    except (TypeError, LookupError):
        return True


try:
    if is_ascii(sys.getdefaultencoding()):
        _icu.set_default_encoding(b'utf-8')
except:
    import traceback
    traceback.print_exc()

try:
    if is_ascii(sys.getfilesystemencoding()):
        _icu.set_filesystem_encoding(b'utf-8')
except:
    import traceback
    traceback.print_exc()
del is_ascii


def collator():
    global _collator, _locale
    if _collator is None:
        if _locale is None:
            from calibre.utils.localization import get_lang
            if tweaks['locale_for_sorting']:
                _locale = tweaks['locale_for_sorting']
            else:
                _locale = get_lang()
        try:
            _collator = _icu.Collator(_locale)
        except Exception as e:
            print(f'Failed to load collator for locale: {_locale!r} with error {e!r}, using English')
            _collator = _icu.Collator('en')
    return _collator


def change_locale(locale=None):
    global _locale, _collator, _primary_collator, _sort_collator, _numeric_collator, _case_sensitive_collator, _non_numeric_sort_collator
    _collator = _primary_collator = _sort_collator = _numeric_collator = _case_sensitive_collator = _non_numeric_sort_collator = None
    _locale = locale


def primary_collator():
    'Ignores case differences and accented characters'
    global _primary_collator
    if _primary_collator is None:
        _primary_collator = collator().clone()
        _primary_collator.strength = _icu.UCOL_PRIMARY
    return _primary_collator


def sort_collator():
    'Ignores case differences and recognizes numbers in strings (if the tweak is set)'
    global _sort_collator
    if _sort_collator is None:
        _sort_collator = collator().clone()
        _sort_collator.strength = _icu.UCOL_SECONDARY
        _sort_collator.numeric = prefs['numeric_collation']
    return _sort_collator


def non_numeric_sort_collator():
    'Ignores case differences only'
    global _non_numeric_sort_collator
    if _non_numeric_sort_collator is None:
        _non_numeric_sort_collator = collator().clone()
        _non_numeric_sort_collator.strength = _icu.UCOL_SECONDARY
        _non_numeric_sort_collator.numeric = False
    return _non_numeric_sort_collator


def numeric_collator():
    'Uses natural sorting for numbers inside strings so something2 will sort before something10'
    global _numeric_collator
    if _numeric_collator is None:
        _numeric_collator = collator().clone()
        _numeric_collator.strength = _icu.UCOL_SECONDARY
        _numeric_collator.numeric = True
    return _numeric_collator


def case_sensitive_collator():
    'Always sorts upper case letter before lower case'
    global _case_sensitive_collator
    if _case_sensitive_collator is None:
        _case_sensitive_collator = collator().clone()
        _case_sensitive_collator.numeric = sort_collator().numeric
        _case_sensitive_collator.upper_first = True
    return _case_sensitive_collator


def make_sort_key_func(collator_function, func_name='sort_key'):
    func = None

    def sort_key(a):
        nonlocal func
        if func is None:
            func = getattr(collator_function(), func_name)

        try:
            return func(a)
        except TypeError:
            if isinstance(a, bytes):
                try:
                    a = a.decode(sys.getdefaultencoding())
                except ValueError:
                    return a
                return func(a)
        return b''

    return sort_key


def make_two_arg_func(collator_function, func_name='strcmp'):
    func = None

    def two_args(a, b):
        nonlocal func
        if func is None:
            func = getattr(collator_function(), func_name)

        try:
            return func(a, b)
        except TypeError:
            if isinstance(a, bytes):
                try:
                    a = a.decode(sys.getdefaultencoding())
                except Exception:
                    return cmp(a, b)
            elif a is None:
                a = ''
            if isinstance(b, bytes):
                try:
                    b = b.decode(sys.getdefaultencoding())
                except Exception:
                    return cmp(a, b)
            elif b is None:
                b = ''
            return func(a, b)

    return two_args


def make_change_case_func(which):

    def change_case(x):
        try:
            try:
                return _icu.change_case(x, which, _locale)
            except NotImplementedError:
                pass
            collator()  # sets _locale
            return _icu.change_case(x, which, _locale)
        except TypeError:
            if isinstance(x, bytes):
                try:
                    x = x.decode(sys.getdefaultencoding())
                except ValueError:
                    return x
                return _icu.change_case(x, which, _locale)
            raise
    return change_case
# }}}


# ################ The string functions ########################################
sort_key = make_sort_key_func(sort_collator)
numeric_sort_key = make_sort_key_func(numeric_collator)
primary_sort_key = make_sort_key_func(primary_collator)
case_sensitive_sort_key = make_sort_key_func(case_sensitive_collator)
collation_order = make_sort_key_func(sort_collator, 'collation_order')
collation_order_for_partitioning = make_sort_key_func(non_numeric_sort_collator, 'collation_order')

strcmp = make_two_arg_func(sort_collator)
case_sensitive_strcmp = make_two_arg_func(case_sensitive_collator)
primary_strcmp = make_two_arg_func(primary_collator)
upper = make_change_case_func(_icu.UPPER_CASE)
lower = make_change_case_func(_icu.LOWER_CASE)
title_case = make_change_case_func(_icu.TITLE_CASE)


def capitalize(x):
    try:
        return upper(x[0]) + lower(x[1:])
    except (IndexError, TypeError, AttributeError):
        return x


try:
    swapcase = _icu.swap_case
except AttributeError:  # For people running from source
    swapcase = lambda x:x.swapcase()

find = make_two_arg_func(collator, 'find')
primary_find = make_two_arg_func(primary_collator, 'find')
contains = make_two_arg_func(collator, 'contains')
primary_contains = make_two_arg_func(primary_collator, 'contains')
startswith = make_two_arg_func(collator, 'startswith')
primary_startswith = make_two_arg_func(primary_collator, 'startswith')
safe_chr = _icu.chr
ord_string = _icu.ord_string


def character_name(string):
    try:
        return _icu.character_name(str(string)) or None
    except (TypeError, ValueError, KeyError):
        pass


def character_name_from_code(code):
    try:
        return _icu.character_name_from_code(code) or ''
    except (TypeError, ValueError, KeyError):
        return ''


def normalize(text, mode='NFC'):
    # This is very slightly slower than using unicodedata.normalize, so stick with
    # that unless you have very good reasons not too. Also, it's speed
    # decreases on wide python builds, where conversion to/from ICU's string
    # representation is slower.
    return _icu.normalize(_nmodes[mode], str(text))


def contractions(col=None):
    global _cmap
    col = col or _collator
    if col is None:
        col = collator()
    ans = _cmap.get(collator, None)
    if ans is None:
        ans = col.contractions()
        ans = frozenset(filter(None, ans))
        _cmap[col] = ans
    return ans


def partition_by_first_letter(items, reverse=False, key=lambda x:x):
    # Build a list of 'equal' first letters by noticing changes
    # in ICU's 'ordinal' for the first letter.
    from collections import OrderedDict
    items = sorted(items, key=lambda x:sort_key(key(x)), reverse=reverse)
    ans = OrderedDict()
    last_c, last_ordnum = ' ', 0
    for item in items:
        c = icu_upper(key(item) or ' ')
        ordnum, ordlen = collation_order(c)
        if last_ordnum != ordnum:
            last_c = c[0:1]
            last_ordnum = ordnum
        try:
            ans[last_c].append(item)
        except KeyError:
            ans[last_c] = [item]
    return ans


# Return the number of unicode codepoints in a string
string_length = len

# Return the number of UTF-16 codepoints in a string
utf16_length = _icu.utf16_length

################################################################################

if __name__ == '__main__':
    from calibre.utils.icu_test import run
    run(verbosity=4)

Zerion Mini Shell 1.0