%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /lib/calibre/calibre/utils/
Upload File :
Create Path :
Current File : //lib/calibre/calibre/utils/icu_test.py

#!/usr/bin/env python3


__license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'

import unittest, sys
from contextlib import contextmanager

import calibre.utils.icu as icu
from polyglot.builtins import iteritems, cmp


@contextmanager
def make_collation_func(name, locale, numeric=True, maker=icu.make_sort_key_func, func='strcmp'):
    def coll():
        ans = icu._icu.Collator(locale)
        ans.numeric = numeric
        return ans

    yield maker(coll, func)


class TestICU(unittest.TestCase):

    ae = unittest.TestCase.assertEqual
    ane= unittest.TestCase.assertNotEqual

    def setUp(self):
        icu.change_locale('en')

    def test_sorting(self):
        ' Test the various sorting APIs '
        german = '''Sonntag Montag Dienstag Januar Februar März Fuße Fluße Flusse flusse fluße flüße flüsse'''.split()
        german_good = '''Dienstag Februar flusse Flusse fluße Fluße flüsse flüße Fuße Januar März Montag Sonntag'''.split()
        french = '''dimanche lundi mardi janvier février mars déjà Meme deja même dejà bpef bœg Boef Mémé bœf boef bnef pêche pèché pêché pêche pêché'''.split()
        french_good = '''bnef boef Boef bœf bœg bpef deja dejà déjà dimanche février janvier lundi mardi mars Meme Mémé même pèché pêche pêche pêché pêché'''.split()  # noqa

        # Test corner cases
        sort_key = icu.sort_key
        s = '\U0001f431'
        self.ae(sort_key(s), sort_key(s.encode(sys.getdefaultencoding())), 'UTF-8 encoded object not correctly decoded to generate sort key')
        self.ae(s.encode('utf-16'), s.encode('utf-16'), 'Undecodable bytestring not returned as itself')
        self.ae(b'', sort_key(None))
        self.ae(0, icu.strcmp(None, b''))
        self.ae(0, icu.strcmp(s, s.encode(sys.getdefaultencoding())))

        # Test locales
        with make_collation_func('dsk', 'de', maker=icu.make_sort_key_func, func='sort_key') as dsk:
            self.ae(german_good, sorted(german, key=dsk))
            with make_collation_func('dcmp', 'de', maker=icu.make_two_arg_func, func='strcmp') as dcmp:
                for x in german:
                    for y in german:
                        self.ae(cmp(dsk(x), dsk(y)), dcmp(x, y))

        with make_collation_func('fsk', 'fr', maker=icu.make_sort_key_func, func='sort_key') as fsk:
            self.ae(french_good, sorted(french, key=fsk))
            with make_collation_func('fcmp', 'fr', maker=icu.make_two_arg_func) as fcmp:
                for x in french:
                    for y in french:
                        self.ae(cmp(fsk(x), fsk(y)), fcmp(x, y))

        with make_collation_func('ssk', 'es', maker=icu.make_sort_key_func, func='sort_key') as ssk:
            self.assertNotEqual(ssk('peña'), ssk('pena'))
            with make_collation_func('scmp', 'es', maker=icu.make_two_arg_func) as scmp:
                self.assertNotEqual(0, scmp('pena', 'peña'))

        for k, v in iteritems({'pèché': 'peche', 'flüße':'Flusse', 'Štepánek':'ŠtepaneK'}):
            self.ae(0, icu.primary_strcmp(k, v))

        # Test different types of collation
        self.ae(icu.primary_sort_key('Aä'), icu.primary_sort_key('aa'))
        self.assertLess(icu.numeric_sort_key('something 2'), icu.numeric_sort_key('something 11'))
        self.assertLess(icu.case_sensitive_sort_key('A'), icu.case_sensitive_sort_key('a'))
        self.ae(0, icu.strcmp('a', 'A'))
        self.ae(cmp('a', 'A'), icu.case_sensitive_strcmp('a', 'A'))
        self.ae(0, icu.primary_strcmp('ä', 'A'))

    def test_change_case(self):
        ' Test the various ways of changing the case '
        from calibre.utils.titlecase import titlecase
        # Test corner cases
        self.ae('A', icu.upper(b'a'))
        for x in ('', None, False, 1):
            self.ae(x, icu.capitalize(x))

        for x in ('a', 'Alice\'s code', 'macdonald\'s machIne', '02 the wars'):
            self.ae(icu.upper(x), x.upper())
            self.ae(icu.lower(x), x.lower())
            # ICU's title case algorithm is different from ours, when there are
            # capitals inside words
            self.ae(icu.title_case(x), titlecase(x).replace('machIne', 'Machine'))
            self.ae(icu.capitalize(x), x[0].upper() + x[1:].lower())
            self.ae(icu.swapcase(x), x.swapcase())

    def test_find(self):
        ' Test searching for substrings '
        self.ae((1, 1), icu.find(b'a', b'1ab'))
        self.ae((1, 1), icu.find('\U0001f431', 'x\U0001f431x'))
        self.ae((1, 1), icu.find('y', '\U0001f431y'))
        self.ae((0, 4), icu.primary_find('pena', 'peña'))
        for k, v in iteritems({'pèché': 'peche', 'flüße':'Flusse', 'Štepánek':'ŠtepaneK'}):
            self.ae((1, len(k)), icu.primary_find(v, ' ' + k), f'Failed to find {v} in {k}')
        self.assertTrue(icu.startswith(b'abc', b'ab'))
        self.assertTrue(icu.startswith('abc', 'abc'))
        self.assertFalse(icu.startswith('xyz', 'a'))
        self.assertTrue(icu.startswith('xxx', ''))
        self.assertTrue(icu.primary_startswith('pena', 'peña'))
        self.assertTrue(icu.contains('\U0001f431', '\U0001f431'))
        self.assertTrue(icu.contains('something', 'some other something else'))
        self.assertTrue(icu.contains('', 'a'))
        self.assertTrue(icu.contains('', ''))
        self.assertFalse(icu.contains('xxx', 'xx'))
        self.assertTrue(icu.primary_contains('pena', 'peña'))

    def test_collation_order(self):
        'Testing collation ordering'
        for group in [
            (self.ae,  ('Šaa', 'Smith', 'Solženicyn', 'Štepánek')),
            (self.ae,  ('11', '011')),
            (self.ane, ('2', '1')),
            (self.ae,  ('100 Smith', '0100 Smith')),
        ]:
            last = None
            assert_func = group[0]
            for x in group[1]:
                order, _ = icu.numeric_collator().collation_order(x)
                if last is not None:
                    assert_func(last, order, f'Order for {x} not correct: {last} != {order}')
                last = order

        self.ae(dict(icu.partition_by_first_letter(['A1', '', 'a1', '\U0001f431', '\U0001f431x'])),
                {' ':[''], 'A':['A1', 'a1'], '\U0001f431':['\U0001f431', '\U0001f431x']})

    def test_collation_order_for_partitioning(self):
        'Testing collation ordering for partitioning'
        for group in [
            (self.ae, ('Smith', 'Šaa', 'Solženicyn', 'Štepánek')),
            (self.ane, ('11', '011')),
            (self.ae, ('102 Smith', '100 Smith')),
            (self.ane, ('100 Smith', '0100 Smith')),
        ]:
            last = None
            assert_func = group[0]
            for x in group[1]:
                order, _ = icu.non_numeric_sort_collator().collation_order(x)
                if last is not None:
                    assert_func(last, order, f'Order for {x} not correct: {last} != {order}')
                last = order

        self.ae(dict(icu.partition_by_first_letter(['A1', '', 'a1', '\U0001f431', '\U0001f431x'])),
                {' ':[''], 'A':['A1', 'a1'], '\U0001f431':['\U0001f431', '\U0001f431x']})

    def test_roundtrip(self):
        ' Test roundtripping '
        for r in ('xxx\0\u2219\U0001f431xxx', '\0', '', 'simple'):
            self.ae(r, icu._icu.roundtrip(r))
        self.ae(icu._icu.roundtrip('\ud8e81'), '\ufffd1')
        self.ae(icu._icu.roundtrip('\udc01\ud8e8'), '\ufffd\ufffd')
        for x, l in [('', 0), ('a', 1), ('\U0001f431', 1)]:
            self.ae(icu._icu.string_length(x), l)
        for x, l in [('', 0), ('a', 1), ('\U0001f431', 2)]:
            self.ae(icu._icu.utf16_length(x), l)
        self.ae(icu._icu.chr(0x1f431), '\U0001f431')
        self.ae(icu._icu.ord_string('abc'*100), tuple(map(ord, 'abc'*100)))
        self.ae(icu._icu.ord_string('\U0001f431'), (0x1f431,))

    def test_character_name(self):
        ' Test character naming '
        from calibre.utils.unicode_names import character_name_from_code
        for q, e in {
                '\U0001f431': 'CAT FACE'
                }.items():
            self.ae(icu.character_name(q), e)
            self.ae(character_name_from_code(icu.ord_string(q)[0]), e)

    def test_contractions(self):
        ' Test contractions '
        self.skipTest('Skipping as this depends too much on ICU version')
        c = icu._icu.Collator('cs')
        self.ae(icu.contractions(c), frozenset({'Z\u030c', 'z\u030c', 'Ch',
            'C\u030c', 'ch', 'cH', 'c\u030c', 's\u030c', 'r\u030c', 'CH',
            'S\u030c', 'R\u030c'}))

    def test_break_iterator(self):
        ' Test the break iterator '
        from calibre.spell.break_iterator import split_into_words as split, index_of, split_into_words_and_positions, count_words
        for q in ('one two three', ' one two three', 'one\ntwo  three ', ):
            self.ae(split(str(q)), ['one', 'two', 'three'], 'Failed to split: %r' % q)
        self.ae(split('I I\'m'), ['I', "I'm"])
        self.ae(split('out-of-the-box'), ['out-of-the-box'])
        self.ae(split('-one two-'), ['-one', 'two-'])
        self.ae(split('-one a-b-c-d e'), ['-one', 'a-b-c-d', 'e'])
        self.ae(split('-one -a-b-c-d- e'), ['-one', '-a-b-c-d-', 'e'])
        self.ae(split_into_words_and_positions('one \U0001f431 three'), [(0, 3), (6, 5)])
        self.ae(count_words('a b c d e f'), 6)
        for needle, haystack, pos in (
                ('word', 'a word b', 2),
                ('word', 'a word', 2),
                ('one-two', 'a one-two punch', 2),
                ('one-two', 'one-two punch', 0),
                ('one-two', 'one-two', 0),
                ('one', 'one-two one', 8),
                ('one-two', 'one-two-three one-two', 14),
                ('one', 'onet one', 5),
                ('two', 'one-two two', 8),
                ('two', 'two-one two', 8),
                ('-two', 'one-two -two', 8),
                ('-two', 'two', -1),
                ('i', 'i', 0),
                ('i', 'six i', 4),
                ('i', '', -1), ('', '', -1), ('', 'i', -1),
                ('i', 'six clicks', -1),
                ('i', '\U0001f431 i', 2),
                ('-a', 'b -a', 2),
                ('a-', 'a-b a- d', 4),
                ('-a-', 'b -a -a-', 5),
                ('-a-', '-a-', 0),
                ('-a-', 'a-', -1),
                ('-a-', '-a', -1),
                ('-a-', 'a', -1),
                ('a-', 'a-', 0),
                ('-a', '-a', 0),
                ('a-b-c-', 'a-b-c-d', -1),
                ('a-b-c-', 'a-b-c-.', 0),
                ('a-b-c-', 'a-b-c-d a-b-c- d', 8),
        ):
            fpos = index_of(needle, haystack)
            self.ae(pos, fpos, 'Failed to find index of %r in %r (%d != %d)' % (needle, haystack, pos, fpos))


def find_tests():
    return unittest.defaultTestLoader.loadTestsFromTestCase(TestICU)


class TestRunner(unittest.main):

    def createTests(self):
        self.test = find_tests()


def run(verbosity=4):
    TestRunner(verbosity=verbosity, exit=False)


def test_build():
    result = TestRunner(verbosity=0, buffer=True, catchbreak=True, failfast=True, argv=sys.argv[:1], exit=False).result
    if not result.wasSuccessful():
        raise SystemExit(1)


if __name__ == '__main__':
    run(verbosity=4)

Zerion Mini Shell 1.0