%PDF- %PDF-
| Direktori : /lib/calibre/calibre/spell/ |
| Current File : //lib/calibre/calibre/spell/break_iterator.py |
#!/usr/bin/env python3
__license__ = 'GPL v3'
__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
from threading import Lock
from calibre.utils.icu import _icu
from calibre.utils.localization import lang_as_iso639_1
_iterators = {}
_lock = Lock()
def get_iterator(lang):
it = _iterators.get(lang)
if it is None:
it = _iterators[lang] = _icu.BreakIterator(_icu.UBRK_WORD, lang_as_iso639_1(lang) or lang)
return it
def split_into_words(text, lang='en'):
with _lock:
it = get_iterator(lang)
it.set_text(text)
return [text[p:p+s] for p, s in it.split2()]
def split_into_words_and_positions(text, lang='en'):
with _lock:
it = get_iterator(lang)
it.set_text(text)
return it.split2()
def index_of(needle, haystack, lang='en'):
with _lock:
it = get_iterator(lang)
it.set_text(haystack)
return it.index(needle)
def count_words(text, lang='en'):
with _lock:
it = get_iterator(lang)
it.set_text(text)
return it.count_words()