%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /proc/thread-self/root/usr/lib/calibre/calibre/utils/hyphenation/
Upload File :
Create Path :
Current File : //proc/thread-self/root/usr/lib/calibre/calibre/utils/hyphenation/hyphenate.py

#!/usr/bin/env python3
# License: GPL v3 Copyright: 2019, Kovid Goyal <kovid at kovidgoyal.net>


import os

import regex

from calibre.utils.hyphenation.dictionaries import (
    dictionary_name_for_locale, path_to_dictionary
)
from polyglot.functools import lru_cache

REGEX_FLAGS = regex.VERSION1 | regex.WORD | regex.FULLCASE | regex.UNICODE


@lru_cache()
def dictionary_for_locale(locale):
    name = dictionary_name_for_locale(locale)
    if name is not None:
        from calibre_extensions import hyphen
        path = path_to_dictionary(name)
        fd = os.open(path, getattr(os, 'O_BINARY', 0) | os.O_RDONLY)
        return hyphen.load_dictionary(fd)


def add_soft_hyphens(word, dictionary, hyphen_char='\u00ad'):
    word = str(word)
    if len(word) > 99 or '=' in word:
        return word
    q = word
    q = q.replace(hyphen_char, '')
    if len(q) < 4:
        return word
    lq = q.lower()  # the hyphen library needs lowercase words to work
    from calibre_extensions import hyphen
    try:
        ans = hyphen.simple_hyphenate(dictionary, lq)
    except ValueError:
        # Can happen if the word requires non-standard hyphenation (i.e.
        # replacements)
        return word
    parts = ans.split('=')
    if len(parts) == 1:
        return word
    if lq != q:
        aparts = []
        pos = 0
        for p in parts:
            lp = len(p)
            aparts.append(q[pos:pos+lp])
            pos += lp
        parts = aparts
    return hyphen_char.join(parts)


tags_not_to_hyphenate = frozenset((
    'video', 'audio', 'script', 'code', 'pre', 'img', 'br', 'samp', 'kbd',
    'var', 'abbr', 'acronym', 'sub', 'sup', 'button', 'option', 'label',
    'textarea', 'input', 'math', 'svg', 'style', 'title', 'head'
))


def barename(x):
    return x.split('}', 1)[-1]


def words_pat():
    ans = getattr(words_pat, 'ans', None)
    if ans is None:
        ans = words_pat.ans = regex.compile(r'\w+', REGEX_FLAGS)
    return ans


def add_soft_hyphens_to_words(words, dictionary, hyphen_char='\u00ad'):
    pos = 0
    parts = []
    for m in words_pat().finditer(words):
        word = m.group()
        if m.start() > pos:
            parts.append(words[pos:m.start()])
        parts.append(add_soft_hyphens(word, dictionary, hyphen_char))
        pos = m.end()
    if pos < len(words):
        parts.append(words[pos:])
    return ''.join(parts)


def add_to_tag(stack, elem, locale, hyphen_char):
    name = barename(elem.tag)
    if name in tags_not_to_hyphenate:
        return
    tl = elem.get('lang') or elem.get('{http://www.w3.org/XML/1998/namespace}lang') or locale
    dictionary = dictionary_for_locale(tl)
    if dictionary is not None and elem.text and not elem.text.isspace():
        elem.text = add_soft_hyphens_to_words(elem.text, dictionary, hyphen_char)
    for child in elem:
        if dictionary is not None and child.tail and not child.tail.isspace():
            child.tail = add_soft_hyphens_to_words(child.tail, dictionary, hyphen_char)
        if not callable(getattr(child, 'tag', None)):
            stack.append((child, tl))


def add_soft_hyphens_to_html(root, locale='en', hyphen_char='\u00ad'):
    stack = [(root, locale)]
    while stack:
        elem, locale = stack.pop()
        add_to_tag(stack, elem, locale, hyphen_char)


def remove_soft_hyphens_from_html(root, hyphen_char='\u00ad'):
    for elem in root.iterdescendants():
        if elem.tail:
            elem.tail = elem.tail.replace(hyphen_char, '')
        text = getattr(elem, 'text', None)
        if text:
            elem.text = elem.text.replace(hyphen_char, '')

Zerion Mini Shell 1.0