%PDF- %PDF-
Direktori : /lib/calibre/calibre/utils/ |
Current File : //lib/calibre/calibre/utils/localization.py |
#!/usr/bin/env python3 __license__ = 'GPL v3' __copyright__ = '2009, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' import os, locale, re, io from gettext import GNUTranslations, NullTranslations from polyglot.builtins import iteritems _available_translations = None def available_translations(): global _available_translations if _available_translations is None: stats = P('localization/stats.calibre_msgpack', allow_user_override=False) if os.path.exists(stats): from calibre.utils.serialize import msgpack_loads with open(stats, 'rb') as f: stats = msgpack_loads(f.read()) else: stats = {} _available_translations = [x for x in stats if stats[x] > 0.1] return _available_translations def get_system_locale(): from calibre.constants import iswindows, ismacos lang = None if iswindows: try: from calibre.constants import get_windows_user_locale_name lang = get_windows_user_locale_name() lang = lang.strip() if not lang: lang = None except: pass # Windows XP does not have the GetUserDefaultLocaleName fn elif ismacos: from calibre_extensions.usbobserver import user_locale try: lang = user_locale() or None except Exception: # Fallback to environment vars if something bad happened import traceback traceback.print_exc() if lang is None: try: envvars = ['LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LC_MESSAGES', 'LANG'] lang = locale.getdefaultlocale(envvars)[0] # lang is None in two cases: either the environment variable is not # set or it's "C". Stop looking for a language in the latter case. if lang is None: for var in envvars: if os.environ.get(var) == 'C': lang = 'en_US' break except: pass # This happens on Ubuntu apparently if lang is None and 'LANG' in os.environ: # Needed for OS X try: lang = os.environ['LANG'] except: pass if lang: lang = lang.replace('-', '_') lang = '_'.join(lang.split('_')[:2]) return lang def sanitize_lang(lang): if lang: match = re.match('[a-z]{2,3}(_[A-Z]{2}){0,1}', lang) if match: lang = match.group() if lang == 'zh': lang = 'zh_CN' if not lang: lang = 'en' return lang def get_lang(): 'Try to figure out what language to display the interface in' from calibre.utils.config_base import prefs lang = prefs['language'] lang = os.environ.get('CALIBRE_OVERRIDE_LANG', lang) if lang: return lang try: lang = get_system_locale() except: import traceback traceback.print_exc() lang = None return sanitize_lang(lang) def is_rtl(): return get_lang()[:2].lower() in {'he', 'ar'} def get_lc_messages_path(lang): hlang = None if zf_exists(): if lang in available_translations(): hlang = lang else: xlang = lang.split('_')[0].lower() if xlang in available_translations(): hlang = xlang return hlang def zf_exists(): return os.path.exists(P('localization/locales.zip', allow_user_override=False)) _lang_trans = None def get_all_translators(): from zipfile import ZipFile with ZipFile(P('localization/locales.zip', allow_user_override=False), 'r') as zf: for lang in available_translations(): mpath = get_lc_messages_path(lang) if mpath is not None: buf = io.BytesIO(zf.read(mpath + '/messages.mo')) yield lang, GNUTranslations(buf) def get_single_translator(mpath, which='messages'): from zipfile import ZipFile with ZipFile(P('localization/locales.zip', allow_user_override=False), 'r') as zf: path = f'{mpath}/{which}.mo' data = zf.read(path) buf = io.BytesIO(data) try: return GNUTranslations(buf) except Exception as e: import traceback traceback.print_exc() import hashlib sig = hashlib.sha1(data).hexdigest() raise ValueError('Failed to load translations for: {} (size: {} and signature: {}) with error: {}'.format( path, len(data), sig, e)) def get_iso639_translator(lang): lang = sanitize_lang(lang) mpath = get_lc_messages_path(lang) if lang else None return get_single_translator(mpath, 'iso639') if mpath else None def get_translator(bcp_47_code): parts = bcp_47_code.replace('-', '_').split('_')[:2] parts[0] = lang_as_iso639_1(parts[0].lower()) or 'en' if len(parts) > 1: parts[1] = parts[1].upper() lang = '_'.join(parts) lang = {'pt':'pt_BR', 'zh':'zh_CN'}.get(lang, lang) available = available_translations() found = True if lang == 'en' or lang.startswith('en_'): return found, lang, NullTranslations() if lang not in available: lang = {'pt':'pt_BR', 'zh':'zh_CN'}.get(parts[0], parts[0]) if lang not in available: lang = get_lang() if lang not in available: lang = 'en' found = False if lang == 'en': return True, lang, NullTranslations() return found, lang, get_single_translator(lang) lcdata = { 'abday': ('Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'), 'abmon': ('Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'), 'd_fmt': '%m/%d/%Y', 'd_t_fmt': '%a %d %b %Y %r %Z', 'day': ('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'), 'mon': ('January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'), 'noexpr': '^[nN].*', 'radixchar': '.', 't_fmt': '%r', 't_fmt_ampm': '%I:%M:%S %p', 'thousep': ',', 'yesexpr': '^[yY].*' } def load_po(path): from calibre.translations.msgfmt import make buf = io.BytesIO() try: make(path, buf) except Exception: print(('Failed to compile translations file: %s, ignoring') % path) buf = None else: buf = io.BytesIO(buf.getvalue()) return buf def translator_for_lang(lang): t = buf = iso639 = lcdata = None if 'CALIBRE_TEST_TRANSLATION' in os.environ: buf = load_po(os.path.expanduser(os.environ['CALIBRE_TEST_TRANSLATION'])) mpath = get_lc_messages_path(lang) if buf is None and mpath and os.access(mpath + '.po', os.R_OK): buf = load_po(mpath + '.po') if mpath is not None: from zipfile import ZipFile with ZipFile(P('localization/locales.zip', allow_user_override=False), 'r') as zf: if buf is None: buf = io.BytesIO(zf.read(mpath + '/messages.mo')) if mpath == 'nds': mpath = 'de' isof = mpath + '/iso639.mo' try: iso639 = io.BytesIO(zf.read(isof)) except: pass # No iso639 translations for this lang if buf is not None: from calibre.utils.serialize import msgpack_loads try: lcdata = msgpack_loads(zf.read(mpath + '/lcdata.calibre_msgpack')) except: pass # No lcdata if buf is not None: try: t = GNUTranslations(buf) except Exception: import traceback traceback.print_exc() t = None if iso639 is not None: try: iso639 = GNUTranslations(iso639) except Exception: iso639 = None else: if t is not None: t.add_fallback(iso639) if t is None: t = NullTranslations() return {'translator': t, 'iso639_translator': iso639, 'lcdata': lcdata} def set_translators(): global _lang_trans, lcdata # To test different translations invoke as # CALIBRE_OVERRIDE_LANG=de_DE.utf8 program lang = get_lang() if lang: q = translator_for_lang(lang) t = q['translator'] _lang_trans = q['iso639_translator'] if q['lcdata']: lcdata = q['lcdata'] else: t = NullTranslations() try: set_translators.lang = t.info().get('language') except Exception: pass t.install(names=('ngettext',)) # Now that we have installed a translator, we have to retranslate the help # for the global prefs object as it was instantiated in get_lang(), before # the translator was installed. from calibre.utils.config_base import prefs prefs.retranslate_help() set_translators.lang = None _iso639 = None _extra_lang_codes = { 'pt_BR' : _('Brazilian Portuguese'), 'en_GB' : _('English (United Kingdom)'), 'zh_CN' : _('Simplified Chinese'), 'zh_TW' : _('Traditional Chinese'), 'en' : _('English'), 'en_US' : _('English (United States)'), 'en_AR' : _('English (Argentina)'), 'en_AU' : _('English (Australia)'), 'en_JP' : _('English (Japan)'), 'en_DE' : _('English (Germany)'), 'en_BG' : _('English (Bulgaria)'), 'en_EG' : _('English (Egypt)'), 'en_NZ' : _('English (New Zealand)'), 'en_CA' : _('English (Canada)'), 'en_GR' : _('English (Greece)'), 'en_IN' : _('English (India)'), 'en_NP' : _('English (Nepal)'), 'en_TH' : _('English (Thailand)'), 'en_TR' : _('English (Turkey)'), 'en_CY' : _('English (Cyprus)'), 'en_CZ' : _('English (Czech Republic)'), 'en_PH' : _('English (Philippines)'), 'en_PK' : _('English (Pakistan)'), 'en_PL' : _('English (Poland)'), 'en_HR' : _('English (Croatia)'), 'en_HU' : _('English (Hungary)'), 'en_ID' : _('English (Indonesia)'), 'en_IL' : _('English (Israel)'), 'en_RU' : _('English (Russia)'), 'en_SG' : _('English (Singapore)'), 'en_YE' : _('English (Yemen)'), 'en_IE' : _('English (Ireland)'), 'en_CN' : _('English (China)'), 'en_TW' : _('English (Taiwan)'), 'en_ZA' : _('English (South Africa)'), 'es_PY' : _('Spanish (Paraguay)'), 'es_UY' : _('Spanish (Uruguay)'), 'es_AR' : _('Spanish (Argentina)'), 'es_CR' : _('Spanish (Costa Rica)'), 'es_MX' : _('Spanish (Mexico)'), 'es_CU' : _('Spanish (Cuba)'), 'es_CL' : _('Spanish (Chile)'), 'es_EC' : _('Spanish (Ecuador)'), 'es_HN' : _('Spanish (Honduras)'), 'es_VE' : _('Spanish (Venezuela)'), 'es_BO' : _('Spanish (Bolivia)'), 'es_NI' : _('Spanish (Nicaragua)'), 'es_CO' : _('Spanish (Colombia)'), 'de_AT' : _('German (Austria)'), 'fr_BE' : _('French (Belgium)'), 'nl' : _('Dutch (Netherlands)'), 'nl_BE' : _('Dutch (Belgium)'), 'und' : _('Unknown') } if False: # Extra strings needed for Qt # NOTE: Ante Meridian (i.e. like 10:00 AM) _('AM') # NOTE: Post Meridian (i.e. like 10:00 PM) _('PM') # NOTE: Ante Meridian (i.e. like 10:00 am) _('am') # NOTE: Post Meridian (i.e. like 10:00 pm) _('pm') _('&Copy') _('Select All') _('Copy Link') _('&Select All') _('Copy &Link Location') _('&Undo') _('&Redo') _('Cu&t') _('&Paste') _('Paste and Match Style') _('Directions') _('Left to Right') _('Right to Left') _('Fonts') _('&Step up') _('Step &down') _('Close without Saving') _('Close Tab') _lcase_map = {} for k in _extra_lang_codes: _lcase_map[k.lower()] = k def _load_iso639(): global _iso639 if _iso639 is None: ip = P('localization/iso639.calibre_msgpack', allow_user_override=False, data=True) from calibre.utils.serialize import msgpack_loads _iso639 = msgpack_loads(ip) if 'by_3' not in _iso639: _iso639['by_3'] = _iso639['by_3t'] return _iso639 def get_iso_language(lang_trans, lang): iso639 = _load_iso639() ans = lang lang = lang.split('_')[0].lower() if len(lang) == 2: ans = iso639['by_2'].get(lang, ans) elif len(lang) == 3: if lang in iso639['by_3']: ans = iso639['by_3'][lang] return lang_trans(ans) def get_language(lang, gettext_func=None): translate = gettext_func or _ lang = _lcase_map.get(lang, lang) if lang in _extra_lang_codes: # The translator was not active when _extra_lang_codes was defined, so # re-translate return translate(_extra_lang_codes[lang]) if gettext_func is None: gettext_func = getattr(_lang_trans, 'gettext', translate) return get_iso_language(gettext_func, lang) def calibre_langcode_to_name(lc, localize=True): iso639 = _load_iso639() translate = _ if localize else lambda x: x try: return translate(iso639['by_3'][lc]) except: pass return lc def canonicalize_lang(raw): if not raw: return None if not isinstance(raw, str): raw = raw.decode('utf-8', 'ignore') raw = raw.lower().strip() if not raw: return None raw = raw.replace('_', '-').partition('-')[0].strip() if not raw: return None iso639 = _load_iso639() m2to3 = iso639['2to3'] if len(raw) == 2: ans = m2to3.get(raw, None) if ans is not None: return ans elif len(raw) == 3: if raw in iso639['by_3']: return raw return iso639['name_map'].get(raw, None) _lang_map = None def lang_map(): ' Return mapping of ISO 639 3 letter codes to localized language names ' iso639 = _load_iso639() translate = _ global _lang_map if _lang_map is None: _lang_map = {k:translate(v) for k, v in iteritems(iso639['by_3'])} return _lang_map def lang_map_for_ui(): ans = getattr(lang_map_for_ui, 'ans', None) if ans is None: ans = lang_map().copy() for x in ('zxx', 'mis', 'mul'): ans.pop(x, None) lang_map_for_ui.ans = ans return ans def langnames_to_langcodes(names): ''' Given a list of localized language names return a mapping of the names to 3 letter ISO 639 language codes. If a name is not recognized, it is mapped to None. ''' iso639 = _load_iso639() translate = _ ans = {} names = set(names) for k, v in iteritems(iso639['by_3']): tv = translate(v) if tv in names: names.remove(tv) ans[tv] = k if not names: break for x in names: ans[x] = None return ans def lang_as_iso639_1(name_or_code): code = canonicalize_lang(name_or_code) if code is not None: iso639 = _load_iso639() return iso639['3to2'].get(code, None) _udc = None def get_udc(): global _udc if _udc is None: from calibre.ebooks.unihandecode import Unihandecoder _udc = Unihandecoder(lang=get_lang()) return _udc def user_manual_stats(): stats = getattr(user_manual_stats, 'stats', None) if stats is None: import json try: stats = json.loads(P('user-manual-translation-stats.json', allow_user_override=False, data=True)) except OSError: stats = {} user_manual_stats.stats = stats return stats def lang_code_for_user_manual(): lc = lang_as_iso639_1(get_lang()) if lc == 'en': return '' stats = user_manual_stats() if stats.get(lc, 0) < 0.3: return '' return lc def localize_user_manual_link(url): lc = lang_code_for_user_manual() if not lc: return url from polyglot.urllib import urlparse, urlunparse parts = urlparse(url) path = re.sub(r'/generated/[a-z]+/', '/generated/%s/' % lc, parts.path or '') path = f'/{lc}{path}' parts = list(parts) parts[2] = path return urlunparse(parts) def website_languages(): stats = getattr(website_languages, 'stats', None) if stats is None: try: stats = frozenset(P('localization/website-languages.txt', allow_user_override=False, data=True).decode('utf-8').split()) except OSError: stats = frozenset() website_languages.stats = stats return stats def localize_website_link(url): lc = lang_as_iso639_1(get_lang()) langs = website_languages() if lc == 'en' or lc not in langs: return url from polyglot.urllib import urlparse, urlunparse parts = urlparse(url) path = f'/{lc}{parts.path}' parts = list(parts) parts[2] = path return urlunparse(parts)