%PDF- %PDF-
Direktori : /usr/lib/calibre/calibre/ebooks/docx/ |
Current File : //usr/lib/calibre/calibre/ebooks/docx/fonts.py |
#!/usr/bin/env python3 __license__ = 'GPL v3' __copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>' import os, re from collections import namedtuple from calibre.ebooks.docx.block_styles import binary_property, inherit from calibre.utils.filenames import ascii_filename from calibre.utils.fonts.scanner import font_scanner, NoFonts from calibre.utils.fonts.utils import panose_to_css_generic_family, is_truetype_font from calibre.utils.icu import ord_string from polyglot.builtins import codepoint_to_chr, iteritems Embed = namedtuple('Embed', 'name key subsetted') def has_system_fonts(name): try: return bool(font_scanner.fonts_for_family(name)) except NoFonts: return False def get_variant(bold=False, italic=False): return {(False, False):'Regular', (False, True):'Italic', (True, False):'Bold', (True, True):'BoldItalic'}[(bold, italic)] def find_fonts_matching(fonts, style='normal', stretch='normal'): for font in fonts: if font['font-style'] == style and font['font-stretch'] == stretch: yield font def weight_key(font): w = font['font-weight'] try: return abs(int(w) - 400) except Exception: return abs({'normal': 400, 'bold': 700}.get(w, 1000000) - 400) def get_best_font(fonts, style, stretch): try: return sorted(find_fonts_matching(fonts, style, stretch), key=weight_key)[0] except Exception: pass class Family: def __init__(self, elem, embed_relationships, XPath, get): self.name = self.family_name = get(elem, 'w:name') self.alt_names = tuple(get(x, 'w:val') for x in XPath('./w:altName')(elem)) if self.alt_names and not has_system_fonts(self.name): for x in self.alt_names: if has_system_fonts(x): self.family_name = x break self.embedded = {} for x in ('Regular', 'Bold', 'Italic', 'BoldItalic'): for y in XPath('./w:embed%s[@r:id]' % x)(elem): rid = get(y, 'r:id') key = get(y, 'w:fontKey') subsetted = get(y, 'w:subsetted') in {'1', 'true', 'on'} if rid in embed_relationships: self.embedded[x] = Embed(embed_relationships[rid], key, subsetted) self.generic_family = 'auto' for x in XPath('./w:family[@w:val]')(elem): self.generic_family = get(x, 'w:val', 'auto') ntt = binary_property(elem, 'notTrueType', XPath, get) self.is_ttf = ntt is inherit or not ntt self.panose1 = None self.panose_name = None for x in XPath('./w:panose1[@w:val]')(elem): try: v = get(x, 'w:val') v = tuple(int(v[i:i+2], 16) for i in range(0, len(v), 2)) except (TypeError, ValueError, IndexError): pass else: self.panose1 = v self.panose_name = panose_to_css_generic_family(v) self.css_generic_family = {'roman':'serif', 'swiss':'sans-serif', 'modern':'monospace', 'decorative':'fantasy', 'script':'cursive'}.get(self.generic_family, None) self.css_generic_family = self.css_generic_family or self.panose_name or 'serif' SYMBOL_MAPS = { # {{{ 'Wingdings': (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '๐', 'โ', 'โ', '๐', '๐ญ', '๐ฎ', '๐ฏ', '๐ฟ', 'โ', '๐', '๐', '๐ช', '๐ซ', '๐ฌ', '๐ญ', '๐', '๐', '๐', '๐', '๐', '๐', 'โณ', '๐ฎ', '๐ฐ', '๐ฒ', '๐ณ', '๐ด', '๐ซ', '๐ฌ', 'โ', 'โ', '๐', 'โ', '๐', '๐', '๐', 'โ', 'โ', 'โ', '๐', '๐', 'โบ', '๐', 'โน', '๐ฃ', '๐ฑ', '๐ณ', '๐ฑ', 'โ', 'โผ', '๐ข', 'โ', '๐', 'โ', '๐', 'โ ', 'โก', 'โช', 'โฏ', '๐', 'โธ', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ', '๐ฐ', '๐ต', 'โซ', '๐พ', 'โผ', '๐', '๐', 'โ', 'โ', '๐', 'โงซ', 'โ', 'โ', '๐', 'โง', 'โฎน', 'โ', '๐ต', '๐ถ', '๐ถ', '๐ท', ' ', '๐', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ ', 'โ', 'โ', 'โ', 'โ', '๐', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ', '๐ข', '๐ ', '๐ก', '๐ฃ', '๐ฆ', '๐ค', '๐ฅ', '๐ง', 'โ', 'โข', 'โฌ', 'โญ', '๐', '๐', '๐', '๐', '๐ฟ', 'โช', '๐', '๐', '๐', 'โ ', '๐', '๐', '๐', '๐', 'โฏ', 'โ', 'โฏ', 'โฏ', 'โฏ', 'โช', 'โฐ', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', 'โฎฐ', 'โฎฑ', 'โฎฒ', 'โฎณ', 'โฎด', 'โฎต', 'โฎถ', 'โฎท', '๐ช', '๐ซ', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', 'โซ', 'โฆ', 'โฎ', 'โฎ', 'โฎ', 'โฎ', 'โฎ', 'โฎ', 'โฎ', 'โฎ', '๐กจ', '๐กช', '๐กฉ', '๐กซ', '๐กฌ', '๐กญ', '๐กฏ', '๐กฎ', '๐กธ', '๐กบ', '๐กน', '๐กป', '๐กผ', '๐กฝ', '๐กฟ', '๐กพ', 'โฆ', 'โจ', 'โง', 'โฉ', 'โฌ', 'โณ', 'โฌ', 'โฌ', 'โฌ', 'โฌ', '๐ขฌ', '๐ขญ', '๐ถ', 'โ', '๐ท', '๐น', ' '), # noqa 'Wingdings 2': (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '๐', '๐', '๐', '๐', 'โ', 'โ', '๐พ', '๐ฝ', '๐ ', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐ต', '๐ถ', '๐ท', '๐ธ', '๐ญ', '๐ฏ', '๐ฑ', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐ ', '๐ก', '๐', '๐', '๐ข', '๐ฃ', '๐', '๐ด', '๐ธ', '๐ต', 'โ', 'โฎฝ', 'โ', 'โฎพ', 'โฎฟ', '๐', 'โฆธ', '๐ฑ', '๐ด', '๐ฒ', '๐ณ', 'โฝ', '๐น', '๐บ', '๐ป', '๐ฆ', '๐ค', '๐ฅ', '๐ง', '๐', '๐', '๐', '๐', 'โช', 'โ ', 'โก', 'โข', 'โฃ', 'โค', 'โฅ', 'โฆ', 'โง', 'โจ', 'โฉ', 'โฟ', 'โถ', 'โท', 'โธ', 'โน', 'โบ', 'โป', 'โผ', 'โฝ', 'โพ', 'โฟ', ' ', 'โ', '๐', 'โฝ', 'โพ', 'โธฟ', 'โ', '๐', '๐', '๐', '๐', '๐', '๐ ', '๐ก', '๐ข', '๐ฃ', '๐ค', '๐ฅ', '๐ฆ', '๐ง', '๐จ', '๐ฉ', 'โ ', '๐', 'โฆ', 'โ', 'โ', '๐ ', '๐', '๐', 'โ', 'โฆฟ', '๐', '๐', 'โพ', 'โ ', 'โก', '๐', '๐', '๐', '๐', 'โฃ', '๐', '๐', '๐', '๐', 'โฌฉ', 'โฌฅ', 'โ', '๐', 'โ', '๐', '๐', '๐', '๐', 'โฌช', 'โฌง', 'โ', '๐ ', 'โ', 'โ', 'โฏ', 'โฏ', 'โฏ', 'โฏ', 'โฌ', 'โฏ', 'โฌฃ', 'โฌข', 'โฏ', 'โฏ', '๐ก', '๐ข', '๐ฃ', '๐ค', '๐ฅ', '๐ฆ', '๐ง', '๐จ', '๐ฉ', '๐ช', '๐ซ', '๐ฌ', '๐ญ', '๐ฎ', '๐ฏ', '๐ฐ', '๐ฑ', '๐ฒ', '๐ณ', '๐ด', '๐ต', '๐ถ', '๐ท', '๐ธ', '๐น', '๐บ', '๐ป', '๐ผ', '๐ฝ', '๐พ', '๐ฟ', '๐', '๐', '๐', '๐', '๐', '๐', 'โถ', '๐', '๐', '๐', '๐', 'โน', '๐', '๐', 'โฏ', '๐', '๐', 'โฏ', 'โฏ', 'โป', 'โ', ' ', ' ', ' ', ' ', ' ', ' ',), # noqa 'Wingdings 3': (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'โญ ', 'โญข', 'โญก', 'โญฃ', 'โญค', 'โญฅ', 'โญง', 'โญฆ', 'โญฐ', 'โญฒ', 'โญฑ', 'โญณ', 'โญถ', 'โญธ', 'โญป', 'โญฝ', 'โญค', 'โญฅ', 'โญช', 'โญฌ', 'โญซ', 'โญญ', 'โญ', 'โฎ ', 'โฎก', 'โฎข', 'โฎฃ', 'โฎค', 'โฎฅ', 'โฎฆ', 'โฎง', 'โฎ', 'โฎ', 'โฎ', 'โฎ', 'โฎ', 'โฎ', 'โญพ', 'โญฟ', 'โฎ', 'โฎ', 'โฎ ', 'โฎ', 'โฎ', 'โฎ', 'โฎ', 'โฎ', 'โญฎ', 'โญฏ', 'โ', 'โค', 'โ', 'โฅ', 'โฃ', 'โฝ', 'โช', 'โฎธ', '๐ข ', '๐ขก', '๐ขข', '๐ขฃ', '๐ขค', '๐ขฅ', '๐ขฆ', '๐ขง', '๐ขจ', '๐ขฉ', '๐ขช', '๐ขซ', '๐ก', '๐ก', '๐ก', '๐ก', '๐ก', '๐ก', '๐ก', '๐ก', '๐ก', '๐ก', 'โฒ', 'โผ', 'โณ', 'โฝ', 'โ', 'โถ', 'โ', 'โท', 'โฃ', 'โข', 'โค', 'โฅ', '๐', '๐', '๐', ' ', '๐', 'โฏ ', 'โฏ', 'โฏ', 'โฏ', 'โฎ', 'โฎ', 'โฎ', 'โฎ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ', '๐ ข', '๐ ค', '๐ ฆ', '๐ จ', '๐ ช', '๐ ฌ', '๐ข', '๐ข', '๐ข', '๐ข', '๐ ฎ', '๐ ฐ', '๐ ฒ', '๐ ด', '๐ ถ', '๐ ธ', '๐ บ', '๐ น', '๐ ป', '๐ข', '๐ข', '๐ข', '๐ข', '๐ ผ', '๐ พ', '๐ ฝ', '๐ ฟ', '๐ก', '๐ก', '๐ก', '๐ก', '๐ก', '๐ก', '๐ก ', '๐ก', 'โฎจ', 'โฎฉ', 'โฎช', 'โฎซ', 'โฎฌ', 'โฎญ', 'โฎฎ', 'โฎฏ', '๐ก ', '๐กข', '๐กก', '๐กฃ', '๐กค', '๐กฅ', '๐กง', '๐กฆ', '๐กฐ', '๐กฒ', '๐กฑ', '๐กณ', '๐กด', '๐กต', '๐กท', '๐กถ', '๐ข', '๐ข', '๐ข', '๐ข', '๐ข', '๐ข ', '๐ข', '๐ข', '๐ข', '๐ข', '๐ข', '๐ข', '๐ข', '๐ข', '๐ข', '๐ข', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ',), # noqa 'Webdings': (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '๐ท', '๐ธ', '๐ฒ', '๐ถ', '๐', '๐', '๐', '๐จ', '๐ฉ', '๐ฐ', '๐ฑ', '๐ถ', '๐', '๐พ', '๐ผ', '๐', '๐', '๐', 'โด', 'โต', 'โถ', 'โท', 'โช', 'โฉ', 'โฎ', 'โญ', 'โธ', 'โน', 'โบ', '๐', '๐ณ', '๐ ', '๐', '๐', '๐', '๐', '๐', '๐ญ', '๐', '๐ ', '๐', '๐', '๐ฃ', '๐', '๐', '๐', '๐', '๐', '๐', '๐ค', '๐', '๐ณ', '๐ฌ', '๐ซ', '๐จ', '๐', '๐', '๐', '๐ฌ', '๐ฝ', '๐ญ', '๐ช', '๐ซ', 'โฎ', 'โ', '๐ฒ', 'โฌ', '๐ก', '๐ฆ', '๐ฑ', 'โฌ', '๐', '๐', '๐ฉ', '๐ฐ', '๐', '๐ด', 'โฌค', '๐ฅ', '๐', '๐', '๐', 'โ', '๐ฒ', '๐', '๐', 'โณ', 'โฆธ', 'โ', '๐ญ', '๐ฎ', 'โ', '๐ฏ', '๐ฒ', ' ', '๐น', '๐บ', '๐', '๐', '๐ผ', '๐ฝ', '๐', 'โท', '๐', '๐', '๐', '๐', '๐', '๐', '๐', '๐ ', '๐ข', '๐ ', '๐ท', '๐ฃ', '๐ช', '๐ก', '๐ข', '๐ฃ', 'โฏ', '๐', '๐ ', '๐', '๐', '๐น', '๐บ', '๐ป', '๐ต', '๐ฐ', '๐ฝ', '๐พ', '๐', '๐', '๐', '๐ฎ', '๐', '๐', '๐', '๐', '๐', '๐ผ', '๐ญ', '๐', '๐', '๐', '๐ง', '๐ฟ', '๐', '๐ท', '๐', '๐ฌ', '๐ฝ', '๐น', '๐พ', '๐ป', '๐', '๐', '๐บ', '๐ป', '๐ฅ', '๐ฆ', '๐ง', '๐น', '๐ฎ', '๐ฎ', '๐ป', '๐ผ', '๐', '๐', '๐จ', '๐ฉ', '๐ฟ', '๐ช', '๐', '๐', '๐', '๐', '๐ฅ', '๐ค', '๐ณ', '๐ฃ', '๐ค', '๐ฅ', '๐ฆ', 'โ', '๐จ', '๐ง', '๐ฉ', '๐ช', '๐ฌ', '๐ซ', '๐', '๐ก', '๐', '๐', '๐ฝ', '๐ธ', '๐', '๐', 'โ ', 'โฟ', '๐', '๐', '๐', '๐ค', '๐ฅ', '๐ฆ', '๐ง', '๐ช', '๐ฟ', '๐ฆ', '๐', '๐', '๐', '๐ฌ', '๐ฎ', '๐ญ', '๐ฏ', '๐บ', '๐', '๐', '๐', '๐',), # noqa 'Symbol': (' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '!', 'โ', '#', 'โ', '%', '&', 'โ', '(', ')', '*', '+', ',', 'โ', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', 'โ ', 'ฮ', 'ฮ', 'ฮง', 'ฮ', 'ฮ', 'ฮฆ', 'ฮ', 'ฮ', 'ฮ', 'ฯ', 'ฮ', 'ฮ', 'ฮ', 'ฮ', 'ฮ', 'ฮ ', 'ฮ', 'ฮก', 'ฮฃ', 'ฮค', 'ฮฅ', 'ฯ', 'ฮฉ', 'ฮ', 'ฮจ', 'ฮ', '[', 'โด', ']', 'โฅ', '_', '๏ฃฅ', 'ฮฑ', 'ฮฒ', 'ฯ', 'ฮด', 'ฮต', 'ฯ', 'ฮณ', 'ฮท', 'ฮน', 'ฯ', 'ฮป', 'ฮผ', 'ฮฝ', 'ฮพ', 'ฮฟ', 'ฯ', 'ฮธ', 'ฯ', 'ฯ', 'ฯ', 'ฯ ', 'ฯ', 'ฯ', 'ฮพ', 'ฯ', 'ฮถ', '{', '|', '}', '~', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', 'โฌ', 'ฯ', 'โฒ', 'โค', 'โ', 'โ', 'ฦ', 'โฃ', 'โฅ', 'โฆ', 'โ ', 'โ', 'โ', 'โ', 'โ', 'โ', 'ยฐ', 'ยฑ', 'โณ', 'โฅ', 'ร', 'โ', 'โ', 'โข', 'รท', 'โ ', 'โก', 'โ', 'โฆ', 'โ', 'โฏ', 'โฒ', 'โต', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ ', 'โฉ', 'โช', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ ', 'โ', 'ยฎ', 'ยฉ', 'โข', 'โ', 'โ', 'โ ', 'ยฌ', 'โฆ', 'โง', 'โ', 'โ', 'โ', 'โ', 'โ', 'โ', 'ใ', 'ยฎ', 'ยฉ', 'โข', 'โ', 'โ', 'โ', 'โ', 'โก', 'โข', 'โฃ', 'โง', 'โจ', 'โฉ', 'โช', ' ', 'ใ', 'โซ', 'โ ', 'โฎ', 'โก', 'โ', 'โ', 'โ ', 'โค', 'โฅ', 'โฆ', 'โช', 'โซ', 'โฌ', ' ',), # noqa } # }}} SYMBOL_FONT_NAMES = frozenset(n.lower() for n in SYMBOL_MAPS) def is_symbol_font(family): try: return family.lower() in SYMBOL_FONT_NAMES except AttributeError: return False def do_map(m, points): base = 0xf000 limit = len(m) + base for p in points: if base < p < limit: yield m[p - base] else: yield codepoint_to_chr(p) def map_symbol_text(text, font): m = SYMBOL_MAPS[font] if isinstance(text, bytes): text = text.decode('utf-8') return ''.join(do_map(m, ord_string(text))) class Fonts: def __init__(self, namespace): self.namespace = namespace self.fonts = {} self.used = set() def __call__(self, root, embed_relationships, docx, dest_dir): for elem in self.namespace.XPath('//w:font[@w:name]')(root): self.fonts[self.namespace.get(elem, 'w:name')] = Family(elem, embed_relationships, self.namespace.XPath, self.namespace.get) def family_for(self, name, bold=False, italic=False): f = self.fonts.get(name, None) if f is None: return 'serif' variant = get_variant(bold, italic) self.used.add((name, variant)) name = f.name if variant in f.embedded else f.family_name if is_symbol_font(name): return name return '"{}", {}'.format(name.replace('"', ''), f.css_generic_family) def embed_fonts(self, dest_dir, docx): defs = [] dest_dir = os.path.join(dest_dir, 'fonts') for name, variant in self.used: f = self.fonts[name] if variant in f.embedded: if not os.path.exists(dest_dir): os.mkdir(dest_dir) fname = self.write(name, dest_dir, docx, variant) if fname is not None: d = {'font-family':'"%s"' % name.replace('"', ''), 'src': 'url("fonts/%s")' % fname} if 'Bold' in variant: d['font-weight'] = 'bold' if 'Italic' in variant: d['font-style'] = 'italic' d = [f'{k}: {v}' for k, v in iteritems(d)] d = ';\n\t'.join(d) defs.append('@font-face {\n\t%s\n}\n' % d) return '\n'.join(defs) def write(self, name, dest_dir, docx, variant): f = self.fonts[name] ef = f.embedded[variant] raw = docx.read(ef.name) prefix = raw[:32] if ef.key: key = re.sub(r'[^A-Fa-f0-9]', '', ef.key) key = bytearray(reversed(tuple(int(key[i:i+2], 16) for i in range(0, len(key), 2)))) prefix = bytearray(prefix) prefix = bytes(bytearray(prefix[i]^key[i % len(key)] for i in range(len(prefix)))) if not is_truetype_font(prefix): return None ext = 'otf' if prefix.startswith(b'OTTO') else 'ttf' fname = ascii_filename(f'{name} - {variant}.{ext}').replace(' ', '_').replace('&', '_') with open(os.path.join(dest_dir, fname), 'wb') as dest: dest.write(prefix) dest.write(raw[32:]) return fname