%PDF- %PDF-
| Direktori : /usr/lib/calibre/calibre/utils/fonts/sfnt/ |
| Current File : //usr/lib/calibre/calibre/utils/fonts/sfnt/subset.py |
#!/usr/bin/env python3
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import traceback
from collections import OrderedDict
from operator import itemgetter
from functools import partial
from calibre.utils.icu import safe_chr, ord_string
from calibre.utils.fonts.sfnt.container import Sfnt
from calibre.utils.fonts.sfnt.errors import UnsupportedFont, NoGlyphs
from polyglot.builtins import iteritems, itervalues
# TrueType outlines {{{
def resolve_glyphs(loca, glyf, character_map, extra_glyphs):
unresolved_glyphs = set(itervalues(character_map)) | extra_glyphs
unresolved_glyphs.add(0) # We always want the .notdef glyph
resolved_glyphs = {}
while unresolved_glyphs:
glyph_id = unresolved_glyphs.pop()
try:
offset, length = loca.glyph_location(glyph_id)
except (IndexError, ValueError, KeyError, TypeError):
continue
glyph = glyf.glyph_data(offset, length)
resolved_glyphs[glyph_id] = glyph
for gid in glyph.glyph_indices:
if gid not in resolved_glyphs:
unresolved_glyphs.add(gid)
return OrderedDict(sorted(iteritems(resolved_glyphs), key=itemgetter(0)))
def subset_truetype(sfnt, character_map, extra_glyphs):
loca = sfnt[b'loca']
glyf = sfnt[b'glyf']
try:
head, maxp = sfnt[b'head'], sfnt[b'maxp']
except KeyError:
raise UnsupportedFont('This font does not contain head and/or maxp tables')
loca.load_offsets(head, maxp)
resolved_glyphs = resolve_glyphs(loca, glyf, character_map, extra_glyphs)
if not resolved_glyphs or set(resolved_glyphs) == {0}:
raise NoGlyphs('This font has no glyphs for the specified character '
'set, subsetting it is pointless')
# Keep only character codes that have resolved glyphs
for code, glyph_id in tuple(iteritems(character_map)):
if glyph_id not in resolved_glyphs:
del character_map[code]
# Update the glyf table
glyph_offset_map = glyf.update(resolved_glyphs)
# Update the loca table
loca.subset(glyph_offset_map)
head.index_to_loc_format = 0 if loca.fmt == 'H' else 1
head.update()
maxp.num_glyphs = len(loca.offset_map) - 1
# }}}
def subset_postscript(sfnt, character_map, extra_glyphs):
cff = sfnt[b'CFF ']
cff.decompile()
cff.subset(character_map, extra_glyphs)
def do_warn(warnings, *args):
for arg in args:
for line in arg.splitlines():
if warnings is None:
print(line)
else:
warnings.append(line)
if warnings is None:
print()
else:
warnings.append('')
def pdf_subset(sfnt, glyphs):
for tag in tuple(sfnt.tables):
if tag not in {b'hhea', b'head', b'hmtx', b'maxp',
b'OS/2', b'post', b'cvt ', b'fpgm', b'glyf', b'loca',
b'prep', b'CFF ', b'VORG'}:
# Remove non core tables since they are unused in PDF rendering
del sfnt[tag]
if b'loca' in sfnt and b'glyf' in sfnt:
# TrueType Outlines
subset_truetype(sfnt, {}, glyphs)
elif b'CFF ' in sfnt:
# PostScript Outlines
subset_postscript(sfnt, {}, glyphs)
else:
raise UnsupportedFont('This font does not contain TrueType '
'or PostScript outlines')
def safe_ord(x):
return ord_string(str(x))[0]
def subset(raw, individual_chars, ranges=(), warnings=None):
warn = partial(do_warn, warnings)
chars = set()
for ic in individual_chars:
try:
chars.add(safe_ord(ic))
except ValueError:
continue
for r in ranges:
chars |= set(range(safe_ord(r[0]), safe_ord(r[1])+1))
# Always add the space character for ease of use from the command line
if safe_ord(' ') not in chars:
chars.add(safe_ord(' '))
sfnt = Sfnt(raw)
old_sizes = sfnt.sizes()
# Remove the Digital Signature table since it is useless in a subset
# font anyway
sfnt.pop(b'DSIG', None)
# Remove non core tables as they aren't likely to be used by renderers
# anyway
core_tables = {b'cmap', b'hhea', b'head', b'hmtx', b'maxp', b'name',
b'OS/2', b'post', b'cvt ', b'fpgm', b'glyf', b'loca', b'prep',
b'CFF ', b'VORG', b'EBDT', b'EBLC', b'EBSC', b'BASE', b'GSUB',
b'GPOS', b'GDEF', b'JSTF', b'gasp', b'hdmx', b'kern', b'LTSH',
b'PCLT', b'VDMX', b'vhea', b'vmtx', b'MATH'}
for tag in list(sfnt):
if tag not in core_tables:
del sfnt[tag]
try:
cmap = sfnt[b'cmap']
except KeyError:
raise UnsupportedFont('This font has no cmap table')
# Get mapping of chars to glyph ids for all specified chars
character_map = cmap.get_character_map(chars)
extra_glyphs = set()
if b'GSUB' in sfnt:
# Parse all substitution rules to ensure that glyphs that can be
# substituted for the specified set of glyphs are not removed
gsub = sfnt[b'GSUB']
try:
gsub.decompile()
extra_glyphs = gsub.all_substitutions(itervalues(character_map))
except UnsupportedFont as e:
warn('Usupported GSUB table: %s'%e)
except Exception:
warn('Failed to decompile GSUB table:', traceback.format_exc())
if b'loca' in sfnt and b'glyf' in sfnt:
# TrueType Outlines
subset_truetype(sfnt, character_map, extra_glyphs)
elif b'CFF ' in sfnt:
# PostScript Outlines
subset_postscript(sfnt, character_map, extra_glyphs)
else:
raise UnsupportedFont('This font does not contain TrueType '
'or PostScript outlines')
# Restrict the cmap table to only contain entries for the resolved glyphs
cmap.set_character_map(character_map)
if b'kern' in sfnt:
try:
sfnt[b'kern'].restrict_to_glyphs(frozenset(itervalues(character_map)))
except UnsupportedFont as e:
warn('kern table unsupported, ignoring: %s'%e)
except Exception:
warn('Subsetting of kern table failed, ignoring:',
traceback.format_exc())
raw, new_sizes = sfnt()
return raw, old_sizes, new_sizes
# CLI {{{
def option_parser():
import textwrap
from calibre.utils.config import OptionParser
parser = OptionParser(usage=textwrap.dedent('''\
%prog [options] input_font_file output_font_file characters_to_keep
Subset the specified font, keeping only the glyphs for the characters in
characters_to_keep. characters_to_keep is a comma separated list of characters of
the form: a,b,c,A-Z,0-9,xyz
You can specify ranges in the list of characters, as shown above.
'''))
parser.add_option('-c', '--codes', default=False, action='store_true',
help='If specified, the list of characters is interpreted as '
'numeric unicode codes instead of characters. So to specify the '
'characters a,b you would use 97,98 or U+0061,U+0062')
parser.prog = 'subset-font'
return parser
def print_stats(old_stats, new_stats):
from calibre import prints
prints('========= Table comparison (original vs. subset) =========')
prints('Table', ' ', '%10s'%'Size', ' ', 'Percent', ' ', '%10s'%'New Size',
' New Percent')
prints('='*80)
old_total = sum(itervalues(old_stats))
new_total = sum(itervalues(new_stats))
tables = sorted(old_stats, key=lambda x:old_stats[x],
reverse=True)
for table in tables:
osz = old_stats[table]
op = osz/old_total * 100
nsz = new_stats.get(table, 0)
np = nsz/new_total * 100
suffix = ' | same size'
if nsz != osz:
suffix = ' | reduced to %.1f %%'%(nsz/osz * 100)
prints('%4s'%table, ' ', '%10s'%osz, ' ', '%5.1f %%'%op, ' ',
'%10s'%nsz, ' ', '%5.1f %%'%np, suffix)
prints('='*80)
def main(args):
import sys, time
from calibre import prints
parser = option_parser()
opts, args = parser.parse_args(args)
if len(args) < 4 or len(args) > 4:
parser.print_help()
raise SystemExit(1)
iff, off, chars = args[1:]
with open(iff, 'rb') as f:
orig = f.read()
chars = [x for x in chars.split(',')]
individual, ranges = set(), set()
def not_single(c):
if len(c) > 1:
prints(c, 'is not a single character', file=sys.stderr)
raise SystemExit(1)
def conv_code(c):
if c.upper()[:2] in ('U+', '0X'):
c = int(c[2:], 16)
return safe_chr(int(c))
for c in chars:
if '-' in c:
parts = tuple(x.strip() for x in c.split('-'))
if len(parts) != 2:
prints('Invalid range:', c, file=sys.stderr)
raise SystemExit(1)
if opts.codes:
parts = tuple(map(conv_code, parts))
for i in parts:
not_single(i)
ranges.add(parts)
else:
if opts.codes:
c = conv_code(c)
not_single(c)
individual.add(c)
st = time.time()
sf, old_stats, new_stats = subset(orig, individual, ranges)
taken = time.time() - st
reduced = (len(sf)/len(orig)) * 100
def sz(x):
return '%gKB'%(len(x)/1024.)
print_stats(old_stats, new_stats)
prints('Original size:', sz(orig), 'Subset size:', sz(sf), 'Reduced to: %g%%'%(reduced))
prints('Subsetting took %g seconds'%taken)
with open(off, 'wb') as f:
f.write(sf)
prints('Subset font written to:', off)
if __name__ == '__main__':
try:
import init_calibre
init_calibre
except ImportError:
pass
import sys
main(sys.argv)
# }}}
# Tests {{{
def test_mem():
from calibre.utils.mem import memory
import gc
gc.collect()
start_mem = memory()
raw = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
calls = 1000
for i in range(calls):
subset(raw, (), (('a', 'z'),))
del raw
for i in range(3):
gc.collect()
print('Leaked memory per call:', (memory() - start_mem)/calls*1024, 'KB')
def test():
raw = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
sf, old_stats, new_stats = subset(raw, {'a', 'b', 'c'}, ())
if len(sf) > 0.3 * len(raw):
raise Exception('Subsetting failed')
def all():
from calibre.utils.fonts.scanner import font_scanner
failed = []
unsupported = []
warnings = {}
total = 0
averages = []
for family in font_scanner.find_font_families():
for font in font_scanner.fonts_for_family(family):
raw = font_scanner.get_font_data(font)
print('Subsetting', font['full_name'], end='\t')
total += 1
try:
w = []
sf, old_stats, new_stats = subset(raw, {'a', 'b', 'c'},
(), w)
if w:
warnings[font['full_name'] + ' (%s)'%font['path']] = w
except NoGlyphs:
print('No glyphs!')
continue
except UnsupportedFont as e:
unsupported.append((font['full_name'], font['path'], str(e)))
print('Unsupported!')
continue
except Exception as e:
print('Failed!')
failed.append((font['full_name'], font['path'], str(e)))
else:
averages.append(sum(itervalues(new_stats))/sum(itervalues(old_stats)) * 100)
print('Reduced to:', '%.1f'%averages[-1] , '%')
if unsupported:
print('\n\nUnsupported:')
for name, path, err in unsupported:
print(name, path, err)
print()
if warnings:
print('\n\nWarnings:')
for name, w in iteritems(warnings):
if w:
print(name)
print('', '\n\t'.join(w), sep='\t')
if failed:
print('\n\nFailures:')
for name, path, err in failed:
print(name, path, err)
print()
print('Average reduction to: %.1f%%'%(sum(averages)/len(averages)))
print('Total:', total, 'Unsupported:', len(unsupported), 'Failed:',
len(failed), 'Warnings:', len(warnings))
# }}}