%PDF- %PDF-
| Direktori : /proc/thread-self/root/usr/lib/calibre/calibre/ebooks/docx/ |
| Current File : //proc/thread-self/root/usr/lib/calibre/calibre/ebooks/docx/numbering.py |
#!/usr/bin/env python3
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import re, string
from collections import Counter, defaultdict
from functools import partial
from lxml.html.builder import OL, UL, SPAN
from calibre.ebooks.docx.block_styles import ParagraphStyle
from calibre.ebooks.docx.char_styles import RunStyle, inherit
from calibre.ebooks.metadata import roman
from polyglot.builtins import iteritems
STYLE_MAP = {
'aiueo': 'hiragana',
'aiueoFullWidth': 'hiragana',
'hebrew1': 'hebrew',
'iroha': 'katakana-iroha',
'irohaFullWidth': 'katakana-iroha',
'lowerLetter': 'lower-alpha',
'lowerRoman': 'lower-roman',
'none': 'none',
'upperLetter': 'upper-alpha',
'upperRoman': 'upper-roman',
'chineseCounting': 'cjk-ideographic',
'decimalZero': 'decimal-leading-zero',
}
def alphabet(val, lower=True):
x = string.ascii_lowercase if lower else string.ascii_uppercase
return x[(abs(val - 1)) % len(x)]
alphabet_map = {
'lower-alpha':alphabet, 'upper-alpha':partial(alphabet, lower=False),
'lower-roman':lambda x:roman(x).lower(), 'upper-roman':roman,
'decimal-leading-zero': lambda x: '0%d' % x
}
class Level:
def __init__(self, namespace, lvl=None):
self.namespace = namespace
self.restart = None
self.start = 0
self.fmt = 'decimal'
self.para_link = None
self.paragraph_style = self.character_style = None
self.is_numbered = False
self.num_template = None
self.bullet_template = None
self.pic_id = None
if lvl is not None:
self.read_from_xml(lvl)
def copy(self):
ans = Level(self.namespace)
for x in ('restart', 'pic_id', 'start', 'fmt', 'para_link', 'paragraph_style', 'character_style', 'is_numbered', 'num_template', 'bullet_template'):
setattr(ans, x, getattr(self, x))
return ans
def format_template(self, counter, ilvl, template):
def sub(m):
x = int(m.group(1)) - 1
if x > ilvl or x not in counter:
return ''
val = counter[x] - (0 if x == ilvl else 1)
formatter = alphabet_map.get(self.fmt, lambda x: '%d' % x)
return formatter(val)
return re.sub(r'%(\d+)', sub, template).rstrip() + '\xa0'
def read_from_xml(self, lvl, override=False):
XPath, get = self.namespace.XPath, self.namespace.get
for lr in XPath('./w:lvlRestart[@w:val]')(lvl):
try:
self.restart = int(get(lr, 'w:val'))
except (TypeError, ValueError):
pass
for lr in XPath('./w:start[@w:val]')(lvl):
try:
self.start = int(get(lr, 'w:val'))
except (TypeError, ValueError):
pass
for rPr in XPath('./w:rPr')(lvl):
ps = RunStyle(self.namespace, rPr)
if self.character_style is None:
self.character_style = ps
else:
self.character_style.update(ps)
lt = None
for lr in XPath('./w:lvlText[@w:val]')(lvl):
lt = get(lr, 'w:val')
for lr in XPath('./w:numFmt[@w:val]')(lvl):
val = get(lr, 'w:val')
if val == 'bullet':
self.is_numbered = False
cs = self.character_style
if lt in {'\uf0a7', 'o'} or (
cs is not None and cs.font_family is not inherit and cs.font_family.lower() in {'wingdings', 'symbol'}):
self.fmt = {'\uf0a7':'square', 'o':'circle'}.get(lt, 'disc')
else:
self.bullet_template = lt
for lpid in XPath('./w:lvlPicBulletId[@w:val]')(lvl):
self.pic_id = get(lpid, 'w:val')
else:
self.is_numbered = True
self.fmt = STYLE_MAP.get(val, 'decimal')
if lt and re.match(r'%\d+\.$', lt) is None:
self.num_template = lt
for lr in XPath('./w:pStyle[@w:val]')(lvl):
self.para_link = get(lr, 'w:val')
for pPr in XPath('./w:pPr')(lvl):
ps = ParagraphStyle(self.namespace, pPr)
if self.paragraph_style is None:
self.paragraph_style = ps
else:
self.paragraph_style.update(ps)
def css(self, images, pic_map, rid_map):
ans = {'list-style-type': self.fmt}
if self.pic_id:
rid = pic_map.get(self.pic_id, None)
if rid:
try:
fname = images.generate_filename(rid, rid_map=rid_map, max_width=20, max_height=20)
except Exception:
fname = None
else:
ans['list-style-image'] = 'url("images/%s")' % fname
return ans
def char_css(self):
try:
css = self.character_style.css
except AttributeError:
css = {}
css.pop('font-family', None)
return css
class NumberingDefinition:
def __init__(self, namespace, parent=None, an_id=None):
self.namespace = namespace
XPath, get = self.namespace.XPath, self.namespace.get
self.levels = {}
self.abstract_numbering_definition_id = an_id
if parent is not None:
for lvl in XPath('./w:lvl')(parent):
try:
ilvl = int(get(lvl, 'w:ilvl', 0))
except (TypeError, ValueError):
ilvl = 0
self.levels[ilvl] = Level(namespace, lvl)
def copy(self):
ans = NumberingDefinition(self.namespace, an_id=self.abstract_numbering_definition_id)
for l, lvl in iteritems(self.levels):
ans.levels[l] = lvl.copy()
return ans
class Numbering:
def __init__(self, namespace):
self.namespace = namespace
self.definitions = {}
self.instances = {}
self.counters = defaultdict(Counter)
self.starts = {}
self.pic_map = {}
def __call__(self, root, styles, rid_map):
' Read all numbering style definitions '
XPath, get = self.namespace.XPath, self.namespace.get
self.rid_map = rid_map
for npb in XPath('./w:numPicBullet[@w:numPicBulletId]')(root):
npbid = get(npb, 'w:numPicBulletId')
for idata in XPath('descendant::v:imagedata[@r:id]')(npb):
rid = get(idata, 'r:id')
self.pic_map[npbid] = rid
lazy_load = {}
for an in XPath('./w:abstractNum[@w:abstractNumId]')(root):
an_id = get(an, 'w:abstractNumId')
nsl = XPath('./w:numStyleLink[@w:val]')(an)
if nsl:
lazy_load[an_id] = get(nsl[0], 'w:val')
else:
nd = NumberingDefinition(self.namespace, an, an_id=an_id)
self.definitions[an_id] = nd
def create_instance(n, definition):
nd = definition.copy()
start_overrides = {}
for lo in XPath('./w:lvlOverride')(n):
try:
ilvl = int(get(lo, 'w:ilvl'))
except (ValueError, TypeError):
ilvl = None
for so in XPath('./w:startOverride[@w:val]')(lo):
try:
start_override = int(get(so, 'w:val'))
except (TypeError, ValueError):
pass
else:
start_overrides[ilvl] = start_override
for lvl in XPath('./w:lvl')(lo)[:1]:
nilvl = get(lvl, 'w:ilvl')
ilvl = nilvl if ilvl is None else ilvl
alvl = nd.levels.get(ilvl, None)
if alvl is None:
alvl = Level(self.namespace)
alvl.read_from_xml(lvl, override=True)
for ilvl, so in iteritems(start_overrides):
try:
nd.levels[ilvl].start = start_override
except KeyError:
pass
return nd
next_pass = {}
for n in XPath('./w:num[@w:numId]')(root):
an_id = None
num_id = get(n, 'w:numId')
for an in XPath('./w:abstractNumId[@w:val]')(n):
an_id = get(an, 'w:val')
d = self.definitions.get(an_id, None)
if d is None:
next_pass[num_id] = (an_id, n)
continue
self.instances[num_id] = create_instance(n, d)
numbering_links = styles.numbering_style_links
for an_id, style_link in iteritems(lazy_load):
num_id = numbering_links[style_link]
self.definitions[an_id] = self.instances[num_id].copy()
for num_id, (an_id, n) in iteritems(next_pass):
d = self.definitions.get(an_id, None)
if d is not None:
self.instances[num_id] = create_instance(n, d)
for num_id, d in iteritems(self.instances):
self.starts[num_id] = {lvl:d.levels[lvl].start for lvl in d.levels}
def get_pstyle(self, num_id, style_id):
d = self.instances.get(num_id, None)
if d is not None:
for ilvl, lvl in iteritems(d.levels):
if lvl.para_link == style_id:
return ilvl
def get_para_style(self, num_id, lvl):
d = self.instances.get(num_id, None)
if d is not None:
lvl = d.levels.get(lvl, None)
return getattr(lvl, 'paragraph_style', None)
def update_counter(self, counter, levelnum, levels):
counter[levelnum] += 1
for ilvl, lvl in iteritems(levels):
restart = lvl.restart
if (restart is None and ilvl == levelnum + 1) or restart == levelnum + 1:
counter[ilvl] = lvl.start
def apply_markup(self, items, body, styles, object_map, images):
seen_instances = set()
for p, num_id, ilvl in items:
d = self.instances.get(num_id, None)
if d is not None:
lvl = d.levels.get(ilvl, None)
if lvl is not None:
an_id = d.abstract_numbering_definition_id
counter = self.counters[an_id]
if ilvl not in counter or num_id not in seen_instances:
counter[ilvl] = self.starts[num_id][ilvl]
seen_instances.add(num_id)
p.tag = 'li'
p.set('value', '%s' % counter[ilvl])
p.set('list-lvl', str(ilvl))
p.set('list-id', num_id)
if lvl.num_template is not None:
val = lvl.format_template(counter, ilvl, lvl.num_template)
p.set('list-template', val)
elif lvl.bullet_template is not None:
val = lvl.format_template(counter, ilvl, lvl.bullet_template)
p.set('list-template', val)
self.update_counter(counter, ilvl, d.levels)
templates = {}
def commit(current_run):
if not current_run:
return
start = current_run[0]
parent = start.getparent()
idx = parent.index(start)
d = self.instances[start.get('list-id')]
ilvl = int(start.get('list-lvl'))
lvl = d.levels[ilvl]
lvlid = start.get('list-id') + start.get('list-lvl')
has_template = 'list-template' in start.attrib
wrap = (OL if lvl.is_numbered or has_template else UL)('\n\t')
if has_template:
wrap.set('lvlid', lvlid)
else:
wrap.set('class', styles.register(lvl.css(images, self.pic_map, self.rid_map), 'list'))
ccss = lvl.char_css()
if ccss:
ccss = styles.register(ccss, 'bullet')
parent.insert(idx, wrap)
last_val = None
for child in current_run:
wrap.append(child)
child.tail = '\n\t'
if has_template:
span = SPAN()
span.text = child.text
child.text = None
for gc in child:
span.append(gc)
child.append(span)
span = SPAN(child.get('list-template'))
if ccss:
span.set('class', ccss)
last = templates.get(lvlid, '')
if span.text and len(span.text) > len(last):
templates[lvlid] = span.text
child.insert(0, span)
for attr in ('list-lvl', 'list-id', 'list-template'):
child.attrib.pop(attr, None)
val = int(child.get('value'))
if last_val == val - 1 or wrap.tag == 'ul' or (last_val is None and val == 1):
child.attrib.pop('value')
last_val = val
current_run[-1].tail = '\n'
del current_run[:]
parents = set()
for child in body.iterdescendants('li'):
parents.add(child.getparent())
for parent in parents:
current_run = []
for child in parent:
if child.tag == 'li':
if current_run:
last = current_run[-1]
if (last.get('list-id') , last.get('list-lvl')) != (child.get('list-id'), child.get('list-lvl')):
commit(current_run)
current_run.append(child)
else:
commit(current_run)
commit(current_run)
# Convert the list items that use custom text for bullets into tables
# so that they display correctly
for wrap in body.xpath('//ol[@lvlid]'):
wrap.attrib.pop('lvlid')
wrap.tag = 'div'
wrap.set('style', 'display:table')
for i, li in enumerate(wrap.iterchildren('li')):
li.tag = 'div'
li.attrib.pop('value', None)
li.set('style', 'display:table-row')
obj = object_map[li]
bs = styles.para_cache[obj]
if i == 0:
wrap.set('style', 'display:table; padding-left:%s' %
bs.css.get('margin-left', '0'))
bs.css.pop('margin-left', None)
for child in li:
child.set('style', 'display:table-cell')