%PDF- %PDF-
| Direktori : /lib/calibre/calibre/ebooks/docx/ |
| Current File : //lib/calibre/calibre/ebooks/docx/names.py |
#!/usr/bin/env python3
__license__ = 'GPL v3'
__copyright__ = '2013, Kovid Goyal <kovid at kovidgoyal.net>'
import re
from lxml.etree import XPath as X
from calibre.utils.filenames import ascii_text
from polyglot.builtins import iteritems
# Names {{{
TRANSITIONAL_NAMES = {
'DOCUMENT' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument',
'DOCPROPS' : 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties',
'APPPROPS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties',
'STYLES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles',
'NUMBERING' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/numbering',
'FONTS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable',
'EMBEDDED_FONT' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/font',
'IMAGES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/image',
'LINKS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink',
'FOOTNOTES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes',
'ENDNOTES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/endnotes',
'THEMES' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme',
'SETTINGS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings',
'WEB_SETTINGS' : 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings',
}
STRICT_NAMES = {
k:v.replace('http://schemas.openxmlformats.org/officeDocument/2006', 'http://purl.oclc.org/ooxml/officeDocument')
for k, v in iteritems(TRANSITIONAL_NAMES)
}
TRANSITIONAL_NAMESPACES = {
'mo': 'http://schemas.microsoft.com/office/mac/office/2008/main',
'o': 'urn:schemas-microsoft-com:office:office',
've': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
'mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
# Text Content
'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
'w10': 'urn:schemas-microsoft-com:office:word',
'wne': 'http://schemas.microsoft.com/office/word/2006/wordml',
'xml': 'http://www.w3.org/XML/1998/namespace',
# Drawing
'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
'm': 'http://schemas.openxmlformats.org/officeDocument/2006/math',
'mv': 'urn:schemas-microsoft-com:mac:vml',
'pic': 'http://schemas.openxmlformats.org/drawingml/2006/picture',
'v': 'urn:schemas-microsoft-com:vml',
'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing',
# Properties (core and extended)
'cp': 'http://schemas.openxmlformats.org/package/2006/metadata/core-properties',
'dc': 'http://purl.org/dc/elements/1.1/',
'ep': 'http://schemas.openxmlformats.org/officeDocument/2006/extended-properties',
'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
# Content Types
'ct': 'http://schemas.openxmlformats.org/package/2006/content-types',
# Package Relationships
'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
'pr': 'http://schemas.openxmlformats.org/package/2006/relationships',
# Dublin Core document properties
'dcmitype': 'http://purl.org/dc/dcmitype/',
'dcterms': 'http://purl.org/dc/terms/'
}
STRICT_NAMESPACES = {
k:v.replace(
'http://schemas.openxmlformats.org/officeDocument/2006', 'http://purl.oclc.org/ooxml/officeDocument').replace(
'http://schemas.openxmlformats.org/wordprocessingml/2006', 'http://purl.oclc.org/ooxml/wordprocessingml').replace(
'http://schemas.openxmlformats.org/drawingml/2006', 'http://purl.oclc.org/ooxml/drawingml')
for k, v in iteritems(TRANSITIONAL_NAMESPACES)
}
# }}}
def barename(x):
return x.rpartition('}')[-1]
def XML(x):
return '{{{}}}{}'.format(TRANSITIONAL_NAMESPACES['xml'], x)
def generate_anchor(name, existing):
x = y = 'id_' + re.sub(r'[^0-9a-zA-Z_]', '', ascii_text(name)).lstrip('_')
c = 1
while y in existing:
y = '%s_%d' % (x, c)
c += 1
return y
class DOCXNamespace:
def __init__(self, transitional=True):
self.xpath_cache = {}
if transitional:
self.namespaces = TRANSITIONAL_NAMESPACES.copy()
self.names = TRANSITIONAL_NAMES.copy()
else:
self.namespaces = STRICT_NAMESPACES.copy()
self.names = STRICT_NAMES.copy()
def XPath(self, expr):
ans = self.xpath_cache.get(expr, None)
if ans is None:
self.xpath_cache[expr] = ans = X(expr, namespaces=self.namespaces)
return ans
def is_tag(self, x, q):
tag = getattr(x, 'tag', x)
ns, name = q.partition(':')[0::2]
return f'{{{self.namespaces.get(ns, None)}}}{name}' == tag
def expand(self, name, sep=':'):
ns, tag = name.partition(sep)[::2]
if ns and tag:
tag = f'{{{self.namespaces[ns]}}}{tag}'
return tag or ns
def get(self, x, attr, default=None):
return x.attrib.get(self.expand(attr), default)
def ancestor(self, elem, name):
try:
return self.XPath('ancestor::%s[1]' % name)(elem)[0]
except IndexError:
return None
def children(self, elem, *args):
return self.XPath('|'.join('child::%s' % a for a in args))(elem)
def descendants(self, elem, *args):
return self.XPath('|'.join('descendant::%s' % a for a in args))(elem)
def makeelement(self, root, tag, append=True, **attrs):
ans = root.makeelement(self.expand(tag), **{self.expand(k, sep='_'):v for k, v in iteritems(attrs)})
if append:
root.append(ans)
return ans