%PDF- %PDF-
| Direktori : /lib/calibre/calibre/ebooks/metadata/ |
| Current File : //lib/calibre/calibre/ebooks/metadata/docx.py |
#!/usr/bin/env python3
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid at kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
from io import BytesIO
from calibre.ebooks.docx.container import DOCX
from calibre.utils.xml_parse import safe_xml_fromstring
from calibre.ebooks.docx.writer.container import update_doc_props, xml2str
from calibre.utils.imghdr import identify
def get_cover(docx):
doc = docx.document
get = docx.namespace.get
images = docx.namespace.XPath(
'//*[name()="w:drawing" or name()="w:pict"]/descendant::*[(name()="a:blip" and @r:embed) or (name()="v:imagedata" and @r:id)][1]')
rid_map = docx.document_relationships[0]
for image in images(doc):
rid = get(image, 'r:embed') or get(image, 'r:id')
if rid in rid_map:
try:
raw = docx.read(rid_map[rid])
fmt, width, height = identify(raw)
except Exception:
continue
if width < 0 or height < 0:
continue
if 0.8 <= height/width <= 1.8 and height*width >= 160000:
return (fmt, raw)
def get_metadata(stream):
c = DOCX(stream, extract=False)
mi = c.metadata
try:
cdata = get_cover(c)
except Exception:
cdata = None
import traceback
traceback.print_exc()
c.close()
stream.seek(0)
if cdata is not None:
mi.cover_data = cdata
return mi
def set_metadata(stream, mi):
from calibre.utils.zipfile import safe_replace
c = DOCX(stream, extract=False)
dp_name, ap_name = c.get_document_properties_names()
dp_raw = c.read(dp_name)
try:
ap_raw = c.read(ap_name)
except Exception:
ap_raw = None
cp = safe_xml_fromstring(dp_raw)
update_doc_props(cp, mi, c.namespace)
replacements = {}
if ap_raw is not None:
ap = safe_xml_fromstring(ap_raw)
comp = ap.makeelement('{%s}Company' % c.namespace.namespaces['ep'])
for child in tuple(ap):
if child.tag == comp.tag:
ap.remove(child)
comp.text = mi.publisher
ap.append(comp)
replacements[ap_name] = BytesIO(xml2str(ap))
stream.seek(0)
safe_replace(stream, dp_name, BytesIO(xml2str(cp)), extra_replacements=replacements)
if __name__ == '__main__':
import sys
with open(sys.argv[-1], 'rb') as stream:
print(get_metadata(stream))