%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /lib/calibre/calibre/utils/podofo/
Upload File :
Create Path :
Current File : //lib/calibre/calibre/utils/podofo/__init__.py

#!/usr/bin/env python3
# License: GPLv3 Copyright: 2009, Kovid Goyal <kovid at kovidgoyal.net>


import os
import shutil
import sys

from calibre.constants import preferred_encoding
from calibre.ebooks.metadata import authors_to_string
from calibre.ptempfile import TemporaryDirectory
from calibre.utils.ipc.simple_worker import WorkerError, fork_job


def get_podofo():
    from calibre_extensions import podofo
    return podofo


def prep(val):
    if not val:
        return ''
    if not isinstance(val, str):
        val = val.decode(preferred_encoding, 'replace')
    return val.strip()


def set_metadata(stream, mi):
    with TemporaryDirectory('_podofo_set_metadata') as tdir:
        with open(os.path.join(tdir, 'input.pdf'), 'wb') as f:
            shutil.copyfileobj(stream, f)
        from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet
        xmp_packet = metadata_to_xmp_packet(mi)

        try:
            result = fork_job('calibre.utils.podofo', 'set_metadata_', (tdir,
                mi.title, mi.authors, mi.book_producer, mi.tags, xmp_packet))
            touched = result['result']
        except WorkerError as e:
            raise Exception('Failed to set PDF metadata in (%s): %s'%(mi.title, e.orig_tb))
        if touched:
            with open(os.path.join(tdir, 'output.pdf'), 'rb') as f:
                f.seek(0, 2)
                if f.tell() > 100:
                    f.seek(0)
                    stream.seek(0)
                    stream.truncate()
                    shutil.copyfileobj(f, stream)
                    stream.flush()
    stream.seek(0)


def set_metadata_implementation(pdf_doc, title, authors, bkp, tags, xmp_packet):
    title = prep(title)
    touched = False
    if title and title != pdf_doc.title:
        pdf_doc.title = title
        touched = True

    author = prep(authors_to_string(authors))
    if author and author != pdf_doc.author:
        pdf_doc.author = author
        touched = True

    bkp = prep(bkp)
    if bkp and bkp != pdf_doc.creator:
        pdf_doc.creator = bkp
        touched = True
    if bkp and bkp != pdf_doc.producer:
        pdf_doc.producer = bkp
        touched = True

    try:
        tags = prep(', '.join([x.strip() for x in tags if x.strip()]))
        if tags != pdf_doc.keywords:
            pdf_doc.keywords = tags
            touched = True
    except Exception:
        pass

    try:
        current_xmp_packet = pdf_doc.get_xmp_metadata()
        if current_xmp_packet:
            from calibre.ebooks.metadata.xmp import merge_xmp_packet
            xmp_packet = merge_xmp_packet(current_xmp_packet, xmp_packet)
        pdf_doc.set_xmp_metadata(xmp_packet)
        touched = True
    except Exception:
        pass
    return touched


def set_metadata_(tdir, title, authors, bkp, tags, xmp_packet):
    podofo = get_podofo()
    os.chdir(tdir)
    p = podofo.PDFDoc()
    p.open('input.pdf')

    touched = set_metadata_implementation(p, title, authors, bkp, tags, xmp_packet)
    if touched:
        p.save('output.pdf')

    return touched


def get_xmp_metadata(path):
    podofo = get_podofo()
    p = podofo.PDFDoc()
    with open(path, 'rb') as f:
        raw = f.read()
    p.load(raw)
    return p.get_xmp_metadata()


def get_outline(path=None):
    if path is None:
        path = sys.argv[-1]
    podofo = get_podofo()
    p = podofo.PDFDoc()
    with open(path, 'rb') as f:
        raw = f.read()
    p.load(raw)
    return p.get_outline()['children']


def get_image_count(path):
    podofo = get_podofo()
    p = podofo.PDFDoc()
    with open(path, 'rb') as f:
        raw = f.read()
    p.load(raw)
    return p.image_count()


def list_fonts(pdf_doc):
    fonts = pdf_doc.list_fonts()
    ref_map = {f['Reference']: f for f in fonts}
    return ref_map


def remove_unused_fonts(pdf_doc):
    return pdf_doc.remove_unused_fonts()


def test_remove_unused_fonts(src):
    podofo = get_podofo()
    p = podofo.PDFDoc()
    p.open(src)
    remove_unused_fonts(p)
    dest = src.rpartition('.')[0] + '-removed.pdf'
    p.save(dest)
    print('Modified pdf saved to:', dest)


def dedup_type3_fonts(pdf_doc):
    return pdf_doc.dedup_type3_fonts()


def test_dedup_type3_fonts(src):
    podofo = get_podofo()
    p = podofo.PDFDoc()
    p.open(src)
    num = dedup_type3_fonts(p)
    dest = src.rpartition('.')[0] + '-removed.pdf'
    p.save(dest)
    print(f'Modified pdf with {num} glyphs removed saved to:', dest)


def test_list_fonts(src):
    podofo = get_podofo()
    p = podofo.PDFDoc()
    with open(src, 'rb') as f:
        raw = f.read()
    p.load(raw)
    import pprint
    pprint.pprint(list_fonts(p))


def test_save_to(src, dest):
    podofo = get_podofo()
    p = podofo.PDFDoc()
    with open(src, 'rb') as f:
        raw = f.read()
    p.load(raw)
    with open(dest, 'wb') as out:
        p.save_to_fileobj(out)
        print('Wrote PDF of size:', out.tell())


def test_podofo():
    import tempfile
    from io import BytesIO
    from calibre.ebooks.metadata.book.base import Metadata
    from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet
    # {{{
    raw = b"%PDF-1.1\n%\xe2\xe3\xcf\xd3\n1 0 obj<</Type/Catalog/Metadata 6 0 R/Pages 2 0 R>>\nendobj\n2 0 obj<</Type/Pages/Count 1/Kids[ 3 0 R]/MediaBox[ 0 0 300 144]>>\nendobj\n3 0 obj<</Type/Page/Contents 4 0 R/Parent 2 0 R/Resources<</Font<</F1<</Type/Font/BaseFont/Times-Roman/Subtype/Type1>>>>>>>>\nendobj\n4 0 obj<</Length 55>>\nstream\n  BT\n    /F1 18 Tf\n    0 0 Td\n    (Hello World) Tj\n  ET\nendstream\nendobj\n5 0 obj<</Author(\xfe\xff\x00U\x00n\x00k\x00n\x00o\x00w\x00n)/CreationDate(D:20140919134038+05'00')/Producer(PoDoFo - http://podofo.sf.net)/Title(\xfe\xff\x00n\x00e\x00w\x00t)>>\nendobj\n6 0 obj<</Type/Metadata/Filter/FlateDecode/Length 584/Subtype/XML>>\nstream\nx\x9c\xed\x98\xcd\xb2\x930\x14\xc7\xf7}\n&.\x1d\x1ahoGa\x80\x8e\xb6\xe3x\x17ua\xaf\xe3\xd2\t\xc9i\x1b\x0b\x81&a\xc0\xfbj.|$_\xc1\xd0r\xe9\xb7V\x9d\xbb\x83\x15\x9c\x9c\xff\xff\x97\x8fs\xb2 \x18W9\xa1k\xd0V\x0cK.B\xf4\xf3\xfb\x0fdq\x16\xa2\xcf\xa3\x993\xcb'\xb0\xe2\xef\x1f%\xcc\x1f?<\xd0\xc75\xf5\x18\x1aG\xbd\xa0\xf2\xab4OA\x13\xabJ\x13\xa1\xfc*D\x84e1\xf8\xe6\xbd\x0ec\x14\xf5,+\x90l\xe1\x7f\x9c\xbek\x92\xccW\x88VZ\xe7>\xc6eY\xf6\xcba?\x93K\xecz\x9e\x87\x9d\x01\x1e\x0cl\x93a\xaboB\x93\xca\x16\xea\xc5\xd6\xa3q\x99\x82\xa2\x92\xe7\x9ag\xa2qc\xb45\xcb\x0b\x99l\xad\x18\xc5\x90@\nB+\xec\xf6]\x8c\xacZK\xe2\xac\xd0!j\xec\x8c!\xa3>\xdb\xfb=\x85\x1b\xd2\x9bD\xef#M,\xe15\xd4O\x88X\x86\xa8\xb2\x19,H\x91h\x14\x05x7z`\x81O<\x02|\x99VOBs\x9d\xc0\x7f\xe0\x05\x94\xfa\xd6)\x1c\xb1jx^\xc4\tW+\x90'\x13xK\x96\xf8Hy\x96X\xabU\x11\x7f\x05\xaa\xff\xa4=I\xab\x95T\x02\xd1\xd9)u\x0e\x9b\x0b\xcb\x8e>\x89\xb5\xc8Jqm\x91\x07\xaa-\xee\xc8{\x972=\xdd\xfa+\xe5d\xea\xb9\xad'\xa1\xfa\xdbj\xee\xd3,\xc5\x15\xc9M-9\xa6\x96\xdaD\xce6Wr\xd3\x1c\xdf3S~|\xc1A\xe2MA\x92F{\xb1\x0eM\xba?3\xdd\xc2\x88&S\xa2!\x1a8\xee\x9d\xedx\xb6\xeb=\xb8C\xff\xce\xf1\x87\xaf\xfb\xde\xe0\xd5\xc8\xf3^:#\x7f\xe8\x04\xf8L\xf2\x0fK\xcd%W\xe9\xbey\xea/\xa5\x89`D\xb2m\x17\t\x92\x822\xb7\x02(\x1c\x13\xc5)\x1e\x9c-\x01\xff\x1e\xc0\x16\xd5\xe5\r\xaaG\xcc\x8e\x0c\xff\xca\x8e\x92\x84\xc7\x12&\x93\xd6\xb3\x89\xd8\x10g\xd9\xfai\xe7\xedv\xde6-\x94\xceR\x9bfI\x91\n\x85\x8e}nu9\x91\xcd\xefo\xc6+\x90\x1c\x94\xcd\x05\x83\xea\xca\xd17\x16\xbb\xb6\xfc\xa22\xa9\x9bn\xbe0p\xfd\x88wAs\xc3\x9a+\x19\xb7w\xf2a#=\xdf\xd3A:H\x07\xe9 \x1d\xa4\x83t\x90\x0e\xd2A:H\x07yNH/h\x7f\xd6\x80`!*\xd18\xfa\x05\x94\x80P\xb0\nendstream\nendobj\nxref\n0 7\n0000000000 65535 f \n0000000015 00000 n \n0000000074 00000 n \n0000000148 00000 n \n0000000280 00000 n \n0000000382 00000 n \n0000000522 00000 n \ntrailer\n<</ID[<4D028D512DEBEFD964756764AD8FF726><4D028D512DEBEFD964756764AD8FF726>]/Info 5 0 R/Root 1 0 R/Size 7>>\nstartxref\n1199\n%%EOF\n"  # noqa
    # }}}
    mi = Metadata('title1', ['author1'])
    xmp_packet = metadata_to_xmp_packet(mi)
    podofo = get_podofo()
    p = podofo.PDFDoc()
    p.load(raw)
    p.title = mi.title
    p.author = mi.authors[0]
    p.set_xmp_metadata(xmp_packet)
    buf = BytesIO()
    p.save_to_fileobj(buf)
    raw = buf.getvalue()
    with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as f:
        f.write(raw)
    try:
        p = podofo.PDFDoc()
        p.open(f.name)
        if (p.title, p.author) != (mi.title, mi.authors[0]):
            raise ValueError('podofo failed to set title and author in Info dict {} != {}'.format(
                (p.title, p.author), (mi.title, mi.authors[0])))
        if not p.get_xmp_metadata():
            raise ValueError('podofo failed to write XMP packet')
        del p
    finally:
        os.remove(f.name)


if __name__ == '__main__':
    get_xmp_metadata(sys.argv[-1])

Zerion Mini Shell 1.0