%PDF- %PDF-
Direktori : /usr/lib/calibre/calibre/ebooks/conversion/plugins/ |
Current File : //usr/lib/calibre/calibre/ebooks/conversion/plugins/djvu_input.py |
__license__ = 'GPL 3' __copyright__ = '2011, Anthon van der Neut <anthon@mnt.org>' __docformat__ = 'restructuredtext en' import os from io import BytesIO from calibre.customize.conversion import InputFormatPlugin class DJVUInput(InputFormatPlugin): name = 'DJVU Input' author = 'Anthon van der Neut' description = _('Convert OCR-ed DJVU files (.djvu) to HTML') file_types = {'djvu', 'djv'} commit_name = 'djvu_input' def convert(self, stream, options, file_ext, log, accelerators): from calibre.ebooks.txt.processor import convert_basic stdout = BytesIO() from calibre.ebooks.djvu.djvu import DJVUFile x = DJVUFile(stream) x.get_text(stdout) raw_text = stdout.getvalue() if not raw_text: raise ValueError('The DJVU file contains no text, only images, probably page scans.' ' calibre only supports conversion of DJVU files with actual text in them.') html = convert_basic(raw_text.replace(b"\n", b' ').replace( b'\037', b'\n\n')) # Run the HTMLized text through the html processing plugin. from calibre.customize.ui import plugin_for_input_format html_input = plugin_for_input_format('html') for opt in html_input.options: setattr(options, opt.option.name, opt.recommended_value) options.input_encoding = 'utf-8' base = os.getcwd() htmlfile = os.path.join(base, 'index.html') c = 0 while os.path.exists(htmlfile): c += 1 htmlfile = os.path.join(base, 'index%d.html'%c) with open(htmlfile, 'wb') as f: f.write(html.encode('utf-8')) odi = options.debug_pipeline options.debug_pipeline = None # Generate oeb from html conversion. with open(htmlfile, 'rb') as f: oeb = html_input.convert(f, options, 'html', log, {}) options.debug_pipeline = odi os.remove(htmlfile) # Set metadata from file. from calibre.customize.ui import get_file_type_metadata from calibre.ebooks.oeb.transforms.metadata import meta_info_to_oeb_metadata mi = get_file_type_metadata(stream, file_ext) meta_info_to_oeb_metadata(mi, oeb.metadata, log) return oeb