%PDF- %PDF-
Direktori : /usr/lib/calibre/calibre/ebooks/metadata/ |
Current File : //usr/lib/calibre/calibre/ebooks/metadata/archive.py |
#!/usr/bin/env python3 __license__ = 'GPL v3' __copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>' __docformat__ = 'restructuredtext en' import os from contextlib import closing from calibre.customize import FileTypePlugin from calibre.utils.localization import canonicalize_lang def is_comic(list_of_names): extensions = {x.rpartition('.')[-1].lower() for x in list_of_names if '.' in x and x.lower().rpartition('/')[-1] != 'thumbs.db'} comic_extensions = {'jpg', 'jpeg', 'png'} return len(extensions - comic_extensions) == 0 def archive_type(stream): from calibre.utils.zipfile import stringFileHeader try: pos = stream.tell() except: pos = 0 id_ = stream.read(4) ans = None if id_ == stringFileHeader: ans = 'zip' elif id_.startswith(b'Rar'): ans = 'rar' try: stream.seek(pos) except Exception: pass return ans class KPFExtract(FileTypePlugin): name = 'KPF Extract' author = 'Kovid Goyal' description = _('Extract the source DOCX file from Amazon Kindle Create KPF files.' ' Note this will not contain any edits made in the Kindle Create program itself.') file_types = {'kpf'} supported_platforms = ['windows', 'osx', 'linux'] on_import = True def run(self, archive): from calibre.utils.zipfile import ZipFile with ZipFile(archive, 'r') as zf: fnames = zf.namelist() candidates = [x for x in fnames if x.lower().endswith('.docx')] if not candidates: return archive of = self.temporary_file('_kpf_extract.docx') with closing(of): of.write(zf.read(candidates[0])) return of.name class ArchiveExtract(FileTypePlugin): name = 'Archive Extract' author = 'Kovid Goyal' description = _('Extract common e-book formats from archive files ' '(ZIP/RAR). Also try to autodetect if they are actually ' 'CBZ/CBR files.') file_types = {'zip', 'rar'} supported_platforms = ['windows', 'osx', 'linux'] on_import = True def run(self, archive): from calibre.utils.zipfile import ZipFile is_rar = archive.lower().endswith('.rar') if is_rar: from calibre.utils.unrar import extract_member, names else: zf = ZipFile(archive, 'r') if is_rar: fnames = list(names(archive)) else: fnames = zf.namelist() def fname_ok(fname): bn = os.path.basename(fname).lower() if bn == 'thumbs.db': return False if '.' not in bn: return False if bn.rpartition('.')[-1] in {'diz', 'nfo'}: return False if '__MACOSX' in fname.split('/'): return False return True fnames = list(filter(fname_ok, fnames)) if is_comic(fnames): ext = '.cbr' if is_rar else '.cbz' of = self.temporary_file('_archive_extract'+ext) with open(archive, 'rb') as f: of.write(f.read()) of.close() return of.name if len(fnames) > 1 or not fnames: return archive fname = fnames[0] ext = os.path.splitext(fname)[1][1:] if ext.lower() not in { 'lit', 'epub', 'mobi', 'prc', 'rtf', 'pdf', 'mp3', 'pdb', 'azw', 'azw1', 'azw3', 'fb2', 'docx', 'doc', 'odt'}: return archive of = self.temporary_file('_archive_extract.'+ext) with closing(of): if is_rar: data = extract_member(archive, match=None, name=fname)[1] of.write(data) else: of.write(zf.read(fname)) return of.name def get_comic_book_info(d, mi, series_index='volume'): # See http://code.google.com/p/comicbookinfo/wiki/Example series = d.get('series', '') if series.strip(): mi.series = series si = d.get(series_index, None) if si is None: si = d.get('issue' if series_index == 'volume' else 'volume', None) if si is not None: try: mi.series_index = float(si) except Exception: mi.series_index = 1 if d.get('language', None): lang = canonicalize_lang(d.get('lang')) if lang: mi.languages = [lang] if d.get('rating', -1) > -1: mi.rating = d['rating'] for x in ('title', 'publisher'): y = d.get(x, '').strip() if y: setattr(mi, x, y) tags = d.get('tags', []) if tags: mi.tags = tags authors = [] for credit in d.get('credits', []): if credit.get('role', '') in ('Writer', 'Artist', 'Cartoonist', 'Creator'): x = credit.get('person', '') if x: x = ' '.join(reversed(x.split(', '))) authors.append(x) if authors: mi.authors = authors comments = d.get('comments', '') if comments and comments.strip(): mi.comments = comments.strip() pubm, puby = d.get('publicationMonth', None), d.get('publicationYear', None) if puby is not None: from calibre.utils.date import parse_only_date from datetime import date try: dt = date(puby, 6 if pubm is None else pubm, 15) dt = parse_only_date(str(dt)) mi.pubdate = dt except Exception: pass def parse_comic_comment(comment, series_index='volume'): # See http://code.google.com/p/comicbookinfo/wiki/Example from calibre.ebooks.metadata import MetaInformation import json mi = MetaInformation(None, None) m = json.loads(comment) if isinstance(m, dict): for cat in m: if cat.startswith('ComicBookInfo'): get_comic_book_info(m[cat], mi, series_index=series_index) break return mi def get_comic_metadata(stream, stream_type, series_index='volume'): comment = None if stream_type == 'cbz': from calibre.utils.zipfile import ZipFile zf = ZipFile(stream) comment = zf.comment elif stream_type == 'cbr': from calibre.utils.unrar import comment as get_comment comment = get_comment(stream) return parse_comic_comment(comment or b'{}', series_index=series_index)