%PDF- %PDF-
| Direktori : /lib/calibre/calibre/ebooks/metadata/ |
| Current File : //lib/calibre/calibre/ebooks/metadata/meta.py |
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
import os, regex, collections
from calibre.utils.config import prefs
from calibre.constants import filesystem_encoding
from calibre.ebooks.metadata.opf2 import OPF
from calibre import isbytestring
from calibre.customize.ui import get_file_type_metadata, set_file_type_metadata
from calibre.ebooks.metadata import MetaInformation, string_to_authors
# The priorities for loading metadata from different file types
# Higher values should be used to update metadata from lower values
METADATA_PRIORITIES = collections.defaultdict(lambda:0)
for i, ext in enumerate((
'html', 'htm', 'xhtml', 'xhtm',
'rtf', 'fb2', 'pdf', 'prc', 'odt',
'epub', 'lit', 'lrx', 'lrf', 'mobi',
'azw', 'azw3', 'azw1', 'rb', 'imp', 'snb'
)):
METADATA_PRIORITIES[ext] = i + 1
def path_to_ext(path):
return os.path.splitext(path)[1][1:].lower()
def metadata_from_formats(formats, force_read_metadata=False, pattern=None):
try:
return _metadata_from_formats(formats, force_read_metadata, pattern)
except:
mi = metadata_from_filename(list(iter(formats))[0], pat=pattern)
if not mi.authors:
mi.authors = [_('Unknown')]
return mi
def _metadata_from_formats(formats, force_read_metadata=False, pattern=None):
mi = MetaInformation(None, None)
formats.sort(key=lambda x: METADATA_PRIORITIES[path_to_ext(x)])
extensions = list(map(path_to_ext, formats))
if 'opf' in extensions:
opf = formats[extensions.index('opf')]
mi2 = opf_metadata(opf)
if mi2 is not None and mi2.title:
return mi2
for path, ext in zip(formats, extensions):
with lopen(path, 'rb') as stream:
try:
newmi = get_metadata(stream, stream_type=ext,
use_libprs_metadata=True,
force_read_metadata=force_read_metadata,
pattern=pattern)
mi.smart_update(newmi)
except Exception:
continue
if getattr(mi, 'application_id', None) is not None:
return mi
if not mi.title:
mi.title = _('Unknown')
if not mi.authors:
mi.authors = [_('Unknown')]
return mi
def get_metadata(stream, stream_type='lrf', use_libprs_metadata=False,
force_read_metadata=False, pattern=None):
pos = 0
if hasattr(stream, 'tell'):
pos = stream.tell()
try:
return _get_metadata(stream, stream_type, use_libprs_metadata,
force_read_metadata, pattern)
finally:
if hasattr(stream, 'seek'):
stream.seek(pos)
def _get_metadata(stream, stream_type, use_libprs_metadata,
force_read_metadata=False, pattern=None):
if stream_type:
stream_type = stream_type.lower()
if stream_type in ('html', 'html', 'xhtml', 'xhtm', 'xml'):
stream_type = 'html'
if stream_type in ('mobi', 'prc', 'azw'):
stream_type = 'mobi'
if stream_type in ('odt', 'ods', 'odp', 'odg', 'odf'):
stream_type = 'odt'
opf = None
if hasattr(stream, 'name'):
c = os.path.splitext(stream.name)[0]+'.opf'
if os.access(c, os.R_OK):
opf = opf_metadata(os.path.abspath(c))
if use_libprs_metadata and getattr(opf, 'application_id', None) is not None:
return opf
name = os.path.basename(getattr(stream, 'name', ''))
# The fallback pattern matches the default filename format produced by calibre
base = metadata_from_filename(name, pat=pattern, fallback_pat=regex.compile(
r'^(?P<title>.+) - (?P<author>[^-]+)$', flags=regex.UNICODE | regex.VERSION1 | regex.FULLCASE))
if not base.authors:
base.authors = [_('Unknown')]
if not base.title:
base.title = _('Unknown')
mi = MetaInformation(None, None)
if force_read_metadata or prefs['read_file_metadata']:
mi = get_file_type_metadata(stream, stream_type)
base.smart_update(mi)
if opf is not None:
base.smart_update(opf)
return base
def set_metadata(stream, mi, stream_type='lrf', report_error=None):
if stream_type:
stream_type = stream_type.lower()
set_file_type_metadata(stream, mi, stream_type, report_error=report_error)
def metadata_from_filename(name, pat=None, fallback_pat=None):
if isbytestring(name):
name = name.decode(filesystem_encoding, 'replace')
name = name.rpartition('.')[0]
mi = MetaInformation(None, None)
if pat is None:
try:
pat = regex.compile(prefs.get('filename_pattern'), flags=regex.UNICODE | regex.VERSION1 | regex.FULLCASE)
except Exception:
try:
pat = regex.compile(prefs.get('filename_pattern'), flags=regex.UNICODE | regex.VERSION0 | regex.FULLCASE)
except Exception:
pat = regex.compile('(?P<title>.+) - (?P<author>[^_]+)', flags=regex.UNICODE | regex.VERSION0 | regex.FULLCASE)
name = name.replace('_', ' ')
match = pat.search(name)
if match is None and fallback_pat is not None:
match = fallback_pat.search(name)
if match is not None:
try:
mi.title = match.group('title')
except IndexError:
pass
try:
au = match.group('author')
aus = string_to_authors(au)
if aus:
mi.authors = aus
if prefs['swap_author_names'] and mi.authors:
def swap(a):
if ',' in a:
parts = a.split(',', 1)
else:
parts = a.split(None, 1)
if len(parts) > 1:
t = parts[-1]
parts = parts[:-1]
parts.insert(0, t)
return ' '.join(parts)
mi.authors = [swap(x) for x in mi.authors]
except (IndexError, ValueError):
pass
try:
mi.series = match.group('series')
except IndexError:
pass
try:
si = match.group('series_index')
mi.series_index = float(si)
except (IndexError, ValueError, TypeError):
pass
try:
si = match.group('isbn')
mi.isbn = si
except (IndexError, ValueError):
pass
try:
publisher = match.group('publisher')
mi.publisher = publisher
except (IndexError, ValueError):
pass
try:
pubdate = match.group('published')
if pubdate:
from calibre.utils.date import parse_only_date
mi.pubdate = parse_only_date(pubdate)
except:
pass
try:
comments = match.group('comments')
mi.comments = comments
except (IndexError, ValueError):
pass
if mi.is_null('title'):
mi.title = name
return mi
def opf_metadata(opfpath):
if hasattr(opfpath, 'read'):
f = opfpath
opfpath = getattr(f, 'name', os.getcwd())
else:
f = open(opfpath, 'rb')
try:
opf = OPF(f, os.path.dirname(opfpath))
if opf.application_id is not None:
mi = opf.to_book_metadata()
if hasattr(opf, 'cover') and opf.cover:
cpath = os.path.join(os.path.dirname(opfpath), opf.cover)
if os.access(cpath, os.R_OK):
fmt = cpath.rpartition('.')[-1]
with open(cpath, 'rb') as f:
data = f.read()
mi.cover_data = (fmt, data)
return mi
except Exception:
import traceback
traceback.print_exc()
pass
def forked_read_metadata(original_path, tdir):
from calibre.ebooks.metadata.opf2 import metadata_to_opf
from calibre.ebooks.metadata.worker import run_import_plugins
path = run_import_plugins((original_path,), os.getpid(), tdir)[0]
if path != original_path:
with lopen(os.path.join(tdir, 'file_changed_by_plugins'), 'w') as f:
f.write(os.path.abspath(path))
with lopen(path, 'rb') as f:
fmt = os.path.splitext(path)[1][1:].lower()
f.seek(0, 2)
sz = f.tell()
with lopen(os.path.join(tdir, 'size.txt'), 'wb') as s:
s.write(str(sz).encode('ascii'))
f.seek(0)
mi = get_metadata(f, fmt)
if mi.cover_data and mi.cover_data[1]:
with lopen(os.path.join(tdir, 'cover.jpg'), 'wb') as f:
f.write(mi.cover_data[1])
mi.cover_data = (None, None)
mi.cover = 'cover.jpg'
opf = metadata_to_opf(mi, default_lang='und')
with lopen(os.path.join(tdir, 'metadata.opf'), 'wb') as f:
f.write(opf)