%PDF- %PDF-
| Direktori : /lib/calibre/calibre/library/ |
| Current File : //lib/calibre/calibre/library/check_library.py |
#!/usr/bin/env python3
__license__ = 'GPL v3'
__copyright__ = '2010, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import re, os, traceback, fnmatch
from calibre import isbytestring
from calibre.constants import filesystem_encoding
from calibre.ebooks import BOOK_EXTENSIONS
from polyglot.builtins import iteritems
EBOOK_EXTENSIONS = frozenset(BOOK_EXTENSIONS)
NORMALS = frozenset({'metadata.opf', 'cover.jpg'})
IGNORE_AT_TOP_LEVEL = frozenset({'metadata.db', 'metadata_db_prefs_backup.json', 'metadata_pre_restore.db'})
'''
Checks fields:
- name of array containing info
- user-readable name of info
- can be deleted (can be checked)
- can be fixed. In this case, the name of the fix method is derived from the
array name
'''
CHECKS = [('invalid_titles', _('Invalid titles'), True, False),
('extra_titles', _('Extra titles'), True, False),
('invalid_authors', _('Invalid authors'), True, False),
('extra_authors', _('Extra authors'), True, False),
('missing_formats', _('Missing book formats'), False, True),
('extra_formats', _('Extra book formats'), True, False),
('extra_files', _('Unknown files in books'), True, False),
('missing_covers', _('Missing cover files'), False, True),
('extra_covers', _('Cover files not in database'), True, True),
('failed_folders', _('Folders raising exception'), False, False)
]
class CheckLibrary:
def __init__(self, library_path, db):
if isbytestring(library_path):
library_path = library_path.decode(filesystem_encoding)
self.src_library_path = os.path.abspath(library_path)
self.db = db
self.is_case_sensitive = db.is_case_sensitive
self.all_authors = frozenset(x[1] for x in db.all_authors())
self.all_ids = frozenset(id_ for id_ in db.all_ids())
self.all_dbpaths = frozenset(self.dbpath(id_) for id_ in self.all_ids)
self.all_lc_dbpaths = frozenset(f.lower() for f in self.all_dbpaths)
self.db_id_regexp = re.compile(r'^.* \((\d+)\)$')
self.dirs = []
self.book_dirs = []
self.potential_authors = {}
self.invalid_authors = []
self.extra_authors = []
self.invalid_titles = []
self.extra_titles = []
self.unknown_book_files = []
self.missing_formats = []
self.extra_formats = []
self.extra_files = []
self.missing_covers = []
self.extra_covers = []
self.failed_folders = []
def dbpath(self, id_):
return self.db.path(id_, index_is_id=True)
@property
def errors_occurred(self):
return self.failed_folders or self.mismatched_dirs or \
self.conflicting_custom_cols or self.failed_restores
def ignore_name(self, filename):
for filespec in self.ignore_names:
if fnmatch.fnmatch(filename, filespec):
return True
return False
def scan_library(self, name_ignores, extension_ignores):
self.ignore_names = frozenset(name_ignores)
self.ignore_ext = frozenset('.'+ e for e in extension_ignores)
lib = self.src_library_path
for auth_dir in os.listdir(lib):
if self.ignore_name(auth_dir) or auth_dir in IGNORE_AT_TOP_LEVEL:
continue
auth_path = os.path.join(lib, auth_dir)
# First check: author must be a directory
if not os.path.isdir(auth_path):
self.invalid_authors.append((auth_dir, auth_dir, 0))
continue
self.potential_authors[auth_dir] = {}
# Look for titles in the author directories
found_titles = False
try:
for title_dir in os.listdir(auth_path):
if self.ignore_name(title_dir):
continue
title_path = os.path.join(auth_path, title_dir)
db_path = os.path.join(auth_dir, title_dir)
m = self.db_id_regexp.search(title_dir)
# Second check: title must have an ID and must be a directory
if m is None or not os.path.isdir(title_path):
self.invalid_titles.append((auth_dir, db_path, 0))
continue
id_ = m.group(1)
# Third check: the id_ must be in the DB and the paths must match
if self.is_case_sensitive:
if int(id_) not in self.all_ids or \
db_path not in self.all_dbpaths:
self.extra_titles.append((title_dir, db_path, 0))
continue
else:
if int(id_) not in self.all_ids or \
db_path.lower() not in self.all_lc_dbpaths:
self.extra_titles.append((title_dir, db_path, 0))
continue
# Record the book to check its formats
self.book_dirs.append((db_path, title_dir, id_))
found_titles = True
except:
traceback.print_exc()
# Sort-of check: exception processing directory
self.failed_folders.append((auth_dir, traceback.format_exc(), []))
# Fourth check: author directories that contain no titles
if not found_titles:
self.extra_authors.append((auth_dir, auth_dir, 0))
for x in self.book_dirs:
try:
self.process_book(lib, x)
except:
traceback.print_exc()
# Sort-of check: exception processing directory
self.failed_folders.append((title_path, traceback.format_exc(), []))
# Check for formats and covers in db for book dirs that are gone
for id_ in self.all_ids:
path = self.dbpath(id_)
if not os.path.exists(os.path.join(lib, path)):
title_dir = os.path.basename(path)
book_formats = frozenset(x for x in
self.db.format_files(id_, index_is_id=True))
for fmt in book_formats:
self.missing_formats.append((title_dir,
os.path.join(path, fmt[0]+'.'+fmt[1].lower()), id_))
if self.db.has_cover(id_):
self.missing_covers.append((title_dir,
os.path.join(path, 'cover.jpg'), id_))
def is_ebook_file(self, filename):
ext = os.path.splitext(filename)[1]
if not ext:
return False
ext = ext[1:].lower()
if ext.startswith('original_'):
ext = ext[len('original_'):]
if ext in EBOOK_EXTENSIONS:
return True
return False
def process_book(self, lib, book_info):
(db_path, title_dir, book_id) = book_info
filenames = frozenset(f for f in os.listdir(os.path.join(lib, db_path))
if os.path.splitext(f)[1] not in self.ignore_ext or
f == 'cover.jpg')
book_id = int(book_id)
formats = frozenset(filter(self.is_ebook_file, filenames))
book_formats = frozenset(x[0]+'.'+x[1].lower() for x in
self.db.format_files(book_id, index_is_id=True))
if self.is_case_sensitive:
unknowns = frozenset(filenames-formats-NORMALS)
missing = book_formats - formats
# Check: any books that aren't formats or normally there?
for fn in unknowns:
if fn in missing: # An unknown format correctly registered
continue
self.extra_files.append((title_dir,
os.path.join(db_path, fn), book_id))
# Check: any book formats that should be there?
for fn in missing:
if fn in unknowns: # An unknown format correctly registered
continue
self.missing_formats.append((title_dir,
os.path.join(db_path, fn), book_id))
# Check: any book formats that shouldn't be there?
extra = formats - book_formats - NORMALS
for e in extra:
self.extra_formats.append((title_dir,
os.path.join(db_path, e), book_id))
else:
def lc_map(fnames, fset):
fn = {}
for f in fnames:
ff = f.lower()
if ff in fset:
fn[ff] = f
return fn
filenames_lc = frozenset(f.lower() for f in filenames)
formats_lc = frozenset(f.lower() for f in formats)
unknowns = frozenset(filenames_lc-formats_lc-NORMALS)
book_formats_lc = frozenset(f.lower() for f in book_formats)
missing = book_formats_lc - formats_lc
# Check: any books that aren't formats or normally there?
for lcfn,ccfn in iteritems(lc_map(filenames, unknowns)):
if lcfn in missing: # An unknown format correctly registered
continue
self.extra_files.append((title_dir, os.path.join(db_path, ccfn),
book_id))
# Check: any book formats that should be there?
for lcfn,ccfn in iteritems(lc_map(book_formats, missing)):
if lcfn in unknowns: # An unknown format correctly registered
continue
self.missing_formats.append((title_dir,
os.path.join(db_path, ccfn), book_id))
# Check: any book formats that shouldn't be there?
extra = formats_lc - book_formats_lc - NORMALS
for e in lc_map(formats, extra):
self.extra_formats.append((title_dir, os.path.join(db_path, e),
book_id))
# check cached has_cover
if self.db.has_cover(book_id):
if 'cover.jpg' not in filenames:
self.missing_covers.append((title_dir,
os.path.join(db_path, 'cover.jpg'), book_id))
else:
if 'cover.jpg' in filenames:
self.extra_covers.append((title_dir,
os.path.join(db_path, 'cover.jpg'), book_id))