%PDF- %PDF-
Direktori : /usr/lib/calibre/calibre/gui2/viewer/ |
Current File : //usr/lib/calibre/calibre/gui2/viewer/convert_book.py |
#!/usr/bin/env python3 # License: GPL v3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net> import errno import json import os import tempfile import time from hashlib import sha1 from itertools import count from calibre import walk from calibre.constants import cache_dir, iswindows from calibre.ptempfile import TemporaryFile from calibre.srv.render_book import RENDER_VERSION from calibre.utils.filenames import rmtree from calibre.utils.ipc.simple_worker import start_pipe_worker from calibre.utils.lock import ExclusiveFile from calibre.utils.serialize import msgpack_dumps from calibre.utils.short_uuid import uuid4 from polyglot.builtins import as_bytes, as_unicode, iteritems DAY = 24 * 3600 VIEWER_VERSION = 1 td_counter = count() def book_cache_dir(): return getattr(book_cache_dir, 'override', os.path.join(cache_dir(), 'ev2')) def cache_lock(): return ExclusiveFile(os.path.join(book_cache_dir(), 'metadata.json')) def book_hash(path, size, mtime): path = os.path.normcase(os.path.abspath(path)) raw = json.dumps((path, size, mtime, RENDER_VERSION, VIEWER_VERSION)) if not isinstance(raw, bytes): raw = raw.encode('utf-8') return as_unicode(sha1(raw).hexdigest()) def safe_makedirs(path): try: os.makedirs(path) except OSError as err: if err.errno != errno.EEXIST: raise return path def robust_rmtree(x): retries = 2 if iswindows else 1 # retry on windows to get around the idiotic mandatory file locking for i in range(retries): try: try: rmtree(x) except UnicodeDecodeError: rmtree(as_bytes(x)) return True except OSError: time.sleep(0.1) return False def robust_rename(a, b): retries = 20 if iswindows else 1 # retry on windows to get around the idiotic mandatory file locking for i in range(retries): try: os.rename(a, b) return True except OSError: time.sleep(0.1) return False def clear_temp(temp_path): now = time.time() for x in os.listdir(temp_path): x = os.path.join(temp_path, x) mtime = os.path.getmtime(x) if now - mtime > DAY: robust_rmtree(x) def expire_cache(path, instances, max_age): now = time.time() remove = [x for x in instances if now - x['atime'] > max_age and x['status'] == 'finished'] for instance in remove: if robust_rmtree(os.path.join(path, instance['path'])): instances.remove(instance) def expire_old_versions(path, instances): instances = filter(lambda x: x['status'] == 'finished', instances) remove = sorted(instances, key=lambda x: x['atime'], reverse=True)[1:] for instance in remove: if robust_rmtree(os.path.join(path, instance['path'])): yield instance def expire_cache_and_temp(temp_path, finished_path, metadata, max_age, force_expire): now = time.time() if now - metadata['last_clear_at'] < DAY and max_age >= 0 and not force_expire: return clear_temp(temp_path) entries = metadata['entries'] path_key_map = {} for key, instances in tuple(entries.items()): if instances: expire_cache(finished_path, instances, max_age) if not instances: del entries[key] else: for x in instances: book_path = x.get('book_path') if book_path: path_key_map.setdefault(book_path, []).append(key) for keys in path_key_map.values(): instances = [] for key in keys: instances += entries.get(key, []) if len(instances) > 1: removed = tuple(expire_old_versions(finished_path, instances)) if removed: for r in removed: rkey = r['key'] if rkey in entries: try: entries[rkey].remove(r) except ValueError: pass if not entries[rkey]: del entries[rkey] metadata['last_clear_at'] = now def prepare_convert(temp_path, key, st, book_path): tdir = tempfile.mkdtemp(dir=temp_path, prefix=f'c{next(td_counter)}-') now = time.time() return { 'path': os.path.basename(tdir), 'id': uuid4(), 'status': 'working', 'mtime': now, 'atime': now, 'key': key, 'file_mtime': st.st_mtime, 'file_size': st.st_size, 'cache_size': 0, 'book_path': book_path, } class ConversionFailure(ValueError): def __init__(self, book_path, worker_output): self.book_path = book_path self.worker_output = worker_output ValueError.__init__( self, f'Failed to convert book: {book_path} with error:\n{worker_output}') running_workers = [] def clean_running_workers(): for p in running_workers: if p.poll() is None: p.kill() del running_workers[:] def do_convert(path, temp_path, key, instance): tdir = os.path.join(temp_path, instance['path']) p = None try: with TemporaryFile('log.txt') as logpath: with open(logpath, 'w+b') as logf: p = start_pipe_worker('from calibre.srv.render_book import viewer_main; viewer_main()', stdout=logf, stderr=logf) running_workers.append(p) p.stdin.write(msgpack_dumps(( path, tdir, {'size': instance['file_size'], 'mtime': instance['file_mtime'], 'hash': key}, ))) p.stdin.close() if p.wait() != 0: with lopen(logpath, 'rb') as logf: worker_output = logf.read().decode('utf-8', 'replace') raise ConversionFailure(path, worker_output) finally: try: running_workers.remove(p) except Exception: pass size = 0 for f in walk(tdir): size += os.path.getsize(f) instance['cache_size'] = size def save_metadata(metadata, f): f.seek(0), f.truncate(), f.write(as_bytes(json.dumps(metadata, indent=2))) def prepare_book(path, convert_func=do_convert, max_age=30 * DAY, force=False, prepare_notify=None, force_expire=False): st = os.stat(path) key = book_hash(path, st.st_size, st.st_mtime) finished_path = safe_makedirs(os.path.join(book_cache_dir(), 'f')) temp_path = safe_makedirs(os.path.join(book_cache_dir(), 't')) with cache_lock() as f: try: metadata = json.loads(f.read()) except ValueError: metadata = {'entries': {}, 'last_clear_at': 0} entries = metadata['entries'] instances = entries.setdefault(key, []) for instance in tuple(instances): if instance['status'] == 'finished': if force: robust_rmtree(os.path.join(finished_path, instance['path'])) instances.remove(instance) else: instance['atime'] = time.time() save_metadata(metadata, f) return os.path.join(finished_path, instance['path']) if prepare_notify: prepare_notify() instance = prepare_convert(temp_path, key, st, path) instances.append(instance) save_metadata(metadata, f) convert_func(path, temp_path, key, instance) src_path = os.path.join(temp_path, instance['path']) with cache_lock() as f: ans = tempfile.mkdtemp(dir=finished_path, prefix=f'c{next(td_counter)}-') instance['path'] = os.path.basename(ans) try: metadata = json.loads(f.read()) except ValueError: metadata = {'entries': {}, 'last_clear_at': 0} entries = metadata['entries'] instances = entries.setdefault(key, []) os.rmdir(ans) if not robust_rename(src_path, ans): raise Exception(( 'Failed to rename: "{}" to "{}" probably some software such as an antivirus or file sync program' ' running on your computer has locked the files' ).format(src_path, ans)) instance['status'] = 'finished' for q in instances: if q['id'] == instance['id']: q.update(instance) break expire_cache_and_temp(temp_path, finished_path, metadata, max_age, force_expire) save_metadata(metadata, f) return ans def update_book(path, old_stat, name_data_map=None): old_key = book_hash(path, old_stat.st_size, old_stat.st_mtime) finished_path = safe_makedirs(os.path.join(book_cache_dir(), 'f')) with cache_lock() as f: st = os.stat(path) new_key = book_hash(path, st.st_size, st.st_mtime) if old_key == new_key: return try: metadata = json.loads(f.read()) except ValueError: metadata = {'entries': {}, 'last_clear_at': 0} entries = metadata['entries'] instances = entries.get(old_key) if not instances: return for instance in tuple(instances): if instance['status'] == 'finished': entries.setdefault(new_key, []).append(instance) instances.remove(instance) if not instances: del entries[old_key] instance['file_mtime'] = st.st_mtime instance['file_size'] = st.st_size if name_data_map: for name, data in iteritems(name_data_map): with open(os.path.join(finished_path, instance['path'], name), 'wb') as f2: f2.write(data) save_metadata(metadata, f) return def find_tests(): import unittest class TestViewerCache(unittest.TestCase): ae = unittest.TestCase.assertEqual def setUp(self): self.tdir = tempfile.mkdtemp() book_cache_dir.override = os.path.join(self.tdir, 'ev2') def tearDown(self): rmtree(self.tdir) del book_cache_dir.override def test_viewer_cache(self): def convert_mock(path, temp_path, key, instance): self.ae(instance['status'], 'working') self.ae(instance['key'], key) open(os.path.join(temp_path, instance['path'], 'sentinel'), 'wb').write(b'test') def set_data(x): if not isinstance(x, bytes): x = x.encode('utf-8') with open(book_src, 'wb') as f: f.write(x) book_src = os.path.join(self.tdir, 'book.epub') set_data('a') path = prepare_book(book_src, convert_func=convert_mock) self.ae(open(os.path.join(path, 'sentinel'), 'rb').read(), b'test') # Test that opening the same book uses the cache second_path = prepare_book(book_src, convert_func=convert_mock) self.ae(path, second_path) # Test that changing the book updates the cache set_data('bc') third_path = prepare_book(book_src, convert_func=convert_mock) self.assertNotEqual(path, third_path) # Test force reload fourth_path = prepare_book(book_src, convert_func=convert_mock) self.ae(third_path, fourth_path) fourth_path = prepare_book(book_src, convert_func=convert_mock, force=True) self.assertNotEqual(third_path, fourth_path) # Test cache expiry set_data('bcd') prepare_book(book_src, convert_func=convert_mock, max_age=-1000) self.ae([], os.listdir(os.path.join(book_cache_dir(), 'f'))) # Test modifying a book and opening it repeatedly leaves only # a single entry for it in the cache opath = prepare_book(book_src, convert_func=convert_mock, force_expire=True) finished_entries = os.listdir(os.path.join(book_cache_dir(), 'f')) self.ae(len(finished_entries), 1) set_data('bcde' * 4096) npath = prepare_book(book_src, convert_func=convert_mock, force_expire=True) new_finished_entries = os.listdir(os.path.join(book_cache_dir(), 'f')) self.ae(len(new_finished_entries), 1) self.assertNotEqual(opath, npath) set_data('bcdef') prepare_book(book_src, convert_func=convert_mock, max_age=-1000, force_expire=True) self.ae([], os.listdir(os.path.join(book_cache_dir(), 'f'))) with cache_lock() as f: metadata = json.loads(f.read()) self.assertEqual(metadata['entries'], {}) # Test updating cached book book_src = os.path.join(self.tdir, 'book2.epub') set_data('bb') path = prepare_book(book_src, convert_func=convert_mock) self.ae(open(os.path.join(path, 'sentinel'), 'rb').read(), b'test') bs = os.stat(book_src) set_data('cde') update_book(book_src, bs, name_data_map={'sentinel': b'updated'}) self.ae(open(os.path.join(path, 'sentinel'), 'rb').read(), b'updated') self.ae(1, len(os.listdir(os.path.join(book_cache_dir(), 'f')))) with cache_lock() as f: metadata = json.loads(f.read()) self.ae(len(metadata['entries']), 1) entry = list(metadata['entries'].values())[0] self.ae(len(entry), 1) entry = entry[0] st = os.stat(book_src) self.ae(entry['file_size'], st.st_size) self.ae(entry['file_mtime'], st.st_mtime) return unittest.defaultTestLoader.loadTestsFromTestCase(TestViewerCache)