%PDF- %PDF-
Direktori : /data/old/usr/bin/ |
Current File : //data/old/usr/bin/urlwatch |
#!/usr/bin/python3.4 # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). # Copyright (c) 2008-2016 Thomas Perl <thp.io/about> # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. The name of the author may not be used to endorse or promote products # derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR # IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES # OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. # IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # File and folder paths import sys import os.path import os import shutil import requests import socket import argparse import logging import imp import subprocess import concurrent.futures # Duplicated here pkgname = 'urlwatch' urlwatch_dir = os.path.expanduser(os.path.join('~', '.'+pkgname)) urls_txt = os.path.join(urlwatch_dir, 'urls.txt') urls_yaml = os.path.join(urlwatch_dir, 'urls.yaml') config_yaml = os.path.join(urlwatch_dir, 'urlwatch.yaml') cache_dir = os.path.join(urlwatch_dir, 'cache') cache_db = os.path.join(urlwatch_dir, 'cache.db') hooks_py = os.path.join(urlwatch_dir, 'hooks.py') # Check if we are installed in the system already (prefix, bindir) = os.path.split(os.path.dirname(os.path.abspath(sys.argv[0]))) if bindir == 'bin': # Installed system-wide examples_dir = os.path.join(prefix, 'share', pkgname, 'examples') else: # Assume we are not yet installed sys.path.insert(0, os.path.join(prefix, bindir, 'lib')) examples_dir = os.path.join(prefix, bindir, 'share', pkgname, 'examples') urls_yaml_example = os.path.join(examples_dir, 'urls.yaml.example') hooks_py_example = os.path.join(examples_dir, 'hooks.py.example') # Code section import urlwatch from urlwatch.handler import JobState, Report from urlwatch.storage import UrlsYaml, UrlsTxt, ConfigStorage, CacheDirStorage, CacheMiniDBStorage from urlwatch.jobs import JobBase, NotModifiedError from urlwatch.filters import FilterBase from urlwatch.reporters import ReporterBase # One minute (=60 seconds) timeout for each request to avoid hanging socket.setdefaulttimeout(60) logger = logging.getLogger(pkgname) MAX_WORKERS = 10 def run_parallel(func, items): executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS) for future in concurrent.futures.as_completed(executor.submit(func, item) for item in items): exception = future.exception() if exception is not None: raise exception yield future.result() def show_features(): print() print('Supported jobs:\n') print(JobBase.job_documentation()) print('Supported filters:\n') print(FilterBase.filter_documentation()) print() print('Supported reporters:\n') print(ReporterBase.reporter_documentation()) print() return 0 def edit_yaml(yaml_file, parser, example_file=None): editor = os.environ.get('EDITOR', None) if editor is None: editor = os.environ.get('VISUAL', None) if editor is None: print('Please set $VISUAL or $EDITOR.') return 1 fn_base, fn_ext = os.path.splitext(yaml_file) yaml_edit = fn_base + '.edit' + fn_ext try: if os.path.exists(yaml_file): shutil.copy(yaml_file, yaml_edit) elif example_file is not None: shutil.copy(example_file, yaml_edit) subprocess.check_call([editor, yaml_edit]) # Check if we can still parse it if parser is not None: parser(yaml_edit).load() os.rename(yaml_edit, yaml_file) print('Saving edit changes in', yaml_file) except Exception as e: print('Parsing failed:') print('======') print(e) print('======') print('') print('The file', yaml_file, 'was NOT updated.') print('Your changes have been saved in', yaml_edit) return 1 return 0 def edit_hooks(hooks_file, example_file): editor = os.environ.get('EDITOR', None) if editor is None: editor = os.environ.get('VISUAL', None) if editor is None: print('Please set $VISUAL or $EDITOR.') return 1 fn_base, fn_ext = os.path.splitext(hooks_file) hooks_edit = fn_base + '.edit' + fn_ext try: if os.path.exists(hooks_file): shutil.copy(hooks_file, hooks_edit) else: shutil.copy(example_file, hooks_edit) subprocess.check_call([editor, hooks_edit]) imp.load_source('hooks', hooks_edit) os.rename(hooks_edit, hooks_file) print('Saving edit changes in', hooks_file) except Exception as e: print('Parsing failed:') print('======') print(e) print('======') print('') print('The file', hooks_file, 'was NOT updated.') print('Your changes have been saved in', hooks_edit) return 1 return 0 def list_urls(jobs, verbose): for idx, job in enumerate(jobs): if verbose: print('%d: %s' % (idx+1, repr(job))) else: pretty_name = job.pretty_name() location = job.get_location() if pretty_name != location: print('%d: %s (%s)' % (idx+1, pretty_name, location)) else: print('%d: %s' % (idx+1, pretty_name)) return 0 def modify_urls(jobs, urls, add, delete): save = True if delete is not None: try: index = int(delete) - 1 try: job = jobs.pop(index) print('Removed %r' % (job,)) except IndexError: print('Not found: %r' % (index,)) save = False except ValueError: job = next((job for job in jobs if job.get_location() == delete), None) try: jobs.remove(job) print('Removed %r' % (job,)) except ValueError: print('Not found: %r' % (delete,)) save = False if add is not None: d = {k: v for k, v in (item.split('=', 2) for item in add.split(','))} job = JobBase.unserialize(d) print('Adding %r' % (job,)) jobs.append(job) if save: print('Saving updated list to %r' % (urls,)) UrlsYaml(urls).save(jobs) return 0 def main(args): if args.verbose: root_logger = logging.getLogger('') console = logging.StreamHandler() console.setFormatter(logging.Formatter('%(asctime)s %(module)s %(levelname)s: %(message)s')) root_logger.addHandler(console) root_logger.setLevel(logging.DEBUG) root_logger.info('turning on verbose logging mode') logger.info('Using %s as URLs file', args.urls) logger.info('Using %s for hooks', args.hooks) logger.info('Using %s as cache directory', args.cache) if not os.path.isdir(urlwatch_dir): os.makedirs(urlwatch_dir) print(""" Created directory: {urlwatch_dir} """.format(urlwatch_dir=urlwatch_dir)) if not os.path.exists(args.config): ConfigStorage.write_default_config(args.config) print(""" A default config has been written to {config_yaml}. Use "{pkgname} --edit-config" to customize it. """.format(config_yaml=args.config, pkgname=pkgname)) if args.edit_config: sys.exit(edit_yaml(args.config, ConfigStorage)) config = ConfigStorage(args.config) # Migrate urlwatch 1.x URLs to urlwatch 2.x if os.path.isfile(urls_txt) and not os.path.isfile(args.urls): print(""" Migrating URLs: {urls_txt} -> {urls_yaml} Use "{pkgname} --edit" to customize it. """.format(urls_txt=urls_txt, urls_yaml=args.urls, pkgname=pkgname)) UrlsYaml(args.urls).save(UrlsTxt(urls_txt).load_secure()) os.rename(urls_txt, urls_txt + '.migrated') if not os.path.isfile(args.urls) and not args.edit and not args.add: print(""" You need to create {urls_yaml} in order to use {pkgname}. Use "{pkgname} --edit" to open the file with your editor. """.format(urls_yaml=args.urls, pkgname=pkgname)) sys.exit(1) if os.path.exists(args.hooks): imp.load_source('hooks', args.hooks) if os.path.isfile(args.urls): jobs = UrlsYaml(args.urls).load_secure() else: logger.warn('No jobs file found') jobs = [] # Migreate urlwatch 1.x cache to urlwatch 2.x if not os.path.isfile(args.cache) and os.path.isdir(cache_dir): print(""" Migrating cache: {cache_dir} -> {cache_db} """.format(cache_dir=cache_dir, cache_db=args.cache)) cache_storage = CacheMiniDBStorage(args.cache) old_cache_storage = CacheDirStorage(cache_dir) cache_storage.restore(old_cache_storage.backup()) cache_storage.gc([job.get_guid() for job in jobs]) os.rename(cache_dir, cache_dir + '.migrated') else: cache_storage = CacheMiniDBStorage(args.cache) report = Report(config.config) if args.features: sys.exit(show_features()) if args.gc_cache: cache_storage.gc([job.get_guid() for job in jobs]) sys.exit(0) if args.edit: sys.exit(edit_yaml(args.urls, UrlsYaml, urls_yaml_example)) if args.edit_hooks: sys.exit(edit_hooks(args.hooks, hooks_py_example)) if args.list: sys.exit(list_urls(jobs, args.verbose)) if args.add is not None or args.delete is not None: sys.exit(modify_urls(jobs, args.urls, args.add, args.delete)) logger.debug('Processing %d jobs', len(jobs)) for job_state in run_parallel(lambda job_state: job_state.process(), (JobState(cache_storage, job) for job in jobs)): logger.debug('Job finished: %s', job_state.job) if job_state.exception is not None: if isinstance(job_state.exception, NotModifiedError): logger.info('Job %s has not changed (HTTP 304)', job_state.job) report.unchanged(job_state) elif isinstance(job_state.exception, requests.exceptions.RequestException): # Instead of a full traceback, just show the HTTP error job_state.traceback = str(job_state.exception) report.error(job_state) else: report.error(job_state) elif job_state.old_data is not None: if job_state.old_data.splitlines() != job_state.new_data.splitlines(): report.changed(job_state) job_state.save() else: report.unchanged(job_state) else: report.new(job_state) job_state.save() # Output everything report.finish() # Close cache cache_storage.close() if __name__ == '__main__': parser = argparse.ArgumentParser(description=urlwatch.__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('--version', action='version', version='%(prog)s {}'.format(urlwatch.__version__)) parser.add_argument('-v', '--verbose', action='store_true', help='show debug output') group = parser.add_argument_group('files and directories') group.add_argument('--urls', metavar='FILE', help='read job list (URLs) from FILE', default=urls_yaml) group.add_argument('--config', metavar='FILE', help='read configuration from FILE', default=config_yaml) group.add_argument('--hooks', metavar='FILE', help='use FILE as hooks.py module', default=hooks_py) group.add_argument('--cache', metavar='FILE', help='use FILE as cache database', default=cache_db) group = parser.add_argument_group('job list management') group.add_argument('--list', action='store_true', help='list jobs') group.add_argument('--add', metavar='JOB', help='add job (key1=value1,key2=value2,...)') group.add_argument('--delete', metavar='JOB', help='delete job by location or index') group = parser.add_argument_group('interactive commands ($EDITOR/$VISUAL)') group.add_argument('--edit', action='store_true', help='edit URL/job listL') group.add_argument('--edit-config', action='store_true', help='edit configuration file') group.add_argument('--edit-hooks', action='store_true', help='edit hooks script') group = parser.add_argument_group('miscellaneous') group.add_argument('--features', action='store_true', help='list supported jobs/filters/reporters') group.add_argument('--gc-cache', action='store_true', help='remove old cache entries') main(parser.parse_args())