%PDF- %PDF-
| Direktori : /data/old/usr/bin/ |
| Current File : //data/old/usr/bin/urlwatch |
#!/usr/bin/python3.4
# -*- coding: utf-8 -*-
#
# This file is part of urlwatch (https://thp.io/2008/urlwatch/).
# Copyright (c) 2008-2016 Thomas Perl <thp.io/about>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# File and folder paths
import sys
import os.path
import os
import shutil
import requests
import socket
import argparse
import logging
import imp
import subprocess
import concurrent.futures
# Duplicated here
pkgname = 'urlwatch'
urlwatch_dir = os.path.expanduser(os.path.join('~', '.'+pkgname))
urls_txt = os.path.join(urlwatch_dir, 'urls.txt')
urls_yaml = os.path.join(urlwatch_dir, 'urls.yaml')
config_yaml = os.path.join(urlwatch_dir, 'urlwatch.yaml')
cache_dir = os.path.join(urlwatch_dir, 'cache')
cache_db = os.path.join(urlwatch_dir, 'cache.db')
hooks_py = os.path.join(urlwatch_dir, 'hooks.py')
# Check if we are installed in the system already
(prefix, bindir) = os.path.split(os.path.dirname(os.path.abspath(sys.argv[0])))
if bindir == 'bin':
# Installed system-wide
examples_dir = os.path.join(prefix, 'share', pkgname, 'examples')
else:
# Assume we are not yet installed
sys.path.insert(0, os.path.join(prefix, bindir, 'lib'))
examples_dir = os.path.join(prefix, bindir, 'share', pkgname, 'examples')
urls_yaml_example = os.path.join(examples_dir, 'urls.yaml.example')
hooks_py_example = os.path.join(examples_dir, 'hooks.py.example')
# Code section
import urlwatch
from urlwatch.handler import JobState, Report
from urlwatch.storage import UrlsYaml, UrlsTxt, ConfigStorage, CacheDirStorage, CacheMiniDBStorage
from urlwatch.jobs import JobBase, NotModifiedError
from urlwatch.filters import FilterBase
from urlwatch.reporters import ReporterBase
# One minute (=60 seconds) timeout for each request to avoid hanging
socket.setdefaulttimeout(60)
logger = logging.getLogger(pkgname)
MAX_WORKERS = 10
def run_parallel(func, items):
executor = concurrent.futures.ThreadPoolExecutor(max_workers=MAX_WORKERS)
for future in concurrent.futures.as_completed(executor.submit(func, item) for item in items):
exception = future.exception()
if exception is not None:
raise exception
yield future.result()
def show_features():
print()
print('Supported jobs:\n')
print(JobBase.job_documentation())
print('Supported filters:\n')
print(FilterBase.filter_documentation())
print()
print('Supported reporters:\n')
print(ReporterBase.reporter_documentation())
print()
return 0
def edit_yaml(yaml_file, parser, example_file=None):
editor = os.environ.get('EDITOR', None)
if editor is None:
editor = os.environ.get('VISUAL', None)
if editor is None:
print('Please set $VISUAL or $EDITOR.')
return 1
fn_base, fn_ext = os.path.splitext(yaml_file)
yaml_edit = fn_base + '.edit' + fn_ext
try:
if os.path.exists(yaml_file):
shutil.copy(yaml_file, yaml_edit)
elif example_file is not None:
shutil.copy(example_file, yaml_edit)
subprocess.check_call([editor, yaml_edit])
# Check if we can still parse it
if parser is not None:
parser(yaml_edit).load()
os.rename(yaml_edit, yaml_file)
print('Saving edit changes in', yaml_file)
except Exception as e:
print('Parsing failed:')
print('======')
print(e)
print('======')
print('')
print('The file', yaml_file, 'was NOT updated.')
print('Your changes have been saved in', yaml_edit)
return 1
return 0
def edit_hooks(hooks_file, example_file):
editor = os.environ.get('EDITOR', None)
if editor is None:
editor = os.environ.get('VISUAL', None)
if editor is None:
print('Please set $VISUAL or $EDITOR.')
return 1
fn_base, fn_ext = os.path.splitext(hooks_file)
hooks_edit = fn_base + '.edit' + fn_ext
try:
if os.path.exists(hooks_file):
shutil.copy(hooks_file, hooks_edit)
else:
shutil.copy(example_file, hooks_edit)
subprocess.check_call([editor, hooks_edit])
imp.load_source('hooks', hooks_edit)
os.rename(hooks_edit, hooks_file)
print('Saving edit changes in', hooks_file)
except Exception as e:
print('Parsing failed:')
print('======')
print(e)
print('======')
print('')
print('The file', hooks_file, 'was NOT updated.')
print('Your changes have been saved in', hooks_edit)
return 1
return 0
def list_urls(jobs, verbose):
for idx, job in enumerate(jobs):
if verbose:
print('%d: %s' % (idx+1, repr(job)))
else:
pretty_name = job.pretty_name()
location = job.get_location()
if pretty_name != location:
print('%d: %s (%s)' % (idx+1, pretty_name, location))
else:
print('%d: %s' % (idx+1, pretty_name))
return 0
def modify_urls(jobs, urls, add, delete):
save = True
if delete is not None:
try:
index = int(delete) - 1
try:
job = jobs.pop(index)
print('Removed %r' % (job,))
except IndexError:
print('Not found: %r' % (index,))
save = False
except ValueError:
job = next((job for job in jobs if job.get_location() == delete), None)
try:
jobs.remove(job)
print('Removed %r' % (job,))
except ValueError:
print('Not found: %r' % (delete,))
save = False
if add is not None:
d = {k: v for k, v in (item.split('=', 2) for item in add.split(','))}
job = JobBase.unserialize(d)
print('Adding %r' % (job,))
jobs.append(job)
if save:
print('Saving updated list to %r' % (urls,))
UrlsYaml(urls).save(jobs)
return 0
def main(args):
if args.verbose:
root_logger = logging.getLogger('')
console = logging.StreamHandler()
console.setFormatter(logging.Formatter('%(asctime)s %(module)s %(levelname)s: %(message)s'))
root_logger.addHandler(console)
root_logger.setLevel(logging.DEBUG)
root_logger.info('turning on verbose logging mode')
logger.info('Using %s as URLs file', args.urls)
logger.info('Using %s for hooks', args.hooks)
logger.info('Using %s as cache directory', args.cache)
if not os.path.isdir(urlwatch_dir):
os.makedirs(urlwatch_dir)
print("""
Created directory: {urlwatch_dir}
""".format(urlwatch_dir=urlwatch_dir))
if not os.path.exists(args.config):
ConfigStorage.write_default_config(args.config)
print("""
A default config has been written to {config_yaml}.
Use "{pkgname} --edit-config" to customize it.
""".format(config_yaml=args.config, pkgname=pkgname))
if args.edit_config:
sys.exit(edit_yaml(args.config, ConfigStorage))
config = ConfigStorage(args.config)
# Migrate urlwatch 1.x URLs to urlwatch 2.x
if os.path.isfile(urls_txt) and not os.path.isfile(args.urls):
print("""
Migrating URLs: {urls_txt} -> {urls_yaml}
Use "{pkgname} --edit" to customize it.
""".format(urls_txt=urls_txt, urls_yaml=args.urls, pkgname=pkgname))
UrlsYaml(args.urls).save(UrlsTxt(urls_txt).load_secure())
os.rename(urls_txt, urls_txt + '.migrated')
if not os.path.isfile(args.urls) and not args.edit and not args.add:
print("""
You need to create {urls_yaml} in order to use {pkgname}.
Use "{pkgname} --edit" to open the file with your editor.
""".format(urls_yaml=args.urls, pkgname=pkgname))
sys.exit(1)
if os.path.exists(args.hooks):
imp.load_source('hooks', args.hooks)
if os.path.isfile(args.urls):
jobs = UrlsYaml(args.urls).load_secure()
else:
logger.warn('No jobs file found')
jobs = []
# Migreate urlwatch 1.x cache to urlwatch 2.x
if not os.path.isfile(args.cache) and os.path.isdir(cache_dir):
print("""
Migrating cache: {cache_dir} -> {cache_db}
""".format(cache_dir=cache_dir, cache_db=args.cache))
cache_storage = CacheMiniDBStorage(args.cache)
old_cache_storage = CacheDirStorage(cache_dir)
cache_storage.restore(old_cache_storage.backup())
cache_storage.gc([job.get_guid() for job in jobs])
os.rename(cache_dir, cache_dir + '.migrated')
else:
cache_storage = CacheMiniDBStorage(args.cache)
report = Report(config.config)
if args.features:
sys.exit(show_features())
if args.gc_cache:
cache_storage.gc([job.get_guid() for job in jobs])
sys.exit(0)
if args.edit:
sys.exit(edit_yaml(args.urls, UrlsYaml, urls_yaml_example))
if args.edit_hooks:
sys.exit(edit_hooks(args.hooks, hooks_py_example))
if args.list:
sys.exit(list_urls(jobs, args.verbose))
if args.add is not None or args.delete is not None:
sys.exit(modify_urls(jobs, args.urls, args.add, args.delete))
logger.debug('Processing %d jobs', len(jobs))
for job_state in run_parallel(lambda job_state: job_state.process(),
(JobState(cache_storage, job) for job in jobs)):
logger.debug('Job finished: %s', job_state.job)
if job_state.exception is not None:
if isinstance(job_state.exception, NotModifiedError):
logger.info('Job %s has not changed (HTTP 304)', job_state.job)
report.unchanged(job_state)
elif isinstance(job_state.exception, requests.exceptions.RequestException):
# Instead of a full traceback, just show the HTTP error
job_state.traceback = str(job_state.exception)
report.error(job_state)
else:
report.error(job_state)
elif job_state.old_data is not None:
if job_state.old_data.splitlines() != job_state.new_data.splitlines():
report.changed(job_state)
job_state.save()
else:
report.unchanged(job_state)
else:
report.new(job_state)
job_state.save()
# Output everything
report.finish()
# Close cache
cache_storage.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=urlwatch.__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('--version', action='version', version='%(prog)s {}'.format(urlwatch.__version__))
parser.add_argument('-v', '--verbose', action='store_true', help='show debug output')
group = parser.add_argument_group('files and directories')
group.add_argument('--urls', metavar='FILE', help='read job list (URLs) from FILE', default=urls_yaml)
group.add_argument('--config', metavar='FILE', help='read configuration from FILE', default=config_yaml)
group.add_argument('--hooks', metavar='FILE', help='use FILE as hooks.py module', default=hooks_py)
group.add_argument('--cache', metavar='FILE', help='use FILE as cache database', default=cache_db)
group = parser.add_argument_group('job list management')
group.add_argument('--list', action='store_true', help='list jobs')
group.add_argument('--add', metavar='JOB', help='add job (key1=value1,key2=value2,...)')
group.add_argument('--delete', metavar='JOB', help='delete job by location or index')
group = parser.add_argument_group('interactive commands ($EDITOR/$VISUAL)')
group.add_argument('--edit', action='store_true', help='edit URL/job listL')
group.add_argument('--edit-config', action='store_true', help='edit configuration file')
group.add_argument('--edit-hooks', action='store_true', help='edit hooks script')
group = parser.add_argument_group('miscellaneous')
group.add_argument('--features', action='store_true', help='list supported jobs/filters/reporters')
group.add_argument('--gc-cache', action='store_true', help='remove old cache entries')
main(parser.parse_args())