%PDF- %PDF-
Direktori : /backups/router/usr/local/opnsense/scripts/unbound/blocklists/ |
Current File : //backups/router/usr/local/opnsense/scripts/unbound/blocklists/__init__.py |
#!/usr/local/bin/python3 """ Copyright (c) 2023 Deciso B.V. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ import os import requests import syslog import re import glob import importlib import sys import fcntl import ujson import time from configparser import ConfigParser class BaseBlocklistHandler: def __init__(self, config=None): self.config = config self.cnf = None self.priority = 0 self.cache_ttl = 72000 self.cur_bl_location = '/var/unbound/data/dnsbl.json' self.domain_pattern = re.compile( r'^(\*\.){,1}(([\da-zA-Z_])([_\w-]{,62})\.){,127}(([\da-zA-Z])[_\w-]{,61})' r'?([\da-zA-Z]\.((xn\-\-[a-zA-Z\d]+)|([a-zA-Z\d]{2,})))$' ) self._load_config() def get_config(self): """ Get statically defined configuration options. """ pass def get_blocklist(self): """ Overridden by derived classes to produce a formatted blocklist. Returns a dictionary with domains as keys and a dictionary of metadata as values """ pass def _blocklist_reader(self, uri): """ Used by a derived class to define a caching and/or download routine. """ pass def _blocklists_in_config(self): """ Generator for derived classes to iterate over configured blocklists. """ pass def _load_config(self): """ Load a configuration file. """ if os.path.exists(self.config): self.cnf = ConfigParser() self.cnf.read(self.config) def _domains_in_blocklist(self, blocklist): """ Generator for derived classes to iterate over cached/downloaded domains. """ for line in self._blocklist_reader(blocklist): # cut line into parts before comment marker (if any) tmp = line.split('#')[0].split() entry = None while tmp: entry = tmp.pop(-1) if entry not in ['127.0.0.1', '0.0.0.0']: break if entry: yield entry.lower() def _uri_reader(self, uri): """ Takes a URI and yields domain entries. """ req_opts = { 'url': uri, 'timeout': 30, 'stream': True } req = requests.get(**req_opts) if req.status_code >= 200 and req.status_code <= 299: req.raw.decode_content = True prev_chop = '' while True: chop = req.raw.read(1024).decode() if not chop: if prev_chop: yield prev_chop break else: parts = (prev_chop + chop).split('\n') if parts[-1] != "\n": prev_chop = parts.pop() else: prev_chop = '' for part in parts: yield part else: raise Exception( 'blocklist download : unable to download file from %s (status_code: %d)' % (uri, req.status_code) ) class BlocklistParser: def __init__(self): # check for a running download process, this may take a while so it's better to check... try: lck = open('/tmp/unbound-download_blocklists.tmp', 'w+') fcntl.flock(lck, fcntl.LOCK_EX | fcntl.LOCK_NB) except IOError: # already running, exit status 99 sys.exit(99) syslog.openlog('unbound', facility=syslog.LOG_LOCAL4) self.handlers = list() self._register_handlers() self.startup_time = time.time() def _register_handlers(self): handlers = list() for filename in glob.glob("%s/*.py" % os.path.dirname(__file__)): importlib.import_module(".%s" % os.path.splitext(os.path.basename(filename))[0], __name__) for module_name in dir(sys.modules[__name__]): for attribute_name in dir(getattr(sys.modules[__name__], module_name)): cls = getattr(getattr(sys.modules[__name__], module_name), attribute_name) if isinstance(cls, type) and issubclass(cls, BaseBlocklistHandler)\ and cls not in (BaseBlocklistHandler,): handlers.append(cls()) self.handlers = handlers def _get_config(self): cfg = {} for handler in self.handlers: tmp = handler.get_config() if tmp: cfg = tmp | cfg return cfg def _merge_results(self, blocklists): """ Take output of all the handlers and merge based on each handlers' priority. The default handler has highest priority """ if len(blocklists) == 1: return next(iter(blocklists.values())) blocklists = dict(sorted(blocklists.items(), reverse=True)) first = next(iter(blocklists.values())) for bl in list(blocklists.values())[1:]: for key, value in bl.items(): if key not in first: # no collision, merge first[key] = value else: # a handler with a lower priority has provided a policy # on a domain that already exists in the blocklist, # add it for debugging purposes first[key].setdefault('collisions', []).append(value) return first def update_blocklist(self): blocklists = {} merged = {} for handler in self.handlers: blocklists[handler.priority] = handler.get_blocklist() merged['data'] = self._merge_results(blocklists) merged['config'] = self._get_config() # check if there are wildcards in the dataset has_wildcards = False for item in merged['data']: if merged['data'][item].get('wildcard') == True: has_wildcards = True break merged['config']['has_wildcards'] = has_wildcards # write out results if not os.path.exists('/var/unbound/data'): os.makedirs('/var/unbound/data') with open("/var/unbound/data/dnsbl.json.new", 'w') as unbound_outf: if merged['data']: ujson.dump(merged, unbound_outf) # atomically replace the current dnsbl so unbound can pick up on it os.replace('/var/unbound/data/dnsbl.json.new', '/var/unbound/data/dnsbl.json') syslog.syslog(syslog.LOG_NOTICE, "blocklist parsing done in %0.2f seconds (%d records)" % ( time.time() - self.startup_time, len(merged['data']) ))