%PDF- %PDF-
Mini Shell

Mini Shell

Direktori : /backups/router/usr/local/opnsense/scripts/unbound/
Upload File :
Create Path :
Current File : //backups/router/usr/local/opnsense/scripts/unbound/logger.py

#!/usr/local/bin/python3

"""
    Copyright (c) 2022 Deciso B.V.
    All rights reserved.

    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions are met:

    1. Redistributions of source code must retain the above copyright notice,
     this list of conditions and the following disclaimer.

    2. Redistributions in binary form must reproduce the above copyright
     notice, this list of conditions and the following disclaimer in the
     documentation and/or other materials provided with the distribution.

    THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,
    INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
    AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
    AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
    OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
    POSSIBILITY OF SUCH DAMAGE.
"""

import sys
import selectors
import argparse
import syslog
import time
import datetime
import os
import pandas
import signal
import socket
import duckdb
sys.path.insert(0, "/usr/local/opnsense/site-python")
from duckdb_helper import DbConnection, StorageVersionException, restore_database, export_database

class DNSReader:
    def __init__(self, source_pipe, target_db, flush_interval, backup_dir):
        self.source_pipe = source_pipe
        self.target_db = target_db
        self.backup_dir = backup_dir
        self.timer = 0
        self.cleanup_timer = 0
        self.imp_exp_timer = time.time()
        self.flush_interval = flush_interval
        self.buffer = list()
        self.selector = selectors.DefaultSelector()
        self.fd = None

        self.client_map = {}
        self.update_clients = set()
        self.update_hostname = False

    def _resolve_ip(self, ip, timeout=0.01):
        # if a host is known locally, we should be able to resolve it sub 10ms
        if ip is None:
            return
        old = socket.getdefaulttimeout()
        socket.setdefaulttimeout(timeout)
        try:
            host = socket.gethostbyaddr(ip)[0]
        except Exception:
            host = None
        socket.setdefaulttimeout(old)
        return host

    def _setup_db(self):
        with DbConnection(self.target_db, read_only=False) as db:
            db.connection.execute("""
                CREATE TABLE IF NOT EXISTS query (
                    uuid UUID,
                    time INTEGER,
                    client TEXT,
                    family TEXT,
                    type TEXT,
                    domain TEXT,
                    action INTEGER,
                    source INTEGER,
                    blocklist TEXT,
                    rcode INTEGER,
                    resolve_time_ms INTEGER,
                    dnssec_status INTEGER,
                    ttl INTEGER
                )
            """)

            db.connection.execute("""
                CREATE TABLE IF NOT EXISTS client (
                    ipaddr TEXT UNIQUE,
                    hostname TEXT
                );

                DELETE FROM client
            """)

            for size in [600, 300, 60]:
                db.connection.execute(
                    """
                        CREATE OR REPLACE VIEW v_time_series_{min}min AS (
                            SELECT
                                epoch(to_timestamp(epoch(now()) - epoch(now()) % {intv}) -
                                    (i.generate_series * INTERVAL {min} MINUTE)) as start_timestamp,
                                epoch(to_timestamp(epoch(now()) - epoch(now()) % {intv}) -
                                    ((i.generate_series - 1) * INTERVAL {min} MINUTE)) as end_timestamp
                            FROM
                                generate_series(0, {lim}) as i
                        )
                    """.format(intv=size, min=(size//60), lim=((60//(size//60))*24))
                )

            # when an earlier version created an index, make sure it doesn't exists.
            # We do need to read the file anyway and adding an index means a **lot** of maintenance in the logger.
            db.connection.execute("DROP INDEX IF EXISTS idx_query")

    def _sig(self, *args):
        # signal either open() or select() to gracefully close.
        # we intentionally raise an exception since syscalls such as select()
        # will default to a retry with a computed timeout if no exception is thrown
        # as per PEP 475. This will force select() or open() to return so we can free up any resources.
        raise InterruptedError()

    def close_logger(self):
        syslog.syslog(syslog.LOG_NOTICE, "Closing logger")
        # we might be killing the process before open() has had a chance to return,
        # so check if the file descriptor is there
        if self.fd is not None:
            self.selector.unregister(self.fd)
            self.fd.close()
        self.selector.close()
        sys.exit(0)

    def _read(self, fd, mask):
        r = self.fd.readline()
        if r == '':
            return False

        q = tuple(r.strip("\n").split('|'))
        self.buffer.append(q)

        self.update_clients.add(q[2])

        # Start a transaction every flush_interval seconds. With regular inserts
        # we would also need to limit the amount of queries we buffer before inserting them,
        # but appending a DataFrame of N size is always faster than splitting N into chunks and
        # individually appending them in separate DataFrames.
        now = time.time()
        if (now - self.timer) > self.flush_interval:
            # A note on the usage of DbConnection:
            # The pipe fifo can fill up while blocking for the lock, since during this time the
            # read() callback cannot run to empty it. The dnsbl module side catches this with
            # a BlockingIOError, forcing it to re-buffer the query, making the process
            # "eventually consistent". Realistically this condition should never occur.
            with DbConnection(self.target_db, read_only=False) as db:
                self.timer = now
                if (now - self.cleanup_timer) > 3600:
                    self.cleanup_timer = now
                    # truncate the database to the last 7 days once an hour
                    t = datetime.date.today() - datetime.timedelta(days=7)
                    epoch = int(datetime.datetime(year=t.year, month=t.month, day=t.day).timestamp())
                    db.connection.execute("DELETE FROM query WHERE to_timestamp(time) < to_timestamp(?)", [epoch])

                if len(self.buffer) > 0:
                    # construct a dataframe from the current buffer and empty it. This is orders of magniture
                    # faster than transactional inserts, and doesn't block even under high load.
                    db.connection.append('query', pandas.DataFrame(self.buffer))
                    self.buffer.clear()
                for client in self.update_clients:
                    # attempt to resolve every client IP we've seen in between intervals (if necessary)
                    if  (now - self.client_map.get(client, 0)) > 3600:
                        self.client_map[client] = now
                        host = self._resolve_ip(client)
                        try:
                            db.connection.execute("INSERT INTO client VALUES (?, ?)", [client, host])
                        except duckdb.ConstraintException:
                            db.connection.execute("UPDATE client SET hostname=? WHERE ipaddr=?", [host, client])

            # duckdb database files don't like records to be deleted over time, which causes unnecessary growth.
            # By performing an export/import on regular bases (roughly each 24 hours), we keep the file more managable.
            if (now - self.imp_exp_timer) > 86400:
                self.imp_exp_timer = now
                if export_database(self.target_db, self.backup_dir, 'unbound', 'unbound'):
                    restore_database(self.backup_dir, self.target_db)
                    syslog.syslog(
                        syslog.LOG_NOTICE,
                        'Database auto restore from %s for cleanup reasons in %.2f seconds' % (
                            self.backup_dir,
                            time.time() - now
                        )
                    )
                else:
                    syslog.syslog(syslog.LOG_ERROR, "unable to export database to %s" % self.backup_dir)


        return True

    def run_logger(self):
        # set a signal handler
        signal.signal(signal.SIGINT, self._sig)
        signal.signal(signal.SIGTERM, self._sig)

        self._setup_db()

        r_count = 0
        pipe_ready = False
        # give dnsbl_module.py some time to create a pipe
        while not pipe_ready:
            try:
                # open() will block until a query has been pushed down the fifo
                self.fd = open(self.source_pipe, 'r')
                pipe_ready = True
            except InterruptedError:
                self.close_logger()
            except OSError:
                r_count += 1
                if r_count > 10:
                    syslog.syslog(syslog.LOG_ERR, "Unable to open pipe. This is likely because Unbound isn't running.")
                    sys.exit(1)
                time.sleep(1)

        self.selector.register(self.fd.fileno(), selectors.EVENT_READ, self._read)

        while True:
            # select() will block until a file object is ready. To handle a SIGTERM we raise
            # an exception in the signal handler, see _sig() for further details. On an exception
            # events will return an empty list (see https://docs.python.org/3/library/selectors.html#selectors.BaseSelector.select)
            events = self.selector.select()
            if not events:
                self.close_logger()
            for key, mask in events:
                callback = key.data
                if not callback(key.fileobj, mask):
                    # unbound closed pipe
                    self.close_logger()

def run(pipe, target_db, flush_interval, backup_dir):
    r = DNSReader(pipe, target_db, flush_interval, backup_dir)
    try:
        r.run_logger()
    except InterruptedError:
        r.close_logger()
    except Exception:
        raise

if __name__ == '__main__':
    syslog.openlog('unbound', facility=syslog.LOG_LOCAL4)
    parser = argparse.ArgumentParser()
    parser.add_argument('--pipe', help='named pipe file location', default='/var/unbound/data/dns_logger')
    parser.add_argument('--targetdb', help='duckdb filename', default='/var/unbound/data/unbound.duckdb')
    parser.add_argument('--backup_dir', help='backup directory', default='/var/cache/unbound.duckdb')
    parser.add_argument('--flush_interval', help='interval to flush to db', default=10)

    inputargs = parser.parse_args()
    try:
        with DbConnection(inputargs.targetdb, read_only=False) as db:
            pass
    except StorageVersionException:
        try:
            if restore_database(inputargs.backup_dir, inputargs.targetdb):
                syslog.syslog(
                    syslog.LOG_NOTICE,
                    'Database restored from %s due to version mismatch' % inputargs.backup_dir
                )
            else:
                syslog.syslog(syslog.LOG_ERR, 'Restore needed, but backup locked, exit...')
                sys.exit(-1)
        except FileNotFoundError:
            # no backup to recover, remove database and proceed normal startup
            if os.path.isfile(inputargs.targetdb):
                syslog.syslog(
                    syslog.LOG_NOTICE,
                    'Missing restore data, removing %s to proceed startup' % inputargs.targetdb
                )
                os.remove(inputargs.targetdb)


    syslog.syslog(syslog.LOG_NOTICE, 'Backgrounding unbound logging backend.')
    run(inputargs.pipe, inputargs.targetdb, inputargs.flush_interval, inputargs.backup_dir)

Zerion Mini Shell 1.0