%PDF- %PDF-
Direktori : /backups/router/usr/local/opnsense/scripts/unbound/ |
Current File : //backups/router/usr/local/opnsense/scripts/unbound/stats.py |
#!/usr/local/bin/python3 """ Copyright (c) 2022 Deciso B.V. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ import argparse import ujson import sys import re import os import pandas import numpy as np from time import time from operator import itemgetter sys.path.insert(0, "/usr/local/opnsense/site-python") from duckdb_helper import DbConnection def percent(val, total): if val == 0 or total == 0: return 0 return '{:.2f}'.format(round(((val / total) * 100), 2)) def handle_rolling(args): # sanitize input interval = int(re.sub("^(?:(?!600|300|60).)*$", "600", str(args.interval))) tp = int(re.sub("^(?:(?!24|12|1).)*$", "24", str(args.timeperiod))) data = pandas.DataFrame() result = {} if args.clients: query = """ WITH grouped AS ( SELECT v.start_timestamp s, v.end_timestamp e, c.client cl, COUNT(c.client) cnt_cl FROM v_time_series_{intv}min v LEFT JOIN query c ON c.time >= v.start_timestamp AND c.time <= v.end_timestamp WHERE to_timestamp(v.start_timestamp) > (to_timestamp(epoch(now())) - INTERVAL {tp} HOUR) GROUP BY v.end_timestamp, v.start_timestamp, c.client ORDER BY v.end_timestamp ) SELECT s as start_timestamp, e as end_timestamp, GROUP_CONCAT(cl) as clients, GROUP_CONCAT(COALESCE(resolved.hostname, '')) as hostnames, GROUP_CONCAT(cnt_cl) as client_totals FROM grouped LEFT JOIN client resolved ON cl = resolved.ipaddr GROUP BY s, e ORDER BY e """.format(intv=interval//60, tp=tp) else: query = """ SELECT v.start_timestamp, v.end_timestamp, COUNT(q.time) AS total, COUNT(case q.action when 0 then 1 else null end) AS passed, COUNT(case q.action when 1 then 1 else null end) AS blocked, COUNT(case q.action when 2 then 1 else null end) AS dropped, COUNT(case q.source when 0 then 1 else null end) AS resolved, COUNT(case q.source when 2 then 1 else null end) AS local, COUNT(case q.source when 3 then 1 else null end) AS cached FROM v_time_series_{intv}min v LEFT JOIN query q ON q.time >= v.start_timestamp AND q.time <= v.end_timestamp WHERE to_timestamp(v.start_timestamp) > (to_timestamp(epoch(now())) - INTERVAL {tp} HOUR) GROUP BY v.end_timestamp, v.start_timestamp ORDER BY v.end_timestamp """.format(intv=(interval//60), tp=tp) with DbConnection('/var/unbound/data/unbound.duckdb', read_only=True) as db: if db.connection is not None and db.table_exists('query'): data = db.connection.execute(query).fetchdf().astype('object') if not data.empty: if args.clients: # a group_concat without any client returns NaN in a Dataframe, replace it with an empty string data = data.replace(np.nan, '', regex=True) data = list(data.itertuples(index=False, name=None)) result = {} for row in data: interval = {row[0]: {}} if row[2]: tmp = [] hosts = row[3].split(',') counts = row[4].split(',') for idx, client in enumerate(row[2].split(',')): tmp.append((client, int(counts[idx]), hosts[idx])) # sort the list by most active client tmp.sort(key=itemgetter(1), reverse=True) # limit by 10 tmp = tmp[:10] interval[row[0]] |= {t[0]: {'count': t[1], 'hostname': t[2]} for t in tmp} result |= interval else: result = data.set_index('start_timestamp').apply(lambda x: { 'total': x.total, 'passed': x.passed, 'blocked': x.blocked, 'dropped': x.dropped, 'resolved': x.resolved, 'local': x.local, 'cached': x.cached }, axis=1).to_dict() print(ujson.dumps(result)) def handle_top(args): total = resolved = blocked = local = passed = blocklist_size = 0 r_top = r_top_blocked = r_total = r_start_time = pandas.DataFrame() top = top_blocked = {} start_time = int(time()) bl_path = '/var/unbound/data/dnsbl.size' if os.path.isfile(bl_path) and os.path.getsize(bl_path) > 0: with open(bl_path, 'r') as f: blocklist_size = int(f.readline()) with DbConnection('/var/unbound/data/unbound.duckdb', read_only=True) as db: if db.connection is not None and db.table_exists('query'): # all queries are stored in a DataFrame() and its resulting set # cast to native python types (`int` instead of `numpy.int64`) # in order to properly convert it to json format r_top = db.connection.execute(""" SELECT domain, COUNT(domain) as cnt FROM query WHERE action == 0 GROUP BY domain ORDER BY cnt DESC LIMIT ? """, [args.max]).fetchdf().astype('object') r_top_blocked = db.connection.execute(""" SELECT domain, COUNT(domain) as cnt, blocklist FROM query WHERE action == 1 GROUP BY domain, blocklist ORDER BY cnt DESC LIMIT ? """, [args.max]).fetchdf().astype('object') r_total = db.connection.execute(""" SELECT COUNT(*) AS total, COUNT(case q.action when 1 then 1 else null end) AS blocked, COUNT(case q.source when 2 then 1 else null end) AS local, COUNT(case q.action when 0 then 1 else null end) AS passed, COUNT(case q.source when 0 then 1 else null end) as resolved FROM query q """).fetchdf().astype('object') r_start_time = db.connection.execute(""" SELECT time FROM query ORDER BY time ASC LIMIT 1 """).fetchdf().astype('object') if not r_total.empty: total = r_total.total.iloc[0] resolved = r_total.resolved.iloc[0] blocked = r_total.blocked.iloc[0] local = r_total.local.iloc[0] passed = r_total.passed.iloc[0] if not r_start_time.empty: start_time = r_start_time.time.iloc[0] if not r_top.empty: top = r_top.set_index('domain').apply(lambda x: { "total": x.cnt, "pcnt": percent(x.cnt, passed) }, axis=1).to_dict() if not r_top_blocked.empty: top_blocked = r_top_blocked.set_index('domain').apply(lambda x: { "total": x.cnt, "pcnt": percent(x.cnt, blocked), "blocklist": x.blocklist }, axis=1).to_dict() print(ujson.dumps({ "total": total, "blocklist_size": blocklist_size, "passed": passed, "resolved": {"total": resolved, "pcnt": percent(resolved, total)}, "blocked": {"total": blocked, "pcnt": percent(blocked, total)}, "local": {"total": local, "pcnt": percent(local, total)}, "start_time": start_time, "top": top, "top_blocked": top_blocked })) def handle_details(args): result = [] details = pandas.DataFrame() with DbConnection('/var/unbound/data/unbound.duckdb', read_only=True) as db: if db.connection is not None and db.table_exists('query') and db.table_exists('client'): if args.client and args.start and args.end: details = db.connection.execute(""" SELECT * FROM query q LEFT JOIN client resolved on q.client = resolved.ipaddr WHERE q.client = ? AND q.time > ? AND q.time < ? ORDER BY time DESC LIMIT ? """, [args.client, args.start, args.end, args.limit]).fetchdf().astype({'uuid': str}) else: details = db.connection.execute(""" SELECT * FROM query LEFT JOIN client resolved on client = resolved.ipaddr ORDER BY time DESC LIMIT ? """, [args.limit]).fetchdf().astype({'uuid': str}) if not details.empty: # use a resolved hostname if possible details['client'] = np.where(details['hostname'].isnull(), details['client'], details['hostname']) details['blocklist'].replace(np.nan, None, inplace=True) details = details.drop(['hostname', 'ipaddr'], axis=1) # map the integer types to a sensible description details['action'] = details['action'].map({0: 'Pass', 1: 'Block', 2: 'Drop'}) details['source'] = details['source'].map({ 0: 'Recursion', 1: 'Local', 2: 'Local-data', 3: 'Cache' }) details['rcode'] = details['rcode'].map({ 0: 'NOERROR', 1: 'FORMERR', 2: 'SERVFAIL', 3: 'NXDOMAIN', 4: 'NOTIMPL', 5: 'REFUSED', 6: 'YXDOMAIN', 7: 'YXRRSET', 8: 'NXRRSET', 9: 'NOTAUTH', 10: 'NOTZONE' }) details['dnssec_status'] = details['dnssec_status'].map({ 0: 'Unchecked', 1: 'Bogus', 2: 'Indeterminate', 3: 'Insecure', 5: 'Secure' }) result = details.to_dict('records') print(ujson.dumps(result)) if __name__ == '__main__': parser = argparse.ArgumentParser() subparsers = parser.add_subparsers(dest='command', help='sub-command help') r_parser = subparsers.add_parser('rolling', help='get rolling aggregate of query data') r_parser.add_argument('--timeperiod', help='timeperiod in hours. Valid values are [24, 12, 1]', type=int, default=24) r_parser.add_argument('--interval', help='interval in seconds. valid values are [600, 300, 60]', type=int, default=300) r_parser.add_argument('--clients', help='get top 10 client activity instead', action='store_true') r_parser.set_defaults(func=handle_rolling) t_parser = subparsers.add_parser('totals', help='get top queried domains and total counters') t_parser.add_argument('--max', help='limit top queried domains by max items', type=int, default=10) t_parser.set_defaults(func=handle_top) d_parser = subparsers.add_parser('details', help='get detailed query information') d_parser.add_argument('--limit', help='limit results', type=int, default=500) d_parser.add_argument('--client', help='limit result to client') d_parser.add_argument('--start', type=int, help='start unix epoch') d_parser.add_argument('--end', type=int, help='end unix epoch') d_parser.set_defaults(func=handle_details) if len(sys.argv)==1: parser.print_help() sys.exit(1) inputargs = parser.parse_args() inputargs.func(inputargs)