From 3db85fa00836c0ed807995fe21401aa0c35ffeaf Mon Sep 17 00:00:00 2001 From: eldraco Date: Mon, 24 Jun 2024 17:45:33 +0200 Subject: [PATCH] Add flow visualizer --- flow_visualizer.py | 172 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 flow_visualizer.py diff --git a/flow_visualizer.py b/flow_visualizer.py new file mode 100644 index 0000000..528ccaf --- /dev/null +++ b/flow_visualizer.py @@ -0,0 +1,172 @@ +from flask import Flask, render_template_string +import pandas as pd +import argparse +import sys +import json +from datetime import datetime +import random + +app = Flask(__name__) + +# Read Zeek conn log file or stdin +def read_zeek_conn_log(file_path=None, use_stdin=False): + columns = ["ts", "uid", "id_orig_h", "id_orig_p", "id_resp_h", "id_resp_p", + "proto", "service", "duration", "orig_bytes", "resp_bytes", + "conn_state", "local_orig", "local_resp", "missed_bytes", + "history", "orig_pkts", "orig_ip_bytes", "resp_pkts", "resp_ip_bytes", + "tunnel_parents"] + + data = [] + if use_stdin: + source = sys.stdin + else: + source = open(file_path, 'r') + + with source as file: + for line in file: + if not line.startswith("#"): + parts = line.split() + if len(parts) == len(columns): + data.append(parts) + else: + while len(parts) < len(columns): + parts.append("-") + data.append(parts[:len(columns)]) + + df = pd.DataFrame(data, columns=columns) + df['ts'] = df['ts'].astype(float) + df['duration'] = df['duration'].astype(float) + df['human_ts'] = df['ts'].apply(lambda x: datetime.utcfromtimestamp(x).strftime('%Y-%m-%d %H:%M:%S')) + return df + +# Generate a random color +def generate_random_color(): + return "#{:06x}".format(random.randint(0, 0xFFFFFF)) + +@app.route('/') +def index(): + if args.stdin: + df = read_zeek_conn_log(use_stdin=True) + else: + df = read_zeek_conn_log(args.filename) + + # Filter by minimum duration + df = df[df['duration'] >= args.min_duration] + + min_ts = df['ts'].min() + df['relative_start'] = df['ts'] - min_ts + max_duration = df['duration'].max() + max_relative_start = df['relative_start'].max() + + # Assign colors to source IPs + unique_ips = df['id_orig_h'].unique() + ip_colors = {ip: generate_random_color() for ip in unique_ips} + + flows = df.to_dict(orient='records') + return render_template_string(TEMPLATE, flows=flows, max_duration=max_duration, max_relative_start=max_relative_start, ip_colors=ip_colors) + +TEMPLATE = ''' + + + + + Zeek Conn Flows + + + + +

Zeek Conn Flows

+ + +
+ {% for flow in flows %} +
+
+
{{ flow.human_ts }} - {{ flow.id_orig_h }}:{{ flow.id_orig_p }} -> {{ flow.id_resp_h }}:{{ flow.id_resp_p }}
+
+ {% endfor %} +
+
+ + +''' + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Run Flask app to display Zeek conn flows.') + parser.add_argument('--stdin', action='store_true', help='Read Zeek conn log from stdin') + parser.add_argument('--min-duration', type=float, default=0.0, help='Minimum duration of flows to display') + parser.add_argument('filename', type=str, nargs='?', help='The Zeek conn log file to read') + args = parser.parse_args() + + if not args.stdin and not args.filename: + parser.error('Must provide a filename or use --stdin to read from stdin') + + app.run(debug=True) +