From 80cfa7f8eb82a11c19c7c614835a5c0d19d5352c Mon Sep 17 00:00:00 2001 From: Yaguang Tang Date: Tue, 8 Oct 2024 18:49:30 +0800 Subject: [PATCH] Add support for node down and softirq alert --- .../files/jsonnet/legacy.libsonnet | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/roles/kube_prometheus_stack/files/jsonnet/legacy.libsonnet b/roles/kube_prometheus_stack/files/jsonnet/legacy.libsonnet index b11d8a6f07..dd21676514 100644 --- a/roles/kube_prometheus_stack/files/jsonnet/legacy.libsonnet +++ b/roles/kube_prometheus_stack/files/jsonnet/legacy.libsonnet @@ -174,6 +174,14 @@ severity: 'P5', }, }, + { + alert: 'NodeDown', + expr: 'up{job="node-exporter"} == 0', + 'for': '2m', + labels: { + severity: 'P1', + }, + }, ], }, { @@ -226,6 +234,23 @@ alertRule('dropped', '0', '0.75'), ], }, + { + name: 'softirq', + rules: [ + { + alert: 'NodeSoftirqRcu', + expr: 'rate(node_softirqs_total{vector="rcu"}[1m]) > 10000', + 'for': '5m', + labels: { + severity: 'warning', + }, + annotations: { + summary: 'High softirq rcu on node {{ $labels.instance }}: {{ $value }} ', + description: 'This can result in high software interrupt load on the node which can bring system performance down.', + }, + }, + ], + }, ], }, },