-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathalerts-dev.yaml
136 lines (129 loc) · 5.53 KB
/
alerts-dev.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
name: sokos-oppdrag
namespace: okonomi
labels:
team: okonomi
spec:
groups:
- name: sokos-oppdrag-alerts
rules:
- alert: sokos-oppdrag er nede
expr: kube_deployment_status_replicas_available{deployment="sokos-oppdrag"} == 0
for: 5m
annotations:
action: "Sjekk hvorfor applikasjonen er nede"
summary: "{{ $labels.deployment }} er nede i namespace {{ $labels.namespace }}"
labels:
namespace: okonomi
severity: critical
sokos_oppdrag_type: sokos-oppdrag
alert_type: custom
- alert: sokos-oppdrag har restartet flere ganger de siste 5 minuttene!
expr: sum(increase(kube_pod_container_status_restarts_total{container=~"sokos-oppdrag"}[5m])) by (container) > 2
for: 5m
annotations:
action: "Sjekk hvorfor applikasjonen ikke klarer å restarte"
summary: "{{ $labels.container }} restarter mange ganger"
labels:
namespace: okonomi
severity: critical
sokos_oppdrag_type: sokos-oppdrag
alert_type: custom
- alert: Applikasjonen logger høy andel errors
expr:
'sum(max_over_time(log_messages_errors{namespace="okonomi",app="sokos-oppdrag",level="Error"}[15m])
or vector(0)) -
sum(max_over_time(log_messages_errors{namespace="okonomi",app="sokos-oppdrag",level="Error"}[15m]
offset 15m) or vector(0)) > 5'
for: 5m
annotations:
action: "<https://logs.adeo.no/app/discover#/?_g=(time:(from:now-1d,to:now))&_a=(filters:!((query:(match_phrase:(application:'sokos-oppdrag'))),(query:(match_phrase:(cluster:'dev-fss'))),(query:(match_phrase:(level:'Error')))))|Sjekk loggene>"
summary: "`kubectl logs -l app=sokos-oppdrag -n okonomi` for logger"
labels:
namespace: okonomi
severity: danger
sokos_oppdrag_type: sokos-oppdrag
alert_type: custom
- alert: Applikasjonen logger høy andel warnings
expr:
'sum(max_over_time(log_messages_errors{namespace="okonomi",app="sokos-oppdrag",level="Warning"}[15m])
or vector(0)) -
sum(max_over_time(log_messages_errors{namespace="okonomi",app="sokos-oppdrag",level="Warning"}[15m]
offset 15m) or vector(0)) > 5'
for: 5m
annotations:
action: "<https://logs.adeo.no/app/discover#/?_g=(time:(from:now-1d,to:now))&_a=(filters:!((query:(match_phrase:(application:'sokos-oppdrag'))),(query:(match_phrase:(cluster:'dev-fss'))),(query:(match_phrase:(level:'Warning')))))|Sjekk loggene>"
summary: "`kubectl logs -l app=sokos-oppdrag -n okonomi` for logger"
labels:
namespace: okonomi
severity: warning
sokos_oppdrag_type: sokos-oppdrag
alert_type: custom
- alert: sokos-oppdrag har høy andel HTTP serverfeil (5xx responser)
expr: floor(increase(ktor_http_server_requests_seconds_count{status=~"5.*", app="sokos-oppdrag"}[3m])) > 2
for: 5m
annotations:
action: "`kubectl logs {{ $labels.pod }} -n {{ $labels.namespace }}`"
summary: "Følgende request feilet: `Status {{ $labels.status }} - {{ $labels.method }} {{ $labels.route }}`.
Sjekk loggene for å se hvorfor dette feiler."
labels:
namespace: okonomi
severity: critical
sokos_oppdrag_type: sokos-oppdrag
alert_type: custom
- alert: sokos-oppdrag har høy andel HTTP klientfeil (4xx responser)
expr: floor(increase(ktor_http_server_requests_seconds_count{status=~"4.*", status!~"404", app="sokos-oppdrag"}[3m])) > 2
for: 2m
annotations:
action: "`kubectl logs {{ $labels.pod }} -n {{ $labels.namespace }}`"
summary: "Følgende request feilet: `Status {{ $labels.status }} - {{ $labels.method }} {{ $labels.route }}`.
Sjekk loggene for å se hvorfor dette feiler"
labels:
namespace: okonomi
severity: critical
sokos_oppdrag_type: sokos-oppdrag
alert_type: custom
---
apiVersion: monitoring.coreos.com/v1alpha1
kind: AlertmanagerConfig
metadata:
name: sokos-oppdrag-slack
namespace: okonomi
labels:
alertmanagerConfig: sokos-oppdrag-slack
spec:
receivers:
- name: sokos-oppdrag-receiver
slackConfigs:
- apiURL:
key: apiUrl
name: slack-webhook
channel: '#team-mob-alerts-dev'
iconEmoji: ':alert:'
username: 'Alert dev-fss (sokos-oppdrag)'
sendResolved: true
title: |-
[{{ .Status | toUpper }}{{ if eq .Status "firing" }}:{{ .Alerts.Firing | len }}{{ end }}] {{ .CommonLabels.alertname }}
text: >-
{{ range .Alerts }}
{{ if or .Annotations.summary .Annotations.message }}
{{ or .Annotations.summary .Annotations.message }}
{{ println " " }}
{{- end }}
{{- if .Annotations.action }}
• *action*: {{ .Annotations.action }} {{ println " " }}
{{- end }}
{{ end }}
route:
groupBy:
- alertname
matchers:
- name: "sokos_oppdrag_type"
matchType: "="
value: "sokos-oppdrag"
groupInterval: 10s
groupWait: 5s
receiver: sokos-oppdrag-receiver
repeatInterval: