From 40c4ae43657ed705233722ee7c1bdf0bc7e9339b Mon Sep 17 00:00:00 2001 From: Lucas Bickel <116588+hairmare@users.noreply.github.com> Date: Thu, 16 Nov 2023 10:45:28 +0100 Subject: [PATCH] feat(logging-apps): replace loki-stack with loki and promtail (#1147) --- charts/logging-apps/Chart.yaml | 90 +--------------- charts/logging-apps/README.md | 26 +++-- charts/logging-apps/ci/default-values.yaml | 8 ++ charts/logging-apps/examples/loki-stack.yaml | 10 -- charts/logging-apps/examples/loki.yaml | 105 +++++++++++++++++++ charts/logging-apps/examples/promtail.yaml | 31 ++++++ charts/logging-apps/templates/loki.yaml | 33 ++++++ charts/logging-apps/templates/promtail.yaml | 33 ++++++ charts/logging-apps/values.yaml | 48 +++++++-- 9 files changed, 271 insertions(+), 113 deletions(-) delete mode 100644 charts/logging-apps/examples/loki-stack.yaml create mode 100644 charts/logging-apps/examples/loki.yaml create mode 100644 charts/logging-apps/examples/promtail.yaml create mode 100644 charts/logging-apps/templates/loki.yaml create mode 100644 charts/logging-apps/templates/promtail.yaml diff --git a/charts/logging-apps/Chart.yaml b/charts/logging-apps/Chart.yaml index 4897f4fe0..123defc42 100644 --- a/charts/logging-apps/Chart.yaml +++ b/charts/logging-apps/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 name: logging-apps description: Argo CD app-of-apps config for logging applications type: application -version: 0.27.0 +version: 0.28.0 home: https://github.com/adfinis/helm-charts/tree/main/charts/logging-apps sources: - https://github.com/adfinis/helm-charts @@ -17,89 +17,7 @@ dependencies: annotations: artifacthub.io/changes: | - kind: changed - description: 'chore(fluentBit): update chart from 0.21.6 to 0.39.0' + description: 'feat: replace loki-stack with loki and promtail' links: - - name: GitHub release of helm-chart 0.39.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.39.0 - - name: GitHub release of Fluent Bit v2.1.10 - url: https://github.com/fluent/fluent-bit/releases/tag/v2.1.10 - - name: GitHub release of helm-chart 0.38.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.38.0 - - name: GitHub release of Fluent Bit v2.1.9 - url: https://github.com/fluent/fluent-bit/releases/tag/v2.1.9 - - name: GitHub release of helm-chart 0.37.1 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.37.1 - - name: GitHub release of Fluent Bit v2.1.8 - url: https://github.com/fluent/fluent-bit/releases/tag/v2.1.8 - - name: GitHub release of helm-chart 0.37.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.37.0 - - name: GitHub release of helm-chart 0.36.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.36.0 - - name: GitHub release of Fluent Bit v2.1.7 - url: https://github.com/fluent/fluent-bit/releases/tag/v2.1.7 - - name: GitHub release of helm-chart 0.35.1 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.35.1 - - name: GitHub release of helm-chart 0.35.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.35.0 - - name: GitHub release of helm-chart 0.34.2 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.34.2 - - name: GitHub release of Fluent Bit v2.1.6 - url: https://github.com/fluent/fluent-bit/releases/tag/v2.1.6 - - name: GitHub release of helm-chart 0.34.1 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.34.1 - - name: GitHub release of helm-chart 0.34.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.34.0 - - name: GitHub release of helm-chart 0.33.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.33.0 - - name: GitHub release of helm-chart 0.32.2 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.32.2 - - name: GitHub release of Fluent Bit v2.1.5 - url: https://github.com/fluent/fluent-bit/releases/tag/v2.1.5 - - name: GitHub release of helm-chart 0.32.1 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.32.1 - - name: GitHub release of helm-chart 0.32.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.32.0 - - name: GitHub release of helm-chart 0.31.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.31.0 - - name: GitHub release of helm-chart 0.30.4 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.30.4 - - name: GitHub release of Fluent Bit v2.1.4 - url: https://github.com/fluent/fluent-bit/releases/tag/v2.1.4 - - name: GitHub release of helm-chart 0.30.3 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.30.3 - - name: GitHub release of helm-chart 0.30.2 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.30.2 - - name: GitHub release of helm-chart 0.30.1 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.30.1 - - name: GitHub release of helm-chart 0.30.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.30.0 - - name: GitHub release of helm-chart 0.29.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.29.0 - - name: GitHub release of Fluent Bit v2.1.3 - url: https://github.com/fluent/fluent-bit/releases/tag/v2.1.3 - - name: GitHub release of helm-chart 0.28.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.28.0 - - name: GitHub release of Fluent Bit v2.1.2 - url: https://github.com/fluent/fluent-bit/releases/tag/v2.1.2 - - name: GitHub release of helm-chart 0.27.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.27.0 - - name: GitHub release of Fluent Bit v2.0.11 - url: https://github.com/fluent/fluent-bit/releases/tag/v2.0.11 - - name: GitHub release of helm-chart 0.26.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.26.0 - - name: GitHub release of helm-chart 0.25.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.25.0 - - name: GitHub release of Fluent Bit v2.0.10 - url: https://github.com/fluent/fluent-bit/releases/tag/v2.0.10 - - name: GitHub release of helm-chart 0.24.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.24.0 - - name: GitHub release of Fluent Bit v2.0.9 - url: https://github.com/fluent/fluent-bit/releases/tag/v2.0.9 - - name: GitHub release of helm-chart 0.23.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.23.0 - - name: GitHub release of Fluent Bit v2.0.8 - url: https://github.com/fluent/fluent-bit/releases/tag/v2.0.8 - - name: GitHub release of helm-chart 0.22.0 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.22.0 - - name: GitHub release of helm-chart 0.21.7 - url: https://github.com/fluent/helm-charts/releases/tag/fluent-bit-0.21.7 + - name: Issue + url: https://github.com/adfinis/helm-charts/issues/1058 diff --git a/charts/logging-apps/README.md b/charts/logging-apps/README.md index 8b79a1578..0d969f845 100644 --- a/charts/logging-apps/README.md +++ b/charts/logging-apps/README.md @@ -1,6 +1,6 @@ # logging-apps -![Version: 0.27.0](https://img.shields.io/badge/Version-0.27.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) +![Version: 0.28.0](https://img.shields.io/badge/Version-0.28.0-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) Argo CD app-of-apps config for logging applications @@ -26,7 +26,7 @@ This chart is maintained by [Adfinis](https://adfinis.com/?pk_campaign=github&pk | filebeat | object | - | [filebeat](https://github.com/elastic/beats/tree/master/filebeat) ([example](./examples/filebeat.yaml)) | | filebeat.chart | string | `"filebeat"` | Chart | | filebeat.destination.namespace | string | `"infra-logging"` | Namespace | -| filebeat.enabled | bool | `false` | Enable loki | +| filebeat.enabled | bool | `false` | Enable filebeat | | filebeat.repoURL | string | [repo](https://helm.elastic.co) | Repo URL | | filebeat.targetRevision | string | `"8.5.1"` | [filebeat Helm chart](https://github.com/elastic/helm-charts/tree/master/filebeat) version | | filebeat.values | object | [upstream values](https://github.com/elastic/helm-charts/tree/master/filebeat/values.yaml) | Helm values | @@ -44,13 +44,21 @@ This chart is maintained by [Adfinis](https://adfinis.com/?pk_campaign=github&pk | fluentd.repoURL | string | [repo](https://charts.bitnami.com/bitnami) | Repo URL | | fluentd.targetRevision | string | `"5.5.*"` | [fluentd Helm chart](https://github.com/bitnami/charts/tree/master/bitnami/fluentd) version | | fluentd.values | object | [upstream values](https://github.com/bitnami/charts/tree/master/bitnami/fluentd/values.yaml) | Helm values | -| lokiStack | object | - | [loki-stack](https://github.com/grafana/loki) ([example](./examples/loki-stack.yaml)) | -| lokiStack.chart | string | `"loki-stack"` | Chart | -| lokiStack.destination.namespace | string | `"infra-logging"` | Namespace | -| lokiStack.enabled | bool | `false` | Enable loki | -| lokiStack.repoURL | string | [repo](https://grafana.github.io/helm-charts) | Repo URL | -| lokiStack.targetRevision | string | `"2.9.10"` | [loki-stack Helm chart](https://github.com/grafana/helm-charts/tree/main/charts/loki-stack) version | -| lokiStack.values | object | [upstream values](https://github.com/grafana/helm-charts/blob/main/charts/loki-stack/values.yaml) | Helm values | +| loki | object | - | [Grafana Loki](https://grafana.com/oss/loki/) ([example](./examples/loki.yaml)) | +| loki.chart | string | `"loki"` | Chart | +| loki.destination.namespace | string | `"infra-logging"` | Namespace | +| loki.enabled | bool | `false` | Enable loki | +| loki.repoURL | string | [repo](https://grafana.github.io/helm-charts) | Repo URL | +| loki.targetRevision | string | `"5.36.3"` | [loki Helm chart](https://github.com/grafana/loki/tree/main/production/helm/loki) | +| loki.values | object | [upstream values](https://github.com/grafana/loki/blob/main/production/helm/loki/values.yaml) | Helm values | +| lokiStack | object | DEPRECATED | [loki-stack](https://github.com/grafana/loki) is DEPREACTED use `loki` and `promtail` individually | +| promtail | object | - | [Grafana Loki promtail](https://grafana.com/docs/loki/latest/send-data/promtail/) ([example](./examples/promtail.yaml)) | +| promtail.chart | string | `"promtail"` | Chart | +| promtail.destination.namespace | string | `"infra-logging"` | Namespace | +| promtail.enabled | bool | `false` | Enable promtail | +| promtail.repoURL | string | [repo](https://grafana.github.io/helm-charts) | Repo URL | +| promtail.targetRevision | string | `"6.15.3"` | [promtail Helm chart](https://github.com/grafana/helm-charts/tree/main/charts/promtail) | +| promtail.values | object | [upstream values](https://github.com/grafana/helm-charts/blob/main/charts/promtail/values.yaml) | Helm values | ## About this chart diff --git a/charts/logging-apps/ci/default-values.yaml b/charts/logging-apps/ci/default-values.yaml index 686303d92..9b4fc6037 100644 --- a/charts/logging-apps/ci/default-values.yaml +++ b/charts/logging-apps/ci/default-values.yaml @@ -1,4 +1,12 @@ lokiStack: + enabled: false + values: {} + +loki: + enabled: true + values: {} + +promtail: enabled: true values: {} diff --git a/charts/logging-apps/examples/loki-stack.yaml b/charts/logging-apps/examples/loki-stack.yaml deleted file mode 100644 index bf24669c7..000000000 --- a/charts/logging-apps/examples/loki-stack.yaml +++ /dev/null @@ -1,10 +0,0 @@ -lokiStack: - enabled: true - project: infra-logging - values: - loki: - enabled: true - promtail: - enabled: true - grafana: - enabled: false diff --git a/charts/logging-apps/examples/loki.yaml b/charts/logging-apps/examples/loki.yaml new file mode 100644 index 000000000..8187bb8cc --- /dev/null +++ b/charts/logging-apps/examples/loki.yaml @@ -0,0 +1,105 @@ +# Deploy Loki using Azure blob as backing store + +loki: + enabled: true + project: infra-logging + values: + test: + # TODO: enable this together with selfMonitoring when you deploy loki! + enabled: false + monitoring: + selfMonitoring: + enabled: false + grafanaAgent: + installOperator: false + serviceMonitor: + enabled: true + labels: + k8s.adfinis.com/prometheus: kube-prometheus + lokiCanary: + enabled: false + loki: + # TODO: enable auth! + auth_enabled: false + commonConfig: + # The default is /var/loki and as we have no persistent volume + # mounted startup fails because the location is read-only. + # TODO: Review which potential problems this could cause with loss + # of data when a pod is restarted. + path_prefix: /tmp/loki + # We're using a custom schema config, because we want to directly start using + # TSDB to avoid having to migrate from boltdb-shipper once it's deprecated. + schemaConfig: + configs: + - from: 2023-06-23 + store: tsdb + object_store: azure + schema: v12 + index: + prefix: loki_index_ + period: 24h + storage_config: + # TODO: configure a proper Azure Blob store + azure: + account_name: + account_key: ${ARM_ACCESS_KEY} + container_name: + endpoint_suffix: blob.core.windows.net + tsdb_shipper: + shared_store: azure + table_manager: + retention_deletes_enabled: true + retention_period: 30d + singleBinary: + replicas: 3 + persistence: + enabled: false + extraArgs: + # Required so that ${ARM_ACCESS_KEY} above is expanded using the + # environment variable configured in the secret below. + - "-config.expand-env=true" + extraEnvFrom: + # Should contain a single key: ARM_ACCESS_KEY containing the access + # key for the storage account. + - secretRef: + name: loki-objectstorage-credentials + serviceMonitor: + enabled: true + additionalLabels: + k8s.adfinis.com/prometheus: kube-prometheus + prometheusRule: + additionalLabels: + k8s.adfinis.com/prometheus: kube-prometheus + rules: + - alert: LokiProcessTooManyRestarts + expr: changes(process_start_time_seconds{job=~"loki"}[15m]) > 2 + for: 0m + labels: + severity: warning + annotations: + summary: Loki process too many restarts (instance {{ $labels.instance }}) + description: "A loki process had too many restarts (target {{ $labels.instance }})\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: LokiRequestErrors + expr: 100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[1m])) by (namespace, job, route) / sum(rate(loki_request_duration_seconds_count[1m])) by (namespace, job, route) > 10 + for: 15m + labels: + severity: critical + annotations: + summary: Loki request errors (instance {{ $labels.instance }}) + description: "The {{ $labels.job }} and {{ $labels.route }} are experiencing errors\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: LokiRequestPanic + expr: sum(increase(loki_panic_total[10m])) by (namespace, job) > 0 + for: 5m + labels: + severity: critical + annotations: + summary: Loki request panic (instance {{ $labels.instance }}) + description: "The {{ $labels.job }} is experiencing {{ printf \"%.2f\" $value }}% increase of panics\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: LokiRequestLatency + expr: (histogram_quantile(0.99, sum(rate(loki_request_duration_seconds_bucket{route!~"(?i).*tail.*"}[5m])) by (le))) > 1 + for: 5m + labels: + severity: critical + annotations: + summary: Loki request latency (instance {{ $labels.instance }}) + description: "The {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" diff --git a/charts/logging-apps/examples/promtail.yaml b/charts/logging-apps/examples/promtail.yaml new file mode 100644 index 000000000..df1324333 --- /dev/null +++ b/charts/logging-apps/examples/promtail.yaml @@ -0,0 +1,31 @@ +# Deploy a promtail instance that is monitored by kube-prometheus-stack + +promtail: + enabled: true + project: infra-logging + values: + serviceMonitor: + enabled: true + labels: + k8s.adfinis.com/prometheus: kube-prometheus + prometheusRule: + enabled: true + additionalLabels: + k8s.adfinis.com/prometheus: kube-prometheus + rules: + - alert: PromtailRequestErrors + expr: 100 * sum(rate(promtail_request_duration_seconds_count{status_code=~"5..|failed"}[1m])) by (namespace, job, route, instance) / sum(rate(promtail_request_duration_seconds_count[1m])) by (namespace, job, route, instance) > 10 + for: 5m + labels: + severity: critical + annotations: + summary: Promtail request errors (instance {{ $labels.instance }}) + description: "The {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}% errors.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" + - alert: PromtailRequestLatency + expr: histogram_quantile(0.99, sum(rate(promtail_request_duration_seconds_bucket[5m])) by (le)) > 1 + for: 5m + labels: + severity: critical + annotations: + summary: Promtail request latency (instance {{ $labels.instance }}) + description: "The {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf \"%.2f\" $value }}s 99th percentile latency.\n VALUE = {{ $value }}\n LABELS = {{ $labels }}" diff --git a/charts/logging-apps/templates/loki.yaml b/charts/logging-apps/templates/loki.yaml new file mode 100644 index 000000000..5587aeb9c --- /dev/null +++ b/charts/logging-apps/templates/loki.yaml @@ -0,0 +1,33 @@ +{{ if .Values.loki.enabled }} +{{ template "argoconfig.application" (list . "logging-apps.loki") }} +{{ end }} + +{{- define "logging-apps.loki" -}}{{- $app := unset .Values.loki "enabled" -}}{{- $name := default $app.destination.namespace $app.name -}} +metadata: + name: {{ template "common.fullname" . }}-{{ $name }} +spec: + {{- if $app.project }} + project: {{ $app.project | quote }} + {{- end }} + source: + repoURL: {{ $app.repoURL | quote }} + chart: {{ $app.chart | quote }} + targetRevision: {{ $app.targetRevision | quote }} + helm: + releaseName: {{ $name | quote }} + values: |- + nameOverride: {{ $name | quote }} + {{- $app.values | toYaml | nindent 8 }} + {{- if $app.destination }} + destination: + {{ $app.destination | toYaml | nindent 4 }} + {{- end }} + {{- if $app.syncPolicy }} + syncPolicy: + {{ $app.syncPolicy | toYaml | nindent 4 }} + {{- end }} + {{- if $app.ignoreDifferences }} + ignoreDifferences: + {{ $app.ignoreDifferences | toYaml | nindent 4 }} + {{- end }} +{{- end -}} diff --git a/charts/logging-apps/templates/promtail.yaml b/charts/logging-apps/templates/promtail.yaml new file mode 100644 index 000000000..223a0793b --- /dev/null +++ b/charts/logging-apps/templates/promtail.yaml @@ -0,0 +1,33 @@ +{{ if .Values.promtail.enabled }} +{{ template "argoconfig.application" (list . "logging-apps.promtail") }} +{{ end }} + +{{- define "logging-apps.promtail" -}}{{- $app := unset .Values.promtail "enabled" -}}{{- $name := default $app.destination.namespace $app.name -}} +metadata: + name: {{ template "common.fullname" . }}-{{ $name }} +spec: + {{- if $app.project }} + project: {{ $app.project | quote }} + {{- end }} + source: + repoURL: {{ $app.repoURL | quote }} + chart: {{ $app.chart | quote }} + targetRevision: {{ $app.targetRevision | quote }} + helm: + releaseName: {{ $name | quote }} + values: |- + nameOverride: {{ $name | quote }} + {{- $app.values | toYaml | nindent 8 }} + {{- if $app.destination }} + destination: + {{ $app.destination | toYaml | nindent 4 }} + {{- end }} + {{- if $app.syncPolicy }} + syncPolicy: + {{ $app.syncPolicy | toYaml | nindent 4 }} + {{- end }} + {{- if $app.ignoreDifferences }} + ignoreDifferences: + {{ $app.ignoreDifferences | toYaml | nindent 4 }} + {{- end }} +{{- end -}} diff --git a/charts/logging-apps/values.yaml b/charts/logging-apps/values.yaml index c2b33f788..e227cd9ca 100644 --- a/charts/logging-apps/values.yaml +++ b/charts/logging-apps/values.yaml @@ -1,9 +1,21 @@ -# -- [loki-stack](https://github.com/grafana/loki) ([example](./examples/loki-stack.yaml)) -# @default -- - +# -- [loki-stack](https://github.com/grafana/loki) is DEPREACTED use `loki` and `promtail` individually +# @default -- DEPRECATED lokiStack: - # -- Enable loki enabled: false name: loki-stack + destination: + namespace: "infra-logging" + repoURL: "https://grafana.github.io/helm-charts" + chart: "loki-stack" + targetRevision: "2.9.10" + values: {} + +# -- [Grafana Loki](https://grafana.com/oss/loki/) ([example](./examples/loki.yaml)) +# @default -- - +loki: + # -- Enable loki + enabled: false + name: loki destination: # -- Namespace namespace: "infra-logging" @@ -11,17 +23,37 @@ lokiStack: # @default -- [repo](https://grafana.github.io/helm-charts) repoURL: "https://grafana.github.io/helm-charts" # -- Chart - chart: "loki-stack" - # -- [loki-stack Helm chart](https://github.com/grafana/helm-charts/tree/main/charts/loki-stack) version - targetRevision: "2.9.10" + chart: "loki" + # -- [loki Helm chart](https://github.com/grafana/loki/tree/main/production/helm/loki) + targetRevision: "5.36.3" # -- Helm values - # @default -- [upstream values](https://github.com/grafana/helm-charts/blob/main/charts/loki-stack/values.yaml) + # @default -- [upstream values](https://github.com/grafana/loki/blob/main/production/helm/loki/values.yaml) + values: {} + +# -- [Grafana Loki promtail](https://grafana.com/docs/loki/latest/send-data/promtail/) ([example](./examples/promtail.yaml)) +# @default -- - +promtail: + # -- Enable promtail + enabled: false + name: promtail + destination: + # -- Namespace + namespace: "infra-logging" + # -- Repo URL + # @default -- [repo](https://grafana.github.io/helm-charts) + repoURL: "https://grafana.github.io/helm-charts" + # -- Chart + chart: "promtail" + # -- [promtail Helm chart](https://github.com/grafana/helm-charts/tree/main/charts/promtail) + targetRevision: "6.15.3" + # -- Helm values + # @default -- [upstream values](https://github.com/grafana/helm-charts/blob/main/charts/promtail/values.yaml) values: {} # -- [filebeat](https://github.com/elastic/beats/tree/master/filebeat) ([example](./examples/filebeat.yaml)) # @default -- - filebeat: - # -- Enable loki + # -- Enable filebeat enabled: false name: filebeat destination: