diff --git a/charts/feature-integrations/docs/integrations/cert-manager.md b/charts/feature-integrations/docs/integrations/cert-manager.md index 873fe8cfd..9cae456ba 100644 --- a/charts/feature-integrations/docs/integrations/cert-manager.md +++ b/charts/feature-integrations/docs/integrations/cert-manager.md @@ -11,6 +11,13 @@ | namespaces | list | `[]` | Namespaces to look for cert-manager instances. | | portName | string | `"http-metrics"` | Name of the port to scrape metrics from. | +### Scrape Settings + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| jobName | string | `"integrations/cert-manager"` | The value of the job label for scraped metrics. | +| scrapeInterval | string | `60s` | How frequently to scrape metrics from Windows Exporter. | + ### Metric Processing Settings | Key | Type | Default | Description | @@ -24,9 +31,3 @@ | Key | Type | Default | Description | |-----|------|---------|-------------| | name | string | `""` | Name for this cert-manager instance. | - -### Scrape Settings - -| Key | Type | Default | Description | -|-----|------|---------|-------------| -| scrapeInterval | string | `60s` | How frequently to scrape metrics from Windows Exporter. | diff --git a/charts/feature-integrations/integrations/cert-manager-values.yaml b/charts/feature-integrations/integrations/cert-manager-values.yaml index 1b557b5fc..8618c631a 100644 --- a/charts/feature-integrations/integrations/cert-manager-values.yaml +++ b/charts/feature-integrations/integrations/cert-manager-values.yaml @@ -26,6 +26,10 @@ portName: http-metrics # @section -- Scrape Settings scrapeInterval: +# -- The value of the job label for scraped metrics. +# @section -- Scrape Settings +jobName: integrations/cert-manager + # -- Sets the max_cache_size for cadvisor prometheus.relabel component. # This should be at least 2x-5x your largest scrape target or samples appended rate. # ([docs](https://grafana.com/docs/alloy/latest/reference/components/prometheus.relabel/#arguments)) diff --git a/charts/feature-integrations/schema-mods/definitions/cert-manager-integration.schema.json b/charts/feature-integrations/schema-mods/definitions/cert-manager-integration.schema.json index 0bc8a5ce4..e7fb33663 100644 --- a/charts/feature-integrations/schema-mods/definitions/cert-manager-integration.schema.json +++ b/charts/feature-integrations/schema-mods/definitions/cert-manager-integration.schema.json @@ -4,6 +4,9 @@ "field_selectors": { "type": "array" }, + "jobName": { + "type": "string" + }, "labelSelectors": { "type": "object", "properties": { diff --git a/charts/feature-integrations/templates/_integration_cert-manager.tpl b/charts/feature-integrations/templates/_integration_cert-manager.tpl index 5c14a7bc2..6079e4ffb 100644 --- a/charts/feature-integrations/templates/_integration_cert-manager.tpl +++ b/charts/feature-integrations/templates/_integration_cert-manager.tpl @@ -47,12 +47,13 @@ cert_manager.kubernetes {{ include "helper.alloy_name" .name | quote }} { {{- if $fieldSelectors }} field_selectors = {{ $fieldSelectors | toJson }} {{- end }} - port_name = {{ .portName | quote }} + port_name = {{ .portName | quote }} } cert_manager.scrape {{ include "helper.alloy_name" .name | quote }} { targets = cert_manager.kubernetes.{{ include "helper.alloy_name" .name }}.output clustering = true + job_label = {{ .jobName | quote }} {{- if $metricAllowList }} keep_metrics = "up|{{ $metricAllowList | join "|" | join "|" }}" {{- end }} diff --git a/charts/feature-integrations/tests/cert-manager_test.yaml b/charts/feature-integrations/tests/cert-manager_test.yaml index 6ee9cdd86..a5736843d 100644 --- a/charts/feature-integrations/tests/cert-manager_test.yaml +++ b/charts/feature-integrations/tests/cert-manager_test.yaml @@ -29,12 +29,13 @@ tests: cert_manager.kubernetes "my_cert_manager" { label_selectors = ["app.kubernetes.io/name=cert-manager"] - port_name = "http-metrics" + port_name = "http-metrics" } cert_manager.scrape "my_cert_manager" { targets = cert_manager.kubernetes.my_cert_manager.output clustering = true + job_label = "integrations/cert-manager" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value @@ -69,12 +70,13 @@ tests: cert_manager.kubernetes "my_cert_manager" { namespaces = ["kube-system"] label_selectors = ["app.kubernetes.io/name=cert-manager"] - port_name = "http-metrics" + port_name = "http-metrics" } cert_manager.scrape "my_cert_manager" { targets = cert_manager.kubernetes.my_cert_manager.output clustering = true + job_label = "integrations/cert-manager" scrape_interval = "60s" max_cache_size = 100000 forward_to = argument.metrics_destinations.value diff --git a/charts/feature-integrations/values.schema.json b/charts/feature-integrations/values.schema.json index 96c028b4b..a6bd6b91b 100644 --- a/charts/feature-integrations/values.schema.json +++ b/charts/feature-integrations/values.schema.json @@ -97,6 +97,9 @@ "field_selectors": { "type": "array" }, + "jobName": { + "type": "string" + }, "labelSelectors": { "type": "object", "properties": { diff --git a/charts/k8s-monitoring-test/README.md b/charts/k8s-monitoring-test/README.md index aee9a4472..12b23fb6e 100644 --- a/charts/k8s-monitoring-test/README.md +++ b/charts/k8s-monitoring-test/README.md @@ -70,8 +70,8 @@ In order to specify different destinations of the same type, you can use multipl | Key | Type | Default | Description | |-----|------|---------|-------------| | attempts | int | `10` | Number of times to retry the test on failure. | -| delay | int | `60` | Delay, in seconds, between test runs. | -| initialDelay | int | `10` | Initial delay, in seconds, before starting the first test run. | +| delay | int | `30` | Delay, in seconds, between test runs. | +| initialDelay | int | `0` | Initial delay, in seconds, before starting the first test run. | | tests | list | `[]` | The tests to run. Each should contain an "env" object and a "queries" list. | ### General settings diff --git a/charts/k8s-monitoring-test/values.yaml b/charts/k8s-monitoring-test/values.yaml index 91923fb4f..80c16c9a9 100644 --- a/charts/k8s-monitoring-test/values.yaml +++ b/charts/k8s-monitoring-test/values.yaml @@ -13,7 +13,7 @@ tests: [] # -- Initial delay, in seconds, before starting the first test run. # @section -- Test settings -initialDelay: 10 +initialDelay: 0 # -- Number of times to retry the test on failure. # @section -- Test settings @@ -21,7 +21,7 @@ attempts: 10 # -- Delay, in seconds, between test runs. # @section -- Test settings -delay: 60 +delay: 30 pod: # -- nodeSelector to apply to the test runner pods. diff --git a/charts/k8s-monitoring/Chart.lock b/charts/k8s-monitoring/Chart.lock index 03af10372..aec830a6b 100644 --- a/charts/k8s-monitoring/Chart.lock +++ b/charts/k8s-monitoring/Chart.lock @@ -42,4 +42,4 @@ dependencies: repository: https://grafana.github.io/helm-charts version: 0.9.1 digest: sha256:734d4c8f6076481eb580378daa65fe163c78e9e07a1a214cb4b2fed16441b4c9 -generated: "2024-10-21T17:06:40.407816+02:00" +generated: "2024-10-22T11:10:21.255416+02:00" diff --git a/charts/k8s-monitoring/charts/k8s-monitoring-feature-annotation-autodiscovery-1.0.0.tgz b/charts/k8s-monitoring/charts/k8s-monitoring-feature-annotation-autodiscovery-1.0.0.tgz index 2b9117578..c9587003d 100644 Binary files a/charts/k8s-monitoring/charts/k8s-monitoring-feature-annotation-autodiscovery-1.0.0.tgz and b/charts/k8s-monitoring/charts/k8s-monitoring-feature-annotation-autodiscovery-1.0.0.tgz differ diff --git a/charts/k8s-monitoring/charts/k8s-monitoring-feature-application-observability-1.0.0.tgz b/charts/k8s-monitoring/charts/k8s-monitoring-feature-application-observability-1.0.0.tgz index bf43e4ccf..fe277102e 100644 Binary files a/charts/k8s-monitoring/charts/k8s-monitoring-feature-application-observability-1.0.0.tgz and b/charts/k8s-monitoring/charts/k8s-monitoring-feature-application-observability-1.0.0.tgz differ diff --git a/charts/k8s-monitoring/charts/k8s-monitoring-feature-cluster-events-1.0.0.tgz b/charts/k8s-monitoring/charts/k8s-monitoring-feature-cluster-events-1.0.0.tgz index 7589cbff9..53ad32aa0 100644 Binary files a/charts/k8s-monitoring/charts/k8s-monitoring-feature-cluster-events-1.0.0.tgz and b/charts/k8s-monitoring/charts/k8s-monitoring-feature-cluster-events-1.0.0.tgz differ diff --git a/charts/k8s-monitoring/charts/k8s-monitoring-feature-cluster-metrics-1.0.0.tgz b/charts/k8s-monitoring/charts/k8s-monitoring-feature-cluster-metrics-1.0.0.tgz index 49b6fc49a..47c27e6bc 100644 Binary files a/charts/k8s-monitoring/charts/k8s-monitoring-feature-cluster-metrics-1.0.0.tgz and b/charts/k8s-monitoring/charts/k8s-monitoring-feature-cluster-metrics-1.0.0.tgz differ diff --git a/charts/k8s-monitoring/charts/k8s-monitoring-feature-frontend-observability-1.0.0.tgz b/charts/k8s-monitoring/charts/k8s-monitoring-feature-frontend-observability-1.0.0.tgz index b2c7fd8b4..1123aa376 100644 Binary files a/charts/k8s-monitoring/charts/k8s-monitoring-feature-frontend-observability-1.0.0.tgz and b/charts/k8s-monitoring/charts/k8s-monitoring-feature-frontend-observability-1.0.0.tgz differ diff --git a/charts/k8s-monitoring/charts/k8s-monitoring-feature-integrations-1.0.0.tgz b/charts/k8s-monitoring/charts/k8s-monitoring-feature-integrations-1.0.0.tgz index f93add1d0..c9bd667bd 100644 Binary files a/charts/k8s-monitoring/charts/k8s-monitoring-feature-integrations-1.0.0.tgz and b/charts/k8s-monitoring/charts/k8s-monitoring-feature-integrations-1.0.0.tgz differ diff --git a/charts/k8s-monitoring/charts/k8s-monitoring-feature-pod-logs-1.0.0.tgz b/charts/k8s-monitoring/charts/k8s-monitoring-feature-pod-logs-1.0.0.tgz index 6f379b9a6..343b928fc 100644 Binary files a/charts/k8s-monitoring/charts/k8s-monitoring-feature-pod-logs-1.0.0.tgz and b/charts/k8s-monitoring/charts/k8s-monitoring-feature-pod-logs-1.0.0.tgz differ diff --git a/charts/k8s-monitoring/charts/k8s-monitoring-feature-profiling-1.0.0.tgz b/charts/k8s-monitoring/charts/k8s-monitoring-feature-profiling-1.0.0.tgz index 143ae215f..4fe390406 100644 Binary files a/charts/k8s-monitoring/charts/k8s-monitoring-feature-profiling-1.0.0.tgz and b/charts/k8s-monitoring/charts/k8s-monitoring-feature-profiling-1.0.0.tgz differ diff --git a/charts/k8s-monitoring/charts/k8s-monitoring-feature-prometheus-operator-objects-1.0.0.tgz b/charts/k8s-monitoring/charts/k8s-monitoring-feature-prometheus-operator-objects-1.0.0.tgz index 3551f2785..aac2a66ea 100644 Binary files a/charts/k8s-monitoring/charts/k8s-monitoring-feature-prometheus-operator-objects-1.0.0.tgz and b/charts/k8s-monitoring/charts/k8s-monitoring-feature-prometheus-operator-objects-1.0.0.tgz differ diff --git a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/alloy-metrics.alloy index ea5af2ad9..16c94f912 100644 --- a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/alloy-metrics.alloy @@ -349,3 +349,34 @@ cluster_metrics "feature" { prometheus.remote_write.prometheus.receiver, ] } + +declare "etcd_integration" { + argument "metrics_destinations" { + comment = "Must be a list of metric destinations where collected metrics should be forwarded to" + } + + import.git "etcd" { + repository = "https://github.com/grafana/alloy-modules.git" + revision = "main" + path = "modules/databases/kv/etcd/metrics.alloy" + pull_frequency = "15m" + } + + etcd.kubernetes "k8s_controlplane_etcd" { + label_selectors = ["app.kubernetes.io/component=etcd"] + port_name = "metrics" + } + + etcd.scrape "k8s_controlplane_etcd" { + targets = etcd.kubernetes.k8s_controlplane_etcd.output + clustering = true + scrape_interval = "60s" + max_cache_size = 100000 + forward_to = argument.metrics_destinations.value + } +} +etcd_integration "integration" { + metrics_destinations = [ + prometheus.remote_write.prometheus.receiver, + ] +} diff --git a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/output.yaml b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/output.yaml index a6d974ac9..56b670aa0 100644 --- a/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/cluster-metrics/control-plane-monitoring/output.yaml @@ -479,6 +479,37 @@ data: prometheus.remote_write.prometheus.receiver, ] } + + declare "etcd_integration" { + argument "metrics_destinations" { + comment = "Must be a list of metric destinations where collected metrics should be forwarded to" + } + + import.git "etcd" { + repository = "https://github.com/grafana/alloy-modules.git" + revision = "main" + path = "modules/databases/kv/etcd/metrics.alloy" + pull_frequency = "15m" + } + + etcd.kubernetes "k8s_controlplane_etcd" { + label_selectors = ["app.kubernetes.io/component=etcd"] + port_name = "metrics" + } + + etcd.scrape "k8s_controlplane_etcd" { + targets = etcd.kubernetes.k8s_controlplane_etcd.output + clustering = true + scrape_interval = "60s" + max_cache_size = 100000 + forward_to = argument.metrics_destinations.value + } + } + etcd_integration "integration" { + metrics_destinations = [ + prometheus.remote_write.prometheus.receiver, + ] + } --- # Source: k8s-monitoring/templates/alloy-config.yaml apiVersion: v1 diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/alloy/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/alloy/alloy-metrics.alloy index fcba4ba6e..8d1aa341a 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/alloy/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/alloy/alloy-metrics.alloy @@ -38,6 +38,297 @@ prometheus.remote_write "prometheus" { } } +declare "alloy_integration" { + argument "metrics_destinations" { + comment = "Must be a list of metric destinations where collected metrics should be forwarded to" + } + + declare "alloy_integration_discovery" { + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + comment = "The field selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=alloy\"])" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: http-metrics)" + optional = true + } + + // Alloy service discovery for all of the pods + discovery.kubernetes "alloy_pods" { + role = "pod" + + selectors { + role = "pod" + field = join(coalesce(argument.field_selectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=alloy"]), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // alloy relabelings (pre-scrape) + discovery.relabel "alloy_pods" { + targets = discovery.kubernetes.alloy_pods.targets + + // keep only the specified metrics port name, and pods that are Running and ready + rule { + source_labels = [ + "__meta_kubernetes_pod_container_port_name", + "__meta_kubernetes_pod_phase", + "__meta_kubernetes_pod_ready", + "__meta_kubernetes_pod_container_init", + ] + separator = "@" + regex = coalesce(argument.port_name.value, "metrics") + "@Running@true@false" + action = "keep" + } + + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.alloy_pods.output + } + } + + declare "alloy_integration_scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all alloy metric (default: integrations/alloy)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + prometheus.scrape "alloy" { + job_name = coalesce(argument.job_label.value, "integrations/alloy") + forward_to = [prometheus.relabel.alloy.receiver] + targets = argument.targets.value + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // alloy metric relabelings (post-scrape) + prometheus.relabel "alloy" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, ".*") + action = "keep" + } + + // remove the component_id label from any metric that starts with log_bytes or log_lines, these are custom metrics that are generated + // as part of the log annotation modules in this repo + rule { + action = "replace" + source_labels = ["__name__"] + regex = "^log_(bytes|lines).+" + replacement = "" + target_label = "component_id" + } + + // set the namespace label to that of the exported_namespace + rule { + action = "replace" + source_labels = ["__name__", "exported_namespace"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "namespace" + } + + // set the pod label to that of the exported_pod + rule { + action = "replace" + source_labels = ["__name__", "exported_pod"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "pod" + } + + // set the container label to that of the exported_container + rule { + action = "replace" + source_labels = ["__name__", "exported_container"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "container" + } + + // set the job label to that of the exported_job + rule { + action = "replace" + source_labels = ["__name__", "exported_job"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "job" + } + + // set the instance label to that of the exported_instance + rule { + action = "replace" + source_labels = ["__name__", "exported_instance"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "instance" + } + + rule { + action = "labeldrop" + regex = "exported_(namespace|pod|container|job|instance)" + } + } + } + + + alloy_integration_discovery "alloy_metrics" { + port_name = "http-metrics" + label_selectors = ["app.kubernetes.io/name=alloy-metrics"] + } + + alloy_integration_scrape "alloy_metrics" { + targets = alloy_integration_discovery.alloy_metrics.output + clustering = true + keep_metrics = "up|alloy_build_info" + scrape_interval = "60s" + max_cache_size = 100000 + forward_to = argument.metrics_destinations.value + } +} +alloy_integration "integration" { + metrics_destinations = [ + prometheus.remote_write.prometheus.receiver, + ] +} + // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/alloy/output.yaml b/charts/k8s-monitoring/docs/examples/features/integrations/alloy/output.yaml index 9df977be7..b17f4d1b6 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/alloy/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/integrations/alloy/output.yaml @@ -63,6 +63,297 @@ data: } } + declare "alloy_integration" { + argument "metrics_destinations" { + comment = "Must be a list of metric destinations where collected metrics should be forwarded to" + } + + declare "alloy_integration_discovery" { + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + comment = "The field selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=alloy\"])" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: http-metrics)" + optional = true + } + + // Alloy service discovery for all of the pods + discovery.kubernetes "alloy_pods" { + role = "pod" + + selectors { + role = "pod" + field = join(coalesce(argument.field_selectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=alloy"]), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // alloy relabelings (pre-scrape) + discovery.relabel "alloy_pods" { + targets = discovery.kubernetes.alloy_pods.targets + + // keep only the specified metrics port name, and pods that are Running and ready + rule { + source_labels = [ + "__meta_kubernetes_pod_container_port_name", + "__meta_kubernetes_pod_phase", + "__meta_kubernetes_pod_ready", + "__meta_kubernetes_pod_container_init", + ] + separator = "@" + regex = coalesce(argument.port_name.value, "metrics") + "@Running@true@false" + action = "keep" + } + + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.alloy_pods.output + } + } + + declare "alloy_integration_scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all alloy metric (default: integrations/alloy)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + prometheus.scrape "alloy" { + job_name = coalesce(argument.job_label.value, "integrations/alloy") + forward_to = [prometheus.relabel.alloy.receiver] + targets = argument.targets.value + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // alloy metric relabelings (post-scrape) + prometheus.relabel "alloy" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, ".*") + action = "keep" + } + + // remove the component_id label from any metric that starts with log_bytes or log_lines, these are custom metrics that are generated + // as part of the log annotation modules in this repo + rule { + action = "replace" + source_labels = ["__name__"] + regex = "^log_(bytes|lines).+" + replacement = "" + target_label = "component_id" + } + + // set the namespace label to that of the exported_namespace + rule { + action = "replace" + source_labels = ["__name__", "exported_namespace"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "namespace" + } + + // set the pod label to that of the exported_pod + rule { + action = "replace" + source_labels = ["__name__", "exported_pod"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "pod" + } + + // set the container label to that of the exported_container + rule { + action = "replace" + source_labels = ["__name__", "exported_container"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "container" + } + + // set the job label to that of the exported_job + rule { + action = "replace" + source_labels = ["__name__", "exported_job"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "job" + } + + // set the instance label to that of the exported_instance + rule { + action = "replace" + source_labels = ["__name__", "exported_instance"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "instance" + } + + rule { + action = "labeldrop" + regex = "exported_(namespace|pod|container|job|instance)" + } + } + } + + + alloy_integration_discovery "alloy_metrics" { + port_name = "http-metrics" + label_selectors = ["app.kubernetes.io/name=alloy-metrics"] + } + + alloy_integration_scrape "alloy_metrics" { + targets = alloy_integration_discovery.alloy_metrics.output + clustering = true + keep_metrics = "up|alloy_build_info" + scrape_interval = "60s" + max_cache_size = 100000 + forward_to = argument.metrics_destinations.value + } + } + alloy_integration "integration" { + metrics_destinations = [ + prometheus.remote_write.prometheus.receiver, + ] + } + // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/cert-manager/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/cert-manager/alloy-metrics.alloy index ac69f5ae7..38f81f861 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/cert-manager/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/cert-manager/alloy-metrics.alloy @@ -38,6 +38,38 @@ prometheus.remote_write "prometheus" { } } +declare "cert_manager_integration" { + argument "metrics_destinations" { + comment = "Must be a list of metric destinations where collected metrics should be forwarded to" + } + + import.git "cert_manager" { + repository = "https://github.com/grafana/alloy-modules.git" + revision = "main" + path = "modules/kubernetes/cert-manager/metrics.alloy" + pull_frequency = "15m" + } + + cert_manager.kubernetes "cert_manager" { + label_selectors = ["app.kubernetes.io/name=cert-manager"] + port_name = "http-metrics" + } + + cert_manager.scrape "cert_manager" { + targets = cert_manager.kubernetes.cert_manager.output + clustering = true + job_label = "integrations/cert-manager" + scrape_interval = "60s" + max_cache_size = 100000 + forward_to = argument.metrics_destinations.value + } +} +cert_manager_integration "integration" { + metrics_destinations = [ + prometheus.remote_write.prometheus.receiver, + ] +} + // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/cert-manager/output.yaml b/charts/k8s-monitoring/docs/examples/features/integrations/cert-manager/output.yaml index e20261a19..7d67538da 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/cert-manager/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/integrations/cert-manager/output.yaml @@ -63,6 +63,38 @@ data: } } + declare "cert_manager_integration" { + argument "metrics_destinations" { + comment = "Must be a list of metric destinations where collected metrics should be forwarded to" + } + + import.git "cert_manager" { + repository = "https://github.com/grafana/alloy-modules.git" + revision = "main" + path = "modules/kubernetes/cert-manager/metrics.alloy" + pull_frequency = "15m" + } + + cert_manager.kubernetes "cert_manager" { + label_selectors = ["app.kubernetes.io/name=cert-manager"] + port_name = "http-metrics" + } + + cert_manager.scrape "cert_manager" { + targets = cert_manager.kubernetes.cert_manager.output + clustering = true + job_label = "integrations/cert-manager" + scrape_interval = "60s" + max_cache_size = 100000 + forward_to = argument.metrics_destinations.value + } + } + cert_manager_integration "integration" { + metrics_destinations = [ + prometheus.remote_write.prometheus.receiver, + ] + } + // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/etcd/alloy-metrics.alloy b/charts/k8s-monitoring/docs/examples/features/integrations/etcd/alloy-metrics.alloy index 9633768c0..d7c192268 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/etcd/alloy-metrics.alloy +++ b/charts/k8s-monitoring/docs/examples/features/integrations/etcd/alloy-metrics.alloy @@ -38,6 +38,37 @@ prometheus.remote_write "prometheus" { } } +declare "etcd_integration" { + argument "metrics_destinations" { + comment = "Must be a list of metric destinations where collected metrics should be forwarded to" + } + + import.git "etcd" { + repository = "https://github.com/grafana/alloy-modules.git" + revision = "main" + path = "modules/databases/kv/etcd/metrics.alloy" + pull_frequency = "15m" + } + + etcd.kubernetes "etcd" { + label_selectors = ["app.kubernetes.io/component=etcd"] + port_name = "metrics" + } + + etcd.scrape "etcd" { + targets = etcd.kubernetes.etcd.output + clustering = true + scrape_interval = "60s" + max_cache_size = 100000 + forward_to = argument.metrics_destinations.value + } +} +etcd_integration "integration" { + metrics_destinations = [ + prometheus.remote_write.prometheus.receiver, + ] +} + // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/docs/examples/features/integrations/etcd/output.yaml b/charts/k8s-monitoring/docs/examples/features/integrations/etcd/output.yaml index 2ecd845cf..5b16c1ff9 100644 --- a/charts/k8s-monitoring/docs/examples/features/integrations/etcd/output.yaml +++ b/charts/k8s-monitoring/docs/examples/features/integrations/etcd/output.yaml @@ -63,6 +63,37 @@ data: } } + declare "etcd_integration" { + argument "metrics_destinations" { + comment = "Must be a list of metric destinations where collected metrics should be forwarded to" + } + + import.git "etcd" { + repository = "https://github.com/grafana/alloy-modules.git" + revision = "main" + path = "modules/databases/kv/etcd/metrics.alloy" + pull_frequency = "15m" + } + + etcd.kubernetes "etcd" { + label_selectors = ["app.kubernetes.io/component=etcd"] + port_name = "metrics" + } + + etcd.scrape "etcd" { + targets = etcd.kubernetes.etcd.output + clustering = true + scrape_interval = "60s" + max_cache_size = 100000 + forward_to = argument.metrics_destinations.value + } + } + etcd_integration "integration" { + metrics_destinations = [ + prometheus.remote_write.prometheus.receiver, + ] + } + // Self Reporting prometheus.exporter.unix "kubernetes_monitoring_telemetry" { set_collectors = ["textfile"] diff --git a/charts/k8s-monitoring/templates/features/_feature_integrations.tpl b/charts/k8s-monitoring/templates/features/_feature_integrations.tpl index eb30d40e1..ab8a979fa 100644 --- a/charts/k8s-monitoring/templates/features/_feature_integrations.tpl +++ b/charts/k8s-monitoring/templates/features/_feature_integrations.tpl @@ -7,10 +7,10 @@ {{- define "features.integrations.collectors" }} {{- $metricIntegrations := include "feature.integrations.configured.metrics" (dict "Values" .Values.integrations) | fromYamlArray }} {{- $logIntegrations := include "feature.integrations.configured.logs" (dict "Values" .Values.integrations) | fromYamlArray }} -{{- if $logIntegrations }} +{{- if (not (empty $metricIntegrations)) }} - {{ .Values.integrations.collectors.metrics }} {{- end }} -{{- if $logIntegrations }} +{{- if (not (empty $logIntegrations)) }} - {{ .Values.integrations.collectors.logs }} {{- end }} {{- end }} @@ -25,18 +25,40 @@ {{ include "destinations.alloy.targets" (dict "destinations" $.Values.destinations "names" $destinations "type" "metrics" "ecosystem" "prometheus") | indent 4 | trim }} ] } -{{- end -}} -{{- end -}} +{{- end }} +{{- end }} + +{{- define "features.integrations.logs.include" }} +{{- $destinations := include "features.integrations.destinations.logs" . | fromYamlArray }} +{{- $integrations := include "feature.integrations.configured.logs" (dict "Values" .Values.integrations) | fromYamlArray }} +{{- range $integrationType := $integrations }} + {{- include (printf "integrations.%s.module.metrics" $integrationType) (dict "Values" $.Values.integrations "Files" $.Subcharts.integrations.Files) | indent 0 }} +{{ include "helper.alloy_name" $integrationType }}_integration "integration" { + logs_destinations = [ + {{ include "destinations.alloy.targets" (dict "destinations" $.Values.destinations "names" $destinations "type" "logs" "ecosystem" "loki") | indent 4 | trim }} + ] +} +{{- end }} +{{- end }} + +{{- define "features.integrations.include" }} +{{- if eq .collectorName .Values.integrations.collectors.metrics }} + {{ include "features.integrations.metrics.include" . | indent 0 }} +{{- end }} +{{- if eq .collectorName .Values.integrations.collectors.logs }} + {{ include "features.integrations.logs.include" . | indent 0 }} +{{- end }} +{{- end }} {{- define "features.integrations.destinations" }} {{- $metricDestinations := include "features.integrations.destinations.metrics" . | fromYamlArray }} {{- $logDestinations := include "features.integrations.destinations.logs" . | fromYamlArray }} {{- concat $metricDestinations $logDestinations | uniq | toYaml }} -{{- end -}} +{{- end }} {{- define "features.integrations.destinations.metrics" }} {{- include "destinations.get" (dict "destinations" $.Values.destinations "type" "metrics" "ecosystem" "prometheus" "filter" $.Values.integrations.destinations) -}} -{{- end -}} +{{- end }} {{- define "features.integrations.destinations.logs" }} [] @@ -51,14 +73,13 @@ {{- $metricDestinations := include "features.integrations.destinations.metrics" . | fromYamlArray }} {{- include "destinations.validate_destination_list" (dict "destinations" $metricDestinations "type" "metrics" "ecosystem" "prometheus" "feature" $featureName) }} {{- include "collectors.require_collector" (dict "Values" $.Values "name" "alloy-metrics" "feature" $featureName) }} -{{- end -}} +{{- end }} {{- $logIntegrations := include "feature.integrations.configured.logs" (dict "Values" .Values.integrations) | fromYamlArray }} {{- if $logIntegrations }} {{- $logDestinations := include "features.integrations.destinations.logs" . | fromYamlArray }} {{- include "destinations.validate_destination_list" (dict "destinations" $logDestinations "type" "log" "ecosystem" "loki" "feature" $featureName) }} {{- include "collectors.require_collector" (dict "Values" $.Values "name" "alloy-logs" "feature" $featureName) }} -{{- end -}} - -{{- end -}} -{{- end -}} +{{- end }} +{{- end }} +{{- end }} diff --git a/charts/k8s-monitoring/tests/integration/integration-cert-manager/prometheus.yaml b/charts/k8s-monitoring/tests/integration/integration-cert-manager/prometheus.yaml new file mode 100644 index 000000000..23e225d1b --- /dev/null +++ b/charts/k8s-monitoring/tests/integration/integration-cert-manager/prometheus.yaml @@ -0,0 +1,30 @@ +--- +server: + extraFlags: + - enable-feature=remote-write-receiver + + persistentVolume: + enabled: false + + service: + servicePort: 9090 + +serverFiles: + prometheus.yml: + scrape_configs: [] + +configmapReload: + prometheus: + enabled: false + +alertmanager: + enabled: false + +kube-state-metrics: + enabled: false + +prometheus-node-exporter: + enabled: false + +prometheus-pushgateway: + enabled: false diff --git a/charts/k8s-monitoring/tests/integration/integration-cert-manager/test-manifest.yaml b/charts/k8s-monitoring/tests/integration/integration-cert-manager/test-manifest.yaml new file mode 100644 index 000000000..807dd3e6c --- /dev/null +++ b/charts/k8s-monitoring/tests/integration/integration-cert-manager/test-manifest.yaml @@ -0,0 +1,31 @@ +--- +prerequisites: + - type: helm + name: prometheus + repo: https://prometheus-community.github.io/helm-charts + chart: prometheus + namespace: prometheus + valuesFile: charts/k8s-monitoring/tests/integration/integration-cert-manager/prometheus.yaml + + - type: helm + name: cert-manager + repo: https://charts.jetstack.io + chart: cert-manager + namespace: cert-manager + values: + installCRDs: true + + - type: helm + name: grafana + repo: https://grafana.github.io/helm-charts + chart: grafana + namespace: grafana + values: + datasources: + datasources.yaml: + apiVersion: 1 + datasources: + - name: Prometheus + type: prometheus + url: http://prometheus-server.prometheus.svc:9090 + isDefault: true diff --git a/charts/k8s-monitoring/tests/integration/integration-cert-manager/test-values.yaml b/charts/k8s-monitoring/tests/integration/integration-cert-manager/test-values.yaml new file mode 100644 index 000000000..3d1c1c472 --- /dev/null +++ b/charts/k8s-monitoring/tests/integration/integration-cert-manager/test-values.yaml @@ -0,0 +1,16 @@ +--- +tests: + - env: + PROMETHEUS_URL: http://prometheus-server.prometheus.svc:9090/api/v1/query + + queries: + # Cert Manager metrics + - query: certmanager_clock_time_seconds{cluster="cert-manager-integration-test"} + type: promql + + # DPM check + - query: avg(count_over_time(scrape_samples_scraped{cluster="cert-manager-integration-test"}[1m])) + type: promql + expect: + value: 1 + operator: == diff --git a/charts/k8s-monitoring/tests/integration/integration-cert-manager/values.yaml b/charts/k8s-monitoring/tests/integration/integration-cert-manager/values.yaml new file mode 100644 index 000000000..a5e669fb0 --- /dev/null +++ b/charts/k8s-monitoring/tests/integration/integration-cert-manager/values.yaml @@ -0,0 +1,18 @@ +--- +cluster: + name: cert-manager-integration-test + +destinations: + - name: localPrometheus + type: prometheus + url: http://prometheus-server.prometheus.svc:9090/api/v1/write + +integrations: + cert-manager: + instances: + - name: cert-manager + metricsTuning: + includeMetrics: [scrape_samples_scraped, certmanager_.*] + +alloy-metrics: + enabled: true