From dc180dc22b4ed035f59cfd48dd6d1e0335829ea6 Mon Sep 17 00:00:00 2001 From: Stephen Lang Date: Fri, 3 May 2024 17:55:43 +0100 Subject: [PATCH] fix(dashboards): Port windows dashboards to new grafonnet --- dashboards/windows.libsonnet | 1649 ++++++++++++++++++++++------------ 1 file changed, 1067 insertions(+), 582 deletions(-) diff --git a/dashboards/windows.libsonnet b/dashboards/windows.libsonnet index 7630e8143..e9bcacbb9 100644 --- a/dashboards/windows.libsonnet +++ b/dashboards/windows.libsonnet @@ -1,616 +1,1101 @@ -local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; -local dashboard = grafana.dashboard; -local prometheus = grafana.prometheus; -local template = grafana.template; -local graphPanel = grafana.graphPanel; -local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet'; +local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; + +local prometheus = g.query.prometheus; +local stat = g.panel.stat; +local table = g.panel.table; +local timeSeries = g.panel.timeSeries; +local var = g.dashboard.variable; { - grafanaDashboards+:: { - 'k8s-resources-windows-cluster.json': - local tableStyles = { - namespace: { - alias: 'Namespace', - link: '%(prefix)s/d/%(uid)s/k8s-resources-windows-namespace?var-datasource=$datasource&var-namespace=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-windows-namespace.json') }, - }, - }; - - dashboard.new( - '%(dashboardNamePrefix)sCompute Resources / Cluster(Windows)' % $._config.grafanaK8s, - uid=($._config.grafanaDashboardIDs['k8s-resources-windows-cluster.json']), - tags=($._config.grafanaK8s.dashboardTags), - ).addTemplate( - { - current: { - text: 'default', - value: 'default', - }, - hide: 0, - label: null, - name: 'datasource', - options: [], - query: 'prometheus', - refresh: 1, - regex: $._config.datasourceFilterRegex, - type: 'datasource', + local statPanel(title, unit, query) = + stat.new(title) + + stat.options.withColorMode('none') + + stat.standardOptions.withUnit(unit) + + stat.queryOptions.withInterval($._config.grafanaK8s.minimumTimeInterval) + + stat.queryOptions.withTargets([ + prometheus.new('${datasource}', query) + + prometheus.withInstant(true), + ]), + + local tsPanel = + timeSeries { + new(title): + timeSeries.new(title) + + timeSeries.options.legend.withShowLegend() + + timeSeries.options.legend.withAsTable() + + timeSeries.options.legend.withDisplayMode('table') + + timeSeries.options.legend.withPlacement('right') + + timeSeries.options.legend.withCalcs(['lastNotNull']) + + timeSeries.options.tooltip.withMode('single') + + timeSeries.fieldConfig.defaults.custom.withShowPoints('never') + + timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) + + timeSeries.fieldConfig.defaults.custom.withSpanNulls(true) + + timeSeries.queryOptions.withInterval($._config.grafanaK8s.minimumTimeInterval), + }, + + local variables = { + datasource: + var.datasource.new('datasource', 'prometheus') + + var.datasource.withRegex($._config.datasourceFilterRegex) + + var.datasource.generalOptions.showOnDashboard.withLabelAndValue() + + var.datasource.generalOptions.withLabel('Data source') + + { + current: { + selected: true, + text: $._config.datasourceName, + value: $._config.datasourceName, }, - ).addTemplate( - template.new( - 'cluster', - '$datasource', - 'label_values(up{%(windowsExporterSelector)s}, %(clusterLabel)s)' % $._config, - label='cluster', - refresh='time', - hide=if $._config.showMultiCluster then '' else 'variable', - sort=1, - ) - ) - .addRow( - (g.row('Headlines') + - { - height: '100px', - showTitle: false, - }) - .addPanel( - g.panel('CPU Utilisation') + - g.statPanel('1 - avg(rate(windows_cpu_time_total{%(clusterLabel)s="$cluster", %(windowsExporterSelector)s, mode="idle"}[1m]))' % $._config) - ) - .addPanel( - g.panel('CPU Requests Commitment') + - g.statPanel('sum(kube_pod_windows_container_resource_cpu_cores_request{%(clusterLabel)s="$cluster"}) / sum(node:windows_node_num_cpu:sum{%(clusterLabel)s="$cluster"})' % $._config) - ) - .addPanel( - g.panel('CPU Limits Commitment') + - g.statPanel('sum(kube_pod_windows_container_resource_cpu_cores_limit{%(clusterLabel)s="$cluster"}) / sum(node:windows_node_num_cpu:sum{%(clusterLabel)s="$cluster"})' % $._config) - ) - .addPanel( - g.panel('Memory Utilisation') + - g.statPanel('1 - sum(:windows_node_memory_MemFreeCached_bytes:sum{%(clusterLabel)s="$cluster"}) / sum(:windows_node_memory_MemTotal_bytes:sum{%(clusterLabel)s="$cluster"})' % $._config) - ) - .addPanel( - g.panel('Memory Requests Commitment') + - g.statPanel('sum(kube_pod_windows_container_resource_memory_request{%(clusterLabel)s="$cluster"}) / sum(:windows_node_memory_MemTotal_bytes:sum{%(clusterLabel)s="$cluster"})' % $._config) - ) - .addPanel( - g.panel('Memory Limits Commitment') + - g.statPanel('sum(kube_pod_windows_container_resource_memory_limit{%(clusterLabel)s="$cluster"}) / sum(:windows_node_memory_MemTotal_bytes:sum{%(clusterLabel)s="$cluster"})' % $._config) - ) + }, + + cluster: + var.query.new('cluster') + + var.query.withDatasourceFromVariable(self.datasource) + + var.query.queryTypes.withLabelValues( + $._config.clusterLabel, + 'up{%(windowsExporterSelector)s}' % $._config, ) - .addRow( - g.row('CPU') - .addPanel( - g.panel('CPU Usage') + - g.queryPanel('sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, '{{namespace}}') + - g.stack - ) + + var.query.generalOptions.withLabel('cluster') + + var.query.refresh.onTime() + + ( + if $._config.showMultiCluster + then var.query.generalOptions.showOnDashboard.withLabelAndValue() + else var.query.generalOptions.showOnDashboard.withNothing() ) - .addRow( - g.row('CPU Quota') - .addPanel( - g.panel('CPU Quota') + - g.tablePanel([ - 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(kube_pod_windows_container_resource_cpu_cores_request{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_request{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(kube_pod_windows_container_resource_cpu_cores_limit{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_limit{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'CPU Usage' }, - 'Value #B': { alias: 'CPU Requests' }, - 'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'CPU Limits' }, - 'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' }, - }) - ) + + var.query.withSort(type='alphabetical'), + + instance: + var.query.new('instance') + + var.query.withDatasourceFromVariable(self.datasource) + + var.query.queryTypes.withLabelValues( + 'instance', + 'windows_system_system_up_time{%(clusterLabel)s="$cluster"}' % $._config ) - .addRow( - g.row('Memory') - .addPanel( - g.panel('Memory Usage (Private Working Set)') + - // Not using container_memory_usage_bytes here because that includes page cache - g.queryPanel('sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, '{{namespace}}') + - g.stack + - { yaxes: g.yaxes('decbytes') }, - ) + + var.query.generalOptions.withLabel('instance') + + var.query.refresh.onTime() + + var.query.generalOptions.showOnDashboard.withLabelAndValue() + + var.query.selectionOptions.withMulti(true), + + namespace: + var.query.new('namespace') + + var.query.withDatasourceFromVariable(self.datasource) + + var.query.queryTypes.withLabelValues( + 'namespace', + 'windows_pod_container_available{%(clusterLabel)s="$cluster"}' % $._config, ) - .addRow( - g.row('Memory Requests') - .addPanel( - g.panel('Requests by Namespace') + - g.tablePanel([ - // Not using container_memory_usage_bytes here because that includes page cache - 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(kube_pod_windows_container_resource_memory_request{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_request{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(kube_pod_windows_container_resource_memory_limit{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_limit{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'Memory Usage', unit: 'decbytes' }, - 'Value #B': { alias: 'Memory Requests', unit: 'decbytes' }, - 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'Memory Limits', unit: 'decbytes' }, - 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, - }) - ) - ), + + var.query.generalOptions.withLabel('namespace') + + var.query.refresh.onTime() + + var.query.generalOptions.showOnDashboard.withLabelAndValue() + + var.query.withSort(type='alphabetical'), - 'k8s-resources-windows-namespace.json': - local tableStyles = { - pod: { - alias: 'Pod', - link: '%(prefix)s/d/%(uid)s/k8s-resources-windows-pod?var-datasource=$datasource&var-namespace=$namespace&var-pod=$__cell' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-resources-windows-pod.json') }, - }, - }; - - dashboard.new( - '%(dashboardNamePrefix)sCompute Resources / Namespace(Windows)' % $._config.grafanaK8s, - uid=($._config.grafanaDashboardIDs['k8s-resources-windows-namespace.json']), - tags=($._config.grafanaK8s.dashboardTags), - ).addTemplate( - { - current: { - selected: true, - text: $._config.datasourceName, - value: $._config.datasourceName, - }, - hide: 0, - label: null, - name: 'datasource', - options: [], - query: 'prometheus', - refresh: 1, - regex: $._config.datasourceFilterRegex, - type: 'datasource', - }, + pod: + var.query.new('pod') + + var.query.withDatasourceFromVariable(self.datasource) + + var.query.queryTypes.withLabelValues( + 'pod', + 'windows_pod_container_available{%(clusterLabel)s="$cluster",namespace="$namespace"}' % $._config, ) - .addTemplate( - template.new( - 'cluster', - '$datasource', - 'label_values(up{%(windowsExporterSelector)s}, %(clusterLabel)s)' % $._config, - label='cluster', - refresh='time', - hide=if $._config.showMultiCluster then '' else 'variable', - sort=1, + + var.query.generalOptions.withLabel('pod') + + var.query.refresh.onTime() + + var.query.generalOptions.showOnDashboard.withLabelAndValue() + + var.query.withSort(type='alphabetical'), + }, + + local links = { + namespace: { + title: 'Drill down to pods', + url: '%(prefix)s/d/%(uid)s/k8s-resources-windows-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}' % { + uid: $._config.grafanaDashboardIDs['k8s-resources-windows-namespace.json'], + prefix: $._config.grafanaK8s.linkPrefix, + }, + }, + + pod: { + title: 'Drill down to pods', + url: '%(prefix)s/d/%(uid)s/k8s-resources-windows-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}' % { + uid: $._config.grafanaDashboardIDs['k8s-resources-windows-pod.json'], + prefix: $._config.grafanaK8s.linkPrefix, + }, + }, + }, + + grafanaDashboards+:: { + 'k8s-resources-windows-cluster.json': + local panels = [ + statPanel( + 'CPU Utilisation', + 'none', + '1 - avg(rate(windows_cpu_time_total{%(clusterLabel)s="$cluster", %(windowsExporterSelector)s, mode="idle"}[1m]))' % $._config ) - ) - .addTemplate( - template.new( - 'namespace', - '$datasource', - 'label_values(windows_pod_container_available{%(clusterLabel)s="$cluster"}, namespace)' % $._config, - label='Namespace', - refresh='time', - sort=1, + + stat.gridPos.withW(4) + + stat.gridPos.withH(3), + + statPanel( + 'CPU Requests Commitment', + 'percentunit', + 'sum(kube_pod_windows_container_resource_cpu_cores_request{%(clusterLabel)s="$cluster"}) / sum(node:windows_node_num_cpu:sum{%(clusterLabel)s="$cluster"})' % $._config ) - ) - .addRow( - g.row('CPU Usage') - .addPanel( - g.panel('CPU Usage') + - g.queryPanel('sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, '{{pod}}') + - g.stack, + + stat.gridPos.withW(4) + + stat.gridPos.withH(3), + + statPanel( + 'CPU Limits Commitment', + 'percentunit', + 'sum(kube_pod_windows_container_resource_cpu_cores_limit{%(clusterLabel)s="$cluster"}) / sum(node:windows_node_num_cpu:sum{%(clusterLabel)s="$cluster"})' % $._config ) - ) - .addRow( - g.row('CPU Quota') - .addPanel( - g.panel('CPU Quota') + - g.tablePanel([ - 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(kube_pod_windows_container_resource_cpu_cores_request{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_request{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(kube_pod_windows_container_resource_cpu_cores_limit{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_limit{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'CPU Usage' }, - 'Value #B': { alias: 'CPU Requests' }, - 'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'CPU Limits' }, - 'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' }, - }) + + stat.gridPos.withW(4) + + stat.gridPos.withH(3), + + statPanel( + 'Memory Utilisation', + 'percentunit', + '1 - sum(:windows_node_memory_MemFreeCached_bytes:sum{%(clusterLabel)s="$cluster"}) / sum(:windows_node_memory_MemTotal_bytes:sum{%(clusterLabel)s="$cluster"})' % $._config ) - ) - .addRow( - g.row('Memory Usage') - .addPanel( - g.panel('Memory Usage') + - g.queryPanel('sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, '{{pod}}') + - g.stack + - { yaxes: g.yaxes('decbytes') }, + + stat.gridPos.withW(4) + + stat.gridPos.withH(3), + + statPanel( + 'Memory Requests Commitment', + 'percentunit', + 'sum(kube_pod_windows_container_resource_memory_request{%(clusterLabel)s="$cluster"}) / sum(:windows_node_memory_MemTotal_bytes:sum{%(clusterLabel)s="$cluster"})' % $._config ) - ) - .addRow( - g.row('Memory Quota') - .addPanel( - g.panel('Memory Quota') + - g.tablePanel([ - 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(kube_pod_windows_container_resource_memory_request{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_windows_container_resource_memory_request{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(kube_pod_windows_container_resource_memory_limit{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_windows_container_resource_memory_limit{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'Memory Usage', unit: 'decbytes' }, - 'Value #B': { alias: 'Memory Requests', unit: 'decbytes' }, - 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'Memory Limits', unit: 'decbytes' }, - 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, - }) + + stat.gridPos.withW(4) + + stat.gridPos.withH(3), + + statPanel( + 'Memory Limits Commitment', + 'percentunit', + 'sum(kube_pod_windows_container_resource_memory_limit{%(clusterLabel)s="$cluster"}) / sum(:windows_node_memory_MemTotal_bytes:sum{%(clusterLabel)s="$cluster"})' % $._config ) - ), + + stat.gridPos.withW(4) + + stat.gridPos.withH(3), + + tsPanel.new('CPU Usage') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config + ) + + prometheus.withLegendFormat('__auto'), + ]), + + table.new('CPU Quota') + + table.queryOptions.withTargets([ + prometheus.new('${datasource}', 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(kube_pod_windows_container_resource_cpu_cores_request{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_request{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(kube_pod_windows_container_resource_cpu_cores_limit{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_windows_container_resource_cpu_cores_limit{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + ]) + + + table.queryOptions.withTransformations([ + table.queryOptions.transformation.withId('joinByField') + + table.queryOptions.transformation.withOptions({ + byField: 'namespace', + mode: 'outer', + }), + + table.queryOptions.transformation.withId('organize') + + table.queryOptions.transformation.withOptions({ + excludeByName: { + Time: true, + 'Time 1': true, + 'Time 2': true, + 'Time 3': true, + 'Time 4': true, + 'Time 5': true, + }, + indexByName: { + 'Time 1': 0, + 'Time 2': 1, + 'Time 3': 2, + 'Time 4': 3, + 'Time 5': 4, + namespace: 5, + 'Value #A': 6, + 'Value #B': 7, + 'Value #C': 8, + 'Value #D': 9, + 'Value #E': 10, + }, + renameByName: { + namespace: 'Namespace', + 'Value #A': 'CPU Usage', + 'Value #B': 'CPU Requests', + 'Value #C': 'CPU Requests %', + 'Value #D': 'CPU Limits', + 'Value #E': 'CPU Limits %', + }, + }), + ]) + + + table.standardOptions.withOverrides([ + { + matcher: { + id: 'byRegexp', + options: '/%/', + }, + properties: [ + { + id: 'unit', + value: 'percentunit', + }, + ], + }, + { + matcher: { + id: 'byName', + options: 'Namespace', + }, + properties: [ + { + id: 'links', + value: [links.namespace], + }, + ], + }, + ]), + + tsPanel.new('Memory Usage (Private Working Set)') + + tsPanel.standardOptions.withUnit('decbytes') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config + ) + + prometheus.withLegendFormat('__auto'), + ]), + + table.new('Memory Requests by Namespace') + + table.standardOptions.withUnit('bytes') + + table.queryOptions.withTargets([ + prometheus.new('${datasource}', 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(kube_pod_windows_container_resource_memory_request{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_request{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(kube_pod_windows_container_resource_memory_limit{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster"}) by (namespace) / sum(kube_pod_windows_container_resource_memory_limit{%(clusterLabel)s="$cluster"}) by (namespace)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + ]) + + + table.queryOptions.withTransformations([ + table.queryOptions.transformation.withId('joinByField') + + table.queryOptions.transformation.withOptions({ + byField: 'namespace', + mode: 'outer', + }), + + table.queryOptions.transformation.withId('organize') + + table.queryOptions.transformation.withOptions({ + excludeByName: { + Time: true, + 'Time 1': true, + 'Time 2': true, + 'Time 3': true, + 'Time 4': true, + 'Time 5': true, + }, + indexByName: { + 'Time 1': 0, + 'Time 2': 1, + 'Time 3': 2, + 'Time 4': 3, + 'Time 5': 4, + namespace: 5, + 'Value #A': 6, + 'Value #B': 7, + 'Value #C': 8, + 'Value #D': 9, + 'Value #E': 10, + }, + renameByName: { + namespace: 'Namespace', + 'Value #A': 'Memory Usage', + 'Value #B': 'Memory Requests', + 'Value #C': 'Memory Requests %', + 'Value #D': 'Memory Limits', + 'Value #E': 'Memory Limits %', + }, + }), + ]) + + + table.standardOptions.withOverrides([ + { + matcher: { + id: 'byRegexp', + options: '/%/', + }, + properties: [ + { + id: 'unit', + value: 'percentunit', + }, + ], + }, + { + matcher: { + id: 'byName', + options: 'Memory Usage', + }, + properties: [ + { + id: 'unit', + value: 'decbytes', + }, + ], + }, + { + matcher: { + id: 'byName', + options: 'Memory Requests', + }, + properties: [ + { + id: 'unit', + value: 'decbytes', + }, + ], + }, + { + matcher: { + id: 'byName', + options: 'Memory Limits', + }, + properties: [ + { + id: 'unit', + value: 'decbytes', + }, + ], + }, + { + matcher: { + id: 'byName', + options: 'Namespace', + }, + properties: [ + { + id: 'links', + value: [links.namespace], + }, + ], + }, + ]), + ]; + + g.dashboard.new('%(dashboardNamePrefix)sCompute Resources / Cluster(Windows)' % $._config.grafanaK8s) + + g.dashboard.withUid($._config.grafanaDashboardIDs['k8s-resources-windows-cluster.json']) + + g.dashboard.withTags($._config.grafanaK8s.dashboardTags) + + g.dashboard.withEditable(false) + + g.dashboard.time.withFrom('now-1h') + + g.dashboard.time.withTo('now') + + g.dashboard.withRefresh($._config.grafanaK8s.refresh) + + g.dashboard.withVariables([variables.datasource, variables.cluster]) + + g.dashboard.withPanels(g.util.grid.wrapPanels(panels, panelWidth=24, panelHeight=7)), + + 'k8s-resources-windows-namespace.json': + local panels = [ + tsPanel.new('CPU Usage') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config + ) + + prometheus.withLegendFormat('__auto'), + ]), + + table.new('CPU Quota') + + table.queryOptions.withTargets([ + prometheus.new('${datasource}', 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(kube_pod_windows_container_resource_cpu_cores_request{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_request{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(kube_pod_windows_container_resource_cpu_cores_limit{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_windows_container_resource_cpu_cores_limit{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + ]) + + + table.queryOptions.withTransformations([ + table.queryOptions.transformation.withId('joinByField') + + table.queryOptions.transformation.withOptions({ + byField: 'pod', + mode: 'outer', + }), + + table.queryOptions.transformation.withId('organize') + + table.queryOptions.transformation.withOptions({ + excludeByName: { + Time: true, + 'Time 1': true, + 'Time 2': true, + 'Time 3': true, + 'Time 4': true, + 'Time 5': true, + }, + indexByName: { + 'Time 1': 0, + 'Time 2': 1, + 'Time 3': 2, + 'Time 4': 3, + 'Time 5': 4, + pod: 5, + 'Value #A': 6, + 'Value #B': 7, + 'Value #C': 8, + 'Value #D': 9, + 'Value #E': 10, + }, + renameByName: { + pod: 'Pod', + 'Value #A': 'CPU Usage', + 'Value #B': 'CPU Requests', + 'Value #C': 'CPU Requests %', + 'Value #D': 'CPU Limits', + 'Value #E': 'CPU Limits %', + }, + }), + ]) + + + table.standardOptions.withOverrides([ + { + matcher: { + id: 'byRegexp', + options: '/%/', + }, + properties: [ + { + id: 'unit', + value: 'percentunit', + }, + ], + }, + { + matcher: { + id: 'byName', + options: 'Pod', + }, + properties: [ + { + id: 'links', + value: [links.pod], + }, + ], + }, + ]), + + tsPanel.new('Memory Usage (Private Working Set)') + + tsPanel.standardOptions.withUnit('decbytes') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config + ) + + prometheus.withLegendFormat('__auto'), + ]), + + table.new('Memory Quota') + + table.standardOptions.withUnit('bytes') + + table.queryOptions.withTargets([ + prometheus.new('${datasource}', 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(kube_pod_windows_container_resource_memory_request{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_windows_container_resource_memory_request{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(kube_pod_windows_container_resource_memory_limit{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod) / sum(kube_pod_windows_container_resource_memory_limit{%(clusterLabel)s="$cluster", namespace="$namespace"}) by (pod)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + ]) + + + table.queryOptions.withTransformations([ + table.queryOptions.transformation.withId('joinByField') + + table.queryOptions.transformation.withOptions({ + byField: 'pod', + mode: 'outer', + }), + + table.queryOptions.transformation.withId('organize') + + table.queryOptions.transformation.withOptions({ + excludeByName: { + Time: true, + 'Time 1': true, + 'Time 2': true, + 'Time 3': true, + 'Time 4': true, + 'Time 5': true, + }, + indexByName: { + 'Time 1': 0, + 'Time 2': 1, + 'Time 3': 2, + 'Time 4': 3, + 'Time 5': 4, + pod: 5, + 'Value #A': 6, + 'Value #B': 7, + 'Value #C': 8, + 'Value #D': 9, + 'Value #E': 10, + }, + renameByName: { + pod: 'Pod', + 'Value #A': 'Memory Usage', + 'Value #B': 'Memory Requests', + 'Value #C': 'Memory Requests %', + 'Value #D': 'Memory Limits', + 'Value #E': 'Memory Limits %', + }, + }), + ]) + + + table.standardOptions.withOverrides([ + { + matcher: { + id: 'byRegexp', + options: '/%/', + }, + properties: [ + { + id: 'unit', + value: 'percentunit', + }, + ], + }, + { + matcher: { + id: 'byName', + options: 'Pod', + }, + properties: [ + { + id: 'links', + value: [links.pod], + }, + ], + }, + ]), + ]; + + g.dashboard.new('%(dashboardNamePrefix)sCompute Resources / Namespace(Windows)' % $._config.grafanaK8s) + + g.dashboard.withUid($._config.grafanaDashboardIDs['k8s-resources-windows-namespace.json']) + + g.dashboard.withTags($._config.grafanaK8s.dashboardTags) + + g.dashboard.withEditable(false) + + g.dashboard.time.withFrom('now-1h') + + g.dashboard.time.withTo('now') + + g.dashboard.withRefresh($._config.grafanaK8s.refresh) + + g.dashboard.withVariables([variables.datasource, variables.cluster, variables.namespace]) + + g.dashboard.withPanels(g.util.grid.wrapPanels(panels, panelWidth=24, panelHeight=7)), 'k8s-resources-windows-pod.json': - local tableStyles = { - container: { - alias: 'Container', - }, - }; - - dashboard.new( - '%(dashboardNamePrefix)sCompute Resources / Pod(Windows)' % $._config.grafanaK8s, - uid=($._config.grafanaDashboardIDs['k8s-resources-windows-pod.json']), - tags=($._config.grafanaK8s.dashboardTags), - ).addTemplate( - { - current: { - text: 'default', - value: 'default', + local panels = [ + tsPanel.new('CPU Usage') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config + ) + + prometheus.withLegendFormat('__auto'), + ]), + + table.new('CPU Quota') + + table.queryOptions.withTargets([ + prometheus.new('${datasource}', 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(kube_pod_windows_container_resource_cpu_cores_request{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_request{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(kube_pod_windows_container_resource_cpu_cores_limit{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_limit{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + ]) + + + table.queryOptions.withTransformations([ + table.queryOptions.transformation.withId('joinByField') + + table.queryOptions.transformation.withOptions({ + byField: 'container', + mode: 'outer', + }), + + table.queryOptions.transformation.withId('organize') + + table.queryOptions.transformation.withOptions({ + excludeByName: { + Time: true, + 'Time 1': true, + 'Time 2': true, + 'Time 3': true, + 'Time 4': true, + 'Time 5': true, + }, + indexByName: { + 'Time 1': 0, + 'Time 2': 1, + 'Time 3': 2, + 'Time 4': 3, + 'Time 5': 4, + container: 5, + 'Value #A': 6, + 'Value #B': 7, + 'Value #C': 8, + 'Value #D': 9, + 'Value #E': 10, + }, + renameByName: { + container: 'Container', + 'Value #A': 'CPU Usage', + 'Value #B': 'CPU Requests', + 'Value #C': 'CPU Requests %', + 'Value #D': 'CPU Limits', + 'Value #E': 'CPU Limits %', + }, + }), + ]) + + + table.standardOptions.withOverrides([ + { + matcher: { + id: 'byRegexp', + options: '/%/', + }, + properties: [ + { + id: 'unit', + value: 'percentunit', + }, + ], }, - hide: 0, - label: null, - name: 'datasource', - options: [], - query: 'prometheus', - refresh: 1, - regex: $._config.datasourceFilterRegex, - type: 'datasource', - }, - ) - .addTemplate( - template.new( - 'cluster', - '$datasource', - 'label_values(up{%(windowsExporterSelector)s}, %(clusterLabel)s)' % $._config, - label='cluster', - refresh='time', - hide=if $._config.showMultiCluster then '' else 'variable', - sort=1, - ) - ) - .addTemplate( - template.new( - 'namespace', - '$datasource', - 'label_values(windows_pod_container_available{%(clusterLabel)s="$cluster"}, namespace)' % $._config, - label='Namespace', - refresh='time', - sort=1, - ) - ) - .addTemplate( - template.new( - 'pod', - '$datasource', - 'label_values(windows_pod_container_available{%(clusterLabel)s="$cluster",namespace="$namespace"}, pod)' % $._config, - label='Pod', - refresh='time', - sort=1, - ) - ) - .addRow( - g.row('CPU Usage') - .addPanel( - g.panel('CPU Usage') + - g.queryPanel('sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, '{{container}}') + - g.stack, - ) - ) - .addRow( - g.row('CPU Quota') - .addPanel( - g.panel('CPU Quota') + - g.tablePanel([ - 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(kube_pod_windows_container_resource_cpu_cores_request{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_request{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(kube_pod_windows_container_resource_cpu_cores_limit{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(namespace_pod_container:windows_container_cpu_usage_seconds_total:sum_rate{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_windows_container_resource_cpu_cores_limit{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'CPU Usage' }, - 'Value #B': { alias: 'CPU Requests' }, - 'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'CPU Limits' }, - 'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' }, - }) - ) - ) - .addRow( - g.row('Memory Usage') - .addPanel( - g.panel('Memory Usage') + - g.queryPanel('sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, '{{container}}') + - g.stack, - ) - ) - .addRow( - g.row('Memory Quota') - .addPanel( - g.panel('Memory Quota') + - g.tablePanel([ - 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(kube_pod_windows_container_resource_memory_request{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_windows_container_resource_memory_request{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(kube_pod_windows_container_resource_memory_limit{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_windows_container_resource_memory_limit{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config, - ], tableStyles { - 'Value #A': { alias: 'Memory Usage', unit: 'decbytes' }, - 'Value #B': { alias: 'Memory Requests', unit: 'decbytes' }, - 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, - 'Value #D': { alias: 'Memory Limits', unit: 'decbytes' }, - 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, - }) - ) - ) - .addRow( - g.row('Network I/O') - .addPanel( - graphPanel.new( - 'Network I/O', - datasource='$datasource', - format='bytes', - min=0, - legend_rightSide=true, - legend_alignAsTable=true, - legend_current=true, - legend_avg=true, + { + matcher: { + id: 'byName', + options: 'Namespace', + }, + properties: [ + { + id: 'links', + value: [links.namespace], + }, + ], + }, + ]), + + tsPanel.new('Memory Usage') + + tsPanel.standardOptions.withUnit('decbytes') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config ) - .addTarget(prometheus.target( - 'sort_desc(sum by (container) (rate(windows_container_network_received_bytes_total{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[1m])))' % $._config, - legendFormat='Received : {{ container }}', - )) - .addTarget(prometheus.target( - 'sort_desc(sum by (container) (rate(windows_container_network_transmitted_bytes_total{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[1m])))' % $._config, - legendFormat='Transmitted : {{ container }}', - )) - ) - ), + + prometheus.withLegendFormat('__auto'), + ]), - 'k8s-windows-cluster-rsrc-use.json': - local legendLink = '%(prefix)s/d/%(uid)s/k8s-windows-node-rsrc-use' % { prefix: $._config.grafanaK8s.linkPrefix, uid: std.md5('k8s-windows-node-rsrc-use.json') }; - - dashboard.new( - '%(dashboardNamePrefix)sUSE Method / Cluster(Windows)' % $._config.grafanaK8s, - uid=($._config.grafanaDashboardIDs['k8s-windows-cluster-rsrc-use.json']), - tags=($._config.grafanaK8s.dashboardTags), - ).addTemplate( - { - current: { - text: 'default', - value: 'default', + table.new('Memory Quota') + + table.standardOptions.withUnit('bytes') + + table.queryOptions.withTargets([ + prometheus.new('${datasource}', 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(kube_pod_windows_container_resource_memory_request{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_windows_container_resource_memory_request{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(kube_pod_windows_container_resource_memory_limit{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + + prometheus.new('${datasource}', 'sum(windows_container_private_working_set_usage{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container) / sum(kube_pod_windows_container_resource_memory_limit{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}) by (container)' % $._config) + + prometheus.withInstant(true) + + prometheus.withFormat('table'), + ]) + + + table.queryOptions.withTransformations([ + table.queryOptions.transformation.withId('joinByField') + + table.queryOptions.transformation.withOptions({ + byField: 'container', + mode: 'outer', + }), + + table.queryOptions.transformation.withId('organize') + + table.queryOptions.transformation.withOptions({ + excludeByName: { + Time: true, + 'Time 1': true, + 'Time 2': true, + 'Time 3': true, + 'Time 4': true, + 'Time 5': true, + }, + indexByName: { + 'Time 1': 0, + 'Time 2': 1, + 'Time 3': 2, + 'Time 4': 3, + 'Time 5': 4, + container: 5, + 'Value #A': 6, + 'Value #B': 7, + 'Value #C': 8, + 'Value #D': 9, + 'Value #E': 10, + }, + renameByName: { + container: 'Container', + 'Value #A': 'Memory Usage', + 'Value #B': 'Memory Requests', + 'Value #C': 'Memory Requests %', + 'Value #D': 'Memory Limits', + 'Value #E': 'Memory Limits %', + }, + }), + ]) + + + table.standardOptions.withOverrides([ + { + matcher: { + id: 'byRegexp', + options: '/%/', + }, + properties: [ + { + id: 'unit', + value: 'percentunit', + }, + ], }, - hide: 0, - label: null, - name: 'datasource', - options: [], - query: 'prometheus', - refresh: 1, - regex: $._config.datasourceFilterRegex, - type: 'datasource', - }, - ).addTemplate( - template.new( - 'cluster', - '$datasource', - 'label_values(up{%(windowsExporterSelector)s}, %(clusterLabel)s)' % $._config, - label='cluster', - refresh='time', - hide=if $._config.showMultiCluster then '' else 'variable', - sort=1, - ) - ) - .addRow( - g.row('CPU') - .addPanel( - g.panel('CPU Utilisation') + - g.queryPanel('node:windows_node_cpu_utilisation:avg1m{%(clusterLabel)s="$cluster"} * node:windows_node_num_cpu:sum{%(clusterLabel)s="$cluster"} / scalar(sum(node:windows_node_num_cpu:sum{%(clusterLabel)s="$cluster"}))' % $._config, '{{instance}}', legendLink) + - g.stack + - { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, - ) - ) - .addRow( - g.row('Memory') - .addPanel( - g.panel('Memory Utilisation') + - g.queryPanel('node:windows_node_memory_utilisation:ratio{%(clusterLabel)s="$cluster"}' % $._config, '{{instance}}', legendLink) + - g.stack + - { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, - ) - .addPanel( - g.panel('Memory Saturation (Swap I/O Pages)') + - g.queryPanel('node:windows_node_memory_swap_io_pages:irate{%(clusterLabel)s="$cluster"}' % $._config, '{{instance}}', legendLink) + - g.stack + - { yaxes: g.yaxes('short') }, - ) - ) - .addRow( - g.row('Disk') - .addPanel( - g.panel('Disk IO Utilisation') + - // Full utilisation would be all disks on each node spending an average of - // 1 sec per second doing I/O, normalize by node count for stacked charts - g.queryPanel('node:windows_node_disk_utilisation:avg_irate{%(clusterLabel)s="$cluster"} / scalar(node:windows_node:sum{%(clusterLabel)s="$cluster"})' % $._config, '{{instance}}', legendLink) + - g.stack + - { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, - ) - ) - .addRow( - g.row('Network') - .addPanel( - g.panel('Net Utilisation (Transmitted)') + - g.queryPanel('node:windows_node_net_utilisation:sum_irate{%(clusterLabel)s="$cluster"}' % $._config, '{{instance}}', legendLink) + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - .addPanel( - g.panel('Net Saturation (Dropped)') + - g.queryPanel('node:windows_node_net_saturation:sum_irate{%(clusterLabel)s="$cluster"}' % $._config, '{{instance}}', legendLink) + - g.stack + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Storage') - .addPanel( - g.panel('Disk Capacity') + - g.queryPanel( - ||| - sum by (instance)(node:windows_node_filesystem_usage:{%(clusterLabel)s="$cluster"}) - ||| % $._config, '{{instance}}', legendLink - ) + - g.stack + - { yaxes: g.yaxes({ format: 'percentunit', max: 1 }) }, - ), - ), + ]), + + tsPanel.new('Network I/O') + + tsPanel.standardOptions.withUnit('bytes') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sort_desc(sum by (container) (rate(windows_container_network_received_bytes_total{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[1m])))' % $._config + ) + + prometheus.withLegendFormat('Received : {{ container }}'), + + prometheus.new( + '${datasource}', + 'sort_desc(sum by (container) (rate(windows_container_network_transmitted_bytes_total{%(clusterLabel)s="$cluster", namespace="$namespace", pod="$pod"}[1m])))' % $._config + ) + + prometheus.withLegendFormat('Transmitted : {{ container }}'), + ]), + ]; + + g.dashboard.new('%(dashboardNamePrefix)sCompute Resources / Pod(Windows)' % $._config.grafanaK8s) + + g.dashboard.withUid($._config.grafanaDashboardIDs['k8s-resources-windows-pod.json']) + + g.dashboard.withTags($._config.grafanaK8s.dashboardTags) + + g.dashboard.withEditable(false) + + g.dashboard.time.withFrom('now-1h') + + g.dashboard.time.withTo('now') + + g.dashboard.withRefresh($._config.grafanaK8s.refresh) + + g.dashboard.withVariables([variables.datasource, variables.cluster, variables.namespace, variables.pod]) + + g.dashboard.withPanels(g.util.grid.wrapPanels(panels, panelWidth=24, panelHeight=7)), + + 'k8s-windows-cluster-rsrc-use.json': + local panels = [ + tsPanel.new('CPU Utilisation') + + tsPanel.gridPos.withW(24) + + tsPanel.standardOptions.withUnit('percentunit') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'node:windows_node_cpu_utilisation:avg1m{%(clusterLabel)s="$cluster"} * node:windows_node_num_cpu:sum{%(clusterLabel)s="$cluster"} / scalar(sum(node:windows_node_num_cpu:sum{%(clusterLabel)s="$cluster"}))' % $._config + ) + + prometheus.withLegendFormat('{{instance}}'), + ]), + + tsPanel.new('Memory Utilisation') + + tsPanel.standardOptions.withUnit('percentunit') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'node:windows_node_memory_utilisation:ratio{%(clusterLabel)s="$cluster"}' % $._config + ) + + prometheus.withLegendFormat('{{instance}}'), + ]), + + tsPanel.new('Memory Saturation (Swap I/O Pages)') + + tsPanel.standardOptions.withUnit('short') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'node:windows_node_memory_swap_io_pages:irate{%(clusterLabel)s="$cluster"}' % $._config + ) + + prometheus.withLegendFormat('{{instance}}'), + ]), + + tsPanel.new('Disk IO Utilisation') + + tsPanel.gridPos.withW(24) + + tsPanel.standardOptions.withUnit('percentunit') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'node:windows_node_disk_utilisation:avg_irate{%(clusterLabel)s="$cluster"} / scalar(node:windows_node:sum{%(clusterLabel)s="$cluster"})' % $._config + ) + + prometheus.withLegendFormat('{{instance}}'), + ]), + + tsPanel.new('Net Utilisation (Transmitted)') + + tsPanel.standardOptions.withUnit('Bps') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'node:windows_node_net_utilisation:sum_irate{%(clusterLabel)s="$cluster"}' % $._config + ) + + prometheus.withLegendFormat('{{instance}}'), + ]), + + tsPanel.new('Net Utilisation (Dropped)') + + tsPanel.standardOptions.withUnit('Bps') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'node:windows_node_net_saturation:sum_irate{%(clusterLabel)s="$cluster"}' % $._config + ) + + prometheus.withLegendFormat('{{instance}}'), + ]), + + tsPanel.new('Disk Capacity') + + tsPanel.gridPos.withW(24) + + tsPanel.standardOptions.withUnit('percentunit') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum by (instance)(node:windows_node_filesystem_usage:{%(clusterLabel)s="$cluster"})' % $._config + ) + + prometheus.withLegendFormat('{{instance}}'), + ]), + ]; + + g.dashboard.new('%(dashboardNamePrefix)sUSE Method / Cluster(Windows)' % $._config.grafanaK8s) + + g.dashboard.withUid($._config.grafanaDashboardIDs['k8s-windows-cluster-rsrc-use.json']) + + g.dashboard.withTags($._config.grafanaK8s.dashboardTags) + + g.dashboard.withEditable(false) + + g.dashboard.time.withFrom('now-1h') + + g.dashboard.time.withTo('now') + + g.dashboard.withRefresh($._config.grafanaK8s.refresh) + + g.dashboard.withVariables([variables.datasource, variables.cluster]) + + g.dashboard.withPanels(g.util.grid.wrapPanels(panels, panelWidth=12, panelHeight=7)), 'k8s-windows-node-rsrc-use.json': - dashboard.new( - '%(dashboardNamePrefix)sUSE Method / Node(Windows)' % $._config.grafanaK8s, - uid=($._config.grafanaDashboardIDs['k8s-windows-node-rsrc-use.json']), - tags=($._config.grafanaK8s.dashboardTags), - ).addTemplate( - { - current: { - text: 'default', - value: 'default', - }, - hide: 0, - label: null, - name: 'datasource', - options: [], - query: 'prometheus', - refresh: 1, - regex: $._config.datasourceFilterRegex, - type: 'datasource', - }, - ) - .addTemplate( - template.new( - 'cluster', - '$datasource', - 'label_values(up{%(windowsExporterSelector)s}, %(clusterLabel)s)' % $._config, - label='cluster', - refresh='time', - hide=if $._config.showMultiCluster then '' else 'variable', - sort=1, - ) - ) - .addTemplate( - template.new( - 'instance', - '$datasource', - 'label_values(windows_system_system_up_time{%(clusterLabel)s="$cluster"}, instance)' % $._config, - label='Instance', - refresh='time', - sort=1, - ) - ) - .addRow( - g.row('CPU') - .addPanel( - g.panel('CPU Utilisation') + - g.queryPanel('node:windows_node_cpu_utilisation:avg1m{%(clusterLabel)s="$cluster", instance="$instance"}' % $._config, 'Utilisation') + - { yaxes: g.yaxes('percentunit') }, - ) - .addPanel( - g.panel('CPU Usage Per Core') + - g.queryPanel('sum by (core) (irate(windows_cpu_time_total{%(clusterLabel)s="$cluster", %(windowsExporterSelector)s, mode!="idle", instance="$instance"}[%(grafanaIntervalVar)s]))' % $._config, '{{core}}') + - { yaxes: g.yaxes('percentunit') }, - ) - ) - .addRow( - g.row('Memory') - .addPanel( - g.panel('Memory Utilisation %') + - g.queryPanel('node:windows_node_memory_utilisation:{%(clusterLabel)s="$cluster", instance="$instance"}' % $._config, 'Memory') + - { yaxes: g.yaxes('percentunit') }, - ) - .addPanel( - graphPanel.new('Memory Usage', - datasource='$datasource', - format='bytes',) - .addTarget(prometheus.target( + local panels = [ + tsPanel.new('CPU Utilisation') + + tsPanel.standardOptions.withUnit('percentunit') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'node:windows_node_cpu_utilisation:avg1m{%(clusterLabel)s="$cluster", instance="$instance"}' % $._config + ) + + prometheus.withLegendFormat('Utilisation'), + ]), + + tsPanel.new('CPU Usage Per Core') + + tsPanel.standardOptions.withUnit('percentunit') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'sum by (core) (irate(windows_cpu_time_total{%(clusterLabel)s="$cluster", %(windowsExporterSelector)s, mode!="idle", instance="$instance"}[%(grafanaIntervalVar)s]))' % $._config + ) + + prometheus.withLegendFormat('{{core}}'), + ]), + + tsPanel.new('Memory Utilisation %') + + tsPanel.gridPos.withW(8) + + tsPanel.standardOptions.withUnit('percentunit') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'node:windows_node_memory_utilisation:{%(clusterLabel)s="$cluster", instance="$instance"}' % $._config + ) + + prometheus.withLegendFormat('Memory'), + ]), + + tsPanel.new('Memory Usage') + + tsPanel.gridPos.withW(8) + + tsPanel.standardOptions.withUnit('bytes') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', ||| max( windows_os_visible_memory_bytes{%(clusterLabel)s="$cluster", %(windowsExporterSelector)s, instance="$instance"} - windows_memory_available_bytes{%(clusterLabel)s="$cluster", %(windowsExporterSelector)s, instance="$instance"} ) - ||| % $._config, legendFormat='memory used' - )) - .addTarget(prometheus.target('max(node:windows_node_memory_totalCached_bytes:sum{%(clusterLabel)s="$cluster", instance="$instance"})' % $._config, legendFormat='memory cached')) - .addTarget(prometheus.target('max(windows_memory_available_bytes{%(clusterLabel)s="$cluster", %(windowsExporterSelector)s, instance="$instance"})' % $._config, legendFormat='memory free')) - ) - .addPanel( - g.panel('Memory Saturation (Swap I/O) Pages') + - g.queryPanel('node:windows_node_memory_swap_io_pages:irate{%(clusterLabel)s="$cluster", instance="$instance"}' % $._config, 'Swap IO') + - { yaxes: g.yaxes('short') }, - ) - ) - .addRow( - g.row('Disk') - .addPanel( - g.panel('Disk IO Utilisation') + - g.queryPanel('node:windows_node_disk_utilisation:avg_irate{%(clusterLabel)s="$cluster", instance="$instance"}' % $._config, 'Utilisation') + - { yaxes: g.yaxes('percentunit') }, - ) - .addPanel( - graphPanel.new('Disk I/O', datasource='$datasource') - .addTarget(prometheus.target('max(rate(windows_logical_disk_read_bytes_total{%(clusterLabel)s="$cluster", %(windowsExporterSelector)s, instance="$instance"}[2m]))' % $._config, legendFormat='read')) - .addTarget(prometheus.target('max(rate(windows_logical_disk_write_bytes_total{%(clusterLabel)s="$cluster", %(windowsExporterSelector)s, instance="$instance"}[2m]))' % $._config, legendFormat='written')) - .addTarget(prometheus.target('max(rate(windows_logical_disk_read_seconds_total{%(clusterLabel)s="$cluster", %(windowsExporterSelector)s, instance="$instance"}[2m]) + rate(windows_logical_disk_write_seconds_total{%(clusterLabel)s="$cluster", %(windowsExporterSelector)s, instance="$instance"}[2m]))' % $._config, legendFormat='io time')) + + ||| % $._config + ) + + prometheus.withLegendFormat('memory used'), + + prometheus.new( + '${datasource}', + 'max(node:windows_node_memory_totalCached_bytes:sum{%(clusterLabel)s="$cluster", instance="$instance"})' % $._config + ) + + prometheus.withLegendFormat('memory cached'), + + prometheus.new( + '${datasource}', + 'max(windows_memory_available_bytes{%(clusterLabel)s="$cluster", %(windowsExporterSelector)s, instance="$instance"})' % $._config + ) + + prometheus.withLegendFormat('memory free'), + ]), + + tsPanel.new('Memory Saturation (Swap I/O) Pages') + + tsPanel.gridPos.withW(8) + + tsPanel.standardOptions.withUnit('short') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'node:windows_node_memory_swap_io_pages:irate{%(clusterLabel)s="$cluster", instance="$instance"}' % $._config + ) + + prometheus.withLegendFormat('Swap IO'), + ]), + + tsPanel.new('Disk IO Utilisation') + + tsPanel.standardOptions.withUnit('percentunit') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'node:windows_node_disk_utilisation:avg_irate{%(clusterLabel)s="$cluster", instance="$instance"}' % $._config + ) + + prometheus.withLegendFormat('Utilisation'), + ]), + + tsPanel.new('Disk IO') + + tsPanel.standardOptions.withUnit('bytes') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'max(rate(windows_logical_disk_read_bytes_total{%(clusterLabel)s="$cluster", %(windowsExporterSelector)s, instance="$instance"}[2m]))' % $._config + ) + + prometheus.withLegendFormat('read'), + + prometheus.new( + '${datasource}', + 'max(rate(windows_logical_disk_write_bytes_total{%(clusterLabel)s="$cluster", %(windowsExporterSelector)s, instance="$instance"}[2m]))' % $._config + ) + + prometheus.withLegendFormat('written'), + + prometheus.new( + '${datasource}', + 'max(rate(windows_logical_disk_read_seconds_total{%(clusterLabel)s="$cluster", %(windowsExporterSelector)s, instance="$instance"}[2m]) + rate(windows_logical_disk_write_seconds_total{%(clusterLabel)s="$cluster", %(windowsExporterSelector)s, instance="$instance"}[2m]))' % $._config + ) + + prometheus.withLegendFormat('io time'), + ]) + + tsPanel.standardOptions.withOverrides([ { - seriesOverrides: [ + matcher: { + id: 'byRegexp', + options: '/io time/', + }, + properties: [ { - alias: 'read', - yaxis: 1, + id: 'unit', + value: 'ms', }, - { - alias: 'io time', - yaxis: 2, - }, - ], - yaxes: [ - self.yaxe(format='bytes'), - self.yaxe(format='ms'), ], - } - ) - ) - .addRow( - g.row('Net') - .addPanel( - g.panel('Net Utilisation (Transmitted)') + - g.queryPanel('node:windows_node_net_utilisation:sum_irate{%(clusterLabel)s="$cluster", instance="$instance"}' % $._config, 'Utilisation') + - { yaxes: g.yaxes('Bps') }, - ) - .addPanel( - g.panel('Net Saturation (Dropped)') + - g.queryPanel('node:windows_node_net_saturation:sum_irate{%(clusterLabel)s="$cluster", instance="$instance"}' % $._config, 'Saturation') + - { yaxes: g.yaxes('Bps') }, - ) - ) - .addRow( - g.row('Disk') - .addPanel( - g.panel('Disk Utilisation') + - g.queryPanel( - ||| - node:windows_node_filesystem_usage:{%(clusterLabel)s="$cluster", instance="$instance"} - ||| % $._config, - '{{volume}}', - ) + - { yaxes: g.yaxes('percentunit') }, - ), - ), + }, + ]), + + tsPanel.new('Disk Utilisation') + + tsPanel.gridPos.withW(24) + + tsPanel.standardOptions.withUnit('percentunit') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'node:windows_node_filesystem_usage:{%(clusterLabel)s="$cluster", instance="$instance"}' % $._config + ) + + prometheus.withLegendFormat('{{volume}}'), + ]), + + tsPanel.new('Net Utilisation (Transmitted)') + + tsPanel.standardOptions.withUnit('Bps') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'node:windows_node_net_utilisation:sum_irate{%(clusterLabel)s="$cluster", instance="$instance"}' % $._config + ) + + prometheus.withLegendFormat('Utilisation'), + ]), + + tsPanel.new('Net Saturation (Dropped)') + + tsPanel.standardOptions.withUnit('Bps') + + tsPanel.queryOptions.withTargets([ + prometheus.new( + '${datasource}', + 'node:windows_node_net_saturation:sum_irate{%(clusterLabel)s="$cluster", instance="$instance"}' % $._config + ) + + prometheus.withLegendFormat('Saturation'), + ]), + ]; + + g.dashboard.new('%(dashboardNamePrefix)sUSE Method / Node(Windows)' % $._config.grafanaK8s) + + g.dashboard.withUid($._config.grafanaDashboardIDs['k8s-windows-node-rsrc-use.json']) + + g.dashboard.withTags($._config.grafanaK8s.dashboardTags) + + g.dashboard.withEditable(false) + + g.dashboard.time.withFrom('now-1h') + + g.dashboard.time.withTo('now') + + g.dashboard.withRefresh($._config.grafanaK8s.refresh) + + g.dashboard.withVariables([variables.datasource, variables.cluster, variables.instance]) + + g.dashboard.withPanels(g.util.grid.wrapPanels(panels, panelWidth=12, panelHeight=7)), }, }