Skip to content

Commit

Permalink
Overdue commit of migration for node/pod resources dashboard
Browse files Browse the repository at this point in the history
  • Loading branch information
rgeyer committed Apr 24, 2024
1 parent 076450c commit e4fe0ed
Showing 1 changed file with 241 additions and 132 deletions.
373 changes: 241 additions & 132 deletions dashboards/resources/node.libsonnet
Original file line number Diff line number Diff line change
@@ -1,154 +1,263 @@
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet';
local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet';
local template = grafana.template;
local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet';

{
grafanaDashboards+:: {
local clusterTemplate =
template.new(
name='cluster',
datasource='$datasource',
query='label_values(up{%(kubeStateMetricsSelector)s}, %(clusterLabel)s)' % $._config,
current='',
hide=if $._config.showMultiCluster then '' else '2',
refresh=2,
includeAll=false,
sort=1
),
local fieldOverride = g.panel.timeSeries.fieldOverride;
local prometheus = g.query.prometheus;
local table = g.panel.table;
local timeSeries = g.panel.timeSeries;
local var = g.dashboard.variable;

local nodeTemplate =
template.new(
name='node',
datasource='$datasource',
query='label_values(kube_node_info{%(clusterLabel)s="$cluster"}, node)' % $._config,
current='',
hide='',
refresh=2,
includeAll=false,
multi=true,
sort=1
),
{
local tsPanel =
timeSeries {
new(title):
timeSeries.new(title)
+ timeSeries.options.legend.withShowLegend()
+ timeSeries.options.legend.withAsTable()
+ timeSeries.options.legend.withDisplayMode('table')
+ timeSeries.options.legend.withPlacement('right')
+ timeSeries.options.legend.withCalcs(['lastNotNull'])
+ timeSeries.options.tooltip.withMode('single')
+ timeSeries.fieldConfig.defaults.custom.withShowPoints('never')
+ timeSeries.fieldConfig.defaults.custom.withFillOpacity(10)
+ timeSeries.fieldConfig.defaults.custom.withSpanNulls(true)
+ timeSeries.queryOptions.withInterval($._config.grafanaK8s.minimumTimeInterval),
},

grafanaDashboards+:: {
'k8s-resources-node.json':
local tableStyles = {
local variables = {
datasource:
var.datasource.new('datasource', 'prometheus')
+ var.datasource.withRegex($._config.datasourceFilterRegex)
+ var.datasource.generalOptions.showOnDashboard.withLabelAndValue()
+ var.datasource.generalOptions.withLabel('Data source')
+ {
current: {
selected: true,
text: $._config.datasourceName,
value: $._config.datasourceName,
},
},
cluster:
var.query.new('cluster')
+ var.query.withDatasourceFromVariable(self.datasource)
+ var.query.queryTypes.withLabelValues(
$._config.clusterLabel,
'up{%(kubeStateMetricsSelector)s}' % $._config
)
+ var.query.generalOptions.withLabel('cluster')
+ var.query.refresh.onTime()
+ (
if $._config.showMultiCluster
then var.query.generalOptions.showOnDashboard.withLabelAndValue()
else var.query.generalOptions.showOnDashboard.withNothing()
)
+ var.query.withSort(type='alphabetical'),
node:
var.query.new('node')
+ var.query.withDatasourceFromVariable(self.datasource)
+ var.query.queryTypes.withLabelValues(
'node',
'kube_node_info{%(clusterLabel)s="$cluster"}' % $._config
)
+ var.query.generalOptions.withLabel('node')
+ var.query.refresh.onTime()
+ var.query.generalOptions.showOnDashboard.withLabelAndValue()
+ var.query.selectionOptions.withMulti(true),
};

local links = {
pod: {
alias: 'Pod',
title: 'Drill down to pods',
url: '%(prefix)s/d/%(uid)s/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}' % {
uid: $._config.grafanaDashboardIDs['k8s-resources-pod.json'],
prefix: $._config.grafanaK8s.linkPrefix,
},
},
};

g.dashboard(
'%(dashboardNamePrefix)sCompute Resources / Node (Pods)' % $._config.grafanaK8s,
uid=($._config.grafanaDashboardIDs['k8s-resources-node.json']),
datasource_regex=$._config.datasourceFilterRegex,
datasource=$._config.datasourceName,
)
.addRow(
g.row('CPU Usage')
.addPanel(
g.panel('CPU Usage') +
g.queryPanel([
local panels = [
tsPanel.new('CPU Usage')
+ tsPanel.queryOptions.withTargets([
prometheus.new(
'${datasource}',
'sum(kube_node_status_capacity{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"})' % $._config,
)
+ prometheus.withLegendFormat('max capacity'),

prometheus.new(
'${datasource}',
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
], [
'max capacity',
'{{pod}}',
]) +
g.stack +
)
+ prometheus.withLegendFormat('{{pod}}'),
])
+ tsPanel.fieldConfig.defaults.custom.withStacking({ mode: 'normal' })
+ tsPanel.standardOptions.withOverrides([
fieldOverride.byName.new('max capacity')
+ fieldOverride.byName.withPropertiesFromOptions(
timeSeries.standardOptions.color.withMode('fixed')
+ timeSeries.standardOptions.color.withFixedColor('red')
)
+ fieldOverride.byName.withProperty('custom.stacking', { mode: 'none' })
// This effectively "hides" max capacity from the panel. It shows only in the legend, showing the max capacity. In the "legacy" graph panel
// the max capacity could be reselected and thus shown on the timeseries with a dotted line. This is no longer possible.
+ fieldOverride.byName.withProperty('custom.hideFrom', { tooltip: true, viz: true, legend: false })
+ fieldOverride.byName.withProperty('custom.lineStyle', { fill: 'dash', dash: [10, 10] }),
]),

table.new('CPU Quota')
+ table.queryOptions.withTargets([
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config)
+ prometheus.withInstant(true)
+ prometheus.withFormat('table'),
])
+ table.queryOptions.withTransformations([
table.queryOptions.transformation.withId('joinByField')
+ table.queryOptions.transformation.withOptions({
byField: 'pod',
mode: 'outer',
}),

table.queryOptions.transformation.withId('organize')
+ table.queryOptions.transformation.withOptions({
renameByName: {
pod: 'Pod',
'Value #A': 'CPU Usage',
'Value #B': 'CPU Requests',
'Value #C': 'CPU Requests %',
'Value #D': 'CPU Limits',
'Value #E': 'CPU Limits %',
},
excludeByName: {
Time: true,
'Time 1': true,
'Time 2': true,
'Time 3': true,
'Time 4': true,
'Time 5': true,
},
}),
])
+ table.standardOptions.withOverrides([
{
seriesOverrides: [
matcher: {
id: 'byRegexp',
options: '/%/',
},
properties: [
{
alias: 'max capacity',
color: '#F2495C',
fill: 0,
hideTooltip: true,
legend: true,
linewidth: 2,
stack: false,
hiddenSeries: true,
dashes: true,
id: 'unit',
value: 'percentunit',
},
],
},
)
)
.addRow(
g.row('CPU Quota')
.addPanel(
g.panel('CPU Quota') +
g.tablePanel([
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
], tableStyles {
'Value #A': { alias: 'CPU Usage' },
'Value #B': { alias: 'CPU Requests' },
'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' },
'Value #D': { alias: 'CPU Limits' },
'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' },
})
)
)
.addRow(
g.row('Memory Usage')
.addPanel(
g.panel('Memory Usage (w/o cache)') +
// Like above, without page cache
g.queryPanel([
'sum(kube_node_status_capacity{%(clusterLabel)s="$cluster", node=~"$node", resource="memory"})' % $._config,
'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node", container!=""}) by (pod)' % $._config,
], [
'max capacity',
'{{pod}}',
]) +
g.stack +
{ yaxes: g.yaxes('bytes') } +
{
seriesOverrides: [
matcher: {
id: 'byName',
options: 'Pod',
},
properties: [
{
alias: 'max capacity',
color: '#F2495C',
fill: 0,
hideTooltip: true,
legend: true,
linewidth: 2,
stack: false,
hiddenSeries: true,
dashes: true,
id: 'links',
value: [links.pod],
},
],
},
)
)
.addRow(
g.row('Memory Quota')
.addPanel(
g.panel('Memory Quota') +
g.tablePanel([
'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config,
'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_memory_rss{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_memory_cache{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config,
'sum(node_namespace_pod_container:container_memory_swap{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config,
], tableStyles {
'Value #A': { alias: 'Memory Usage', unit: 'bytes' },
'Value #B': { alias: 'Memory Requests', unit: 'bytes' },
'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' },
'Value #D': { alias: 'Memory Limits', unit: 'bytes' },
'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' },
'Value #F': { alias: 'Memory Usage (RSS)', unit: 'bytes' },
'Value #G': { alias: 'Memory Usage (Cache)', unit: 'bytes' },
'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' },
})
)
) + {
templating+: {
list+: [clusterTemplate, nodeTemplate],
},
},
]),

tsPanel.new('Memory Usage (w/o cache)')
+ tsPanel.standardOptions.withUnit('bytes')
+ tsPanel.queryOptions.withTargets([
prometheus.new(
'${datasource}',
'sum(kube_node_status_capacity{%(clusterLabel)s="$cluster", node=~"$node", resource="memory"})' % $._config,
)
+ prometheus.withLegendFormat('max capacity'),

prometheus.new(
'${datasource}',
'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node", container!=""}) by (pod)' % $._config,
)
+ prometheus.withLegendFormat('{{pod}}'),
])
+ tsPanel.fieldConfig.defaults.custom.withStacking({ mode: 'normal' })
+ tsPanel.standardOptions.withOverrides([
fieldOverride.byName.new('max capacity')
+ fieldOverride.byName.withPropertiesFromOptions(
timeSeries.standardOptions.color.withMode('fixed')
+ timeSeries.standardOptions.color.withFixedColor('red')
)
+ fieldOverride.byName.withProperty('custom.stacking', { mode: 'none' })
// This effectively "hides" max capacity from the panel. It shows only in the legend, showing the max capacity. In the "legacy" graph panel
// the max capacity could be reselected and thus shown on the timeseries with a dotted line. This is no longer possible.
+ fieldOverride.byName.withProperty('custom.hideFrom', { tooltip: true, viz: true, legend: false })
+ fieldOverride.byName.withProperty('custom.lineStyle', { fill: 'dash', dash: [10, 10] }),
]),
];

g.dashboard.new('%(dashboardNamePrefix)sCompute Resources / Node (Pods)' % $._config.grafanaK8s)
+ g.dashboard.withUid($._config.grafanaDashboardIDs['k8s-resources-node.json'])
+ g.dashboard.withTags($._config.grafanaK8s.dashboardTags)
+ g.dashboard.withEditable(false)
+ g.dashboard.time.withFrom('now-1h')
+ g.dashboard.time.withTo('now')
+ g.dashboard.withRefresh($._config.grafanaK8s.refresh)
+ g.dashboard.withVariables([variables.datasource, variables.cluster, variables.node])
+ g.dashboard.withPanels(g.util.grid.wrapPanels(panels, panelWidth=24, panelHeight=6)),


// local tableStyles = {
// pod: {
// alias: 'Pod',
// },
// };

// g.dashboard(
// '%(dashboardNamePrefix)sCompute Resources / Node (Pods)' % $._config.grafanaK8s,
// uid=($._config.grafanaDashboardIDs['k8s-resources-node.json']),
// datasource_regex=$._config.datasourceFilterRegex,
// datasource=$._config.datasourceName,
// )
// .addRow(
// g.row('Memory Quota')
// .addPanel(
// g.panel('Memory Quota') +
// g.tablePanel([
// 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config,
// 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
// 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
// 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
// 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config,
// 'sum(node_namespace_pod_container:container_memory_rss{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config,
// 'sum(node_namespace_pod_container:container_memory_cache{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config,
// 'sum(node_namespace_pod_container:container_memory_swap{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config,
// ], tableStyles {
// 'Value #A': { alias: 'Memory Usage', unit: 'bytes' },
// 'Value #B': { alias: 'Memory Requests', unit: 'bytes' },
// 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' },
// 'Value #D': { alias: 'Memory Limits', unit: 'bytes' },
// 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' },
// 'Value #F': { alias: 'Memory Usage (RSS)', unit: 'bytes' },
// 'Value #G': { alias: 'Memory Usage (Cache)', unit: 'bytes' },
// 'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' },
// })
// )
// ) + {
// templating+: {
// list+: [clusterTemplate, nodeTemplate],
// },
// },
},
}

0 comments on commit e4fe0ed

Please sign in to comment.