-
Notifications
You must be signed in to change notification settings - Fork 599
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Overdue commit of migration for node/pod resources dashboard
- Loading branch information
Showing
1 changed file
with
241 additions
and
132 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,154 +1,263 @@ | ||
local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; | ||
local g = import 'github.com/grafana/jsonnet-libs/grafana-builder/grafana.libsonnet'; | ||
local template = grafana.template; | ||
local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; | ||
|
||
{ | ||
grafanaDashboards+:: { | ||
local clusterTemplate = | ||
template.new( | ||
name='cluster', | ||
datasource='$datasource', | ||
query='label_values(up{%(kubeStateMetricsSelector)s}, %(clusterLabel)s)' % $._config, | ||
current='', | ||
hide=if $._config.showMultiCluster then '' else '2', | ||
refresh=2, | ||
includeAll=false, | ||
sort=1 | ||
), | ||
local fieldOverride = g.panel.timeSeries.fieldOverride; | ||
local prometheus = g.query.prometheus; | ||
local table = g.panel.table; | ||
local timeSeries = g.panel.timeSeries; | ||
local var = g.dashboard.variable; | ||
|
||
local nodeTemplate = | ||
template.new( | ||
name='node', | ||
datasource='$datasource', | ||
query='label_values(kube_node_info{%(clusterLabel)s="$cluster"}, node)' % $._config, | ||
current='', | ||
hide='', | ||
refresh=2, | ||
includeAll=false, | ||
multi=true, | ||
sort=1 | ||
), | ||
{ | ||
local tsPanel = | ||
timeSeries { | ||
new(title): | ||
timeSeries.new(title) | ||
+ timeSeries.options.legend.withShowLegend() | ||
+ timeSeries.options.legend.withAsTable() | ||
+ timeSeries.options.legend.withDisplayMode('table') | ||
+ timeSeries.options.legend.withPlacement('right') | ||
+ timeSeries.options.legend.withCalcs(['lastNotNull']) | ||
+ timeSeries.options.tooltip.withMode('single') | ||
+ timeSeries.fieldConfig.defaults.custom.withShowPoints('never') | ||
+ timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) | ||
+ timeSeries.fieldConfig.defaults.custom.withSpanNulls(true) | ||
+ timeSeries.queryOptions.withInterval($._config.grafanaK8s.minimumTimeInterval), | ||
}, | ||
|
||
grafanaDashboards+:: { | ||
'k8s-resources-node.json': | ||
local tableStyles = { | ||
local variables = { | ||
datasource: | ||
var.datasource.new('datasource', 'prometheus') | ||
+ var.datasource.withRegex($._config.datasourceFilterRegex) | ||
+ var.datasource.generalOptions.showOnDashboard.withLabelAndValue() | ||
+ var.datasource.generalOptions.withLabel('Data source') | ||
+ { | ||
current: { | ||
selected: true, | ||
text: $._config.datasourceName, | ||
value: $._config.datasourceName, | ||
}, | ||
}, | ||
cluster: | ||
var.query.new('cluster') | ||
+ var.query.withDatasourceFromVariable(self.datasource) | ||
+ var.query.queryTypes.withLabelValues( | ||
$._config.clusterLabel, | ||
'up{%(kubeStateMetricsSelector)s}' % $._config | ||
) | ||
+ var.query.generalOptions.withLabel('cluster') | ||
+ var.query.refresh.onTime() | ||
+ ( | ||
if $._config.showMultiCluster | ||
then var.query.generalOptions.showOnDashboard.withLabelAndValue() | ||
else var.query.generalOptions.showOnDashboard.withNothing() | ||
) | ||
+ var.query.withSort(type='alphabetical'), | ||
node: | ||
var.query.new('node') | ||
+ var.query.withDatasourceFromVariable(self.datasource) | ||
+ var.query.queryTypes.withLabelValues( | ||
'node', | ||
'kube_node_info{%(clusterLabel)s="$cluster"}' % $._config | ||
) | ||
+ var.query.generalOptions.withLabel('node') | ||
+ var.query.refresh.onTime() | ||
+ var.query.generalOptions.showOnDashboard.withLabelAndValue() | ||
+ var.query.selectionOptions.withMulti(true), | ||
}; | ||
|
||
local links = { | ||
pod: { | ||
alias: 'Pod', | ||
title: 'Drill down to pods', | ||
url: '%(prefix)s/d/%(uid)s/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}' % { | ||
uid: $._config.grafanaDashboardIDs['k8s-resources-pod.json'], | ||
prefix: $._config.grafanaK8s.linkPrefix, | ||
}, | ||
}, | ||
}; | ||
|
||
g.dashboard( | ||
'%(dashboardNamePrefix)sCompute Resources / Node (Pods)' % $._config.grafanaK8s, | ||
uid=($._config.grafanaDashboardIDs['k8s-resources-node.json']), | ||
datasource_regex=$._config.datasourceFilterRegex, | ||
datasource=$._config.datasourceName, | ||
) | ||
.addRow( | ||
g.row('CPU Usage') | ||
.addPanel( | ||
g.panel('CPU Usage') + | ||
g.queryPanel([ | ||
local panels = [ | ||
tsPanel.new('CPU Usage') | ||
+ tsPanel.queryOptions.withTargets([ | ||
prometheus.new( | ||
'${datasource}', | ||
'sum(kube_node_status_capacity{%(clusterLabel)s="$cluster", node=~"$node", resource="cpu"})' % $._config, | ||
) | ||
+ prometheus.withLegendFormat('max capacity'), | ||
|
||
prometheus.new( | ||
'${datasource}', | ||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, | ||
], [ | ||
'max capacity', | ||
'{{pod}}', | ||
]) + | ||
g.stack + | ||
) | ||
+ prometheus.withLegendFormat('{{pod}}'), | ||
]) | ||
+ tsPanel.fieldConfig.defaults.custom.withStacking({ mode: 'normal' }) | ||
+ tsPanel.standardOptions.withOverrides([ | ||
fieldOverride.byName.new('max capacity') | ||
+ fieldOverride.byName.withPropertiesFromOptions( | ||
timeSeries.standardOptions.color.withMode('fixed') | ||
+ timeSeries.standardOptions.color.withFixedColor('red') | ||
) | ||
+ fieldOverride.byName.withProperty('custom.stacking', { mode: 'none' }) | ||
// This effectively "hides" max capacity from the panel. It shows only in the legend, showing the max capacity. In the "legacy" graph panel | ||
// the max capacity could be reselected and thus shown on the timeseries with a dotted line. This is no longer possible. | ||
+ fieldOverride.byName.withProperty('custom.hideFrom', { tooltip: true, viz: true, legend: false }) | ||
+ fieldOverride.byName.withProperty('custom.lineStyle', { fill: 'dash', dash: [10, 10] }), | ||
]), | ||
|
||
table.new('CPU Quota') | ||
+ table.queryOptions.withTargets([ | ||
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) | ||
+ prometheus.withInstant(true) | ||
+ prometheus.withFormat('table'), | ||
prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) | ||
+ prometheus.withInstant(true) | ||
+ prometheus.withFormat('table'), | ||
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) | ||
+ prometheus.withInstant(true) | ||
+ prometheus.withFormat('table'), | ||
prometheus.new('${datasource}', 'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) | ||
+ prometheus.withInstant(true) | ||
+ prometheus.withFormat('table'), | ||
prometheus.new('${datasource}', 'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config) | ||
+ prometheus.withInstant(true) | ||
+ prometheus.withFormat('table'), | ||
]) | ||
+ table.queryOptions.withTransformations([ | ||
table.queryOptions.transformation.withId('joinByField') | ||
+ table.queryOptions.transformation.withOptions({ | ||
byField: 'pod', | ||
mode: 'outer', | ||
}), | ||
|
||
table.queryOptions.transformation.withId('organize') | ||
+ table.queryOptions.transformation.withOptions({ | ||
renameByName: { | ||
pod: 'Pod', | ||
'Value #A': 'CPU Usage', | ||
'Value #B': 'CPU Requests', | ||
'Value #C': 'CPU Requests %', | ||
'Value #D': 'CPU Limits', | ||
'Value #E': 'CPU Limits %', | ||
}, | ||
excludeByName: { | ||
Time: true, | ||
'Time 1': true, | ||
'Time 2': true, | ||
'Time 3': true, | ||
'Time 4': true, | ||
'Time 5': true, | ||
}, | ||
}), | ||
]) | ||
+ table.standardOptions.withOverrides([ | ||
{ | ||
seriesOverrides: [ | ||
matcher: { | ||
id: 'byRegexp', | ||
options: '/%/', | ||
}, | ||
properties: [ | ||
{ | ||
alias: 'max capacity', | ||
color: '#F2495C', | ||
fill: 0, | ||
hideTooltip: true, | ||
legend: true, | ||
linewidth: 2, | ||
stack: false, | ||
hiddenSeries: true, | ||
dashes: true, | ||
id: 'unit', | ||
value: 'percentunit', | ||
}, | ||
], | ||
}, | ||
) | ||
) | ||
.addRow( | ||
g.row('CPU Quota') | ||
.addPanel( | ||
g.panel('CPU Quota') + | ||
g.tablePanel([ | ||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, | ||
'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, | ||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, | ||
'sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, | ||
'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, | ||
], tableStyles { | ||
'Value #A': { alias: 'CPU Usage' }, | ||
'Value #B': { alias: 'CPU Requests' }, | ||
'Value #C': { alias: 'CPU Requests %', unit: 'percentunit' }, | ||
'Value #D': { alias: 'CPU Limits' }, | ||
'Value #E': { alias: 'CPU Limits %', unit: 'percentunit' }, | ||
}) | ||
) | ||
) | ||
.addRow( | ||
g.row('Memory Usage') | ||
.addPanel( | ||
g.panel('Memory Usage (w/o cache)') + | ||
// Like above, without page cache | ||
g.queryPanel([ | ||
'sum(kube_node_status_capacity{%(clusterLabel)s="$cluster", node=~"$node", resource="memory"})' % $._config, | ||
'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node", container!=""}) by (pod)' % $._config, | ||
], [ | ||
'max capacity', | ||
'{{pod}}', | ||
]) + | ||
g.stack + | ||
{ yaxes: g.yaxes('bytes') } + | ||
{ | ||
seriesOverrides: [ | ||
matcher: { | ||
id: 'byName', | ||
options: 'Pod', | ||
}, | ||
properties: [ | ||
{ | ||
alias: 'max capacity', | ||
color: '#F2495C', | ||
fill: 0, | ||
hideTooltip: true, | ||
legend: true, | ||
linewidth: 2, | ||
stack: false, | ||
hiddenSeries: true, | ||
dashes: true, | ||
id: 'links', | ||
value: [links.pod], | ||
}, | ||
], | ||
}, | ||
) | ||
) | ||
.addRow( | ||
g.row('Memory Quota') | ||
.addPanel( | ||
g.panel('Memory Quota') + | ||
g.tablePanel([ | ||
'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, | ||
'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, | ||
'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, | ||
'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, | ||
'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, | ||
'sum(node_namespace_pod_container:container_memory_rss{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, | ||
'sum(node_namespace_pod_container:container_memory_cache{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, | ||
'sum(node_namespace_pod_container:container_memory_swap{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, | ||
], tableStyles { | ||
'Value #A': { alias: 'Memory Usage', unit: 'bytes' }, | ||
'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, | ||
'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, | ||
'Value #D': { alias: 'Memory Limits', unit: 'bytes' }, | ||
'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, | ||
'Value #F': { alias: 'Memory Usage (RSS)', unit: 'bytes' }, | ||
'Value #G': { alias: 'Memory Usage (Cache)', unit: 'bytes' }, | ||
'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' }, | ||
}) | ||
) | ||
) + { | ||
templating+: { | ||
list+: [clusterTemplate, nodeTemplate], | ||
}, | ||
}, | ||
]), | ||
|
||
tsPanel.new('Memory Usage (w/o cache)') | ||
+ tsPanel.standardOptions.withUnit('bytes') | ||
+ tsPanel.queryOptions.withTargets([ | ||
prometheus.new( | ||
'${datasource}', | ||
'sum(kube_node_status_capacity{%(clusterLabel)s="$cluster", node=~"$node", resource="memory"})' % $._config, | ||
) | ||
+ prometheus.withLegendFormat('max capacity'), | ||
|
||
prometheus.new( | ||
'${datasource}', | ||
'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node", container!=""}) by (pod)' % $._config, | ||
) | ||
+ prometheus.withLegendFormat('{{pod}}'), | ||
]) | ||
+ tsPanel.fieldConfig.defaults.custom.withStacking({ mode: 'normal' }) | ||
+ tsPanel.standardOptions.withOverrides([ | ||
fieldOverride.byName.new('max capacity') | ||
+ fieldOverride.byName.withPropertiesFromOptions( | ||
timeSeries.standardOptions.color.withMode('fixed') | ||
+ timeSeries.standardOptions.color.withFixedColor('red') | ||
) | ||
+ fieldOverride.byName.withProperty('custom.stacking', { mode: 'none' }) | ||
// This effectively "hides" max capacity from the panel. It shows only in the legend, showing the max capacity. In the "legacy" graph panel | ||
// the max capacity could be reselected and thus shown on the timeseries with a dotted line. This is no longer possible. | ||
+ fieldOverride.byName.withProperty('custom.hideFrom', { tooltip: true, viz: true, legend: false }) | ||
+ fieldOverride.byName.withProperty('custom.lineStyle', { fill: 'dash', dash: [10, 10] }), | ||
]), | ||
]; | ||
|
||
g.dashboard.new('%(dashboardNamePrefix)sCompute Resources / Node (Pods)' % $._config.grafanaK8s) | ||
+ g.dashboard.withUid($._config.grafanaDashboardIDs['k8s-resources-node.json']) | ||
+ g.dashboard.withTags($._config.grafanaK8s.dashboardTags) | ||
+ g.dashboard.withEditable(false) | ||
+ g.dashboard.time.withFrom('now-1h') | ||
+ g.dashboard.time.withTo('now') | ||
+ g.dashboard.withRefresh($._config.grafanaK8s.refresh) | ||
+ g.dashboard.withVariables([variables.datasource, variables.cluster, variables.node]) | ||
+ g.dashboard.withPanels(g.util.grid.wrapPanels(panels, panelWidth=24, panelHeight=6)), | ||
|
||
|
||
// local tableStyles = { | ||
// pod: { | ||
// alias: 'Pod', | ||
// }, | ||
// }; | ||
|
||
// g.dashboard( | ||
// '%(dashboardNamePrefix)sCompute Resources / Node (Pods)' % $._config.grafanaK8s, | ||
// uid=($._config.grafanaDashboardIDs['k8s-resources-node.json']), | ||
// datasource_regex=$._config.datasourceFilterRegex, | ||
// datasource=$._config.datasourceName, | ||
// ) | ||
// .addRow( | ||
// g.row('Memory Quota') | ||
// .addPanel( | ||
// g.panel('Memory Quota') + | ||
// g.tablePanel([ | ||
// 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, | ||
// 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, | ||
// 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, | ||
// 'sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, | ||
// 'sum(node_namespace_pod_container:container_memory_working_set_bytes{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{%(clusterLabel)s="$cluster", node=~"$node"}) by (pod)' % $._config, | ||
// 'sum(node_namespace_pod_container:container_memory_rss{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, | ||
// 'sum(node_namespace_pod_container:container_memory_cache{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, | ||
// 'sum(node_namespace_pod_container:container_memory_swap{%(clusterLabel)s="$cluster", node=~"$node",container!=""}) by (pod)' % $._config, | ||
// ], tableStyles { | ||
// 'Value #A': { alias: 'Memory Usage', unit: 'bytes' }, | ||
// 'Value #B': { alias: 'Memory Requests', unit: 'bytes' }, | ||
// 'Value #C': { alias: 'Memory Requests %', unit: 'percentunit' }, | ||
// 'Value #D': { alias: 'Memory Limits', unit: 'bytes' }, | ||
// 'Value #E': { alias: 'Memory Limits %', unit: 'percentunit' }, | ||
// 'Value #F': { alias: 'Memory Usage (RSS)', unit: 'bytes' }, | ||
// 'Value #G': { alias: 'Memory Usage (Cache)', unit: 'bytes' }, | ||
// 'Value #H': { alias: 'Memory Usage (Swap)', unit: 'bytes' }, | ||
// }) | ||
// ) | ||
// ) + { | ||
// templating+: { | ||
// list+: [clusterTemplate, nodeTemplate], | ||
// }, | ||
// }, | ||
}, | ||
} |