Skip to content

Commit

Permalink
Merge pull request #32 from eslam-gomaa/v0.0.6
Browse files Browse the repository at this point in the history
V0.0.6
  • Loading branch information
eslam-gomaa authored May 10, 2023
2 parents 7c92231 + 1182c84 commit 246b010
Show file tree
Hide file tree
Showing 6 changed files with 190 additions and 36 deletions.
16 changes: 15 additions & 1 deletion kptop_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,18 @@ def run():
from kubePtop.cli import Cli


run()
run()

# from kubePtop.node_metrics import PrometheusNodeMetrics
# from kubePtop.read_env import ReadEnv
# env = ReadEnv()
# env.read_env()
# import rich

# test = PrometheusNodeMetrics()
# rich.print(test.nodeManagedK8sInfo('.*'))
# print(test.topNode())
# test.topNodeTable(option="cloud")
# test.topNodeJson('ip-10-129-143-105.eu-west-1.compute.internal')


30 changes: 27 additions & 3 deletions kubePtop/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ def __init__(self):
self.dashboard = 'default'
self.list_dashboards = False
self.sort_by_mem_usage = False
self.list_option = ''
self.list_nodes_option = []
self.colorize_json = False


# Read CLI arguments
self.argparse()
Expand All @@ -50,13 +54,24 @@ def __init__(self):
if self.list_dashboards:
node_monitor.list_dashboards()
exit(0)

# kptop nodes <NODE-NAME> -o json
if self.list_option == 'json':
node_metrics.topNodeJson(node=self.node, color=self.colorize_json)
exit(0)
# Check if the node found.
node_monitor.display_dashboard(dashboard=self.dashboard, node_name=self.node)

# kptop nodes
if self.list_nodes:
node_metrics.topNodeTable()
# kptop nodes -o json
if self.list_option == 'json':
node_metrics.topNodeJson(node=".*", color=self.colorize_json)
exit(0)
node_metrics.topNodeTable(option=self.list_option)
exit(0)


# kptop pods <POD-NAME>
if self.pod:
if self.container is None:
Expand Down Expand Up @@ -90,7 +105,7 @@ def __init__(self):

def argparse(self):
parser = argparse.ArgumentParser(description='A Python tool for Kubernetes Nodes/Pods terminal monitoring through Prometheus metrics.')
parser.add_argument('top', type=str, nargs='*', metavar='{pods, pod, po} | {nodes, node} | {pvcs, pvc}', help='top pods/nodes/pvcs')
parser.add_argument('top', type=str, nargs='*', metavar='{pods, pod, po} | {nodes, node} | {persistentvolumeclaim, pvc}', help='top pods/nodes/persistentvolumeclaim')
parser.add_argument('-n', '--namespace', type=str, required=False, metavar='', help='Specify a Kubernetes namespace')
parser.add_argument('-A', '--all-namespaces', required=False, action='store_true', help='All Kubernetes namespaces')
parser.add_argument('-c', '--container', type=str, required=False, metavar='', help='Monitor a specific Pod\'s container')
Expand All @@ -99,13 +114,16 @@ def argparse(self):
parser.add_argument('-C', '--check-metrics', required=False, action='store_true', help='Checks the availability of the needed metrics')
parser.add_argument('-d', '--debug', required=False, action='store_true', help='Print debug output')
parser.add_argument('-s', '--sort-by-mem-usage', required=False, action='store_true', help='Sort top result by memory usage')
parser.add_argument('-o', '--option', type=str, required=False, choices=['cloud', 'json'], help='options for "kptop node||pod" (currently supported in "kptop node")')
parser.add_argument('-cj', '--colorize-json', required=False, action='store_true', help='Colorize Json output (with "-o json")')
# parser.add_argument('-q', '--query', type=str, required=False, help='options for "Run a custom query')

# parser.add_argument('-D', '--dashboard', type=str, required=False, metavar='', help='Specify a dashboard')
# parser.add_argument('-L', '--list-dashboards', required=False, action='store_true', help='List available dashboards')

pod_aliases = ['pod', 'pods', 'po']
node_aliases = ['node', 'nodes']
pvc_aliases = ['pvc', 'pvcs']
pvc_aliases = ['pvc', 'persistentvolumeclaim']

results = parser.parse_args()
self.parser = parser
Expand Down Expand Up @@ -160,6 +178,12 @@ def argparse(self):

if results.namespace:
self.namespace = results.namespace

if results.option:
self.list_option = results.option

if results.colorize_json:
self.colorize_json = results.option

if results.all_namespaces:
self.all_namespaces = results.all_namespaces
Expand Down
168 changes: 142 additions & 26 deletions kubePtop/node_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
from kubePtop.helper import Helper
from tabulate import tabulate
from kubePtop.colors import Bcolors
import json
import rich
bcolors = Bcolors()
import traceback

Expand Down Expand Up @@ -192,7 +194,7 @@ def MemTotalBytes(self, node):
"result": ""
}
try:
result = self.run_query(f'node_memory_MemTotal_bytes{{{GlobalAttrs.node_exporter_node_label}="{node}"}}')
result = self.run_query(f'node_memory_MemTotal_bytes{{{GlobalAttrs.node_exporter_node_label}=~"{node}"}}')
if not result.get('status') == 'success':
output['fail_reason'] = "could not get metric value"
return output
Expand Down Expand Up @@ -1287,7 +1289,7 @@ def nodeDiskReadBytes(self, node):
# return output


def topNode(self):
def topNode(self, node=".*"):
"""
"""
output = {
Expand All @@ -1296,8 +1298,7 @@ def topNode(self):
"result": {}
}
try:

memory_total_query = f'node_memory_MemTotal_bytes' # f'machine_memory_bytes'
memory_total_query = f'node_memory_MemTotal_bytes{{{GlobalAttrs.node_exporter_node_label}=~"{node}"}}'
memory_total = self.run_query(memory_total_query)
if not memory_total.get('status') == 'success':
output['fail_reason'] = f"could not get metric's value: {memory_total_query}"
Expand All @@ -1306,7 +1307,28 @@ def topNode(self):
output['fail_reason'] = f"Query did not return any data: {memory_total_query}"
return output

memory_free_query = f'node_memory_MemFree_bytes'
nodes_dct = {}
for node_ in memory_total.get('data').get('result'):
nodes_dct[node_.get('metric').get(GlobalAttrs.node_exporter_node_label)] = {
"memory_total": int(node_.get('value')[1]),
"memory_free": -1,
"memory_used": -1,
"cpu_cores": -1,
# "cpu_used": -1, # not sure of the metrics to get the used cpu in milicores.
"cpu_used_percentage": -1,
"running_pods_num": -1,
"cluster": "",
"node_os": "",
"node_arch": "",
"region": "",
"az": "",
"instance_type": "",
"cluster_env": "Unknown",
"node_group_capacity_type": "",
"node_group_name": "",
}

memory_free_query = f'node_memory_MemFree_bytes{{{GlobalAttrs.node_exporter_node_label}=~"{node}"}}'
memory_free = self.run_query(memory_free_query)
if not memory_free.get('status') == 'success':
output['fail_reason'] = f"could not get metric's value: {memory_free_query}"
Expand All @@ -1315,7 +1337,7 @@ def topNode(self):
output['fail_reason'] = f"Query did not return any data: {memory_free_query}"
return output

cpu_cores_query = f'machine_cpu_cores'
cpu_cores_query = f'machine_cpu_cores{{kubernetes_io_hostname=~"{node}"}}'
cpu_cores = self.run_query(cpu_cores_query)
if not cpu_cores.get('status') == 'success':
output['fail_reason'] = f"could not get metric's value: {cpu_cores_query}"
Expand All @@ -1324,7 +1346,8 @@ def topNode(self):
output['fail_reason'] = f"Query did not return any data: {cpu_cores_query}"
return output

cpu_used_percentage_query = f'100 - (avg by ({GlobalAttrs.node_exporter_node_label}) (rate(node_cpu_seconds_total{{mode="idle"}}[10m])) * 100)'
#### Fix
cpu_used_percentage_query = f'100 - (avg by ({GlobalAttrs.node_exporter_node_label}) (rate(node_cpu_seconds_total{{mode="idle", {GlobalAttrs.node_exporter_node_label}=~"{node}"}}[10m])) * 100)'
cpu_used_percentage = self.run_query(cpu_used_percentage_query)
if not cpu_used_percentage.get('status') == 'success':
output['fail_reason'] = f"could not get metric's value: {cpu_used_percentage_query}"
Expand All @@ -1333,27 +1356,21 @@ def topNode(self):
output['fail_reason'] = f"Query did not return any data: {cpu_used_percentage_query}"
return output

running_pods_count_query = f'kubelet_running_pods'
running_pods_count_query = f'kubelet_running_pods{{instance=~"{node}"}}'
running_pods_count = self.run_query(running_pods_count_query)
if not running_pods_count.get('status') == 'success':
output['fail_reason'] = f"could not get metric's value: {running_pods_count_query}"
return output
if not running_pods_count.get('data').get('result'):
output['fail_reason'] = f"Query did not return any data: {running_pods_count_query}"
return output


nodes_dct = {}
for node in memory_total.get('data').get('result'):
nodes_dct[node.get('metric').get(GlobalAttrs.node_exporter_node_label)] = {
"memory_total": int(node.get('value')[1]),
"memory_free": -1,
"memory_used": -1,
"cpu_cores": -1,
# "cpu_used": -1, # not sure of the metrics to get the used cpu in milicores.
"cpu_used_percentage": -1,
"running_pods_num": -1,
}

##
node_managed_k8s_info = self.nodeManagedK8sInfo(node=node)
if not node_managed_k8s_info.get('success'):
output['fail_reason'] = node_managed_k8s_info.get('fail_reason')
return output


for node in memory_free.get('data').get('result'):
nodes_dct[node.get('metric').get(GlobalAttrs.node_exporter_node_label)]['memory_free'] = int(node.get('value')[1])
Expand All @@ -1373,6 +1390,29 @@ def topNode(self):
nodes_dct[node.get('metric').get('instance')]['running_pods_num'] = int(node.get('value')[1])
except KeyError:
pass # A KeyError Exception is expected as this metric returns the value for the master nodes while other metrics dont.


for node in node_managed_k8s_info.get('result'):
# General Labels (match different cloud providers)
try:
nodes_dct[node.get('metric').get('instance')]['node_arch'] = node['metric']['beta_kubernetes_io_arch']
nodes_dct[node.get('metric').get('instance')]['node_os'] = node['metric']['beta_kubernetes_io_os']
nodes_dct[node.get('metric').get('instance')]['cluster'] = node['metric']['cluster']
nodes_dct[node.get('metric').get('instance')]['region'] = node['metric']['topology_kubernetes_io_region']
nodes_dct[node.get('metric').get('instance')]['az'] = node['metric']['topology_kubernetes_io_zone']
nodes_dct[node.get('metric').get('instance')]['instance_type'] = node['metric']['node_kubernetes_io_instance_type']
except KeyError:
pass # If labels are not found, means that most probably this is a Local cluster

# AWS Labels
try:
nodes_dct[node.get('metric').get('instance')]['node_group_capacity_type'] = node['metric']['eks_amazonaws_com_capacityType']
nodes_dct[node.get('metric').get('instance')]['node_group_name'] = node['metric']['eks_amazonaws_com_nodegroup']
if nodes_dct[node.get('metric').get('instance')]['node_group_name']:
nodes_dct[node.get('metric').get('instance')]['cluster_env'] = 'EKS'
except KeyError:
pass # If labels are not found, means that it's not an EKS cluster.


output['result'] = nodes_dct
output['success'] = True
Expand All @@ -1384,9 +1424,20 @@ def topNode(self):
Logging.log.exception(traceback.format_stack())

return output

def topNodeJson(self, node=".*", color=False):
nodes_dct = self.topNode(node=node)
if not nodes_dct.get('success'):
print(f"ERROR -- Failed to get nodes \n{nodes_dct.get('fail_reason')}")
exit(1)

if color:
rich.print_json(data=nodes_dct.get('result'))
else:
print(json.dumps(nodes_dct.get('result'), indent=4))


def topNodeTable(self):
def topNodeTable(self, option=""):
"""
"""
nodes_json = self.topNode()
Expand All @@ -1398,15 +1449,80 @@ def topNodeTable(self):


table = [['NODE', 'MEM TOTAL', 'MEM USAGE', 'MEM FREE', 'CPU CORES', 'CPU USAGE%', 'RUNNING PODS' ]]
for node, value in nodes_json.get('result').items():
row = [node, helper_.bytes_to_kb_mb_gb(value.get('memory_total')), helper_.bytes_to_kb_mb_gb(value.get('memory_used')), helper_.bytes_to_kb_mb_gb(value.get('memory_free')), value.get('cpu_cores'), str(round(value.get('cpu_used_percentage'))) + "%", value.get('running_pods_num')]
table.append(row)
if option == 'cloud':
table = [['NODE', 'MEM TOTAL', 'MEM USAGE', 'MEM FREE', 'CPU CORES', 'CPU USAGE%', 'RUNNING PODS', 'CLUSTER', 'INSTANCE TYPE', 'AZ', 'ENV', 'NG CAPACITY TYPE']]

if option == 'cloud':
for node, value in nodes_json.get('result').items():
row = [
node,
helper_.bytes_to_kb_mb_gb(value.get('memory_total')),
helper_.bytes_to_kb_mb_gb(value.get('memory_used')),
helper_.bytes_to_kb_mb_gb(value.get('memory_free')),
value.get('cpu_cores'),
str(round(value.get('cpu_used_percentage'))) + "%",
value.get('running_pods_num'),
value.get('cluster'),
value.get('instance_type'),
# value.get('region'),
value.get('az'),
value.get('cluster_env'),
value.get('node_group_capacity_type'),
# value.get('node_group_name'),
]
table.append(row)
else:
for node, value in nodes_json.get('result').items():
row = [
node,
helper_.bytes_to_kb_mb_gb(value.get('memory_total')),
helper_.bytes_to_kb_mb_gb(value.get('memory_used')),
helper_.bytes_to_kb_mb_gb(value.get('memory_free')),
value.get('cpu_cores'),
str(round(value.get('cpu_used_percentage'))) + "%",
value.get('running_pods_num'),
]
table.append(row)

out = tabulate(table, headers='firstrow', tablefmt='plain', showindex=False)
print(out)


def nodeManagedK8sInfo(self, node):
"""
INPUT:
- K8s node name (str)
Return:
- dct of metric (dct)
"""
output = {
"success": False,
"fail_reason": "",
"result": {}
}
try:
query = f'kubelet_node_name{{kubernetes_io_hostname=~"{node}"}}'
result = self.run_query(query)
if not result.get('status') == 'success':
output['fail_reason'] = f"could not get metric's value: \n{query}"
return output


if not result.get('data').get('result'):
output['fail_reason'] = f"Query did not return any data: \n{query}"
return output

output['result'] = result.get('data').get('result')
output['success'] = True

except(KeyError, AttributeError) as e:
output['success']: False
output['fail_reason'] = e
Logging.log.error(e)
Logging.log.exception(traceback.format_stack())

return output





2 changes: 1 addition & 1 deletion kubePtop/pod_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ def watch_threads(self):
Markdown("Bytes Write", justify='center'),
Text.from_ansi(disk_write_bytes_graph.graph + f"\n {disk_write_bytes_graph.colors_description_str}"),
)
layout["body2_b_a"].update(Panel(group_disk_io, title="[b]Network IO", padding=(1, 1)))
layout["body2_b_a"].update(Panel(group_disk_io, title="[b]Disk IO", padding=(1, 1)))



Expand Down
8 changes: 4 additions & 4 deletions kubePtop/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
rich
requests
tabulate
rich==13.3.1
requests==2.28.2
tabulate==0.9.0
argparse
asciichartpy
asciichartpy==1.5.25
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

setup(
name = 'kptop',
version = '0.0.5',
version = '0.0.6',
author = 'Eslam Gomaa',
# license = '<the license you chose>',
description = 'A CLI tool that provides Monitoring for Kubernetes resources on the terminal through Prometheus metircs',
Expand Down

0 comments on commit 246b010

Please sign in to comment.