generated from SparkJiao/pytorch-transformers-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_gpu_status.py
30 lines (27 loc) · 1.06 KB
/
check_gpu_status.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import time
from argparse import ArgumentParser
import os
command = {
"nvi1": 'srun -p NA100q -w node01 nvidia-smi',
"nvi2": 'srun -p PA100q -w node02 nvidia-smi',
"nvi3": 'srun -p PA100q -w node03 nvidia-smi',
"nvi4": 'srun -p PA100q -w node04 nvidia-smi',
"nvi5": 'srun -p PA40q -w node05 nvidia-smi',
"nvi6": 'srun -p PA40q -w node06 nvidia-smi',
"nvi7": 'srun -p PA40q -w node07 nvidia-smi',
"nvi8": 'srun -p RTXA6Kq -w node08 nvidia-smi',
"nvi9": 'srun -p RTXA6Kq -w node09 nvidia-smi',
"nvi10": 'srun -p RTXA6Kq -w node10 nvidia-smi',
"nvi11": 'srun -p RTXA6Kq -w node11 nvidia-smi',
"nvi12": 'srun -p PA100q -w node12 nvidia-smi',
"nvi14": 'srun -p HPCq -w node14 nvidia-smi',
"nvi15": 'srun -p NH100q -w node15 nvidia-smi',
}
if __name__ == '__main__':
parser = ArgumentParser()
parser.add_argument("--node", "-n", type=str)
parser.add_argument("--interval", "-i", type=int)
args = parser.parse_args()
while True:
os.system(command[f"nvi{args.node}"])
time.sleep(args.interval)