diff --git a/tritonbench/components/ncu/nsys_analyzer.py b/tritonbench/components/ncu/nsys_analyzer.py
index 5ab0a092..ee580699 100644
--- a/tritonbench/components/ncu/nsys_analyzer.py
+++ b/tritonbench/components/ncu/nsys_analyzer.py
@@ -7,17 +7,17 @@
 # The nsys metrics to the reports. The value is the list of reports of nsys.
 nsys_metrics_to_reports = {
     # the sum of kernel execution time
-    "nsys_gpu_kernel_sum": ["cuda_gpu_kern_sum", "nvtx_sum"],
+    "nsys_gpu_kernel_sum": ["nvtx_kern_sum", "nvtx_sum"],
     # the overhead of kernel launch
-    "nsys_launch_overhead": ["cuda_gpu_kern_sum", "nvtx_sum"],
+    "nsys_launch_overhead": ["nvtx_kern_sum", "nvtx_sum"],
     # the names of kernels
-    "nsys_kernel_names": ["cuda_gpu_kern_sum"],
+    "nsys_kernel_names": ["nvtx_kern_sum"],
     # the durations of kernels
-    "nsys_kernel_durations": ["cuda_gpu_kern_sum"],
+    "nsys_kernel_durations": ["nvtx_kern_sum"],
     # the duration of nvtx range
     "nsys_nvtx_range_duration": ["nvtx_sum"],
     # the number of kernels
-    "nsys_num_of_kernels": ["cuda_gpu_kern_sum"],
+    "nsys_num_of_kernels": ["nvtx_kern_sum"],
 }
 # The public nsys metrics to tritonbench
 nsys_bench_metrics = list(nsys_metrics_to_reports.keys())
@@ -59,12 +59,12 @@ def read_nsys_report(
     kernel_names = []
     sum_kernel_duration = 0
     nvtx_range_duration = 0
-    if "cuda_gpu_kern_sum" in csv_contents:
+    if "nvtx_kern_sum" in csv_contents:
         # gpu kernel execution time summary
-        for row in csv_contents["cuda_gpu_kern_sum"]:
+        for row in csv_contents["nvtx_kern_sum"]:
             # use ms as the unit
             kernel_duration.append(float(row["Total Time (ns)"]) / 1_000_000)
-            kernel_names.append(row["Name"])
+            kernel_names.append(row["Kernel Name"])
         sum_kernel_duration = sum(kernel_duration)
     if "nvtx_sum" in csv_contents:
         # It is supposed to be only one row. The nvtx range is `:tritonbench_range`