From 9109f2faed24bbabadb43347d7e7ebb66d33b1fc Mon Sep 17 00:00:00 2001 From: Dennis Liew Date: Wed, 20 Mar 2024 09:45:20 -0400 Subject: [PATCH 1/3] Added preconditions to avoid write-write data-race --- .../gpu/kernel/cuda_impl/cuda_ops/matrix_transpose_impl.cu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/matrix_transpose_impl.cu b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/matrix_transpose_impl.cu index 823003e4996..3f1e6c8dab4 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/matrix_transpose_impl.cu +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/matrix_transpose_impl.cu @@ -24,6 +24,9 @@ using Complex = mindspore::utils::Complex; template __global__ void MatrixTransposeKernel(const T *input, int elements, int row, int col, T *output) { + if (col <= 0 || row <= 0 || row != col) { + return; + } const int matrix_size = row * col; for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < elements; pos += blockDim.x * gridDim.x) { const int b = pos / matrix_size; From 6587dfc640649f98981c05002d35b0805d9e70e9 Mon Sep 17 00:00:00 2001 From: Dennis Liew <48105496+zhenrongliew@users.noreply.github.com> Date: Mon, 25 Mar 2024 22:17:13 -0400 Subject: [PATCH 2/3] Constraint for square matrix was for old version of kernel --- .../gpu/kernel/cuda_impl/cuda_ops/matrix_transpose_impl.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/matrix_transpose_impl.cu b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/matrix_transpose_impl.cu index 3f1e6c8dab4..f342c4422ce 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/matrix_transpose_impl.cu +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/matrix_transpose_impl.cu @@ -24,7 +24,7 @@ using Complex = mindspore::utils::Complex; template __global__ void MatrixTransposeKernel(const T *input, int elements, int row, int col, T *output) { - if (col <= 0 || row <= 0 || row != col) { + if (col < 0 || row < 0 ) { return; } const int matrix_size = row * col; From 81c8512e3a0d61326bcb4276e387be3910db528e Mon Sep 17 00:00:00 2001 From: Dennis Liew <48105496+zhenrongliew@users.noreply.github.com> Date: Tue, 26 Mar 2024 10:30:39 -0400 Subject: [PATCH 3/3] Apply suggestions from code review: move the conditional statement into the host side API. Co-authored-by: AGroupofProbiotocs <33567919+AGroupofProbiotocs@users.noreply.github.com> --- .../kernel/cuda_impl/cuda_ops/matrix_transpose_impl.cu | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/matrix_transpose_impl.cu b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/matrix_transpose_impl.cu index f342c4422ce..4822795b8b3 100644 --- a/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/matrix_transpose_impl.cu +++ b/mindspore/ccsrc/plugin/device/gpu/kernel/cuda_impl/cuda_ops/matrix_transpose_impl.cu @@ -24,7 +24,16 @@ using Complex = mindspore::utils::Complex; template __global__ void MatrixTransposeKernel(const T *input, int elements, int row, int col, T *output) { +template +cudaError_t MatrixTranspose(const T *input, int elements, int row, int col, T *output, uint32_t device_id, + cudaStream_t cuda_stream) { if (col < 0 || row < 0 ) { + return cudaErrorInvalidValue; + } + MatrixTransposeKernel<<>>( + input, elements, row, col, output); + return GetCudaStatus(); +} return; } const int matrix_size = row * col;