Skip to content

Commit

Permalink
Add sycl equivalent to cuda events for profiling
Browse files Browse the repository at this point in the history
  • Loading branch information
aacostadiaz committed May 31, 2024
1 parent 681df7a commit c72e06e
Show file tree
Hide file tree
Showing 17 changed files with 217 additions and 645 deletions.
2 changes: 0 additions & 2 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,6 @@ endfunction()

if(SYCL_INTEL_TARGET)
add_subdirectory(pvc)
else(SYCL_NVIDIA_TARGET OR NOT CUTLASS_ENABLE_SYCL)
add_subdirectory(ampere)
endif()
if(SYCL_NVIDIA_TARGET OR NOT CUTLASS_ENABLE_SYCL)
add_subdirectory(ampere)
Expand Down
5 changes: 0 additions & 5 deletions benchmarks/ampere/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,3 @@ cutlass_benchmark_add_executable(
bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32
bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32.cu
)

cutlass_benchmark_add_executable(
bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32
bench_ampere_gemm_bf16_bf16_fp32_tensor_op_fp32.cpp
)
10 changes: 10 additions & 0 deletions benchmarks/common/benchmark_runner.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,16 @@ struct BenchmarkRunner {
auto problem_shape_MNKL = cute::append<4>(problem_size, 1);
auto [M, N, K, L] = problem_shape_MNKL;

#if defined(CUTLASS_ENABLE_SYCL)
sycl::property_list prop = {
sycl::property::queue::in_order(),
sycl::property::queue::enable_profiling()
};

auto q = sycl::queue(syclcompat::get_default_context(), syclcompat::get_current_device(), prop);
syclcompat::set_default_queue(q);
#endif

stride_A = cutlass::make_cute_packed_stride(StrideA{}, cute::make_shape(M, K, L));
stride_B = cutlass::make_cute_packed_stride(StrideB{}, cute::make_shape(N, K, L));
stride_C = cutlass::make_cute_packed_stride(StrideC{}, cute::make_shape(M, N, L));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,13 @@ run(Gemm_Op gemm_op)

void test_gemm(int m, int n, int k)
{
sycl::property_list prop = {
sycl::property::queue::in_order(),
sycl::property::queue::enable_profiling()
};

auto q = sycl::queue(syclcompat::get_default_context(), syclcompat::get_current_device(), prop);
syclcompat::set_default_queue(q);

std::cout << "M = " << m << std::endl;
std::cout << "N = " << n << std::endl;
Expand Down
8 changes: 8 additions & 0 deletions examples/cute/tutorial/sgemm_1_sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,14 @@ int main(int argc, char** argv) {
char transB = 'T';
if (argc >= 6) sscanf(argv[5], "%c", &transB);

sycl::property_list prop = {
sycl::property::queue::in_order(),
sycl::property::queue::enable_profiling()
};

auto q = sycl::queue(syclcompat::get_default_context(), syclcompat::get_current_device(), prop);
syclcompat::set_default_queue(q);

using TA = float;
using TB = float;
using TC = float;
Expand Down
8 changes: 8 additions & 0 deletions examples/cute/tutorial/sgemm_2_sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,14 @@ int main(int argc, char** argv) {
char transB = 'T';
if (argc >= 6) sscanf(argv[5], "%c", &transB);

sycl::property_list prop = {
sycl::property::queue::in_order(),
sycl::property::queue::enable_profiling()
};

auto q = sycl::queue(syclcompat::get_default_context(), syclcompat::get_current_device(), prop);
syclcompat::set_default_queue(q);

using TA = float;
using TB = float;
using TC = float;
Expand Down
8 changes: 8 additions & 0 deletions examples/cute/tutorial/sgemm_sm70_sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,14 @@ int main(int argc, char** argv) {
char transB = 'T';
if (argc >= 6) sscanf(argv[5], "%c", &transB);

sycl::property_list prop = {
sycl::property::queue::in_order(),
sycl::property::queue::enable_profiling()
};

auto q = sycl::queue(syclcompat::get_default_context(), syclcompat::get_current_device(), prop);
syclcompat::set_default_queue(q);

using TA = float;
using TB = float;
using TC = float;
Expand Down
8 changes: 8 additions & 0 deletions examples/cute/tutorial/sgemm_sm80_sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,14 @@ int main(int argc, char** argv) {
char transB = 'T';
if (argc >= 6) sscanf(argv[5], "%c", &transB);

sycl::property_list prop = {
sycl::property::queue::in_order(),
sycl::property::queue::enable_profiling()
};

auto q = sycl::queue(syclcompat::get_default_context(), syclcompat::get_current_device(), prop);
syclcompat::set_default_queue(q);

using TA = float;
using TB = float;
using TC = float;
Expand Down
2 changes: 0 additions & 2 deletions examples/sycl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,4 @@

if(SYCL_INTEL_TARGET)
add_subdirectory(pvc)
else()
add_subdirectory(ampere)
endif()
43 changes: 0 additions & 43 deletions examples/sycl/ampere/CMakeLists.txt

This file was deleted.

153 changes: 0 additions & 153 deletions examples/sycl/ampere/ampere_gemm_fp16_fp16_fp32_tensor_op_fp32.cu

This file was deleted.

Loading

0 comments on commit c72e06e

Please sign in to comment.