Skip to content

Commit

Permalink
update build.sh and result printing.
Browse files Browse the repository at this point in the history
  • Loading branch information
taozha2 committed Apr 30, 2024
1 parent 6f12cc0 commit 7d592d8
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 9 deletions.
28 changes: 20 additions & 8 deletions build.sh
Original file line number Diff line number Diff line change
@@ -1,16 +1,28 @@
sycl_compiler_path=/opt/cutlass_compiler/
target=./examples/cute/tutorial/pvc_sycl
sycl_compiler_path=/opt/cutlass/compiler/0327/
gpu_driver_path=/opt/cutlass/gpu_driver/hotfix_agama-ci-devel-803.25/extract/
cuda_path=/usr/local/cuda-12.3/
mkl_path=/opt/intel/oneapi/mkl/2024.1
rm -rf $target

# AOT compile
output=intel_gpu_pvc

# jit compile
#output=spir64


export ZE_AFFINITY_MASK=0
export CPATH=$sycl_compiler_path:$sycl_compiler_path/include/:$sycl_compiler_path/include/sycl/:$mkl_path/include/
export LIBRARY_PATH=/opt/intel/oneapi/mkl/2024.1/lib/
export LD_LIBRARY_PATH=$mkl_path/lib/:${sycl_compiler_path}/lib/
export LIBRARY_PATH=$gpu_driver_path/usr/lib/x86_64-linux-gnu/:$mkl_path/lib/:$sycl_compiler_path/lib/
export LD_LIBRARY_PATH=$LIBRARY_PATH
export IGC_EnableVISANoSchedule=1
export IGC_ShaderDumpEnable=1
export IGC_DumpToCustomDir=./mm_dumps_prefetch_coop
export IGC_DumpToCustomDir=./mm_dumps
export IGC_VATemp=1
export ONEAPI_DEVICE_SELECTOR=level_zero:gpu

target=./examples/cute/tutorial/pvc_sycl
rm -rf $target

cmake .. -G Ninja -DCMAKE_CUDA_HOST_COMPILER=${sycl_compiler_path}/bin/clang++ -DCMAKE_CUDA_COMPILER=$cuda_path/bin/nvcc \
-DCUTLASS_ENABLE_SYCL=ON -DDPCPP_SYCL_TARGET=intel_gpu_pvc -DCMAKE_CXX_COMPILER=${sycl_compiler_path}/bin/clang++ \
-DCMAKE_CXX_FLAGS=" -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -DPREFETCH_DEFAULT" && ninja -v $target && ONEAPI_DEVICE_SELECTOR=level_zero:gpu $target
-DCUTLASS_ENABLE_SYCL=ON -DDPCPP_SYCL_TARGET=$output -DCMAKE_CXX_COMPILER=${sycl_compiler_path}/bin/clang++ \
-DCMAKE_CXX_FLAGS=" -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -DPREFETCH_DEFAULT" && ninja -v $target && $target
6 changes: 5 additions & 1 deletion examples/cute/tutorial/pvc_sycl/pvc_sycl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -269,16 +269,19 @@ void cute_gemm(size_t M, size_t K, size_t N) {
double average_event_time = 0.f;
auto best = 999.f;
for (uint32_t i = WARMUP_ITERATIONS; i < total_iterations; i++) {
#if 0
printf("GPU time is %f ms, Tflops is: %f, HBM (GBs) is %f\n",
event_times[i] / 1e3, 2.0 * M * N * K / 1e12 / event_times[i],
(M * K * sizeof(dtype_a) + K * N * sizeof(dtype_b) +
M * N * sizeof(dtype_c)) /
event_times[i] / 1e9);
#endif
average_event_time += event_times[i];
best = min(best, event_times[i]);
}
average_event_time /= testIterations;
printf("Best is %f Tflops, %f HBM (GBs)\n", 2.0 * M * N * K / 1e12 / best,
printf("MKN (%d, %d, %d), Best is %f ms, %f Tflops, %f HBM (GBs)\n", M, K, N,
best * 1e3, 2.0 * M * N * K / 1e12 / best,
(M * K * sizeof(dtype_a) + K * N * sizeof(dtype_b) +
M * N * sizeof(dtype_c)) /
best / 1e9);
Expand All @@ -305,6 +308,7 @@ void cute_gemm(size_t M, size_t K, size_t N) {
}

int main(int argc, char **argv) {
// M, K, N
cute_gemm<256, 256, 32, 64, 32, 32>(2048, 2048, 2048);
cute_gemm<256, 256, 32, 64, 32, 32>(4096, 4096, 4096);
cute_gemm<256, 256, 32, 64, 32, 32>(8192, 8192, 8192);
Expand Down

0 comments on commit 7d592d8

Please sign in to comment.