From 9619c34a5512905709242adb311086c09a70a170 Mon Sep 17 00:00:00 2001 From: blegouix Date: Sun, 12 Nov 2023 15:50:07 +0100 Subject: [PATCH] tuning --- benchmarks/splines.cpp | 6 +++--- benchmarks/splines_plot.py | 2 +- include/ddc/kernels/splines/matrix_sparse.hpp | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/benchmarks/splines.cpp b/benchmarks/splines.cpp index c7c123950..7b65a0d27 100644 --- a/benchmarks/splines.cpp +++ b/benchmarks/splines.cpp @@ -179,13 +179,13 @@ static void characteristics_advection(benchmark::State& state) #ifdef KOKKOS_ENABLE_CUDA std::string chip = "gpu"; -int cols_per_par_chunk_ref = 1024; -int par_chunks_per_seq_chunk_ref = 160; +int cols_per_par_chunk_ref = 65535; +int par_chunks_per_seq_chunk_ref = 1; unsigned int preconditionner_max_block_size_ref = 1u; #elif defined(KOKKOS_ENABLE_OPENMP) std::string chip = "cpu"; int cols_per_par_chunk_ref = 512; -int par_chunks_per_seq_chunk_ref = 160; +int par_chunks_per_seq_chunk_ref = Kokkos::OpenMP().concurrency(); unsigned int preconditionner_max_block_size_ref = 8u; #elif defined(KOKKOS_ENABLE_SERIAL) std::string chip = "cpu"; diff --git a/benchmarks/splines_plot.py b/benchmarks/splines_plot.py index 8f68f3157..fd871a98b 100644 --- a/benchmarks/splines_plot.py +++ b/benchmarks/splines_plot.py @@ -39,7 +39,7 @@ bandwidth = [group_data["bytes_per_second"][i] for i in range(len(ny))] plt.plot(ny, bandwidth, marker='o', markersize=5, label=f'nx={nx}') -x = np.linspace(min(ny), 10*min(ny)) +x = np.linspace(min(ny), 20*min(ny)) plt.plot(x, np.mean([data_groups[nx]["bytes_per_second"][0] for nx in nx_values])/min(ny)*x, linestyle='--', color='black', label='perfect scaling') # Plotting the data diff --git a/include/ddc/kernels/splines/matrix_sparse.hpp b/include/ddc/kernels/splines/matrix_sparse.hpp index 79975cce9..78cf34def 100644 --- a/include/ddc/kernels/splines/matrix_sparse.hpp +++ b/include/ddc/kernels/splines/matrix_sparse.hpp @@ -76,12 +76,12 @@ class Matrix_Sparse : public Matrix #endif #ifdef KOKKOS_ENABLE_CUDA if (std::is_same_v) { - m_cols_per_par_chunk = 1024; + m_cols_per_par_chunk = 65535; } #endif #ifdef KOKKOS_ENABLE_HIP if (std::is_same_v) { - m_cols_per_par_chunk = 1024; + m_cols_per_par_chunk = 65535; } #endif } @@ -101,12 +101,12 @@ class Matrix_Sparse : public Matrix #endif #ifdef KOKKOS_ENABLE_CUDA if (std::is_same_v) { - m_par_chunks_per_seq_chunk = Kokkos::DefaultHostExecutionSpace().concurrency(); + m_par_chunks_per_seq_chunk = 1; } #endif #ifdef KOKKOS_ENABLE_HIP if (std::is_same_v) { - m_par_chunks_per_seq_chunk = Kokkos::DefaultHostExecutionSpace().concurrency(); + m_par_chunks_per_seq_chunk = 1; } #endif }