Skip to content

Commit

Permalink
tuning
Browse files Browse the repository at this point in the history
  • Loading branch information
blegouix committed Nov 12, 2023
1 parent 33cd935 commit 9619c34
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 8 deletions.
6 changes: 3 additions & 3 deletions benchmarks/splines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,13 +179,13 @@ static void characteristics_advection(benchmark::State& state)

#ifdef KOKKOS_ENABLE_CUDA
std::string chip = "gpu";
int cols_per_par_chunk_ref = 1024;
int par_chunks_per_seq_chunk_ref = 160;
int cols_per_par_chunk_ref = 65535;
int par_chunks_per_seq_chunk_ref = 1;
unsigned int preconditionner_max_block_size_ref = 1u;
#elif defined(KOKKOS_ENABLE_OPENMP)
std::string chip = "cpu";
int cols_per_par_chunk_ref = 512;
int par_chunks_per_seq_chunk_ref = 160;
int par_chunks_per_seq_chunk_ref = Kokkos::OpenMP().concurrency();
unsigned int preconditionner_max_block_size_ref = 8u;
#elif defined(KOKKOS_ENABLE_SERIAL)
std::string chip = "cpu";
Expand Down
2 changes: 1 addition & 1 deletion benchmarks/splines_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
bandwidth = [group_data["bytes_per_second"][i] for i in range(len(ny))]
plt.plot(ny, bandwidth, marker='o', markersize=5, label=f'nx={nx}')

x = np.linspace(min(ny), 10*min(ny))
x = np.linspace(min(ny), 20*min(ny))
plt.plot(x, np.mean([data_groups[nx]["bytes_per_second"][0] for nx in nx_values])/min(ny)*x, linestyle='--', color='black', label='perfect scaling')

# Plotting the data
Expand Down
8 changes: 4 additions & 4 deletions include/ddc/kernels/splines/matrix_sparse.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,12 @@ class Matrix_Sparse : public Matrix
#endif
#ifdef KOKKOS_ENABLE_CUDA
if (std::is_same_v<ExecSpace, Kokkos::Cuda>) {
m_cols_per_par_chunk = 1024;
m_cols_per_par_chunk = 65535;
}
#endif
#ifdef KOKKOS_ENABLE_HIP
if (std::is_same_v<ExecSpace, Kokkos::HIP>) {
m_cols_per_par_chunk = 1024;
m_cols_per_par_chunk = 65535;
}
#endif
}
Expand All @@ -101,12 +101,12 @@ class Matrix_Sparse : public Matrix
#endif
#ifdef KOKKOS_ENABLE_CUDA
if (std::is_same_v<ExecSpace, Kokkos::Cuda>) {
m_par_chunks_per_seq_chunk = Kokkos::DefaultHostExecutionSpace().concurrency();
m_par_chunks_per_seq_chunk = 1;
}
#endif
#ifdef KOKKOS_ENABLE_HIP
if (std::is_same_v<ExecSpace, Kokkos::HIP>) {
m_par_chunks_per_seq_chunk = Kokkos::DefaultHostExecutionSpace().concurrency();
m_par_chunks_per_seq_chunk = 1;
}
#endif
}
Expand Down

0 comments on commit 9619c34

Please sign in to comment.