From a8554b529442957d21b05aa3ee3118164fc117fd Mon Sep 17 00:00:00 2001 From: Thomas Padioleau Date: Sat, 2 Dec 2023 09:43:23 +0100 Subject: [PATCH] Lighten Matrix_Sparse constructor --- include/ddc/kernels/splines/matrix_sparse.hpp | 160 +++++++++--------- 1 file changed, 84 insertions(+), 76 deletions(-) diff --git a/include/ddc/kernels/splines/matrix_sparse.hpp b/include/ddc/kernels/splines/matrix_sparse.hpp index 2f4429694..2aab656b5 100644 --- a/include/ddc/kernels/splines/matrix_sparse.hpp +++ b/include/ddc/kernels/splines/matrix_sparse.hpp @@ -16,6 +16,84 @@ namespace ddc::detail { +template +int default_cols_per_par_chunk() noexcept +{ +#ifdef KOKKOS_ENABLE_SERIAL + if (std::is_same_v) { + return 256; + } +#endif +#ifdef KOKKOS_ENABLE_OPENMP + if (std::is_same_v) { + return 256; + } +#endif +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same_v) { + return 65535; + } +#endif +#ifdef KOKKOS_ENABLE_HIP + if (std::is_same_v) { + return 65535; + } +#endif + return 1; +} + +template +int default_par_chunks_per_seq_chunk() noexcept +{ +#ifdef KOKKOS_ENABLE_SERIAL + if (std::is_same_v) { + return 1; + } +#endif +#ifdef KOKKOS_ENABLE_OPENMP + if (std::is_same_v) { + return ExecSpace().concurrency(); + } +#endif +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same_v) { + return 1; + } +#endif +#ifdef KOKKOS_ENABLE_HIP + if (std::is_same_v) { + return 1; + } +#endif + return 1; +} + +template +unsigned int default_preconditionner_max_block_size() noexcept +{ +#ifdef KOKKOS_ENABLE_SERIAL + if (std::is_same_v) { + return 32u; + } +#endif +#ifdef KOKKOS_ENABLE_OPENMP + if (std::is_same_v) { + return 32u; + } +#endif +#ifdef KOKKOS_ENABLE_CUDA + if (std::is_same_v) { + return 1u; + } +#endif +#ifdef KOKKOS_ENABLE_HIP + if (std::is_same_v) { + return 1u; + } +#endif + return 1u; +} + // Matrix class for Csr storage and iterative solve template class Matrix_Sparse : public Matrix @@ -33,7 +111,7 @@ class Matrix_Sparse : public Matrix int m_par_chunks_per_seq_chunk; // Maximum number of teams to be executed in parallel - int m_preconditionner_max_block_size; // Maximum size of Jacobi-block preconditionner + unsigned int m_preconditionner_max_block_size; // Maximum size of Jacobi-block preconditionner public: // Constructor @@ -46,6 +124,11 @@ class Matrix_Sparse : public Matrix , m_rows("rows", mat_size + 1) , m_cols("cols", mat_size * mat_size) , m_data("data", mat_size * mat_size) + , m_cols_per_par_chunk(cols_per_par_chunk.value_or(default_cols_per_par_chunk())) + , m_par_chunks_per_seq_chunk( + par_chunks_per_seq_chunk.value_or(default_par_chunks_per_seq_chunk())) + , m_preconditionner_max_block_size(preconditionner_max_block_size.value_or( + default_preconditionner_max_block_size())) { // Fill the csr indexes as a dense matrix and initialize with zeros (zeros will be removed once non-zeros elements will be set) for (int i = 0; i < get_size() * get_size(); i++) { @@ -56,81 +139,6 @@ class Matrix_Sparse : public Matrix m_data(i) = 0; } - if (cols_per_par_chunk.has_value()) { - m_cols_per_par_chunk = cols_per_par_chunk.value(); - } else { -#ifdef KOKKOS_ENABLE_SERIAL - if (std::is_same_v) { - m_cols_per_par_chunk = 256; - } -#endif -#ifdef KOKKOS_ENABLE_OPENMP - if (std::is_same_v) { - m_cols_per_par_chunk = 256; - } -#endif -#ifdef KOKKOS_ENABLE_CUDA - if (std::is_same_v) { - m_cols_per_par_chunk = 65535; - } -#endif -#ifdef KOKKOS_ENABLE_HIP - if (std::is_same_v) { - m_cols_per_par_chunk = 65535; - } -#endif - } - - if (par_chunks_per_seq_chunk.has_value()) { - m_par_chunks_per_seq_chunk = par_chunks_per_seq_chunk.value(); - } else { -#ifdef KOKKOS_ENABLE_SERIAL - if (std::is_same_v) { - m_par_chunks_per_seq_chunk = 1; - } -#endif -#ifdef KOKKOS_ENABLE_OPENMP - if (std::is_same_v) { - m_par_chunks_per_seq_chunk = Kokkos::DefaultHostExecutionSpace().concurrency(); - } -#endif -#ifdef KOKKOS_ENABLE_CUDA - if (std::is_same_v) { - m_par_chunks_per_seq_chunk = 1; - } -#endif -#ifdef KOKKOS_ENABLE_HIP - if (std::is_same_v) { - m_par_chunks_per_seq_chunk = 1; - } -#endif - } - - if (preconditionner_max_block_size.has_value()) { - m_preconditionner_max_block_size = preconditionner_max_block_size.value(); - } else { -#ifdef KOKKOS_ENABLE_SERIAL - if (std::is_same_v) { - m_preconditionner_max_block_size = 32u; - } -#endif -#ifdef KOKKOS_ENABLE_OPENMP - if (std::is_same_v) { - m_preconditionner_max_block_size = 32u; - } -#endif -#ifdef KOKKOS_ENABLE_CUDA - if (std::is_same_v) { - m_preconditionner_max_block_size = 1u; - } -#endif -#ifdef KOKKOS_ENABLE_HIP - if (std::is_same_v) { - m_preconditionner_max_block_size = 1u; - } -#endif - } - // Create the solver factory std::shared_ptr gko_exec; if (false) {