diff --git a/benchmarks/splines.cpp b/benchmarks/splines.cpp index 94a617a32..7727bd1b3 100644 --- a/benchmarks/splines.cpp +++ b/benchmarks/splines.cpp @@ -33,23 +33,23 @@ struct X static constexpr bool PERIODIC = true; }; -template +template struct BSplinesX : std::conditional_t< - NonUniform::value, + IsNonUniform, ddc::NonUniformBSplines, ddc::UniformBSplines> { }; -template +template using GrevillePoints = ddc::GrevilleInterpolationPoints< - BSplinesX, + BSplinesX, ddc::BoundCond::PERIODIC, ddc::BoundCond::PERIODIC>; -template -struct DDimX : GrevillePoints::interpolation_discrete_dimension_type +template +struct DDimX : GrevillePoints::interpolation_discrete_dimension_type { }; @@ -83,7 +83,7 @@ void monitorMemoryAsync(std::mutex& mutex, bool& monitorFlag, std::size_t& maxUs } } -template +template static void characteristics_advection_unitary(benchmark::State& state) { std::size_t const nx = state.range(3); @@ -112,44 +112,44 @@ static void characteristics_advection_unitary(benchmark::State& state) std::ref(monitorFlag), std::ref(maxUsedMem)); - if constexpr (!NonUniform::value) { + if constexpr (!IsNonUniform) { ddc::init_discrete_space>(ddc::Coordinate(0.), ddc::Coordinate(1.), nx); } else { std::vector> breaks(nx + 1); for (std::size_t i(0); i < nx + 1; ++i) { breaks[i] = ddc::Coordinate(static_cast(i) / nx); } - ddc::init_discrete_space>(breaks); + ddc::init_discrete_space>(breaks); } - ddc::init_discrete_space>( + ddc::init_discrete_space>( ddc::GrevilleInterpolationPoints< - BSplinesX, + BSplinesX, ddc::BoundCond::PERIODIC, ddc::BoundCond::PERIODIC>:: - template get_sampling>()); + template get_sampling>()); ddc::DiscreteDomain const y_domain = ddc::init_discrete_space(DDimY::init( ddc::Coordinate(-1.), ddc::Coordinate(1.), ddc::DiscreteVector(ny))); auto const x_domain = ddc::GrevilleInterpolationPoints< - BSplinesX, + BSplinesX, ddc::BoundCond::PERIODIC, - ddc::BoundCond::PERIODIC>::template get_domain>(); + ddc::BoundCond::PERIODIC>::template get_domain>(); ddc::Chunk density_alloc( - ddc::DiscreteDomain, DDimY>(x_domain, y_domain), + ddc::DiscreteDomain, DDimY>(x_domain, y_domain), ddc::KokkosAllocator()); ddc::ChunkSpan const density = density_alloc.span_view(); // Initialize the density on the main domain - ddc::DiscreteDomain, DDimY> const x_mesh - = ddc::DiscreteDomain, DDimY>(x_domain, y_domain); + ddc::DiscreteDomain, DDimY> const x_mesh + = ddc::DiscreteDomain, DDimY>(x_domain, y_domain); ddc::parallel_for_each( ExecSpace(), x_mesh, - KOKKOS_LAMBDA(ddc::DiscreteElement, DDimY> const ixy) { - double const x = ddc::coordinate(ddc::select>(ixy)); + KOKKOS_LAMBDA(ddc::DiscreteElement, DDimY> const ixy) { + double const x = ddc::coordinate(ddc::select>(ixy)); double const y = ddc::coordinate(ddc::select(ixy)); density(ixy) = 9.999 * Kokkos::exp(-(x * x + y * y) / 0.1 / 2); // initial_density(ixy) = 9.999 * ((x * x + y * y) < 0.25); @@ -157,22 +157,22 @@ static void characteristics_advection_unitary(benchmark::State& state) ddc::SplineBuilder< ExecSpace, typename ExecSpace::memory_space, - BSplinesX, - DDimX, + BSplinesX, + DDimX, ddc::BoundCond::PERIODIC, ddc::BoundCond::PERIODIC, Backend, - DDimX, + DDimX, DDimY> const spline_builder(x_mesh, cols_per_chunk, preconditioner_max_block_size); ddc::PeriodicExtrapolationRule const periodic_extrapolation; ddc::SplineEvaluator< ExecSpace, typename ExecSpace::memory_space, - BSplinesX, - DDimX, + BSplinesX, + DDimX, ddc::PeriodicExtrapolationRule, ddc::PeriodicExtrapolationRule, - DDimX, + DDimX, DDimY> const spline_evaluator(periodic_extrapolation, periodic_extrapolation); ddc::Chunk coef_alloc( spline_builder.batched_spline_domain(), @@ -188,9 +188,11 @@ static void characteristics_advection_unitary(benchmark::State& state) ddc::parallel_for_each( ExecSpace(), feet_coords.domain(), - KOKKOS_LAMBDA(ddc::DiscreteElement, DDimY> const e) { - feet_coords(e) = ddc::coordinate(ddc::select>(e)) - - ddc::Coordinate(0.0176429863); + KOKKOS_LAMBDA( + ddc::DiscreteElement, DDimY> const e) { + feet_coords(e) + = ddc::coordinate(ddc::select>(e)) + - ddc::Coordinate(0.0176429863); }); Kokkos::Profiling::popRegion(); Kokkos::Profiling::pushRegion("SplineBuilder"); @@ -213,13 +215,13 @@ static void characteristics_advection_unitary(benchmark::State& state) /// The reason is it acts on underlying global /// /// variables, which is always a bad idea. /// //////////////////////////////////////////////////// - ddc::detail::g_discrete_space_dual>.reset(); - if constexpr (!NonUniform::value) { - ddc::detail::g_discrete_space_dual>>.reset(); + ddc::detail::g_discrete_space_dual>.reset(); + if constexpr (!IsNonUniform) { + ddc::detail::g_discrete_space_dual>>.reset(); } else { - ddc::detail::g_discrete_space_dual>>.reset(); + ddc::detail::g_discrete_space_dual>>.reset(); } - ddc::detail::g_discrete_space_dual>.reset(); + ddc::detail::g_discrete_space_dual>.reset(); ddc::detail::g_discrete_space_dual.reset(); //////////////////////////////////////////////////// } @@ -232,42 +234,30 @@ static void characteristics_advection(benchmark::State& state) long const non_uniform = 1; // Preallocate 12 unitary benchs for each combination of cpu/gpu execution space, uniform/non-uniform and spline degree we may want to benchmark (those are determined at compile-time, that's why we need to build explicitely 12 variants of the bench even if we call only one of them) std::map, std::function> benchs; - benchs[std::array {host, uniform, 3L}] = characteristics_advection_unitary< - Kokkos::DefaultHostExecutionSpace, - std::false_type, - 3>; - benchs[std::array {host, uniform, 4L}] = characteristics_advection_unitary< - Kokkos::DefaultHostExecutionSpace, - std::false_type, - 4>; - benchs[std::array {host, uniform, 5L}] = characteristics_advection_unitary< - Kokkos::DefaultHostExecutionSpace, - std::false_type, - 5>; - benchs[std::array {host, non_uniform, 3L}] = characteristics_advection_unitary< - Kokkos::DefaultHostExecutionSpace, - std::true_type, - 3>; - benchs[std::array {host, non_uniform, 4L}] = characteristics_advection_unitary< - Kokkos::DefaultHostExecutionSpace, - std::true_type, - 4>; - benchs[std::array {host, non_uniform, 5L}] = characteristics_advection_unitary< - Kokkos::DefaultHostExecutionSpace, - std::true_type, - 5>; + benchs[std::array {host, uniform, 3L}] + = characteristics_advection_unitary; + benchs[std::array {host, uniform, 4L}] + = characteristics_advection_unitary; + benchs[std::array {host, uniform, 5L}] + = characteristics_advection_unitary; + benchs[std::array {host, non_uniform, 3L}] + = characteristics_advection_unitary; + benchs[std::array {host, non_uniform, 4L}] + = characteristics_advection_unitary; + benchs[std::array {host, non_uniform, 5L}] + = characteristics_advection_unitary; benchs[std::array {dev, uniform, 3L}] - = characteristics_advection_unitary; + = characteristics_advection_unitary; benchs[std::array {dev, uniform, 4L}] - = characteristics_advection_unitary; + = characteristics_advection_unitary; benchs[std::array {dev, uniform, 5L}] - = characteristics_advection_unitary; + = characteristics_advection_unitary; benchs[std::array {dev, non_uniform, 3L}] - = characteristics_advection_unitary; + = characteristics_advection_unitary; benchs[std::array {dev, non_uniform, 4L}] - = characteristics_advection_unitary; + = characteristics_advection_unitary; benchs[std::array {dev, non_uniform, 5L}] - = characteristics_advection_unitary; + = characteristics_advection_unitary; // Run the desired bench benchs.at(std::array {state.range(0), state.range(1), state.range(2)})(state);