Skip to content

Commit

Permalink
Simplify ddc::parallel_for_each and ddc::parallel_transform_reduce im…
Browse files Browse the repository at this point in the history
…plementations (#645)
  • Loading branch information
tpadioleau authored Sep 24, 2024
1 parent e285b32 commit 371ffb8
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 209 deletions.
51 changes: 51 additions & 0 deletions include/ddc/ddc_to_kokkos_execution_policy.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright (C) The DDC development team, see COPYRIGHT.md file
//
// SPDX-License-Identifier: MIT

#pragma once

#include <cstddef>
#include <type_traits>

#include <Kokkos_Core.hpp>

#include "ddc/detail/kokkos.hpp"
#include "ddc/discrete_domain.hpp"
#include "ddc/discrete_element.hpp"

namespace ddc::detail {

template <class ExecSpace, class... DDims>
auto ddc_to_kokkos_execution_policy(
ExecSpace const& execution_space,
DiscreteDomain<DDims...> const& domain)
{
using work_tag = std::
conditional_t<need_annotated_operator<ExecSpace>(), use_annotated_operator, void>;
using index_type = Kokkos::IndexType<DiscreteElementType>;
if constexpr (sizeof...(DDims) == 0) {
return Kokkos::RangePolicy<ExecSpace, work_tag, index_type>(execution_space, 0, 1);
} else {
DiscreteElement<DDims...> const ddc_begin = domain.front();
DiscreteElement<DDims...> const ddc_end = domain.front() + domain.extents();
if constexpr (sizeof...(DDims) == 1) {
std::size_t const begin = ddc_begin.uid();
std::size_t const end = ddc_end.uid();
return Kokkos::
RangePolicy<ExecSpace, work_tag, index_type>(execution_space, begin, end);
} else {
using iteration_pattern = Kokkos::
Rank<sizeof...(DDims), Kokkos::Iterate::Right, Kokkos::Iterate::Right>;
Kokkos::Array<std::size_t, sizeof...(DDims)> const begin {
ddc::uid<DDims>(ddc_begin)...};
Kokkos::Array<std::size_t, sizeof...(DDims)> const end {ddc::uid<DDims>(ddc_end)...};
return Kokkos::MDRangePolicy<
ExecSpace,
iteration_pattern,
work_tag,
index_type>(execution_space, begin, end);
}
}
}

} // namespace ddc::detail
85 changes: 8 additions & 77 deletions include/ddc/parallel_for_each.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include <Kokkos_Core.hpp>

#include "ddc/ddc_to_kokkos_execution_policy.hpp"
#include "ddc/detail/kokkos.hpp"
#include "ddc/discrete_domain.hpp"
#include "ddc/discrete_element.hpp"
Expand All @@ -23,7 +24,7 @@ template <class F, class... DDims>
class ForEachKokkosLambdaAdapter
{
template <class T>
using index_type = std::size_t;
using index_type = DiscreteElementType;

F m_f;

Expand Down Expand Up @@ -57,87 +58,17 @@ class ForEachKokkosLambdaAdapter
}
};

template <class ExecSpace, class Functor>
template <class ExecSpace, class Functor, class... DDims>
void for_each_kokkos(
std::string const& label,
ExecSpace const& execution_space,
[[maybe_unused]] DiscreteDomain<> const& domain,
Functor const& f) noexcept
{
if constexpr (need_annotated_operator<ExecSpace>()) {
Kokkos::parallel_for(
label,
Kokkos::RangePolicy<ExecSpace, use_annotated_operator>(execution_space, 0, 1),
ForEachKokkosLambdaAdapter<Functor>(f));
} else {
Kokkos::parallel_for(
label,
Kokkos::RangePolicy<ExecSpace>(execution_space, 0, 1),
ForEachKokkosLambdaAdapter<Functor>(f));
}
}

template <class ExecSpace, class Functor, class DDim0>
void for_each_kokkos(
std::string const& label,
ExecSpace const& execution_space,
DiscreteDomain<DDim0> const& domain,
Functor const& f) noexcept
{
DiscreteElement<DDim0> const ddc_begin = domain.front();
DiscreteElement<DDim0> const ddc_end = domain.front() + domain.extents();
std::size_t const begin = ddc::uid<DDim0>(ddc_begin);
std::size_t const end = ddc::uid<DDim0>(ddc_end);
if constexpr (need_annotated_operator<ExecSpace>()) {
Kokkos::parallel_for(
label,
Kokkos::RangePolicy<ExecSpace, use_annotated_operator>(execution_space, begin, end),
ForEachKokkosLambdaAdapter<Functor, DDim0>(f));
} else {
Kokkos::parallel_for(
label,
Kokkos::RangePolicy<ExecSpace>(execution_space, begin, end),
ForEachKokkosLambdaAdapter<Functor, DDim0>(f));
}
}

template <class ExecSpace, class Functor, class DDim0, class DDim1, class... DDims>
void for_each_kokkos(
std::string const& label,
ExecSpace const& execution_space,
DiscreteDomain<DDim0, DDim1, DDims...> const& domain,
DiscreteDomain<DDims...> const& domain,
Functor const& f) noexcept
{
DiscreteElement<DDim0, DDim1, DDims...> const ddc_begin = domain.front();
DiscreteElement<DDim0, DDim1, DDims...> const ddc_end = domain.front() + domain.extents();
Kokkos::Array<std::size_t, 2 + sizeof...(DDims)> const
begin {ddc::uid<DDim0>(ddc_begin),
ddc::uid<DDim1>(ddc_begin),
ddc::uid<DDims>(ddc_begin)...};
Kokkos::Array<std::size_t, 2 + sizeof...(DDims)> const
end {ddc::uid<DDim0>(ddc_end), ddc::uid<DDim1>(ddc_end), ddc::uid<DDims>(ddc_end)...};
if constexpr (need_annotated_operator<ExecSpace>()) {
Kokkos::parallel_for(
label,
Kokkos::MDRangePolicy<
ExecSpace,
Kokkos::Rank<
2 + sizeof...(DDims),
Kokkos::Iterate::Right,
Kokkos::Iterate::Right>,
use_annotated_operator>(execution_space, begin, end),
ForEachKokkosLambdaAdapter<Functor, DDim0, DDim1, DDims...>(f));
} else {
Kokkos::parallel_for(
label,
Kokkos::MDRangePolicy<
ExecSpace,
Kokkos::Rank<
2 + sizeof...(DDims),
Kokkos::Iterate::Right,
Kokkos::Iterate::Right>>(execution_space, begin, end),
ForEachKokkosLambdaAdapter<Functor, DDim0, DDim1, DDims...>(f));
}
Kokkos::parallel_for(
label,
ddc_to_kokkos_execution_policy(execution_space, domain),
ForEachKokkosLambdaAdapter<Functor, DDims...>(f));
}

} // namespace detail
Expand Down
144 changes: 12 additions & 132 deletions include/ddc/parallel_transform_reduce.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

#include <Kokkos_Core.hpp>

#include "ddc/ddc_to_kokkos_execution_policy.hpp"
#include "ddc/detail/kokkos.hpp"
#include "ddc/discrete_domain.hpp"
#include "ddc/discrete_element.hpp"
Expand Down Expand Up @@ -91,7 +92,7 @@ template <class Reducer, class Functor, class... DDims>
class TransformReducerKokkosLambdaAdapter
{
template <class T>
using index_type = std::size_t;
using index_type = DiscreteElementType;

Reducer reducer;

Expand Down Expand Up @@ -146,145 +147,24 @@ class TransformReducerKokkosLambdaAdapter
* @param[in] transform a unary FunctionObject that will be applied to each element of the input
* range. The return type must be acceptable as input to reduce
*/
template <class ExecSpace, class T, class BinaryReductionOp, class UnaryTransformOp>
T transform_reduce_kokkos(
std::string const& label,
ExecSpace const& execution_space,
[[maybe_unused]] DiscreteDomain<> const& domain,
T neutral,
BinaryReductionOp const& reduce,
UnaryTransformOp const& transform) noexcept
{
T result = neutral;
if constexpr (need_annotated_operator<ExecSpace>()) {
Kokkos::parallel_reduce(
label,
Kokkos::RangePolicy<ExecSpace, use_annotated_operator>(execution_space, 0, 1),
TransformReducerKokkosLambdaAdapter<
BinaryReductionOp,
UnaryTransformOp>(reduce, transform),
ddc_to_kokkos_reducer_t<BinaryReductionOp>(result));
} else {
Kokkos::parallel_reduce(
label,
Kokkos::RangePolicy<ExecSpace>(execution_space, 0, 1),
TransformReducerKokkosLambdaAdapter<
BinaryReductionOp,
UnaryTransformOp>(reduce, transform),
ddc_to_kokkos_reducer_t<BinaryReductionOp>(result));
}
return result;
}

/** A parallel reduction over a nD domain using the default Kokkos execution space
* @param[in] label name for easy identification of the parallel_for_each algorithm
* @param[in] execution_space a Kokkos execution space where the loop will be executed on
* @param[in] domain the range over which to apply the algorithm
* @param[in] neutral the neutral element of the reduction operation
* @param[in] reduce a binary FunctionObject that will be applied in unspecified order to the
* results of transform, the results of other reduce and neutral.
* @param[in] transform a unary FunctionObject that will be applied to each element of the input
* range. The return type must be acceptable as input to reduce
*/
template <class ExecSpace, class DDim0, class T, class BinaryReductionOp, class UnaryTransformOp>
T transform_reduce_kokkos(
std::string const& label,
ExecSpace const& execution_space,
DiscreteDomain<DDim0> const& domain,
T neutral,
BinaryReductionOp const& reduce,
UnaryTransformOp const& transform) noexcept
{
T result = neutral;
if constexpr (need_annotated_operator<ExecSpace>()) {
Kokkos::parallel_reduce(
label,
Kokkos::RangePolicy<ExecSpace, use_annotated_operator>(
execution_space,
domain.front().uid(),
domain.back().uid() + 1),
TransformReducerKokkosLambdaAdapter<
BinaryReductionOp,
UnaryTransformOp,
DDim0>(reduce, transform),
ddc_to_kokkos_reducer_t<BinaryReductionOp>(result));
} else {
Kokkos::parallel_reduce(
label,
Kokkos::RangePolicy<
ExecSpace>(execution_space, domain.front().uid(), domain.back().uid() + 1),
TransformReducerKokkosLambdaAdapter<
BinaryReductionOp,
UnaryTransformOp,
DDim0>(reduce, transform),
ddc_to_kokkos_reducer_t<BinaryReductionOp>(result));
}
return result;
}

/** A parallel reduction over a nD domain using the default Kokkos execution space
* @param[in] label name for easy identification of the parallel_for_each algorithm
* @param[in] execution_space a Kokkos execution space where the loop will be executed on
* @param[in] domain the range over which to apply the algorithm
* @param[in] neutral the neutral element of the reduction operation
* @param[in] reduce a binary FunctionObject that will be applied in unspecified order to the
* results of transform, the results of other reduce and neutral.
* @param[in] transform a unary FunctionObject that will be applied to each element of the input
* range. The return type must be acceptable as input to reduce
*/
template <
class ExecSpace,
class DDim0,
class DDim1,
class... DDims,
class T,
class BinaryReductionOp,
class UnaryTransformOp>
template <class ExecSpace, class... DDims, class T, class BinaryReductionOp, class UnaryTransformOp>
T transform_reduce_kokkos(
std::string const& label,
ExecSpace const& execution_space,
DiscreteDomain<DDim0, DDim1, DDims...> const& domain,
DiscreteDomain<DDims...> const& domain,
T neutral,
BinaryReductionOp const& reduce,
UnaryTransformOp const& transform) noexcept
{
T result = neutral;
Kokkos::Array<std::size_t, 2 + sizeof...(DDims)> const
begin {select<DDim0>(domain).front().uid(),
select<DDim1>(domain).front().uid(),
select<DDims>(domain).front().uid()...};
Kokkos::Array<std::size_t, 2 + sizeof...(DDims)> const
end {select<DDim0>(domain).back().uid() + 1,
select<DDim1>(domain).back().uid() + 1,
(select<DDims>(domain).back().uid() + 1)...};
if constexpr (need_annotated_operator<ExecSpace>()) {
Kokkos::parallel_reduce(
label,
Kokkos::MDRangePolicy<
ExecSpace,
Kokkos::Rank<2 + sizeof...(DDims)>,
use_annotated_operator>(execution_space, begin, end),
TransformReducerKokkosLambdaAdapter<
BinaryReductionOp,
UnaryTransformOp,
DDim0,
DDim1,
DDims...>(reduce, transform),
ddc_to_kokkos_reducer_t<BinaryReductionOp>(result));
} else {
Kokkos::parallel_reduce(
label,
Kokkos::MDRangePolicy<
ExecSpace,
Kokkos::Rank<2 + sizeof...(DDims)>>(execution_space, begin, end),
TransformReducerKokkosLambdaAdapter<
BinaryReductionOp,
UnaryTransformOp,
DDim0,
DDim1,
DDims...>(reduce, transform),
ddc_to_kokkos_reducer_t<BinaryReductionOp>(result));
}
Kokkos::parallel_reduce(
label,
ddc_to_kokkos_execution_policy(execution_space, domain),
TransformReducerKokkosLambdaAdapter<
BinaryReductionOp,
UnaryTransformOp,
DDims...>(reduce, transform),
ddc_to_kokkos_reducer_t<BinaryReductionOp>(result));
return result;
}

Expand Down

0 comments on commit 371ffb8

Please sign in to comment.