From f5e685b0017ec8ef67d05731ca56b162121f497c Mon Sep 17 00:00:00 2001 From: Jacob Domagala Date: Thu, 18 Jul 2024 15:31:58 +0200 Subject: [PATCH] #2240: Store Reducers by tuple(ProxyType, DataType, OperandType) --- src/vt/collective/reduce/allreduce/helpers.h | 8 ++++ .../reduce/allreduce/rabenseifner.h | 2 + .../reduce/allreduce/recursive_doubling.h | 2 + .../allreduce/recursive_doubling.impl.h | 9 ---- src/vt/objgroup/manager.h | 14 +++++-- src/vt/objgroup/manager.impl.h | 34 +++++++++++---- src/vt/objgroup/proxy/proxy_objgroup.impl.h | 4 +- src/vt/utils/hash/hash_tuple.h | 42 +++++++++++++++---- tests/unit/objgroup/test_objgroup.cc | 18 ++++---- tests/unit/objgroup/test_objgroup_common.h | 12 ++++-- 10 files changed, 105 insertions(+), 40 deletions(-) diff --git a/src/vt/collective/reduce/allreduce/helpers.h b/src/vt/collective/reduce/allreduce/helpers.h index ed848b4d93..1083e32823 100644 --- a/src/vt/collective/reduce/allreduce/helpers.h +++ b/src/vt/collective/reduce/allreduce/helpers.h @@ -43,10 +43,18 @@ #if !defined INCLUDED_VT_COLLECTIVE_REDUCE_ALLREDUCE_HELPERS_H #define INCLUDED_VT_COLLECTIVE_REDUCE_ALLREDUCE_HELPERS_H + #include "data_handler.h" #include "rabenseifner_msg.h" #include "vt/messaging/message/shared_message.h" + #include +#include + +namespace vt { +template +using remove_cvref = std::remove_cv_t>; +} namespace vt::collective::reduce::allreduce { diff --git a/src/vt/collective/reduce/allreduce/rabenseifner.h b/src/vt/collective/reduce/allreduce/rabenseifner.h index 58090cfef1..4491170216 100644 --- a/src/vt/collective/reduce/allreduce/rabenseifner.h +++ b/src/vt/collective/reduce/allreduce/rabenseifner.h @@ -93,8 +93,10 @@ template < typename DataT, template class Op, typename ObjT, auto finalHandler > struct Rabenseifner { + using Data = DataT; using DataType = DataHandler; using Scalar = typename DataType::Scalar; + using ReduceOp = Op; using DataHelperT = DataHelper; using StateT = State; diff --git a/src/vt/collective/reduce/allreduce/recursive_doubling.h b/src/vt/collective/reduce/allreduce/recursive_doubling.h index d88960c544..bbda3dec06 100644 --- a/src/vt/collective/reduce/allreduce/recursive_doubling.h +++ b/src/vt/collective/reduce/allreduce/recursive_doubling.h @@ -116,8 +116,10 @@ template < typename DataT, template class Op, typename ObjT, auto finalHandler> struct RecursiveDoubling { + using Data = DataT; using DataType = DataHandler; using Scalar = typename DataHandler::Scalar; + using ReduceOp = Op; /** * \brief Constructor for RecursiveDoubling class. * diff --git a/src/vt/collective/reduce/allreduce/recursive_doubling.impl.h b/src/vt/collective/reduce/allreduce/recursive_doubling.impl.h index 4e4e926272..a744bda957 100644 --- a/src/vt/collective/reduce/allreduce/recursive_doubling.impl.h +++ b/src/vt/collective/reduce/allreduce/recursive_doubling.impl.h @@ -354,15 +354,6 @@ void RecursiveDoubling::finalPart(size_t id) { parent_proxy_[this_node_].template invoke(state.val_); state.completed_ = true; - - state.adjust_message_ = nullptr; - state.messages_.clear(); - - states_.erase(id); - // std::fill(state.messages_.begin(), state.messages_.end(), nullptr); - - // state.steps_recv_.assign(num_steps_, false); - // state.steps_reduced_.assign(num_steps_, false); } } // namespace vt::collective::reduce::allreduce diff --git a/src/vt/objgroup/manager.h b/src/vt/objgroup/manager.h index 46e133991a..3f2d948e2f 100644 --- a/src/vt/objgroup/manager.h +++ b/src/vt/objgroup/manager.h @@ -58,11 +58,13 @@ #include "vt/messaging/pending_send.h" #include "vt/elm/elm_id.h" #include "vt/utils/fntraits/fntraits.h" +#include "vt/utils/hash/hash_tuple.h" #include #include #include #include +#include namespace vt { namespace objgroup { @@ -91,6 +93,11 @@ struct ObjGroupManager : runtime::component::Component { using HolderBaseType = holder::HolderBase; using HolderBasePtrType = std::unique_ptr; using PendingSendType = messaging::PendingSend; + using ReduceDataType = std::type_index; + using ReduceOperandType = std::type_index; + using ReducerMapType = std::unordered_map< + std::tuple, + ObjGroupProxyType>; public: /** @@ -507,9 +514,10 @@ ObjGroupManager::PendingSendType allreduce(ProxyType proxy, Args&&... data std::unordered_map> pending_; /// Map of object groups' labels std::unordered_map labels_; - - std::unordered_map reducersRD_; - std::unordered_map reducersR_; + /// Recursive Doubling reducers + ReducerMapType reducers_recursive_doubling_; + /// Rabenseifner reducers + ReducerMapType reducers_rabenseifner_; }; }} /* end namespace vt::objgroup */ diff --git a/src/vt/objgroup/manager.impl.h b/src/vt/objgroup/manager.impl.h index 503bbf161e..317121b7a0 100644 --- a/src/vt/objgroup/manager.impl.h +++ b/src/vt/objgroup/manager.impl.h @@ -61,6 +61,7 @@ #include "vt/collective/reduce/allreduce/rabenseifner.h" #include "vt/collective/reduce/allreduce/recursive_doubling.h" #include "vt/collective/reduce/allreduce/type.h" +#include "vt/collective/reduce/allreduce/helpers.h" #include #include @@ -279,21 +280,36 @@ ObjGroupManager::PendingSendType ObjGroupManager::allreduce( proxy::Proxy grp_proxy = {}; - auto& reducers = Reducer::type_ == ReducerType::Rabenseifner ? reducersR_ : reducersRD_; - if (reducers.find(proxy.getProxy()) != reducers.end()) { - auto* obj = reinterpret_cast( - objs_.at(reducers.at(proxy.getProxy()))->getPtr() + auto& reducers = Reducer::type_ == ReducerType::Rabenseifner ? + reducers_rabenseifner_ : + reducers_recursive_doubling_; + auto const key = std::make_tuple( + proxy.getProxy(), std::type_index(typeid(typename Reducer::Data)), + std::type_index(typeid(typename Reducer::ReduceOp)) + ); + if (reducers.find(key) != reducers.end()) { + vt_debug_print( + verbose, allreduce, "Found reducer (type: {}}) for proxy {}", + TypeToString(Reducer::type_), proxy ); + + auto* obj = + reinterpret_cast(objs_.at(reducers.at(key))->getPtr()); id = obj->generateNewId(); obj->initialize(id, std::forward(data)...); grp_proxy = obj->proxy_; } else { + vt_debug_print( + verbose, allreduce, "Creating reducer (type: {}}) for proxy {}", + TypeToString(Reducer::type_), proxy + ); + grp_proxy = vt::theObjGroup()->makeCollective( - TypeToString(Reducer::type_), proxy, - num_nodes, std::forward(data)... + TypeToString(Reducer::type_), proxy, num_nodes, + std::forward(data)... ); grp_proxy[this_node].get()->proxy_ = grp_proxy; - reducers[proxy.getProxy()] = grp_proxy.getProxy(); + reducers[key] = grp_proxy.getProxy(); id = grp_proxy[this_node].get()->id_ - 1; } @@ -314,9 +330,10 @@ ObjGroupManager::allreduce(ProxyType proxy, Args&&... data) { } auto const payload_size = - collective::reduce::allreduce::DataHandler::size( + collective::reduce::allreduce::DataHandler>::size( std::forward(data)... ); + if (payload_size < 2048) { using Reducer = vt::collective::reduce::allreduce::RecursiveDoubling; @@ -327,6 +344,7 @@ ObjGroupManager::allreduce(ProxyType proxy, Args&&... data) { return allreduce(proxy, std::forward(data)...); } + // Silence nvcc warning return PendingSendType{nullptr}; } diff --git a/src/vt/objgroup/proxy/proxy_objgroup.impl.h b/src/vt/objgroup/proxy/proxy_objgroup.impl.h index a36d1bfa00..2252856822 100644 --- a/src/vt/objgroup/proxy/proxy_objgroup.impl.h +++ b/src/vt/objgroup/proxy/proxy_objgroup.impl.h @@ -56,6 +56,7 @@ #include "vt/messaging/param_msg.h" #include "vt/objgroup/proxy/proxy_bits.h" #include "vt/collective/reduce/get_reduce_stamp.h" +#include "vt/collective/reduce/allreduce/helpers.h" namespace vt { namespace objgroup { namespace proxy { @@ -215,8 +216,7 @@ Proxy::allreduce_h( ) const { auto proxy = Proxy(*this); - // using DataT = std::tuple...>; - return theObjGroup()->allreduce...>( + return theObjGroup()->allreduce...>( proxy, std::forward(args)...); } diff --git a/src/vt/utils/hash/hash_tuple.h b/src/vt/utils/hash/hash_tuple.h index 9e1c0095d2..ba1ffc5381 100644 --- a/src/vt/utils/hash/hash_tuple.h +++ b/src/vt/utils/hash/hash_tuple.h @@ -47,16 +47,44 @@ #include namespace std { +namespace { -template -struct hash> { - size_t operator()(std::tuple const& in) const { - auto const& v1 = std::hash()(std::get<0>(in)); - auto const& v2 = std::hash()(std::get<1>(in)); - return v1 ^ v2; +// Code from boost +// Reciprocal of the golden ratio helps spread entropy +// and handles duplicates. +// See Mike Seymour in magic-numbers-in-boosthash-combine: +// http://stackoverflow.com/questions/4948780 + +template +inline void hash_combine(std::size_t& seed, T const& v) { + seed ^= std::hash()(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + +// Recursive template code derived from Matthieu M. +template ::value - 1> +struct HashValueImpl { + static void apply(size_t& seed, Tuple const& tuple) { + HashValueImpl::apply(seed, tuple); + hash_combine(seed, std::get(tuple)); } }; -} +template +struct HashValueImpl { + static void apply(size_t& seed, Tuple const& tuple) { + hash_combine(seed, std::get<0>(tuple)); + } +}; +} // namespace + +template +struct hash> { + size_t operator()(std::tuple const& tt) const { + size_t seed = 0; + HashValueImpl>::apply(seed, tt); + return seed; + } +}; +} // namespace std #endif /*INCLUDED_VT_UTILS_HASH_HASH_TUPLE_H*/ diff --git a/tests/unit/objgroup/test_objgroup.cc b/tests/unit/objgroup/test_objgroup.cc index 6c1c87d446..6f96002693 100644 --- a/tests/unit/objgroup/test_objgroup.cc +++ b/tests/unit/objgroup/test_objgroup.cc @@ -43,6 +43,8 @@ #include "test_objgroup_common.h" #include "test_helpers.h" +#include "vt/collective/reduce/allreduce/rabenseifner.h" +#include "vt/configs/types/types_type.h" #include "vt/objgroup/manager.h" #include @@ -266,7 +268,7 @@ TEST_F(TestObjGroup, test_proxy_allreduce) { auto const my_node = vt::theContext()->getNode(); TestObjGroup::total_verify_expected_ = 0; - auto proxy = vt::theObjGroup()->makeCollective("test_proxy_reduce"); + auto proxy = vt::theObjGroup()->makeCollective("test_proxy_allreduce"); vt::theCollective()->barrier(); @@ -289,7 +291,7 @@ TEST_F(TestObjGroup, test_proxy_allreduce) { EXPECT_EQ(MyObjA::total_verify_expected_, 3); runInEpochCollective([&] { using Reducer = vt::collective::reduce::allreduce::RecursiveDoubling< - std::vector, PlusOp, MyObjA, &MyObjA::verifyAllredVec + std::vector, PlusOp, MyObjA, &MyObjA::verifyAllredVec >; std::vector payload(256, my_node); theObjGroup()->allreduce(proxy, payload); @@ -299,18 +301,20 @@ TEST_F(TestObjGroup, test_proxy_allreduce) { runInEpochCollective([&] { using Reducer = vt::collective::reduce::allreduce::Rabenseifner< - std::vector, PlusOp, MyObjA, &MyObjA::verifyAllredVec + NodeType, PlusOp, MyObjA, &MyObjA::verifyAllred<1> >; - std::vector payload(256, my_node); - theObjGroup()->allreduce(proxy, payload); - theObjGroup()->allreduce(proxy, payload); + std::vector payload(2048, my_node); + theObjGroup()->allreduce(proxy, my_node); + + std::vector payload_large(2048 * 2, my_node); + theObjGroup()->allreduce(proxy, my_node); }); EXPECT_EQ(MyObjA::total_verify_expected_, 6); runInEpochCollective([&] { using Reducer = vt::collective::reduce::allreduce::Rabenseifner< - VectorPayload, PlusOp, MyObjA, &MyObjA::verifyAllredVecPayload>; + VectorPayload, PlusOp, MyObjA, &MyObjA::verifyAllredVecPayload>; std::vector payload(256, my_node); VectorPayload data{payload}; theObjGroup()->allreduce(proxy, data); diff --git a/tests/unit/objgroup/test_objgroup_common.h b/tests/unit/objgroup/test_objgroup_common.h index 2c394a84f5..3d1848b6fb 100644 --- a/tests/unit/objgroup/test_objgroup_common.h +++ b/tests/unit/objgroup/test_objgroup_common.h @@ -131,11 +131,12 @@ struct MyObjA { total_verify_expected_++; } - void verifyAllredVec(std::vector vec) { + template + void verifyAllredVec(std::vector vec) { auto final_size = vec.size(); - EXPECT_EQ(final_size, 256); + EXPECT_EQ(final_size, size); - auto n = vt::theContext()->getNumNodes(); + auto const n = theContext()->getNumNodes(); auto const total_sum = n * (n - 1)/2; for(auto val : vec){ EXPECT_EQ(val, total_sum); @@ -144,7 +145,10 @@ struct MyObjA { total_verify_expected_++; } - void verifyAllredVecPayload(VectorPayload vec) { verifyAllredVec(vec.vec_); } + template + void verifyAllredVecPayload(VectorPayload vec) { + verifyAllredVec(vec.vec_); + } #if MAGISTRATE_KOKKOS_ENABLED void verifyAllredView(Kokkos::View view) {