From 2f4c679b59c34ebe0184940b86f4c05600bb2d83 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Wed, 13 Nov 2024 06:34:24 -0700 Subject: [PATCH 01/11] BLAS - scal: removing check on assignable memory spaces That check is stricter than required as we will values by reference to perform copies and won't try to reassign pointers. Signed-off-by: Luc Berger-Vergiat --- blas/src/KokkosBlas1_scal.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/blas/src/KokkosBlas1_scal.hpp b/blas/src/KokkosBlas1_scal.hpp index 561c505035..83aa722190 100644 --- a/blas/src/KokkosBlas1_scal.hpp +++ b/blas/src/KokkosBlas1_scal.hpp @@ -58,8 +58,6 @@ void scal(const execution_space& space, const RMV& R, const AV& a, const XMV& X) "X is not a Kokkos::View."); static_assert(Kokkos::SpaceAccessibility::accessible, "KokkosBlas::scal: XMV must be accessible from execution_space"); - static_assert(Kokkos::SpaceAccessibility::assignable, - "KokkosBlas::scal: XMV must be assignable to RMV"); static_assert(std::is_same::value, "KokkosBlas::scal: R is const. " "It must be nonconst, because it is an output argument " From 0c7915e6c7befacac3af775dcbde8aa4121ac235 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Wed, 13 Nov 2024 14:47:13 -0700 Subject: [PATCH 02/11] BLAS - rot: check at runtime that X and Y have same extent Signed-off-by: Luc Berger-Vergiat --- blas/src/KokkosBlas1_rot.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/blas/src/KokkosBlas1_rot.hpp b/blas/src/KokkosBlas1_rot.hpp index 7bc3215604..00d8ca45a6 100644 --- a/blas/src/KokkosBlas1_rot.hpp +++ b/blas/src/KokkosBlas1_rot.hpp @@ -40,6 +40,14 @@ void rot(execution_space const& space, VectorView const& X, VectorView const& Y, static_assert(std::is_same::value, "rot: VectorView template parameter needs to store non-const values"); + // Check compatibility of dimensions at run time. + if (X.extent(0) != Y.extent(0)) { + std::ostringstream os; + os << "KokkosBlas::rot: Dimensions of X and Y do not match: " + << "X: " << X.extent(0) << ", Y: " << Y.extent(0); + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + using VectorView_Internal = Kokkos::View::array_layout, Kokkos::Device, From 0a547290a02a94f34df70af0049bbca10f03db6b Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Thu, 14 Nov 2024 08:09:36 -0700 Subject: [PATCH 03/11] BLAS - rot: improving static assertions Signed-off-by: Luc Berger-Vergiat --- blas/src/KokkosBlas1_rot.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/blas/src/KokkosBlas1_rot.hpp b/blas/src/KokkosBlas1_rot.hpp index 00d8ca45a6..0c36eab426 100644 --- a/blas/src/KokkosBlas1_rot.hpp +++ b/blas/src/KokkosBlas1_rot.hpp @@ -27,6 +27,8 @@ void rot(execution_space const& space, VectorView const& X, VectorView const& Y, static_assert(Kokkos::is_execution_space::value, "rot: execution_space template parameter is not a Kokkos " "execution space."); + static_assert(Kokkos::is_view_v, "KokkosBlas::rot: VectorView is not a Kokkos::View."); + static_assert(Kokkos::is_view_v, "KokkosBlas::rot: ScalarView is not a Kokkos::View."); static_assert(VectorView::rank == 1, "rot: VectorView template parameter needs to be a rank 1 view"); static_assert(ScalarView::rank == 0, "rot: ScalarView template parameter needs to be a rank 0 view"); static_assert(Kokkos::SpaceAccessibility::accessible, From 25993865a7c98a091536801e7748a7ff5f91b0c2 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Mon, 25 Nov 2024 14:49:48 -0700 Subject: [PATCH 04/11] BLAS - rotg: check for non-complex types Signed-off-by: Luc Berger-Vergiat --- blas/src/KokkosBlas1_rotg.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/blas/src/KokkosBlas1_rotg.hpp b/blas/src/KokkosBlas1_rotg.hpp index 1927bc2df9..9ca3a211b0 100644 --- a/blas/src/KokkosBlas1_rotg.hpp +++ b/blas/src/KokkosBlas1_rotg.hpp @@ -44,6 +44,8 @@ void rotg(execution_space const& space, SViewType const& a, SViewType const& b, "rotg: execution_space cannot access data in SViewType"); static_assert(Kokkos::SpaceAccessibility::accessible, "rotg: execution_space cannot access data in MViewType"); + static_assert(!Kokkos::ArithTraits::is_complex, + "rotg: MViewType cannot hold complex values."); using SView_Internal = Kokkos::View< typename SViewType::value_type, typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, From 32b3dd8dae7846603110bde3b493d674b60c086f Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Mon, 25 Nov 2024 14:50:37 -0700 Subject: [PATCH 05/11] BLAS - ger: check that matrix stores values as non-const Signed-off-by: Luc Berger-Vergiat --- blas/src/KokkosBlas2_ger.hpp | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/blas/src/KokkosBlas2_ger.hpp b/blas/src/KokkosBlas2_ger.hpp index 88786649ba..6be7197d08 100644 --- a/blas/src/KokkosBlas2_ger.hpp +++ b/blas/src/KokkosBlas2_ger.hpp @@ -43,19 +43,22 @@ template ::accessible, - "AViewType memory space must be accessible from ExecutionSpace"); + "ger: AViewType memory space must be accessible from ExecutionSpace"); static_assert(Kokkos::SpaceAccessibility::accessible, - "XViewType memory space must be accessible from ExecutionSpace"); + "ger: XViewType memory space must be accessible from ExecutionSpace"); static_assert(Kokkos::SpaceAccessibility::accessible, - "YViewType memory space must be accessible from ExecutionSpace"); + "ger: YViewType memory space must be accessible from ExecutionSpace"); - static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, "XViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, "YViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "ger: AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "ger: XViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "ger: YViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); - static_assert(static_cast(XViewType::rank) == 1, "XViewType must have rank 1."); - static_assert(static_cast(YViewType::rank) == 1, "YViewType must have rank 1."); + static_assert(static_cast(AViewType::rank) == 2, "ger: AViewType must have rank 2."); + static_assert(static_cast(XViewType::rank) == 1, "ger: XViewType must have rank 1."); + static_assert(static_cast(YViewType::rank) == 1, "ger: YViewType must have rank 1."); + + static_assert(std::is_same_v, + "ger: AViewType must store non const values.") // Check compatibility of dimensions at run time. if ((A.extent(0) != x.extent(0)) || (A.extent(1) != y.extent(0))) { From 999fc66e6140c139c6a027583fb22e4ce35f6146 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Mon, 25 Nov 2024 14:51:37 -0700 Subject: [PATCH 06/11] BLAS - trmm: check for valid execution space type. Signed-off-by: Luc Berger-Vergiat --- blas/src/KokkosBlas3_trmm.hpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/blas/src/KokkosBlas3_trmm.hpp b/blas/src/KokkosBlas3_trmm.hpp index 9da47b7160..2c69e036bc 100644 --- a/blas/src/KokkosBlas3_trmm.hpp +++ b/blas/src/KokkosBlas3_trmm.hpp @@ -66,10 +66,11 @@ namespace KokkosBlas { template void trmm(const execution_space& space, const char side[], const char uplo[], const char trans[], const char diag[], typename BViewType::const_value_type& alpha, const AViewType& A, const BViewType& B) { - static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, "BViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); - static_assert(static_cast(BViewType::rank) == 2, "BViewType must have rank 2."); + static_assert(Kokkos::is_execution_space_v, "trmm: execution_space must be a Kokkos::execution_space.") + static_assert(Kokkos::is_view_v, "trmm: AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view_v, "trmm: BViewType must be a Kokkos::View."); + static_assert(static_cast(AViewType::rank) == 2, "trmm: AViewType must have rank 2."); + static_assert(static_cast(BViewType::rank) == 2, "trmm: BViewType must have rank 2."); // Check validity of indicator argument bool valid_side = (side[0] == 'L') || (side[0] == 'l') || (side[0] == 'R') || (side[0] == 'r'); From 26899a21091031df1f14401e554ed1410b1498be Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Tue, 17 Dec 2024 09:37:05 -0700 Subject: [PATCH 07/11] BLAS: fix missing semi-colon at end of static_assert Signed-off-by: Luc Berger-Vergiat --- blas/src/KokkosBlas2_ger.hpp | 2 +- blas/src/KokkosBlas3_trmm.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/blas/src/KokkosBlas2_ger.hpp b/blas/src/KokkosBlas2_ger.hpp index 6be7197d08..0f4cbfc02a 100644 --- a/blas/src/KokkosBlas2_ger.hpp +++ b/blas/src/KokkosBlas2_ger.hpp @@ -58,7 +58,7 @@ void ger(const ExecutionSpace& space, const char trans[], const typename AViewTy static_assert(static_cast(YViewType::rank) == 1, "ger: YViewType must have rank 1."); static_assert(std::is_same_v, - "ger: AViewType must store non const values.") + "ger: AViewType must store non const values."); // Check compatibility of dimensions at run time. if ((A.extent(0) != x.extent(0)) || (A.extent(1) != y.extent(0))) { diff --git a/blas/src/KokkosBlas3_trmm.hpp b/blas/src/KokkosBlas3_trmm.hpp index 2c69e036bc..1e0dacbb16 100644 --- a/blas/src/KokkosBlas3_trmm.hpp +++ b/blas/src/KokkosBlas3_trmm.hpp @@ -66,7 +66,7 @@ namespace KokkosBlas { template void trmm(const execution_space& space, const char side[], const char uplo[], const char trans[], const char diag[], typename BViewType::const_value_type& alpha, const AViewType& A, const BViewType& B) { - static_assert(Kokkos::is_execution_space_v, "trmm: execution_space must be a Kokkos::execution_space.") + static_assert(Kokkos::is_execution_space_v, "trmm: execution_space must be a Kokkos::execution_space."); static_assert(Kokkos::is_view_v, "trmm: AViewType must be a Kokkos::View."); static_assert(Kokkos::is_view_v, "trmm: BViewType must be a Kokkos::View."); static_assert(static_cast(AViewType::rank) == 2, "trmm: AViewType must have rank 2."); From 39d48d5dbcdd3b88c15a5e44e84b3621383bdff5 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Tue, 17 Dec 2024 09:39:19 -0700 Subject: [PATCH 08/11] Applying clang-format Signed-off-by: Luc Berger-Vergiat --- blas/src/KokkosBlas1_rotg.hpp | 2 +- blas/src/KokkosBlas2_ger.hpp | 6 +++--- blas/src/KokkosBlas3_trmm.hpp | 6 ++++-- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/blas/src/KokkosBlas1_rotg.hpp b/blas/src/KokkosBlas1_rotg.hpp index 9ca3a211b0..b309316002 100644 --- a/blas/src/KokkosBlas1_rotg.hpp +++ b/blas/src/KokkosBlas1_rotg.hpp @@ -45,7 +45,7 @@ void rotg(execution_space const& space, SViewType const& a, SViewType const& b, static_assert(Kokkos::SpaceAccessibility::accessible, "rotg: execution_space cannot access data in MViewType"); static_assert(!Kokkos::ArithTraits::is_complex, - "rotg: MViewType cannot hold complex values."); + "rotg: MViewType cannot hold complex values."); using SView_Internal = Kokkos::View< typename SViewType::value_type, typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, diff --git a/blas/src/KokkosBlas2_ger.hpp b/blas/src/KokkosBlas2_ger.hpp index 0f4cbfc02a..881b75bbb8 100644 --- a/blas/src/KokkosBlas2_ger.hpp +++ b/blas/src/KokkosBlas2_ger.hpp @@ -58,10 +58,10 @@ void ger(const ExecutionSpace& space, const char trans[], const typename AViewTy static_assert(static_cast(YViewType::rank) == 1, "ger: YViewType must have rank 1."); static_assert(std::is_same_v, - "ger: AViewType must store non const values."); + "ger: AViewType must store non const values."); - // Check compatibility of dimensions at run time. - if ((A.extent(0) != x.extent(0)) || (A.extent(1) != y.extent(0))) { + // Check compatibility of dimensions at run time. + if ((A.extent(0) != x.extent(0)) || (A.extent(1) != y.extent(0))) { std::ostringstream os; os << "KokkosBlas::ger: Dimensions of A, x, and y do not match: " << "A is " << A.extent(0) << " by " << A.extent(1) << ", x has size " << x.extent(0) << ", y has size " diff --git a/blas/src/KokkosBlas3_trmm.hpp b/blas/src/KokkosBlas3_trmm.hpp index 1e0dacbb16..bc36b08023 100644 --- a/blas/src/KokkosBlas3_trmm.hpp +++ b/blas/src/KokkosBlas3_trmm.hpp @@ -66,8 +66,10 @@ namespace KokkosBlas { template void trmm(const execution_space& space, const char side[], const char uplo[], const char trans[], const char diag[], typename BViewType::const_value_type& alpha, const AViewType& A, const BViewType& B) { - static_assert(Kokkos::is_execution_space_v, "trmm: execution_space must be a Kokkos::execution_space."); - static_assert(Kokkos::is_view_v, "trmm: AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_execution_space_v, + "trmm: execution_space must be a Kokkos::execution_space."); static_assert(Kokkos::is_view_v, + "trmm: AViewType must be a " + "Kokkos::View."); static_assert(Kokkos::is_view_v, "trmm: BViewType must be a Kokkos::View."); static_assert(static_cast(AViewType::rank) == 2, "trmm: AViewType must have rank 2."); static_assert(static_cast(BViewType::rank) == 2, "trmm: BViewType must have rank 2."); From 0569e9dc1ca5d90f8dc678758752ae0bf88200e5 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Tue, 17 Dec 2024 09:40:38 -0700 Subject: [PATCH 09/11] More clang-format Signed-off-by: Luc Berger-Vergiat --- blas/src/KokkosBlas2_ger.hpp | 4 ++-- blas/src/KokkosBlas3_trmm.hpp | 7 ++++--- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/blas/src/KokkosBlas2_ger.hpp b/blas/src/KokkosBlas2_ger.hpp index 881b75bbb8..62d4d0fec5 100644 --- a/blas/src/KokkosBlas2_ger.hpp +++ b/blas/src/KokkosBlas2_ger.hpp @@ -60,8 +60,8 @@ void ger(const ExecutionSpace& space, const char trans[], const typename AViewTy static_assert(std::is_same_v, "ger: AViewType must store non const values."); - // Check compatibility of dimensions at run time. - if ((A.extent(0) != x.extent(0)) || (A.extent(1) != y.extent(0))) { + // Check compatibility of dimensions at run time. + if ((A.extent(0) != x.extent(0)) || (A.extent(1) != y.extent(0))) { std::ostringstream os; os << "KokkosBlas::ger: Dimensions of A, x, and y do not match: " << "A is " << A.extent(0) << " by " << A.extent(1) << ", x has size " << x.extent(0) << ", y has size " diff --git a/blas/src/KokkosBlas3_trmm.hpp b/blas/src/KokkosBlas3_trmm.hpp index bc36b08023..62226cefe7 100644 --- a/blas/src/KokkosBlas3_trmm.hpp +++ b/blas/src/KokkosBlas3_trmm.hpp @@ -67,9 +67,10 @@ template void trmm(const execution_space& space, const char side[], const char uplo[], const char trans[], const char diag[], typename BViewType::const_value_type& alpha, const AViewType& A, const BViewType& B) { static_assert(Kokkos::is_execution_space_v, - "trmm: execution_space must be a Kokkos::execution_space."); static_assert(Kokkos::is_view_v, - "trmm: AViewType must be a " - "Kokkos::View."); + "trmm: execution_space must be a Kokkos::execution_space."); + static_assert(Kokkos::is_view_v, + "trmm: AViewType must be a " + "Kokkos::View."); static_assert(Kokkos::is_view_v, "trmm: BViewType must be a Kokkos::View."); static_assert(static_cast(AViewType::rank) == 2, "trmm: AViewType must have rank 2."); static_assert(static_cast(BViewType::rank) == 2, "trmm: BViewType must have rank 2."); From 6667b94f7312e00178a4b9770b6b0c9e32288d7b Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Wed, 18 Dec 2024 12:14:10 -0700 Subject: [PATCH 10/11] Blas - rot: fixing interface of rot The cosine coefficient is strictly real while the sine coefficient can be real or complex leading to a bug in the current API. This commit should fix that for the native and TPL implementation and the associated unit-test is also fixed accordingly. Signed-off-by: Luc Berger-Vergiat --- blas/impl/KokkosBlas1_rot_impl.hpp | 11 +++--- blas/impl/KokkosBlas1_rot_spec.hpp | 23 +++++++------ blas/src/KokkosBlas1_rot.hpp | 26 ++++++++++---- blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp | 7 +++- blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp | 36 ++++++++++++++------ blas/unit_test/Test_Blas1_rot.hpp | 6 ++-- 6 files changed, 74 insertions(+), 35 deletions(-) diff --git a/blas/impl/KokkosBlas1_rot_impl.hpp b/blas/impl/KokkosBlas1_rot_impl.hpp index e139e916be..612ba60bf5 100644 --- a/blas/impl/KokkosBlas1_rot_impl.hpp +++ b/blas/impl/KokkosBlas1_rot_impl.hpp @@ -23,14 +23,15 @@ namespace KokkosBlas { namespace Impl { -template +template struct rot_functor { using scalar_type = typename VectorView::non_const_value_type; VectorView X, Y; - ScalarView c, s; + MagnitudeView c; + ScalarView s; - rot_functor(VectorView const& X_, VectorView const& Y_, ScalarView const& c_, ScalarView const& s_) + rot_functor(VectorView const& X_, VectorView const& Y_, MagnitudeView const& c_, ScalarView const& s_) : X(X_), Y(Y_), c(c_), s(s_) {} KOKKOS_INLINE_FUNCTION @@ -41,8 +42,8 @@ struct rot_functor { } }; -template -void Rot_Invoke(ExecutionSpace const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, + template +void Rot_Invoke(ExecutionSpace const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, ScalarView const& s) { Kokkos::RangePolicy rot_policy(space, 0, X.extent(0)); rot_functor rot_func(X, Y, c, s); diff --git a/blas/impl/KokkosBlas1_rot_spec.hpp b/blas/impl/KokkosBlas1_rot_spec.hpp index 493cd648cf..61f14fb302 100644 --- a/blas/impl/KokkosBlas1_rot_spec.hpp +++ b/blas/impl/KokkosBlas1_rot_spec.hpp @@ -29,7 +29,7 @@ namespace KokkosBlas { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct rot_eti_spec_avail { enum : bool { value = false }; }; @@ -49,7 +49,8 @@ struct rot_eti_spec_avail { EXECSPACE, \ Kokkos::View, Kokkos::MemoryTraits>, \ Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>> { \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> {\ enum : bool { value = true }; \ }; @@ -61,19 +62,19 @@ namespace KokkosBlas { namespace Impl { // Unification layer -template ::value, - bool eti_spec_avail = rot_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = rot_eti_spec_avail::value> struct Rot { - static void rot(ExecutionSpace const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, + static void rot(ExecutionSpace const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, ScalarView const& s); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of Rot. -template -struct Rot { - static void rot(ExecutionSpace const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, +template +struct Rot { + static void rot(ExecutionSpace const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, ScalarView const& s) { Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::rot[ETI]" : "KokkosBlas::rot[noETI]"); @@ -86,7 +87,7 @@ struct Rot(space, X, Y, c, s); + Rot_Invoke(space, X, Y, c, s); Kokkos::Profiling::popRegion(); } }; @@ -108,6 +109,7 @@ struct Rot, Kokkos::MemoryTraits>, \ Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ false, true>; // @@ -121,6 +123,7 @@ struct Rot, Kokkos::MemoryTraits>, \ Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ false, true>; #include diff --git a/blas/src/KokkosBlas1_rot.hpp b/blas/src/KokkosBlas1_rot.hpp index 0c36eab426..01b8a62f06 100644 --- a/blas/src/KokkosBlas1_rot.hpp +++ b/blas/src/KokkosBlas1_rot.hpp @@ -21,22 +21,28 @@ namespace KokkosBlas { -template -void rot(execution_space const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, +template +void rot(execution_space const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, ScalarView const& s) { static_assert(Kokkos::is_execution_space::value, "rot: execution_space template parameter is not a Kokkos " "execution space."); static_assert(Kokkos::is_view_v, "KokkosBlas::rot: VectorView is not a Kokkos::View."); + static_assert(Kokkos::is_view_v, "KokkosBlas::rot: MagnitudeView is not a Kokkos::View."); static_assert(Kokkos::is_view_v, "KokkosBlas::rot: ScalarView is not a Kokkos::View."); static_assert(VectorView::rank == 1, "rot: VectorView template parameter needs to be a rank 1 view"); + static_assert(MagnitudeView::rank == 0, "rot: MagnitudeView template parameter needs to be a rank 0 view"); static_assert(ScalarView::rank == 0, "rot: ScalarView template parameter needs to be a rank 0 view"); static_assert(Kokkos::SpaceAccessibility::accessible, "rot: VectorView template parameter memory space needs to be accessible " "from " "execution_space template parameter"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "rot: MagnitudeView template parameter memory space needs to be accessible " + "from " + "execution_space template parameter"); static_assert(Kokkos::SpaceAccessibility::accessible, - "rot: VectorView template parameter memory space needs to be accessible " + "rot: ScalarView template parameter memory space needs to be accessible " "from " "execution_space template parameter"); static_assert(std::is_same::value, @@ -55,21 +61,27 @@ void rot(execution_space const& space, VectorView const& X, VectorView const& Y, Kokkos::Device, Kokkos::MemoryTraits>; + using MagnitudeView_Internal = Kokkos::View::array_layout, + Kokkos::Device, + Kokkos::MemoryTraits>; + using ScalarView_Internal = Kokkos::View::array_layout, Kokkos::Device, Kokkos::MemoryTraits>; VectorView_Internal X_(X), Y_(Y); - ScalarView_Internal c_(c), s_(s); + MagnitudeView_Internal c_(c); + ScalarView_Internal s_(s); Kokkos::Profiling::pushRegion("KokkosBlas::rot"); - Impl::Rot::rot(space, X_, Y_, c_, s_); + Impl::Rot::rot(space, X_, Y_, c_, s_); Kokkos::Profiling::popRegion(); } -template -void rot(VectorView const& X, VectorView const& Y, ScalarView const& c, ScalarView const& s) { +template +void rot(VectorView const& X, VectorView const& Y, MagnitudeView const& c, ScalarView const& s) { const typename VectorView::execution_space space = typename VectorView::execution_space(); rot(space, X, Y, c, s); } diff --git a/blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp b/blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp index fee65fce14..6f4784b389 100644 --- a/blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp +++ b/blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp @@ -20,7 +20,7 @@ namespace KokkosBlas { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct rot_tpl_spec_avail { enum : bool { value = false }; }; @@ -37,6 +37,9 @@ namespace Impl { struct rot_tpl_spec_avail, \ Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type, LAYOUT, \ + Kokkos::Device, \ + Kokkos::MemoryTraits>, \ Kokkos::View, \ Kokkos::MemoryTraits>> { \ enum : bool { value = true }; \ @@ -64,6 +67,8 @@ KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, struct rot_tpl_spec_avail< \ EXECSPACE, \ Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ Kokkos::View, Kokkos::MemoryTraits>> { \ enum : bool { value = true }; \ }; diff --git a/blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp b/blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp index dfe747bf88..d34cbd885f 100644 --- a/blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp +++ b/blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp @@ -157,12 +157,15 @@ namespace Impl { EXECSPACE, \ Kokkos::View, Kokkos::MemoryTraits>, \ Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ true, ETI_SPEC_AVAIL> { \ using VectorView = \ Kokkos::View, Kokkos::MemoryTraits>; \ + using MagnitudeView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ using ScalarView = \ Kokkos::View, Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, \ + static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ ScalarView const& s) { \ Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,double]"); \ rot_print_specialization(); \ @@ -182,13 +185,16 @@ namespace Impl { struct Rot< \ EXECSPACE, \ Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ Kokkos::View, Kokkos::MemoryTraits>, true, \ ETI_SPEC_AVAIL> { \ using VectorView = \ Kokkos::View, Kokkos::MemoryTraits>; \ + using MagnitudeView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ using ScalarView = \ Kokkos::View, Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, \ + static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ ScalarView const& s) { \ Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,float]"); \ rot_print_specialization(); \ @@ -210,12 +216,17 @@ namespace Impl { Kokkos::View*, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits>, \ Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ true, ETI_SPEC_AVAIL> { \ using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits>; \ - using ScalarView = \ + using MagnitudeView = \ Kokkos::View, Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, \ + using ScalarView = \ + Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ ScalarView const& s) { \ Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,complex]"); \ rot_print_specialization(); \ @@ -225,7 +236,8 @@ namespace Impl { KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasGetPointerMode(singleton.handle, &pointer_mode)); \ KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ cublasZdrot(singleton.handle, X.extent_int(0), reinterpret_cast(X.data()), 1, \ - reinterpret_cast(Y.data()), 1, c.data(), s.data()); \ + reinterpret_cast(Y.data()), 1, c.data(), \ + reinterpret_cast(s.data())); \ KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetPointerMode(singleton.handle, pointer_mode)); \ Kokkos::Profiling::popRegion(); \ } \ @@ -237,13 +249,17 @@ namespace Impl { EXECSPACE, \ Kokkos::View*, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits>, \ - Kokkos::View, Kokkos::MemoryTraits>, true, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, true, \ ETI_SPEC_AVAIL> { \ - using VectorView = Kokkos::View, LAYOUT, Kokkos::Device, \ + using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits>; \ - using ScalarView = \ + using MagnitudeView = \ Kokkos::View, Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, \ + using ScalarView = Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ ScalarView const& s) { \ Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,complex]"); \ rot_print_specialization(); \ @@ -253,7 +269,7 @@ namespace Impl { KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasGetPointerMode(singleton.handle, &pointer_mode)); \ KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ cublasCsrot(singleton.handle, X.extent_int(0), reinterpret_cast(X.data()), 1, \ - reinterpret_cast(Y.data()), 1, c.data(), s.data()); \ + reinterpret_cast(Y.data()), 1, c.data(), reinterpret_cast(s.data())); \ KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetPointerMode(singleton.handle, pointer_mode)); \ Kokkos::Profiling::popRegion(); \ } \ diff --git a/blas/unit_test/Test_Blas1_rot.hpp b/blas/unit_test/Test_Blas1_rot.hpp index db9367cb42..629e6db003 100644 --- a/blas/unit_test/Test_Blas1_rot.hpp +++ b/blas/unit_test/Test_Blas1_rot.hpp @@ -19,12 +19,14 @@ template int test_rot() { using mag_type = typename Kokkos::ArithTraits::mag_type; using vector_type = Kokkos::View; - using scalar_type = Kokkos::View; + using magnitude_type = Kokkos::View; + using scalar_type = Kokkos::View; using vector_ref_type = Kokkos::View; vector_type X("X", 4), Y("Y", 4); vector_ref_type Xref("Xref", 4), Yref("Yref", 4); - scalar_type c("c"), s("s"); + magnitude_type c("c"); + scalar_type s("s"); // Initialize inputs typename vector_type::HostMirror X_h = Kokkos::create_mirror_view(X); From f469bac8953a01fe894beea79d92582d377f0080 Mon Sep 17 00:00:00 2001 From: Luc Berger-Vergiat Date: Mon, 6 Jan 2025 09:33:09 -0700 Subject: [PATCH 11/11] BLAS - ROT: fixing types for Host TPL calls to ROT function The types for the arguments c and s are actually different and need to be appropriately propagated through the TPL layers of the library. Signed-off-by: Luc Berger-Vergiat --- blas/impl/KokkosBlas1_rot_impl.hpp | 2 +- blas/impl/KokkosBlas1_rot_spec.hpp | 18 +- blas/src/KokkosBlas1_rot.hpp | 3 +- blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp | 24 +-- blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp | 213 +++++++++++-------- blas/tpls/KokkosBlas_Host_tpl.cpp | 9 +- blas/tpls/KokkosBlas_Host_tpl.hpp | 2 +- 7 files changed, 150 insertions(+), 121 deletions(-) diff --git a/blas/impl/KokkosBlas1_rot_impl.hpp b/blas/impl/KokkosBlas1_rot_impl.hpp index 612ba60bf5..bcc7518979 100644 --- a/blas/impl/KokkosBlas1_rot_impl.hpp +++ b/blas/impl/KokkosBlas1_rot_impl.hpp @@ -42,7 +42,7 @@ struct rot_functor { } }; - template +template void Rot_Invoke(ExecutionSpace const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, ScalarView const& s) { Kokkos::RangePolicy rot_policy(space, 0, X.extent(0)); diff --git a/blas/impl/KokkosBlas1_rot_spec.hpp b/blas/impl/KokkosBlas1_rot_spec.hpp index 61f14fb302..e68c1dbdd4 100644 --- a/blas/impl/KokkosBlas1_rot_spec.hpp +++ b/blas/impl/KokkosBlas1_rot_spec.hpp @@ -43,15 +43,15 @@ struct rot_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_ROT_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ - template <> \ - struct rot_eti_spec_avail< \ - EXECSPACE, \ - Kokkos::View, Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, Kokkos::MemoryTraits>> {\ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROT_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct rot_eti_spec_avail< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations diff --git a/blas/src/KokkosBlas1_rot.hpp b/blas/src/KokkosBlas1_rot.hpp index 01b8a62f06..cd4c687148 100644 --- a/blas/src/KokkosBlas1_rot.hpp +++ b/blas/src/KokkosBlas1_rot.hpp @@ -76,7 +76,8 @@ void rot(execution_space const& space, VectorView const& X, VectorView const& Y, ScalarView_Internal s_(s); Kokkos::Profiling::pushRegion("KokkosBlas::rot"); - Impl::Rot::rot(space, X_, Y_, c_, s_); + Impl::Rot::rot(space, X_, Y_, c_, + s_); Kokkos::Profiling::popRegion(); } diff --git a/blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp b/blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp index 6f4784b389..3417efc3a5 100644 --- a/blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp +++ b/blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp @@ -32,17 +32,17 @@ namespace Impl { // Generic Host side BLAS (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS -#define KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXECSPACE) \ - template <> \ - struct rot_tpl_spec_avail, \ - Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type, LAYOUT, \ - Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXECSPACE) \ + template <> \ + struct rot_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type, LAYOUT, \ + Kokkos::Device, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; #ifdef KOKKOS_ENABLE_SERIAL @@ -68,7 +68,7 @@ KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, EXECSPACE, \ Kokkos::View, Kokkos::MemoryTraits>, \ Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>, \ + Kokkos::MemoryTraits>, \ Kokkos::View, Kokkos::MemoryTraits>> { \ enum : bool { value = true }; \ }; diff --git a/blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp b/blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp index d34cbd885f..541702d59d 100644 --- a/blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp +++ b/blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp @@ -39,76 +39,104 @@ inline void rot_print_specialization() { namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct Rot, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using VectorView = Kokkos::View, \ +#define KOKKOSBLAS1_DROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rot, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = Kokkos::View, \ Kokkos::MemoryTraits>; \ - using ScalarView = Kokkos::View, \ + using MagnitudeView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using ScalarView = Kokkos::View, \ Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, ScalarView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,double]"); \ - HostBlas::rot(X.extent_int(0), X.data(), 1, Y.data(), 1, c.data(), s.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ + static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,double]"); \ + HostBlas::rot(X.extent_int(0), X.data(), 1, Y.data(), 1, c.data(), s.data()); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_SROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct Rot, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using VectorView = Kokkos::View, \ +#define KOKKOSBLAS1_SROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rot, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = Kokkos::View, \ Kokkos::MemoryTraits>; \ - using ScalarView = Kokkos::View, \ + using MagnitudeView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using ScalarView = Kokkos::View, \ Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, ScalarView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,float]"); \ - HostBlas::rot(X.extent_int(0), X.data(), 1, Y.data(), 1, c.data(), s.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ + static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,float]"); \ + HostBlas::rot(X.extent_int(0), X.data(), 1, Y.data(), 1, c.data(), s.data()); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Rot, EXECSPACE, MEMSPACE, true, ETI_SPEC_AVAIL> { \ - using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using ScalarView = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, ScalarView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,complex]"); \ - HostBlas>::rot(X.extent_int(0), reinterpret_cast*>(X.data()), 1, \ - reinterpret_cast*>(Y.data()), 1, c.data(), s.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rot*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using MagnitudeView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using ScalarView = Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,complex]"); \ + HostBlas>::rot(X.extent_int(0), reinterpret_cast*>(X.data()), 1, \ + reinterpret_cast*>(Y.data()), 1, c.data(), \ + reinterpret_cast*>(s.data())); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_CROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Rot, EXECSPACE, MEMSPACE, true, ETI_SPEC_AVAIL> { \ - using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ +#define KOKKOSBLAS1_CROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rot*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits>; \ - using ScalarView = Kokkos::View, \ + using MagnitudeView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using ScalarView = Kokkos::View, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, ScalarView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,complex]"); \ - HostBlas>::rot(X.extent_int(0), reinterpret_cast*>(X.data()), 1, \ - reinterpret_cast*>(Y.data()), 1, c.data(), s.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ + static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,complex]"); \ + HostBlas>::rot(X.extent_int(0), reinterpret_cast*>(X.data()), 1, \ + reinterpret_cast*>(Y.data()), 1, c.data(), \ + reinterpret_cast*>(s.data())); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #ifdef KOKKOS_ENABLE_SERIAL @@ -217,15 +245,14 @@ namespace Impl { Kokkos::MemoryTraits>, \ Kokkos::View, Kokkos::MemoryTraits>, \ Kokkos::View, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>, \ + Kokkos::MemoryTraits>, \ true, ETI_SPEC_AVAIL> { \ using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits>; \ using MagnitudeView = \ Kokkos::View, Kokkos::MemoryTraits>; \ - using ScalarView = \ - Kokkos::View, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>; \ + using ScalarView = Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ ScalarView const& s) { \ Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,complex]"); \ @@ -237,42 +264,42 @@ namespace Impl { KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ cublasZdrot(singleton.handle, X.extent_int(0), reinterpret_cast(X.data()), 1, \ reinterpret_cast(Y.data()), 1, c.data(), \ - reinterpret_cast(s.data())); \ + reinterpret_cast(s.data())); \ KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetPointerMode(singleton.handle, pointer_mode)); \ Kokkos::Profiling::popRegion(); \ } \ }; -#define KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct Rot< \ - EXECSPACE, \ - Kokkos::View*, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, Kokkos::MemoryTraits>, \ - Kokkos::View, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>, true, \ - ETI_SPEC_AVAIL> { \ - using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using MagnitudeView = \ - Kokkos::View, Kokkos::MemoryTraits>; \ - using ScalarView = Kokkos::View, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,complex]"); \ - rot_print_specialization(); \ - KokkosBlas::Impl::CudaBlasSingleton& singleton = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetStream(singleton.handle, space.cuda_stream())); \ - cublasPointerMode_t pointer_mode; \ - KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasGetPointerMode(singleton.handle, &pointer_mode)); \ - KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ - cublasCsrot(singleton.handle, X.extent_int(0), reinterpret_cast(X.data()), 1, \ - reinterpret_cast(Y.data()), 1, c.data(), reinterpret_cast(s.data())); \ - KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetPointerMode(singleton.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rot< \ + EXECSPACE, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using MagnitudeView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using ScalarView = Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,complex]"); \ + rot_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetStream(singleton.handle, space.cuda_stream())); \ + cublasPointerMode_t pointer_mode; \ + KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasGetPointerMode(singleton.handle, &pointer_mode)); \ + KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ + cublasCsrot(singleton.handle, X.extent_int(0), reinterpret_cast(X.data()), 1, \ + reinterpret_cast(Y.data()), 1, c.data(), reinterpret_cast(s.data())); \ + KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetPointerMode(singleton.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) diff --git a/blas/tpls/KokkosBlas_Host_tpl.cpp b/blas/tpls/KokkosBlas_Host_tpl.cpp index c163dc726d..5df3d661c0 100644 --- a/blas/tpls/KokkosBlas_Host_tpl.cpp +++ b/blas/tpls/KokkosBlas_Host_tpl.cpp @@ -125,9 +125,9 @@ void F77_BLAS_MANGLE(srot, SROT)(KK_INT const* N, float* X, KK_INT const* incx, void F77_BLAS_MANGLE(drot, DROT)(KK_INT const* N, double* X, KK_INT const* incx, double* Y, KK_INT const* incy, double* c, double* s); void F77_BLAS_MANGLE(crot, CROT)(KK_INT const* N, std::complex* X, KK_INT const* incx, std::complex* Y, - KK_INT const* incy, float* c, float* s); + KK_INT const* incy, float* c, std::complex* s); void F77_BLAS_MANGLE(zrot, ZROT)(KK_INT const* N, std::complex* X, KK_INT const* incx, std::complex* Y, - KK_INT const* incy, double* c, double* s); + KK_INT const* incy, double* c, std::complex* s); /// /// rotg @@ -683,7 +683,7 @@ void HostBlas >::axpy(KK_INT n, const std::complex al } template <> void HostBlas >::rot(KK_INT const N, std::complex* X, KK_INT const incx, - std::complex* Y, KK_INT const incy, float* c, float* s) { + std::complex* Y, KK_INT const incy, float* c, std::complex* s) { F77_FUNC_CROT(&N, X, &incx, Y, &incy, c, s); } template <> @@ -824,7 +824,8 @@ void HostBlas >::axpy(KK_INT n, const std::complex } template <> void HostBlas >::rot(KK_INT const N, std::complex* X, KK_INT const incx, - std::complex* Y, KK_INT const incy, double* c, double* s) { + std::complex* Y, KK_INT const incy, double* c, + std::complex* s) { F77_FUNC_ZROT(&N, X, &incx, Y, &incy, c, s); } template <> diff --git a/blas/tpls/KokkosBlas_Host_tpl.hpp b/blas/tpls/KokkosBlas_Host_tpl.hpp index 576fde8471..920b978cc1 100644 --- a/blas/tpls/KokkosBlas_Host_tpl.hpp +++ b/blas/tpls/KokkosBlas_Host_tpl.hpp @@ -57,7 +57,7 @@ struct HostBlas { static void axpy(KK_INT n, const T alpha, const T *x, KK_INT x_inc, /* */ T *y, KK_INT y_inc); - static void rot(KK_INT const N, T *X, KK_INT const incx, T *Y, KK_INT const incy, mag_type *c, mag_type *s); + static void rot(KK_INT const N, T *X, KK_INT const incx, T *Y, KK_INT const incy, mag_type *c, T *s); static void rotg(T *a, T *b, mag_type *c, T *s);