diff --git a/blas/impl/KokkosBlas1_rot_impl.hpp b/blas/impl/KokkosBlas1_rot_impl.hpp index e139e916be..bcc7518979 100644 --- a/blas/impl/KokkosBlas1_rot_impl.hpp +++ b/blas/impl/KokkosBlas1_rot_impl.hpp @@ -23,14 +23,15 @@ namespace KokkosBlas { namespace Impl { -template +template struct rot_functor { using scalar_type = typename VectorView::non_const_value_type; VectorView X, Y; - ScalarView c, s; + MagnitudeView c; + ScalarView s; - rot_functor(VectorView const& X_, VectorView const& Y_, ScalarView const& c_, ScalarView const& s_) + rot_functor(VectorView const& X_, VectorView const& Y_, MagnitudeView const& c_, ScalarView const& s_) : X(X_), Y(Y_), c(c_), s(s_) {} KOKKOS_INLINE_FUNCTION @@ -41,8 +42,8 @@ struct rot_functor { } }; -template -void Rot_Invoke(ExecutionSpace const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, +template +void Rot_Invoke(ExecutionSpace const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, ScalarView const& s) { Kokkos::RangePolicy rot_policy(space, 0, X.extent(0)); rot_functor rot_func(X, Y, c, s); diff --git a/blas/impl/KokkosBlas1_rot_spec.hpp b/blas/impl/KokkosBlas1_rot_spec.hpp index 493cd648cf..e68c1dbdd4 100644 --- a/blas/impl/KokkosBlas1_rot_spec.hpp +++ b/blas/impl/KokkosBlas1_rot_spec.hpp @@ -29,7 +29,7 @@ namespace KokkosBlas { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct rot_eti_spec_avail { enum : bool { value = false }; }; @@ -43,14 +43,15 @@ struct rot_eti_spec_avail { // We may spread out definitions (see _INST macro below) across one or // more .cpp files. // -#define KOKKOSBLAS1_ROT_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ - template <> \ - struct rot_eti_spec_avail< \ - EXECSPACE, \ - Kokkos::View, Kokkos::MemoryTraits>, \ - Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROT_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXECSPACE, MEMSPACE) \ + template <> \ + struct rot_eti_spec_avail< \ + EXECSPACE, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; // Include the actual specialization declarations @@ -61,19 +62,19 @@ namespace KokkosBlas { namespace Impl { // Unification layer -template ::value, - bool eti_spec_avail = rot_eti_spec_avail::value> +template ::value, + bool eti_spec_avail = rot_eti_spec_avail::value> struct Rot { - static void rot(ExecutionSpace const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, + static void rot(ExecutionSpace const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, ScalarView const& s); }; #if !defined(KOKKOSKERNELS_ETI_ONLY) || KOKKOSKERNELS_IMPL_COMPILE_LIBRARY //! Full specialization of Rot. -template -struct Rot { - static void rot(ExecutionSpace const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, +template +struct Rot { + static void rot(ExecutionSpace const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, ScalarView const& s) { Kokkos::Profiling::pushRegion(KOKKOSKERNELS_IMPL_COMPILE_LIBRARY ? "KokkosBlas::rot[ETI]" : "KokkosBlas::rot[noETI]"); @@ -86,7 +87,7 @@ struct Rot(space, X, Y, c, s); + Rot_Invoke(space, X, Y, c, s); Kokkos::Profiling::popRegion(); } }; @@ -108,6 +109,7 @@ struct Rot, Kokkos::MemoryTraits>, \ Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ false, true>; // @@ -121,6 +123,7 @@ struct Rot, Kokkos::MemoryTraits>, \ Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ false, true>; #include diff --git a/blas/src/KokkosBlas1_rot.hpp b/blas/src/KokkosBlas1_rot.hpp index 7bc3215604..cd4c687148 100644 --- a/blas/src/KokkosBlas1_rot.hpp +++ b/blas/src/KokkosBlas1_rot.hpp @@ -21,45 +21,68 @@ namespace KokkosBlas { -template -void rot(execution_space const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, +template +void rot(execution_space const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, ScalarView const& s) { static_assert(Kokkos::is_execution_space::value, "rot: execution_space template parameter is not a Kokkos " "execution space."); + static_assert(Kokkos::is_view_v, "KokkosBlas::rot: VectorView is not a Kokkos::View."); + static_assert(Kokkos::is_view_v, "KokkosBlas::rot: MagnitudeView is not a Kokkos::View."); + static_assert(Kokkos::is_view_v, "KokkosBlas::rot: ScalarView is not a Kokkos::View."); static_assert(VectorView::rank == 1, "rot: VectorView template parameter needs to be a rank 1 view"); + static_assert(MagnitudeView::rank == 0, "rot: MagnitudeView template parameter needs to be a rank 0 view"); static_assert(ScalarView::rank == 0, "rot: ScalarView template parameter needs to be a rank 0 view"); static_assert(Kokkos::SpaceAccessibility::accessible, "rot: VectorView template parameter memory space needs to be accessible " "from " "execution_space template parameter"); + static_assert(Kokkos::SpaceAccessibility::accessible, + "rot: MagnitudeView template parameter memory space needs to be accessible " + "from " + "execution_space template parameter"); static_assert(Kokkos::SpaceAccessibility::accessible, - "rot: VectorView template parameter memory space needs to be accessible " + "rot: ScalarView template parameter memory space needs to be accessible " "from " "execution_space template parameter"); static_assert(std::is_same::value, "rot: VectorView template parameter needs to store non-const values"); + // Check compatibility of dimensions at run time. + if (X.extent(0) != Y.extent(0)) { + std::ostringstream os; + os << "KokkosBlas::rot: Dimensions of X and Y do not match: " + << "X: " << X.extent(0) << ", Y: " << Y.extent(0); + KokkosKernels::Impl::throw_runtime_exception(os.str()); + } + using VectorView_Internal = Kokkos::View::array_layout, Kokkos::Device, Kokkos::MemoryTraits>; + using MagnitudeView_Internal = Kokkos::View::array_layout, + Kokkos::Device, + Kokkos::MemoryTraits>; + using ScalarView_Internal = Kokkos::View::array_layout, Kokkos::Device, Kokkos::MemoryTraits>; VectorView_Internal X_(X), Y_(Y); - ScalarView_Internal c_(c), s_(s); + MagnitudeView_Internal c_(c); + ScalarView_Internal s_(s); Kokkos::Profiling::pushRegion("KokkosBlas::rot"); - Impl::Rot::rot(space, X_, Y_, c_, s_); + Impl::Rot::rot(space, X_, Y_, c_, + s_); Kokkos::Profiling::popRegion(); } -template -void rot(VectorView const& X, VectorView const& Y, ScalarView const& c, ScalarView const& s) { +template +void rot(VectorView const& X, VectorView const& Y, MagnitudeView const& c, ScalarView const& s) { const typename VectorView::execution_space space = typename VectorView::execution_space(); rot(space, X, Y, c, s); } diff --git a/blas/src/KokkosBlas1_rotg.hpp b/blas/src/KokkosBlas1_rotg.hpp index 1927bc2df9..b309316002 100644 --- a/blas/src/KokkosBlas1_rotg.hpp +++ b/blas/src/KokkosBlas1_rotg.hpp @@ -44,6 +44,8 @@ void rotg(execution_space const& space, SViewType const& a, SViewType const& b, "rotg: execution_space cannot access data in SViewType"); static_assert(Kokkos::SpaceAccessibility::accessible, "rotg: execution_space cannot access data in MViewType"); + static_assert(!Kokkos::ArithTraits::is_complex, + "rotg: MViewType cannot hold complex values."); using SView_Internal = Kokkos::View< typename SViewType::value_type, typename KokkosKernels::Impl::GetUnifiedLayout::array_layout, diff --git a/blas/src/KokkosBlas1_scal.hpp b/blas/src/KokkosBlas1_scal.hpp index 561c505035..83aa722190 100644 --- a/blas/src/KokkosBlas1_scal.hpp +++ b/blas/src/KokkosBlas1_scal.hpp @@ -58,8 +58,6 @@ void scal(const execution_space& space, const RMV& R, const AV& a, const XMV& X) "X is not a Kokkos::View."); static_assert(Kokkos::SpaceAccessibility::accessible, "KokkosBlas::scal: XMV must be accessible from execution_space"); - static_assert(Kokkos::SpaceAccessibility::assignable, - "KokkosBlas::scal: XMV must be assignable to RMV"); static_assert(std::is_same::value, "KokkosBlas::scal: R is const. " "It must be nonconst, because it is an output argument " diff --git a/blas/src/KokkosBlas2_ger.hpp b/blas/src/KokkosBlas2_ger.hpp index 88786649ba..62d4d0fec5 100644 --- a/blas/src/KokkosBlas2_ger.hpp +++ b/blas/src/KokkosBlas2_ger.hpp @@ -43,19 +43,22 @@ template ::accessible, - "AViewType memory space must be accessible from ExecutionSpace"); + "ger: AViewType memory space must be accessible from ExecutionSpace"); static_assert(Kokkos::SpaceAccessibility::accessible, - "XViewType memory space must be accessible from ExecutionSpace"); + "ger: XViewType memory space must be accessible from ExecutionSpace"); static_assert(Kokkos::SpaceAccessibility::accessible, - "YViewType memory space must be accessible from ExecutionSpace"); + "ger: YViewType memory space must be accessible from ExecutionSpace"); - static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, "XViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, "YViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "ger: AViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "ger: XViewType must be a Kokkos::View."); + static_assert(Kokkos::is_view::value, "ger: YViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); - static_assert(static_cast(XViewType::rank) == 1, "XViewType must have rank 1."); - static_assert(static_cast(YViewType::rank) == 1, "YViewType must have rank 1."); + static_assert(static_cast(AViewType::rank) == 2, "ger: AViewType must have rank 2."); + static_assert(static_cast(XViewType::rank) == 1, "ger: XViewType must have rank 1."); + static_assert(static_cast(YViewType::rank) == 1, "ger: YViewType must have rank 1."); + + static_assert(std::is_same_v, + "ger: AViewType must store non const values."); // Check compatibility of dimensions at run time. if ((A.extent(0) != x.extent(0)) || (A.extent(1) != y.extent(0))) { diff --git a/blas/src/KokkosBlas3_trmm.hpp b/blas/src/KokkosBlas3_trmm.hpp index 9da47b7160..62226cefe7 100644 --- a/blas/src/KokkosBlas3_trmm.hpp +++ b/blas/src/KokkosBlas3_trmm.hpp @@ -66,10 +66,14 @@ namespace KokkosBlas { template void trmm(const execution_space& space, const char side[], const char uplo[], const char trans[], const char diag[], typename BViewType::const_value_type& alpha, const AViewType& A, const BViewType& B) { - static_assert(Kokkos::is_view::value, "AViewType must be a Kokkos::View."); - static_assert(Kokkos::is_view::value, "BViewType must be a Kokkos::View."); - static_assert(static_cast(AViewType::rank) == 2, "AViewType must have rank 2."); - static_assert(static_cast(BViewType::rank) == 2, "BViewType must have rank 2."); + static_assert(Kokkos::is_execution_space_v, + "trmm: execution_space must be a Kokkos::execution_space."); + static_assert(Kokkos::is_view_v, + "trmm: AViewType must be a " + "Kokkos::View."); + static_assert(Kokkos::is_view_v, "trmm: BViewType must be a Kokkos::View."); + static_assert(static_cast(AViewType::rank) == 2, "trmm: AViewType must have rank 2."); + static_assert(static_cast(BViewType::rank) == 2, "trmm: BViewType must have rank 2."); // Check validity of indicator argument bool valid_side = (side[0] == 'L') || (side[0] == 'l') || (side[0] == 'R') || (side[0] == 'r'); diff --git a/blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp b/blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp index fee65fce14..3417efc3a5 100644 --- a/blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp +++ b/blas/tpls/KokkosBlas1_rot_tpl_spec_avail.hpp @@ -20,7 +20,7 @@ namespace KokkosBlas { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct rot_tpl_spec_avail { enum : bool { value = false }; }; @@ -32,14 +32,17 @@ namespace Impl { // Generic Host side BLAS (could be MKL or whatever) #ifdef KOKKOSKERNELS_ENABLE_TPL_BLAS -#define KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXECSPACE) \ - template <> \ - struct rot_tpl_spec_avail, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>> { \ - enum : bool { value = true }; \ +#define KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(SCALAR, LAYOUT, EXECSPACE) \ + template <> \ + struct rot_tpl_spec_avail< \ + EXECSPACE, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type, LAYOUT, \ + Kokkos::Device, Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>> { \ + enum : bool { value = true }; \ }; #ifdef KOKKOS_ENABLE_SERIAL @@ -64,6 +67,8 @@ KOKKOSBLAS1_ROT_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, Kokkos::LayoutLeft, struct rot_tpl_spec_avail< \ EXECSPACE, \ Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View::mag_type, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ Kokkos::View, Kokkos::MemoryTraits>> { \ enum : bool { value = true }; \ }; diff --git a/blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp b/blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp index dfe747bf88..541702d59d 100644 --- a/blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp +++ b/blas/tpls/KokkosBlas1_rot_tpl_spec_decl.hpp @@ -39,76 +39,104 @@ inline void rot_print_specialization() { namespace KokkosBlas { namespace Impl { -#define KOKKOSBLAS1_DROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct Rot, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using VectorView = Kokkos::View, \ +#define KOKKOSBLAS1_DROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rot, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = Kokkos::View, \ Kokkos::MemoryTraits>; \ - using ScalarView = Kokkos::View, \ + using MagnitudeView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using ScalarView = Kokkos::View, \ Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, ScalarView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,double]"); \ - HostBlas::rot(X.extent_int(0), X.data(), 1, Y.data(), 1, c.data(), s.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ + static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,double]"); \ + HostBlas::rot(X.extent_int(0), X.data(), 1, Y.data(), 1, c.data(), s.data()); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_SROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct Rot, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, \ - Kokkos::MemoryTraits>, \ - true, ETI_SPEC_AVAIL> { \ - using VectorView = Kokkos::View, \ +#define KOKKOSBLAS1_SROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rot, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = Kokkos::View, \ Kokkos::MemoryTraits>; \ - using ScalarView = Kokkos::View, \ + using MagnitudeView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using ScalarView = Kokkos::View, \ Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, ScalarView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,float]"); \ - HostBlas::rot(X.extent_int(0), X.data(), 1, Y.data(), 1, c.data(), s.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ + static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,float]"); \ + HostBlas::rot(X.extent_int(0), X.data(), 1, Y.data(), 1, c.data(), s.data()); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Rot, EXECSPACE, MEMSPACE, true, ETI_SPEC_AVAIL> { \ - using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using ScalarView = Kokkos::View, \ - Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, ScalarView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,complex]"); \ - HostBlas>::rot(X.extent_int(0), reinterpret_cast*>(X.data()), 1, \ - reinterpret_cast*>(Y.data()), 1, c.data(), s.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_ZROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rot*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using MagnitudeView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using ScalarView = Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,complex]"); \ + HostBlas>::rot(X.extent_int(0), reinterpret_cast*>(X.data()), 1, \ + reinterpret_cast*>(Y.data()), 1, c.data(), \ + reinterpret_cast*>(s.data())); \ + Kokkos::Profiling::popRegion(); \ + } \ }; -#define KOKKOSBLAS1_CROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ - template \ - struct Rot, EXECSPACE, MEMSPACE, true, ETI_SPEC_AVAIL> { \ - using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ +#define KOKKOSBLAS1_CROT_TPL_SPEC_DECL_BLAS(LAYOUT, EXECSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rot*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits>; \ - using ScalarView = Kokkos::View, \ + using MagnitudeView = Kokkos::View, \ + Kokkos::MemoryTraits>; \ + using ScalarView = Kokkos::View, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, ScalarView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,complex]"); \ - HostBlas>::rot(X.extent_int(0), reinterpret_cast*>(X.data()), 1, \ - reinterpret_cast*>(Y.data()), 1, c.data(), s.data()); \ - Kokkos::Profiling::popRegion(); \ - } \ + static void rot(EXECSPACE const& /*space*/, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_BLAS,complex]"); \ + HostBlas>::rot(X.extent_int(0), reinterpret_cast*>(X.data()), 1, \ + reinterpret_cast*>(Y.data()), 1, c.data(), \ + reinterpret_cast*>(s.data())); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #ifdef KOKKOS_ENABLE_SERIAL @@ -157,12 +185,15 @@ namespace Impl { EXECSPACE, \ Kokkos::View, Kokkos::MemoryTraits>, \ Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ true, ETI_SPEC_AVAIL> { \ using VectorView = \ Kokkos::View, Kokkos::MemoryTraits>; \ + using MagnitudeView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ using ScalarView = \ Kokkos::View, Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, \ + static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ ScalarView const& s) { \ Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,double]"); \ rot_print_specialization(); \ @@ -182,13 +213,16 @@ namespace Impl { struct Rot< \ EXECSPACE, \ Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ Kokkos::View, Kokkos::MemoryTraits>, true, \ ETI_SPEC_AVAIL> { \ using VectorView = \ Kokkos::View, Kokkos::MemoryTraits>; \ + using MagnitudeView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ using ScalarView = \ Kokkos::View, Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, \ + static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ ScalarView const& s) { \ Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,float]"); \ rot_print_specialization(); \ @@ -210,12 +244,16 @@ namespace Impl { Kokkos::View*, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits>, \ Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ true, ETI_SPEC_AVAIL> { \ using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ Kokkos::MemoryTraits>; \ - using ScalarView = \ + using MagnitudeView = \ Kokkos::View, Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, \ + using ScalarView = Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ ScalarView const& s) { \ Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,complex]"); \ rot_print_specialization(); \ @@ -225,38 +263,43 @@ namespace Impl { KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasGetPointerMode(singleton.handle, &pointer_mode)); \ KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ cublasZdrot(singleton.handle, X.extent_int(0), reinterpret_cast(X.data()), 1, \ - reinterpret_cast(Y.data()), 1, c.data(), s.data()); \ + reinterpret_cast(Y.data()), 1, c.data(), \ + reinterpret_cast(s.data())); \ KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetPointerMode(singleton.handle, pointer_mode)); \ Kokkos::Profiling::popRegion(); \ } \ }; -#define KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ - template <> \ - struct Rot< \ - EXECSPACE, \ - Kokkos::View*, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>, \ - Kokkos::View, Kokkos::MemoryTraits>, true, \ - ETI_SPEC_AVAIL> { \ - using VectorView = Kokkos::View, LAYOUT, Kokkos::Device, \ - Kokkos::MemoryTraits>; \ - using ScalarView = \ - Kokkos::View, Kokkos::MemoryTraits>; \ - static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, ScalarView const& c, \ - ScalarView const& s) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,complex]"); \ - rot_print_specialization(); \ - KokkosBlas::Impl::CudaBlasSingleton& singleton = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ - KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetStream(singleton.handle, space.cuda_stream())); \ - cublasPointerMode_t pointer_mode; \ - KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasGetPointerMode(singleton.handle, &pointer_mode)); \ - KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ - cublasCsrot(singleton.handle, X.extent_int(0), reinterpret_cast(X.data()), 1, \ - reinterpret_cast(Y.data()), 1, c.data(), s.data()); \ - KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetPointerMode(singleton.handle, pointer_mode)); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS1_CROT_TPL_SPEC_DECL_CUBLAS(LAYOUT, EXECSPACE, MEMSPACE, ETI_SPEC_AVAIL) \ + template <> \ + struct Rot< \ + EXECSPACE, \ + Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + Kokkos::View, Kokkos::MemoryTraits>, \ + Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>, \ + true, ETI_SPEC_AVAIL> { \ + using VectorView = Kokkos::View*, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + using MagnitudeView = \ + Kokkos::View, Kokkos::MemoryTraits>; \ + using ScalarView = Kokkos::View, LAYOUT, Kokkos::Device, \ + Kokkos::MemoryTraits>; \ + static void rot(EXECSPACE const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, \ + ScalarView const& s) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::rot[TPL_CUBLAS,complex]"); \ + rot_print_specialization(); \ + KokkosBlas::Impl::CudaBlasSingleton& singleton = KokkosBlas::Impl::CudaBlasSingleton::singleton(); \ + KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetStream(singleton.handle, space.cuda_stream())); \ + cublasPointerMode_t pointer_mode; \ + KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasGetPointerMode(singleton.handle, &pointer_mode)); \ + KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetPointerMode(singleton.handle, CUBLAS_POINTER_MODE_DEVICE)); \ + cublasCsrot(singleton.handle, X.extent_int(0), reinterpret_cast(X.data()), 1, \ + reinterpret_cast(Y.data()), 1, c.data(), reinterpret_cast(s.data())); \ + KOKKOSBLAS_IMPL_CUBLAS_SAFE_CALL(cublasSetPointerMode(singleton.handle, pointer_mode)); \ + Kokkos::Profiling::popRegion(); \ + } \ }; KOKKOSBLAS1_DROT_TPL_SPEC_DECL_CUBLAS(Kokkos::LayoutLeft, Kokkos::Cuda, Kokkos::CudaSpace, true) diff --git a/blas/tpls/KokkosBlas_Host_tpl.cpp b/blas/tpls/KokkosBlas_Host_tpl.cpp index c163dc726d..5df3d661c0 100644 --- a/blas/tpls/KokkosBlas_Host_tpl.cpp +++ b/blas/tpls/KokkosBlas_Host_tpl.cpp @@ -125,9 +125,9 @@ void F77_BLAS_MANGLE(srot, SROT)(KK_INT const* N, float* X, KK_INT const* incx, void F77_BLAS_MANGLE(drot, DROT)(KK_INT const* N, double* X, KK_INT const* incx, double* Y, KK_INT const* incy, double* c, double* s); void F77_BLAS_MANGLE(crot, CROT)(KK_INT const* N, std::complex* X, KK_INT const* incx, std::complex* Y, - KK_INT const* incy, float* c, float* s); + KK_INT const* incy, float* c, std::complex* s); void F77_BLAS_MANGLE(zrot, ZROT)(KK_INT const* N, std::complex* X, KK_INT const* incx, std::complex* Y, - KK_INT const* incy, double* c, double* s); + KK_INT const* incy, double* c, std::complex* s); /// /// rotg @@ -683,7 +683,7 @@ void HostBlas >::axpy(KK_INT n, const std::complex al } template <> void HostBlas >::rot(KK_INT const N, std::complex* X, KK_INT const incx, - std::complex* Y, KK_INT const incy, float* c, float* s) { + std::complex* Y, KK_INT const incy, float* c, std::complex* s) { F77_FUNC_CROT(&N, X, &incx, Y, &incy, c, s); } template <> @@ -824,7 +824,8 @@ void HostBlas >::axpy(KK_INT n, const std::complex } template <> void HostBlas >::rot(KK_INT const N, std::complex* X, KK_INT const incx, - std::complex* Y, KK_INT const incy, double* c, double* s) { + std::complex* Y, KK_INT const incy, double* c, + std::complex* s) { F77_FUNC_ZROT(&N, X, &incx, Y, &incy, c, s); } template <> diff --git a/blas/tpls/KokkosBlas_Host_tpl.hpp b/blas/tpls/KokkosBlas_Host_tpl.hpp index 576fde8471..920b978cc1 100644 --- a/blas/tpls/KokkosBlas_Host_tpl.hpp +++ b/blas/tpls/KokkosBlas_Host_tpl.hpp @@ -57,7 +57,7 @@ struct HostBlas { static void axpy(KK_INT n, const T alpha, const T *x, KK_INT x_inc, /* */ T *y, KK_INT y_inc); - static void rot(KK_INT const N, T *X, KK_INT const incx, T *Y, KK_INT const incy, mag_type *c, mag_type *s); + static void rot(KK_INT const N, T *X, KK_INT const incx, T *Y, KK_INT const incy, mag_type *c, T *s); static void rotg(T *a, T *b, mag_type *c, T *s); diff --git a/blas/unit_test/Test_Blas1_rot.hpp b/blas/unit_test/Test_Blas1_rot.hpp index db9367cb42..629e6db003 100644 --- a/blas/unit_test/Test_Blas1_rot.hpp +++ b/blas/unit_test/Test_Blas1_rot.hpp @@ -19,12 +19,14 @@ template int test_rot() { using mag_type = typename Kokkos::ArithTraits::mag_type; using vector_type = Kokkos::View; - using scalar_type = Kokkos::View; + using magnitude_type = Kokkos::View; + using scalar_type = Kokkos::View; using vector_ref_type = Kokkos::View; vector_type X("X", 4), Y("Y", 4); vector_ref_type Xref("Xref", 4), Yref("Yref", 4); - scalar_type c("c"), s("s"); + magnitude_type c("c"); + scalar_type s("s"); // Initialize inputs typename vector_type::HostMirror X_h = Kokkos::create_mirror_view(X);