diff --git a/blas/impl/KokkosBlas2_gemv_spec.hpp b/blas/impl/KokkosBlas2_gemv_spec.hpp index 42e2465494..08842a61c0 100644 --- a/blas/impl/KokkosBlas2_gemv_spec.hpp +++ b/blas/impl/KokkosBlas2_gemv_spec.hpp @@ -27,7 +27,7 @@ namespace KokkosBlas { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct gemv_eti_spec_avail { enum : bool { value = false }; }; @@ -44,6 +44,7 @@ struct gemv_eti_spec_avail { #define KOKKOSBLAS2_GEMV_ETI_SPEC_AVAIL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ template <> \ struct gemv_eti_spec_avail< \ + EXEC_SPACE, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -67,14 +68,14 @@ namespace Impl { // // Implementation of KokkosBlas::gemv. -template ::value, - bool eti_spec_avail = - gemv_eti_spec_avail::value> +template < + class ExecutionSpace, class AViewType, class XViewType, class YViewType, + bool tpl_spec_avail = gemv_tpl_spec_avail::value, + bool eti_spec_avail = gemv_eti_spec_avail::value> struct GEMV { - static void gemv(const typename AViewType::execution_space& space, - const char trans[], + static void gemv(const ExecutionSpace& space, const char trans[], typename AViewType::const_value_type& alpha, const AViewType& A, const XViewType& x, typename YViewType::const_value_type& beta, @@ -130,6 +131,7 @@ struct GEMV { #define KOKKOSBLAS2_GEMV_ETI_SPEC_DECL(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ extern template struct GEMV< \ + EXEC_SPACE, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -142,6 +144,7 @@ struct GEMV { #define KOKKOSBLAS2_GEMV_ETI_SPEC_INST(SCALAR, LAYOUT, EXEC_SPACE, MEM_SPACE) \ template struct GEMV< \ + EXEC_SPACE, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ diff --git a/blas/src/KokkosBlas2_gemv.hpp b/blas/src/KokkosBlas2_gemv.hpp index dbfeb06537..40ac9db249 100644 --- a/blas/src/KokkosBlas2_gemv.hpp +++ b/blas/src/KokkosBlas2_gemv.hpp @@ -49,14 +49,14 @@ namespace KokkosBlas { /// \param x [in] Input vector, as a 1-D Kokkos::View /// \param beta [in] Input coefficient of y /// \param y [in/out] Output vector, as a nonconst 1-D Kokkos::View -template -void gemv(const execution_space& space, const char trans[], +void gemv(const ExecutionSpace& space, const char trans[], typename AViewType::const_value_type& alpha, const AViewType& A, const XViewType& x, typename YViewType::const_value_type& beta, const YViewType& y) { - static_assert(Kokkos::is_execution_space_v, - "KokkosBlas::gemv: execution_space must be a valid Kokkos " + static_assert(Kokkos::is_execution_space_v, + "KokkosBlas::gemv: ExecutionSpace must be a valid Kokkos " "execution space."); static_assert(Kokkos::is_view::value, "KokkosBlas::gemv: AViewType must be a Kokkos::View."); @@ -71,17 +71,17 @@ void gemv(const execution_space& space, const char trans[], static_assert(static_cast(YViewType::rank) == 1, "KokkosBlas::gemv: YViewType must have rank 1."); static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::gemv: AViewType must be accessible from execution_space"); + "KokkosBlas::gemv: AViewType must be accessible from ExecutionSpace"); static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::gemv: XViewType must be accessible from execution_space"); + "KokkosBlas::gemv: XViewType must be accessible from ExecutionSpace"); static_assert( - Kokkos::SpaceAccessibility::accessible, - "KokkosBlas::gemv: YViewType must be accessible from execution_space"); + "KokkosBlas::gemv: YViewType must be accessible from ExecutionSpace"); // Check compatibility of dimensions at run time. if (trans[0] == 'N' || trans[0] == 'n') { @@ -171,11 +171,13 @@ void gemv(const execution_space& space, const char trans[], if (useFallback) { const bool eti_spec_avail = - KokkosBlas::Impl::gemv_eti_spec_avail::value; - typedef Impl::GEMV fallback_impl_type; + KokkosBlas::Impl::gemv_eti_spec_avail::value; + typedef Impl::GEMV + fallback_impl_type; fallback_impl_type::gemv(space, trans, alpha, A, x, beta, y); } else { - typedef Impl::GEMV impl_type; + typedef Impl::GEMV impl_type; impl_type::gemv(space, trans, alpha, A, x, beta, y); } } diff --git a/blas/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp b/blas/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp index 1f5dde5b04..1496eee020 100644 --- a/blas/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp +++ b/blas/tpls/KokkosBlas2_gemv_tpl_spec_avail.hpp @@ -20,7 +20,7 @@ namespace KokkosBlas { namespace Impl { // Specialization struct which defines whether a specialization exists -template +template struct gemv_tpl_spec_avail { enum : bool { value = false }; }; @@ -32,6 +32,7 @@ struct gemv_tpl_spec_avail { LAYOUTY, MEMSPACE) \ template \ struct gemv_tpl_spec_avail< \ + ExecSpace, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -78,6 +79,7 @@ KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_BLAS(Kokkos::complex, LAYOUTY, MEMSPACE) \ template \ struct gemv_tpl_spec_avail< \ + ExecSpace, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -126,8 +128,9 @@ KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_CUBLAS(Kokkos::complex, #ifdef KOKKOSKERNELS_ENABLE_TPL_ROCBLAS #define KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(SCALAR, LAYOUT) \ - template <> \ + template \ struct gemv_tpl_spec_avail< \ + ExecSpace, \ Kokkos::View, \ @@ -164,8 +167,9 @@ KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ROCBLAS(Kokkos::complex, #ifdef KOKKOS_ENABLE_SYCL #define KOKKOSBLAS2_GEMV_TPL_SPEC_AVAIL_ONEMKL(SCALAR, LAYOUT) \ - template <> \ + template \ struct gemv_tpl_spec_avail< \ + ExecSpace, \ Kokkos::View, \ diff --git a/blas/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp b/blas/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp index 7aa854b962..894ce884ee 100644 --- a/blas/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp +++ b/blas/tpls/KokkosBlas2_gemv_tpl_spec_decl.hpp @@ -43,51 +43,52 @@ namespace Impl { transa = 'C'; \ } -#define KOKKOSBLAS2_DGEMV_BLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct GEMV< \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - Kokkos::View, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef double SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - \ - static void gemv(const typename AViewType::execution_space& /* space */, \ - const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const XViewType& X, \ - typename YViewType::const_value_type& beta, \ - const YViewType& Y) { \ - Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_BLAS,double]"); \ - KOKKOSBLAS2_GEMV_DETERMINE_ARGS(LAYOUTA); \ - HostBlas::gemv(transa, M, N, alpha, A.data(), LDA, X.data(), \ - one, beta, Y.data(), one); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_DGEMV_BLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct GEMV< \ + ExecSpace, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + Kokkos::View, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef double SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + \ + static void gemv(const ExecSpace& /* space */, const char trans[], \ + typename AViewType::const_value_type& alpha, \ + const AViewType& A, const XViewType& X, \ + typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + Kokkos::Profiling::pushRegion("KokkosBlas::gemv[TPL_BLAS,double]"); \ + KOKKOSBLAS2_GEMV_DETERMINE_ARGS(LAYOUTA); \ + HostBlas::gemv(transa, M, N, alpha, A.data(), LDA, X.data(), \ + one, beta, Y.data(), one); \ + Kokkos::Profiling::popRegion(); \ + } \ }; #define KOKKOSBLAS2_SGEMV_BLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, \ ETI_SPEC_AVAIL) \ template \ struct GEMV< \ + ExecSpace, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -111,8 +112,7 @@ namespace Impl { Kokkos::MemoryTraits > \ YViewType; \ \ - static void gemv(const typename AViewType::execution_space& /* space */, \ - const char trans[], \ + static void gemv(const ExecSpace& /* space */, const char trans[], \ typename AViewType::const_value_type& alpha, \ const AViewType& A, const XViewType& X, \ typename YViewType::const_value_type& beta, \ @@ -128,7 +128,8 @@ namespace Impl { #define KOKKOSBLAS2_ZGEMV_BLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, \ ETI_SPEC_AVAIL) \ template \ - struct GEMV**, LAYOUTA, \ + struct GEMV**, LAYOUTA, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View*, LAYOUTX, \ @@ -152,8 +153,7 @@ namespace Impl { Kokkos::MemoryTraits > \ YViewType; \ \ - static void gemv(const typename AViewType::execution_space& /* space */, \ - const char trans[], \ + static void gemv(const ExecSpace& /* space */, const char trans[], \ typename AViewType::const_value_type& alpha, \ const AViewType& A, const XViewType& X, \ typename YViewType::const_value_type& beta, \ @@ -171,50 +171,50 @@ namespace Impl { } \ }; -#define KOKKOSBLAS2_CGEMV_BLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, \ - ETI_SPEC_AVAIL) \ - template \ - struct GEMV**, LAYOUTA, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUTX, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - Kokkos::View*, LAYOUTY, \ - Kokkos::Device, \ - Kokkos::MemoryTraits >, \ - true, ETI_SPEC_AVAIL> { \ - typedef Kokkos::complex SCALAR; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - AViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - XViewType; \ - typedef Kokkos::View, \ - Kokkos::MemoryTraits > \ - YViewType; \ - \ - static void gemv(const typename AViewType::execution_space& /* space */, \ - const char trans[], \ - typename AViewType::const_value_type& alpha, \ - const AViewType& A, const XViewType& X, \ - typename YViewType::const_value_type& beta, \ - const YViewType& Y) { \ - Kokkos::Profiling::pushRegion( \ - "KokkosBlas::gemv[TPL_BLAS,complex]"); \ - KOKKOSBLAS2_GEMV_DETERMINE_ARGS(LAYOUTA); \ - const std::complex alpha_val = alpha, beta_val = beta; \ - HostBlas >::gemv( \ - transa, M, N, alpha_val, \ - reinterpret_cast*>(A.data()), LDA, \ - reinterpret_cast*>(X.data()), one, \ - beta_val, reinterpret_cast*>(Y.data()), one); \ - Kokkos::Profiling::popRegion(); \ - } \ +#define KOKKOSBLAS2_CGEMV_BLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, \ + ETI_SPEC_AVAIL) \ + template \ + struct GEMV**, LAYOUTA, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTX, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + Kokkos::View*, LAYOUTY, \ + Kokkos::Device, \ + Kokkos::MemoryTraits >, \ + true, ETI_SPEC_AVAIL> { \ + typedef Kokkos::complex SCALAR; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + AViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + XViewType; \ + typedef Kokkos::View, \ + Kokkos::MemoryTraits > \ + YViewType; \ + \ + static void gemv(const ExecSpace& /* space */, const char trans[], \ + typename AViewType::const_value_type& alpha, \ + const AViewType& A, const XViewType& X, \ + typename YViewType::const_value_type& beta, \ + const YViewType& Y) { \ + Kokkos::Profiling::pushRegion( \ + "KokkosBlas::gemv[TPL_BLAS,complex]"); \ + KOKKOSBLAS2_GEMV_DETERMINE_ARGS(LAYOUTA); \ + const std::complex alpha_val = alpha, beta_val = beta; \ + HostBlas >::gemv( \ + transa, M, N, alpha_val, \ + reinterpret_cast*>(A.data()), LDA, \ + reinterpret_cast*>(X.data()), one, \ + beta_val, reinterpret_cast*>(Y.data()), one); \ + Kokkos::Profiling::popRegion(); \ + } \ }; KOKKOSBLAS2_DGEMV_BLAS(Kokkos::LayoutLeft, Kokkos::LayoutLeft, @@ -288,6 +288,7 @@ namespace Impl { ETI_SPEC_AVAIL) \ template \ struct GEMV< \ + ExecSpace, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -311,8 +312,7 @@ namespace Impl { Kokkos::MemoryTraits > \ YViewType; \ \ - static void gemv(const typename AViewType::execution_space& space, \ - const char trans[], \ + static void gemv(const ExecSpace& space, const char trans[], \ typename AViewType::const_value_type& alpha, \ const AViewType& A, const XViewType& X, \ typename YViewType::const_value_type& beta, \ @@ -335,6 +335,7 @@ namespace Impl { ETI_SPEC_AVAIL) \ template \ struct GEMV< \ + ExecSpace, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -358,8 +359,7 @@ namespace Impl { Kokkos::MemoryTraits > \ YViewType; \ \ - static void gemv(const typename AViewType::execution_space& space, \ - const char trans[], \ + static void gemv(const ExecSpace& space, const char trans[], \ typename AViewType::const_value_type& alpha, \ const AViewType& A, const XViewType& X, \ typename YViewType::const_value_type& beta, \ @@ -381,7 +381,8 @@ namespace Impl { #define KOKKOSBLAS2_ZGEMV_CUBLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, \ ETI_SPEC_AVAIL) \ template \ - struct GEMV**, LAYOUTA, \ + struct GEMV**, LAYOUTA, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View*, LAYOUTX, \ @@ -405,8 +406,7 @@ namespace Impl { Kokkos::MemoryTraits > \ YViewType; \ \ - static void gemv(const typename AViewType::execution_space& space, \ - const char trans[], \ + static void gemv(const ExecSpace& space, const char trans[], \ typename AViewType::const_value_type& alpha, \ const AViewType& A, const XViewType& X, \ typename YViewType::const_value_type& beta, \ @@ -433,7 +433,8 @@ namespace Impl { #define KOKKOSBLAS2_CGEMV_CUBLAS(LAYOUTA, LAYOUTX, LAYOUTY, MEM_SPACE, \ ETI_SPEC_AVAIL) \ template \ - struct GEMV**, LAYOUTA, \ + struct GEMV**, LAYOUTA, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ Kokkos::View*, LAYOUTX, \ @@ -457,8 +458,7 @@ namespace Impl { Kokkos::MemoryTraits > \ YViewType; \ \ - static void gemv(const typename AViewType::execution_space& space, \ - const char trans[], \ + static void gemv(const ExecSpace& space, const char trans[], \ typename AViewType::const_value_type& alpha, \ const AViewType& A, const XViewType& X, \ typename YViewType::const_value_type& beta, \ @@ -549,8 +549,9 @@ namespace Impl { } #define KOKKOSBLAS2_DGEMV_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ + template \ struct GEMV< \ + ExecSpace, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -575,8 +576,7 @@ namespace Impl { Kokkos::MemoryTraits > \ YViewType; \ \ - static void gemv(const typename AViewType::execution_space& space, \ - const char trans[], \ + static void gemv(const ExecSpace& space, const char trans[], \ typename AViewType::const_value_type& alpha, \ const AViewType& A, const XViewType& X, \ typename YViewType::const_value_type& beta, \ @@ -596,8 +596,9 @@ namespace Impl { }; #define KOKKOSBLAS2_SGEMV_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ + template \ struct GEMV< \ + ExecSpace, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -622,8 +623,7 @@ namespace Impl { Kokkos::MemoryTraits > \ YViewType; \ \ - static void gemv(const typename AViewType::execution_space& space, \ - const char trans[], \ + static void gemv(const ExecSpace& space, const char trans[], \ typename AViewType::const_value_type& alpha, \ const AViewType& A, const XViewType& X, \ typename YViewType::const_value_type& beta, \ @@ -643,8 +643,9 @@ namespace Impl { }; #define KOKKOSBLAS2_ZGEMV_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ + template \ struct GEMV< \ + ExecSpace, \ Kokkos::View**, LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ @@ -669,8 +670,7 @@ namespace Impl { Kokkos::MemoryTraits > \ YViewType; \ \ - static void gemv(const typename AViewType::execution_space& space, \ - const char trans[], \ + static void gemv(const ExecSpace& space, const char trans[], \ typename AViewType::const_value_type& alpha, \ const AViewType& A, const XViewType& X, \ typename YViewType::const_value_type& beta, \ @@ -695,8 +695,9 @@ namespace Impl { }; #define KOKKOSBLAS2_CGEMV_ROCBLAS(LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ + template \ struct GEMV< \ + ExecSpace, \ Kokkos::View**, LAYOUT, \ Kokkos::Device, \ Kokkos::MemoryTraits >, \ @@ -721,8 +722,7 @@ namespace Impl { Kokkos::MemoryTraits > \ YViewType; \ \ - static void gemv(const typename AViewType::execution_space& space, \ - const char trans[], \ + static void gemv(const ExecSpace& space, const char trans[], \ typename AViewType::const_value_type& alpha, \ const AViewType& A, const XViewType& X, \ typename YViewType::const_value_type& beta, \ @@ -818,8 +818,9 @@ struct kokkos_to_std_type_map { }; #define KOKKOSBLAS2_GEMV_ONEMKL(SCALAR, LAYOUT, MEM_SPACE, ETI_SPEC_AVAIL) \ - template <> \ + template \ struct GEMV< \ + ExecSpace, \ Kokkos::View, \ Kokkos::MemoryTraits >, \ @@ -830,16 +831,15 @@ struct kokkos_to_std_type_map { Kokkos::Device, \ Kokkos::MemoryTraits >, \ true, ETI_SPEC_AVAIL> { \ - using execution_space = Kokkos::Experimental::SYCL; \ - using device_type = Kokkos::Device; \ - using mem_traits = Kokkos::MemoryTraits; \ + using device_type = Kokkos::Device; \ + using mem_traits = Kokkos::MemoryTraits; \ using AViewType = \ Kokkos::View; \ using XViewType = \ Kokkos::View; \ using YViewType = Kokkos::View; \ \ - static void gemv(const execution_space& exec, const char kk_trans[], \ + static void gemv(const ExecSpace& exec, const char kk_trans[], \ typename AViewType::const_value_type& alpha, \ const AViewType& A, const XViewType& X, \ typename YViewType::const_value_type& beta, \ diff --git a/blas/unit_test/Test_Blas2_gemv.hpp b/blas/unit_test/Test_Blas2_gemv.hpp index 518e7b8055..b3f3566f83 100644 --- a/blas/unit_test/Test_Blas2_gemv.hpp +++ b/blas/unit_test/Test_Blas2_gemv.hpp @@ -21,8 +21,10 @@ #include namespace Test { -template -void impl_test_gemv(const char* mode, int M, int N) { +template +void impl_test_gemv_streams(ExecutionSpace& space, const char* mode, int M, + int N) { typedef typename ViewTypeA::value_type ScalarA; typedef typename ViewTypeX::value_type ScalarX; typedef typename ViewTypeY::value_type ScalarY; @@ -47,8 +49,7 @@ void impl_test_gemv(const char* mode, int M, int N) { view_stride_adapter y("Y", ldy); view_stride_adapter org_y("Org_Y", ldy); - Kokkos::Random_XorShift64_Pool rand_pool( - 13718); + Kokkos::Random_XorShift64_Pool rand_pool(13718); constexpr double max_valX = 1; constexpr double max_valY = 1; @@ -56,17 +57,17 @@ void impl_test_gemv(const char* mode, int M, int N) { { ScalarX randStart, randEnd; Test::getRandomBounds(max_valX, randStart, randEnd); - Kokkos::fill_random(x.d_view, rand_pool, randStart, randEnd); + Kokkos::fill_random(space, x.d_view, rand_pool, randStart, randEnd); } { ScalarY randStart, randEnd; Test::getRandomBounds(max_valY, randStart, randEnd); - Kokkos::fill_random(y.d_view, rand_pool, randStart, randEnd); + Kokkos::fill_random(space, y.d_view, rand_pool, randStart, randEnd); } { ScalarA randStart, randEnd; Test::getRandomBounds(max_valA, randStart, randEnd); - Kokkos::fill_random(A.d_view, rand_pool, randStart, randEnd); + Kokkos::fill_random(space, A.d_view, rand_pool, randStart, randEnd); } const typename KAT_Y::mag_type max_error = @@ -82,7 +83,7 @@ void impl_test_gemv(const char* mode, int M, int N) { Kokkos::deep_copy(expected, org_y.h_view); vanillaGEMV(mode[0], alpha, A.h_view, x.h_view, beta, expected); - KokkosBlas::gemv(mode, alpha, A.d_view, x.d_view, beta, y.d_view); + KokkosBlas::gemv(space, mode, alpha, A.d_view, x.d_view, beta, y.d_view); Kokkos::deep_copy(y.h_base, y.d_base); int numErrors = 0; for (int i = 0; i < ldy; i++) { @@ -97,10 +98,12 @@ void impl_test_gemv(const char* mode, int M, int N) { << ", alpha = " << alpha << ", beta = " << beta << ", mode " << mode << ": gemv incorrect"; - Kokkos::deep_copy(y.d_base, org_y.h_base); - KokkosBlas::gemv(mode, alpha, A.d_view, x.d_view_const, beta, y.d_view); + Kokkos::deep_copy(space, y.d_base, org_y.h_base); + KokkosBlas::gemv(space, mode, alpha, A.d_view, x.d_view_const, beta, + y.d_view); Kokkos::deep_copy(y.h_base, y.d_base); numErrors = 0; + Kokkos::fence(); // Wait for vanillaGEMV for (int i = 0; i < ldy; i++) { if (KAT_Y::abs(expected(i) - y.h_view(i)) > tol) numErrors++; } @@ -108,8 +111,9 @@ void impl_test_gemv(const char* mode, int M, int N) { << ", alpha = " << alpha << ", beta = " << beta << ", mode " << mode << ": gemv incorrect"; - Kokkos::deep_copy(y.d_base, org_y.h_base); - KokkosBlas::gemv(mode, alpha, A.d_view_const, x.d_view_const, beta, y.d_view); + Kokkos::deep_copy(space, y.d_base, org_y.h_base); + KokkosBlas::gemv(space, mode, alpha, A.d_view_const, x.d_view_const, beta, + y.d_view); Kokkos::deep_copy(y.h_base, y.d_base); numErrors = 0; for (int i = 0; i < ldy; i++) { @@ -123,9 +127,11 @@ void impl_test_gemv(const char* mode, int M, int N) { beta = KAT_Y::zero(); // beta changed, so update the correct answer vanillaGEMV(mode[0], alpha, A.h_view, x.h_view, beta, expected); - Kokkos::deep_copy(y.d_view, KAT_Y::nan()); - KokkosBlas::gemv(mode, alpha, A.d_view, x.d_view, beta, y.d_view); + Kokkos::deep_copy(space, y.d_view, KAT_Y::nan()); + KokkosBlas::gemv(space, mode, alpha, A.d_view, x.d_view, beta, y.d_view); Kokkos::deep_copy(y.h_base, y.d_base); + + Kokkos::fence(); // Wait for vanillaGEMV numErrors = 0; for (int i = 0; i < ldy; i++) { if (KAT_Y::isNan(y.h_view(i)) || @@ -141,6 +147,13 @@ void impl_test_gemv(const char* mode, int M, int N) { EXPECT_EQ(numErrors, 0) << "beta = 0, input contains NaN, A is " << M << 'x' << N << ", mode " << mode << ": gemv incorrect"; } +template +void impl_test_gemv(const char* mode, int M, int N) { + using execution_space = typename Device::execution_space; + execution_space space; + impl_test_gemv_streams(space, mode, M, N); +} } // namespace Test template @@ -310,3 +323,48 @@ TEST_F(TestCategory, gemv_double_int) { // Kokkos::Profiling::popRegion(); } #endif + +template +int test_gemv_streams(const char* mode) { + using execution_space = typename Device::execution_space; + execution_space space; +#if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) + using view_type_a_ll = Kokkos::View; + using view_type_b_ll = Kokkos::View; + using view_type_c_ll = Kokkos::View; + Test::impl_test_gemv_streams(space, mode, 0, 1024); + Test::impl_test_gemv_streams(space, mode, 13, 1024); + Test::impl_test_gemv_streams(space, mode, 50, 40); +#endif + +#if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) + using view_type_a_lr = Kokkos::View; + using view_type_b_lr = Kokkos::View; + using view_type_c_lr = Kokkos::View; + Test::impl_test_gemv_streams(space, mode, 0, 1024); + Test::impl_test_gemv_streams(space, mode, 13, 1024); + Test::impl_test_gemv_streams(space, mode, 50, 40); +#endif + (void)space; + return 1; +} + +#define KOKKOSKERNELS_EXECUTE_TEST(SCALAR, ORDINAL, OFFSET, DEVICE) \ + TEST_F(TestCategory, \ + blas##_##gemv_streams##_##SCALAR##_##ORDINAL##_##OFFSET##_##DEVICE) { \ + test_gemv_streams("N"); \ + test_gemv_streams("T"); \ + } + +#define NO_TEST_COMPLEX + +#include + +#undef KOKKOSKERNELS_EXECUTE_TEST +#undef NO_TEST_COMPLEX \ No newline at end of file diff --git a/docs/developer/apidocs/blas2.rst b/docs/developer/apidocs/blas2.rst index 9d96567929..434e9caf03 100644 --- a/docs/developer/apidocs/blas2.rst +++ b/docs/developer/apidocs/blas2.rst @@ -3,8 +3,8 @@ BLAS2 -- KokkosKernels blas2 interfaces gemv ---- +.. doxygenfunction:: KokkosBlas::gemv(const ExecutionSpace &space, const char trans[], typename AViewType::const_value_type &alpha, const AViewType &A, const XViewType &x, typename YViewType::const_value_type &beta, const YViewType &y) .. doxygenfunction:: KokkosBlas::gemv(const char trans[], typename AViewType::const_value_type &alpha, const AViewType &A, const XViewType &x, typename YViewType::const_value_type &beta, const YViewType &y) -.. doxygenfunction:: KokkosBlas::gemv(const execution_space &space, const char trans[], typename AViewType::const_value_type &alpha, const AViewType &A, const XViewType &x, typename YViewType::const_value_type &beta, const YViewType &y) ger ----