diff --git a/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_blas.hpp b/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_blas.hpp index 3ba437a5a7..bc1a10f61e 100644 --- a/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_blas.hpp +++ b/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_blas.hpp @@ -149,9 +149,7 @@ namespace Impl { Kokkos::MemoryTraits> \ AViewType; \ \ - static void ger(const EXEC_SPACE& /* space */ \ - , \ - const char trans[], \ + static void ger(const EXEC_SPACE& space, const char trans[], \ typename AViewType::const_value_type& alpha, \ const XViewType& X, const YViewType& Y, \ const AViewType& A) { \ @@ -183,8 +181,9 @@ namespace Impl { reinterpret_cast*>(X.data()), one, \ reinterpret_cast*>(A.data()), LDA); \ } else { \ - throw std::runtime_error( \ - "Error: blasZgerc() requires LayoutLeft views."); \ + /* blasgerc() + ~A_ll => call kokkos-kernels' implementation */ \ + GER::ger(space, trans, alpha, X, Y, A); \ } \ } \ Kokkos::Profiling::popRegion(); \ @@ -218,9 +217,7 @@ namespace Impl { Kokkos::MemoryTraits> \ AViewType; \ \ - static void ger(const EXEC_SPACE& /* space */ \ - , \ - const char trans[], \ + static void ger(const EXEC_SPACE& space, const char trans[], \ typename AViewType::const_value_type& alpha, \ const XViewType& X, const YViewType& Y, \ const AViewType& A) { \ @@ -252,8 +249,9 @@ namespace Impl { reinterpret_cast*>(X.data()), one, \ reinterpret_cast*>(A.data()), LDA); \ } else { \ - throw std::runtime_error( \ - "Error: blasCgerc() requires LayoutLeft views."); \ + /* blasgerc() + ~A_ll => call kokkos-kernels' implementation */ \ + GER::ger(space, trans, alpha, X, Y, A); \ } \ } \ Kokkos::Profiling::popRegion(); \ diff --git a/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_cublas.hpp b/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_cublas.hpp index d05b09784e..3f80144f62 100644 --- a/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_cublas.hpp +++ b/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_cublas.hpp @@ -196,8 +196,9 @@ namespace Impl { reinterpret_cast(X.data()), one, \ reinterpret_cast(A.data()), LDA)); \ } else { \ - throw std::runtime_error( \ - "Error: cublasZgerc() requires LayoutLeft views."); \ + /* cublasZgerc() + ~A_ll => call kokkos-kernels' implementation */ \ + GER::ger(space, trans, alpha, X, Y, A); \ } \ } \ KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ @@ -266,8 +267,9 @@ namespace Impl { reinterpret_cast(X.data()), one, \ reinterpret_cast(A.data()), LDA)); \ } else { \ - throw std::runtime_error( \ - "Error: cublasCgerc() requires LayoutLeft views."); \ + /* cublasCgerc() + ~A_ll => call kokkos-kernels' implementation */ \ + GER::ger(space, trans, alpha, X, Y, A); \ } \ } \ KOKKOS_CUBLAS_SAFE_CALL_IMPL(cublasSetStream(s.handle, NULL)); \ diff --git a/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_rocblas.hpp b/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_rocblas.hpp index c55d091516..c21b61befa 100644 --- a/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_rocblas.hpp +++ b/blas/tpls/KokkosBlas2_ger_tpl_spec_decl_rocblas.hpp @@ -199,8 +199,9 @@ namespace Impl { reinterpret_cast(X.data()), one, \ reinterpret_cast(A.data()), LDA)); \ } else { \ - throw std::runtime_error( \ - "Error: rocblasZgerc() requires LayoutLeft views."); \ + /* rocblas_zgerc() + ~A_ll => call k-kernels' implementation */ \ + GER::ger(space, trans, alpha, X, Y, A); \ } \ } \ KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ @@ -273,8 +274,9 @@ namespace Impl { reinterpret_cast(X.data()), one, \ reinterpret_cast(A.data()), LDA)); \ } else { \ - throw std::runtime_error( \ - "Error: rocblasCgec() requires LayoutLeft views."); \ + /* rocblas_cgerc() + ~A_ll => call k-kernels' implementation */ \ + GER::ger(space, trans, alpha, X, Y, A); \ } \ } \ KOKKOS_ROCBLAS_SAFE_CALL_IMPL(rocblas_set_stream(s.handle, NULL)); \ diff --git a/blas/unit_test/Test_Blas1_axpby_unification.hpp b/blas/unit_test/Test_Blas1_axpby_unification.hpp index 9709d580b3..6ce7bad0b1 100644 --- a/blas/unit_test/Test_Blas1_axpby_unification.hpp +++ b/blas/unit_test/Test_Blas1_axpby_unification.hpp @@ -27,8 +27,8 @@ // // Choices (01)-(03) are selected in the routines TEST_F() at the very // bottom of the file, when calling: -// - either test_axpby_unificationr<...>(), -// - or test_axpby_mv_unificationr<...>(). +// - either test_axpby_unification<...>(), +// - or test_axpby_mv_unification<...>(). // // Choices (04)-(05) are selected in routines: // - test_axpby_unification<...>(), when calling @@ -98,7 +98,7 @@ void impl_test_axpby_unification_compare( Test::getRandomBounds(max_val, randStart, randEnd); Kokkos::fill_random(x.d_view, rand_pool, randStart, randEnd); } - Kokkos::deep_copy(x.h_view, x.d_view); + Kokkos::deep_copy(x.h_base, x.d_base); { ScalarTypeY randStart, randEnd; @@ -110,7 +110,7 @@ void impl_test_axpby_unification_compare( } } tY org_y("Org_Y", N); - Kokkos::deep_copy(org_y.h_view, y.d_view); + Kokkos::deep_copy(org_y.h_base, y.d_base); tScalarA valueA(Kokkos::ArithTraits::zero()); tScalarB valueB(Kokkos::ArithTraits::zero()); @@ -131,7 +131,7 @@ void impl_test_axpby_unification_compare( } KokkosBlas::axpby(a, x.d_view, b, y.d_view); } else { - Kokkos::deep_copy(b.h_view, b.d_view); + Kokkos::deep_copy(b.h_base, b.d_base); valueB = b.h_view(0); KokkosBlas::axpby(a, x.d_view, b.d_view, y.d_view); } @@ -158,12 +158,12 @@ void impl_test_axpby_unification_compare( } KokkosBlas::axpby(a, x.d_view, b, y.d_view); } else { - Kokkos::deep_copy(b.h_view, b.d_view); + Kokkos::deep_copy(b.h_base, b.d_base); valueB = b.h_view(0); KokkosBlas::axpby(a, x.d_view, b.d_view, y.d_view); } } else { - Kokkos::deep_copy(a.h_view, a.d_view); + Kokkos::deep_copy(a.h_base, a.d_base); valueA = a.h_view(0); if constexpr (std::is_same_v) { valueB = b; @@ -179,13 +179,13 @@ void impl_test_axpby_unification_compare( } KokkosBlas::axpby(a.d_view, x.d_view, b, y.d_view); } else { - Kokkos::deep_copy(b.h_view, b.d_view); + Kokkos::deep_copy(b.h_base, b.d_base); valueB = b.h_view(0); KokkosBlas::axpby(a.d_view, x.d_view, b.d_view, y.d_view); } } - Kokkos::deep_copy(y.h_view, y.d_view); + Kokkos::deep_copy(y.h_base, y.d_base); if (testWithNanY == false) { for (int i(0); i < N; ++i) { @@ -248,7 +248,7 @@ void impl_test_axpby_mv_unification_compare( Test::getRandomBounds(max_val, randStart, randEnd); Kokkos::fill_random(x.d_view, rand_pool, randStart, randEnd); } - Kokkos::deep_copy(x.h_view, x.d_view); + Kokkos::deep_copy(x.h_base, x.d_base); { ScalarTypeY randStart, randEnd; @@ -260,20 +260,20 @@ void impl_test_axpby_mv_unification_compare( } } tY org_y("Org_Y", N, K); - Kokkos::deep_copy(org_y.h_view, y.d_view); + Kokkos::deep_copy(org_y.h_base, y.d_base); // Cannot use "if constexpr (isRank1()) {" because rank-1 variables // are passed to current routine with view_stride_adapter<...> bool constexpr aIsRank1 = !std::is_same_v && !isRank0(); if constexpr (aIsRank1) { - Kokkos::deep_copy(a.h_view, a.d_view); + Kokkos::deep_copy(a.h_base, a.d_base); } // Cannot use "if constexpr (isRank1()) {" because rank-1 variables // are passed to current routine with view_stride_adapter<...> bool constexpr bIsRank1 = !std::is_same_v && !isRank0(); if constexpr (bIsRank1) { - Kokkos::deep_copy(b.h_view, b.d_view); + Kokkos::deep_copy(b.h_base, b.d_base); } tScalarA valueA(Kokkos::ArithTraits::zero()); @@ -344,7 +344,7 @@ void impl_test_axpby_mv_unification_compare( } } - Kokkos::deep_copy(y.h_view, y.d_view); + Kokkos::deep_copy(y.h_base, y.d_base); if (testWithNanY == false) { for (int i(0); i < N; ++i) { @@ -503,7 +503,9 @@ void impl_test_axpby_unification(int const N) { // ************************************************************ // Case 01/16: Ascalar + Bscalar // ************************************************************ - // std::cout << "Starting case 01/16" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 01/16" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -533,7 +535,9 @@ void impl_test_axpby_unification(int const N) { // ************************************************************ // Case 02/16: Ascalar + Br0 // ************************************************************ - // std::cout << "Starting case 02/16" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 02/16" << std::endl; +#endif if constexpr (std::is_same_v) { // Avoid the test, due to compilation errors // ViewTypeBr0 b; @@ -570,7 +574,9 @@ void impl_test_axpby_unification(int const N) { // ************************************************************ // Case 03/16: Ascalar + Br1s_1 // ************************************************************ - // std::cout << "Starting case 03/16" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 03/16" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -601,7 +607,9 @@ void impl_test_axpby_unification(int const N) { // ************************************************************ // Case 04/16: Ascalar + Br1d // ************************************************************ - // std::cout << "Starting case 04/16" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 04/16" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -631,7 +639,9 @@ void impl_test_axpby_unification(int const N) { // ************************************************************ // Case 05/16: Ar0 + Bscalar // ************************************************************ - // std::cout << "Starting case 05/16" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 05/16" << std::endl; +#endif if constexpr (std::is_same_v) { // Avoid the test, due to compilation errors } else { @@ -665,7 +675,9 @@ void impl_test_axpby_unification(int const N) { // ************************************************************ // Case 06/16: Ar0 + Br0 // ************************************************************ - // std::cout << "Starting case 06/16" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 06/16" << std::endl; +#endif if constexpr ((std::is_same_v) || (std::is_same_v)) { // Avoid the test, due to compilation errors @@ -700,7 +712,9 @@ void impl_test_axpby_unification(int const N) { // ************************************************************ // Case 07/16: Ar0 + Br1s_1 // ************************************************************ - // std::cout << "Starting case 07/16" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 07/16" << std::endl; +#endif if constexpr (std::is_same_v) { // Avoid the test, due to compilation errors } else { @@ -736,7 +750,9 @@ void impl_test_axpby_unification(int const N) { // ************************************************************ // Case 08/16: Ar0 + Br1d // ************************************************************ - // std::cout << "Starting case 08/16" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 08/16" << std::endl; +#endif if constexpr (std::is_same_v) { // Avoid the test, due to compilation errors } else { @@ -771,7 +787,9 @@ void impl_test_axpby_unification(int const N) { // ************************************************************ // Case 09/16: Ar1s_1 + Bscalar // ************************************************************ - // std::cout << "Starting case 09/16" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 09/16" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -803,7 +821,9 @@ void impl_test_axpby_unification(int const N) { // ************************************************************ // Case 10/16: Ar1s_1 + Br0 // ************************************************************ - // std::cout << "Starting case 10/16" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 10/16" << std::endl; +#endif if constexpr (std::is_same_v) { // Avoid the test, due to compilation errors } else { @@ -839,7 +859,9 @@ void impl_test_axpby_unification(int const N) { // ************************************************************ // Case 11/16: Ar1s_1 + Br1s_1 // ************************************************************ - // std::cout << "Starting case 11/16" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 11/16" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -872,7 +894,9 @@ void impl_test_axpby_unification(int const N) { // ************************************************************ // Case 12/16: Ar1s_1 + Br1d // ************************************************************ - // std::cout << "Starting case 12/16" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 12/16" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -904,7 +928,9 @@ void impl_test_axpby_unification(int const N) { // ************************************************************ // Case 13/16: Ar1d + Bscalar // ************************************************************ - // std::cout << "Starting case 13/16" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 13/16" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -936,7 +962,9 @@ void impl_test_axpby_unification(int const N) { // ************************************************************ // Case 14/16: Ar1d + Br0 // ************************************************************ - // std::cout << "Starting case 14/16" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 14/16" << std::endl; +#endif if constexpr (std::is_same_v) { // Avoid the test, due to compilation errors } else { @@ -972,7 +1000,9 @@ void impl_test_axpby_unification(int const N) { // ************************************************************ // Case 15/16: Ar1d + Br1s_1 // ************************************************************ - // std::cout << "Starting case 15/16" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 15/16" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1005,7 +1035,9 @@ void impl_test_axpby_unification(int const N) { // ************************************************************ // Case 16/16: Ar1d + Br1d // ************************************************************ - // std::cout << "Starting case 16/16" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 16/16" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1081,7 +1113,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 01/36: Ascalar + Bscalar // ************************************************************ - // std::cout << "Starting case 01/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 01/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1111,7 +1145,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 02/36: Ascalar + Br0 // ************************************************************ - // std::cout << "Starting case 02/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 02/36" << std::endl; +#endif if constexpr (std::is_same_v) { // Avoid the test, due to compilation errors } else { @@ -1145,7 +1181,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 03/36: Ascalar + Br1s_1 // ************************************************************ - // std::cout << "Starting case 03/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 03/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1176,7 +1214,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 04/36: Ascalar + Br1s_k // ************************************************************ - // std::cout << "Starting case 04/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 04/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1192,7 +1232,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { b.h_view[k] = valueB + k; } - Kokkos::deep_copy(b.d_view, b.h_view); + Kokkos::deep_copy(b.d_base, b.h_base); } else { for (int k(0); k < K; ++k) { b.h_base[k] = valueB + k; @@ -1210,7 +1250,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 05/36: Ascalar + Br1d,1 // ************************************************************ - // std::cout << "Starting case 05/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 05/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1240,7 +1282,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 06/36: Ascalar + Br1d,k // ************************************************************ - // std::cout << "Starting case 06/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 06/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1256,7 +1300,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { b.h_view[k] = valueB + k; } - Kokkos::deep_copy(b.d_view, b.h_view); + Kokkos::deep_copy(b.d_base, b.h_base); } else { for (int k(0); k < K; ++k) { b.h_base[k] = valueB + k; @@ -1274,7 +1318,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 07/36: Ar0 + Bscalar // ************************************************************w - // std::cout << "Starting case 07/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 07/36" << std::endl; +#endif if constexpr (std::is_same_v) { // Avoid the test, due to compilation errors } else { @@ -1308,7 +1354,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 08/36: Ar0 + Br0 // ************************************************************ - // std::cout << "Starting case 08/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 08/36" << std::endl; +#endif if constexpr ((std::is_same_v) || (std::is_same_v)) { // Avoid the test, due to compilation errors @@ -1343,7 +1391,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 09/36: Ar0 + Br1s_1 // ************************************************************ - // std::cout << "Starting case 09/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 09/36" << std::endl; +#endif if constexpr (std::is_same_v) { // Avoid the test, due to compilation errors } else { @@ -1379,7 +1429,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 10/36: Ar0 + Br1s_k // ************************************************************ - // std::cout << "Starting case 10/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 10/36" << std::endl; +#endif if constexpr (std::is_same_v) { // Avoid the test, due to compilation errors } else { @@ -1398,7 +1450,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { b.h_view[k] = valueB + k; } - Kokkos::deep_copy(b.d_view, b.h_view); + Kokkos::deep_copy(b.d_base, b.h_base); } else { for (int k(0); k < K; ++k) { b.h_base[k] = valueB + k; @@ -1418,7 +1470,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 11/36: Ar0 + Br1d,1 // ************************************************************ - // std::cout << "Starting case 11/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 11/36" << std::endl; +#endif if constexpr (std::is_same_v) { // Avoid the test, due to compilation errors } else { @@ -1453,7 +1507,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 12/36: Ar0 + Br1d,k // ************************************************************ - // std::cout << "Starting case 12/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 12/36" << std::endl; +#endif if constexpr (std::is_same_v) { // Avoid the test, due to compilation errors } else { @@ -1472,7 +1528,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { b.h_view[k] = valueB + k; } - Kokkos::deep_copy(b.d_view, b.h_view); + Kokkos::deep_copy(b.d_base, b.h_base); } else { for (int k(0); k < K; ++k) { b.h_base[k] = valueB + k; @@ -1491,7 +1547,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 13/36: Ar1s_1 + Bscalar // ************************************************************w - // std::cout << "Starting case 13/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 13/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1523,7 +1581,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 14/36: Ar1s_1 + Br0 // ************************************************************ - // std::cout << "Starting case 14/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 14/36" << std::endl; +#endif if constexpr (std::is_same_v) { // Avoid the test, due to compilation errors } else { @@ -1559,7 +1619,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 15/36: Ar1s_1 + Br1s_1 // ************************************************************ - // std::cout << "Starting case 15/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 15/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1592,7 +1654,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 16/36: Ar1s_1 + Br1s_k // ************************************************************ - // std::cout << "Starting case 16/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 16/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1608,7 +1672,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { b.h_view[k] = valueB + k; } - Kokkos::deep_copy(b.d_view, b.h_view); + Kokkos::deep_copy(b.d_base, b.h_base); } else { for (int k(0); k < K; ++k) { b.h_base[k] = valueB + k; @@ -1627,7 +1691,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 17/36: Ar1s_1 + Br1d,1 // ************************************************************ - // std::cout << "Starting case 17/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 17/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1659,7 +1725,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 18/36: Ar1s_1 + Br1d,k // ************************************************************ - // std::cout << "Starting case 18/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 18/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1675,7 +1743,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { b.h_view[k] = valueB + k; } - Kokkos::deep_copy(b.d_view, b.h_view); + Kokkos::deep_copy(b.d_base, b.h_base); } else { for (int k(0); k < K; ++k) { b.h_base[k] = valueB + k; @@ -1694,7 +1762,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 19/36: Ar1s_k + Bscalar // ************************************************************ - // std::cout << "Starting case 19/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 19/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1709,7 +1779,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { a.h_view[k] = valueA + k; } - Kokkos::deep_copy(a.d_view, a.h_view); + Kokkos::deep_copy(a.d_base, a.h_base); } else { for (int k(0); k < K; ++k) { a.h_base[k] = valueA + k; @@ -1736,7 +1806,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 20/36: Ar1s_k + Br0 // ************************************************************ - // std::cout << "Starting case 20/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 20/36" << std::endl; +#endif if constexpr (std::is_same_v) { // Avoid the test, due to compilation errors } else { @@ -1754,7 +1826,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { a.h_view[k] = valueA + k; } - Kokkos::deep_copy(a.d_view, a.h_view); + Kokkos::deep_copy(a.d_base, a.h_base); } else { for (int k(0); k < K; ++k) { a.h_base[k] = valueA + k; @@ -1782,7 +1854,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 21/36: Ar1s_k + Br1s_1 // ************************************************************ - // std::cout << "Starting case 21/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 21/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1797,7 +1871,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { a.h_view[k] = valueA + k; } - Kokkos::deep_copy(a.d_view, a.h_view); + Kokkos::deep_copy(a.d_base, a.h_base); } else { for (int k(0); k < K; ++k) { a.h_base[k] = valueA + k; @@ -1825,7 +1899,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 22/36: Ar1s_k + Br1s_k // ************************************************************ - // std::cout << "Starting case 22/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 22/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1840,7 +1916,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { a.h_view[k] = valueA + k; } - Kokkos::deep_copy(a.d_view, a.h_view); + Kokkos::deep_copy(a.d_base, a.h_base); } else { for (int k(0); k < K; ++k) { a.h_base[k] = valueA + k; @@ -1852,7 +1928,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { b.h_view[k] = valueB + k; } - Kokkos::deep_copy(b.d_view, b.h_view); + Kokkos::deep_copy(b.d_base, b.h_base); } else { for (int k(0); k < K; ++k) { b.h_base[k] = valueB + k; @@ -1871,7 +1947,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 23/36: Ar1s_k + Br1d,1 // ************************************************************ - // std::cout << "Starting case 23/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 23/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1886,7 +1964,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { a.h_view[k] = valueA + k; } - Kokkos::deep_copy(a.d_view, a.h_view); + Kokkos::deep_copy(a.d_base, a.h_base); } else { for (int k(0); k < K; ++k) { a.h_base[k] = valueA + k; @@ -1913,7 +1991,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 24/36: Ar1s_k + Br1d,k // ************************************************************ - // std::cout << "Starting case 24/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 24/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1928,7 +2008,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { a.h_view[k] = valueA + k; } - Kokkos::deep_copy(a.d_view, a.h_view); + Kokkos::deep_copy(a.d_base, a.h_base); } else { for (int k(0); k < K; ++k) { a.h_base[k] = valueA + k; @@ -1940,7 +2020,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { b.h_view[k] = valueB + k; } - Kokkos::deep_copy(b.d_view, b.h_view); + Kokkos::deep_copy(b.d_base, b.h_base); } else { for (int k(0); k < K; ++k) { b.h_base[k] = valueB + k; @@ -1960,7 +2040,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 25/36: Ar1d,1 + Bscalar // ************************************************************w - // std::cout << "Starting case 25/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 25/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -1992,7 +2074,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 26/36: Ar1d,1 + Br0 // ************************************************************ - // std::cout << "Starting case 26/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 26/36" << std::endl; +#endif if constexpr (std::is_same_v) { // Avoid the test, due to compilation errors } else { @@ -2028,7 +2112,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 27/36: Ar1d,1 + Br1s_1 // ************************************************************ - // std::cout << "Starting case 27/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 27/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -2061,7 +2147,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 28/36: Ar1d,1 + Br1s_k // ************************************************************ - // std::cout << "Starting case 28/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 28/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -2077,7 +2165,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { b.h_view[k] = valueB + k; } - Kokkos::deep_copy(b.d_view, b.h_view); + Kokkos::deep_copy(b.d_base, b.h_base); } else { for (int k(0); k < K; ++k) { b.h_base[k] = valueB + k; @@ -2096,7 +2184,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 29/36: Ar1d,1 + Br1d,1 // ************************************************************ - // std::cout << "Starting case 29/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 29/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -2128,7 +2218,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 30/36: Ar1d,1 + Br1d,k // ************************************************************ - // std::cout << "Starting case 30/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 30/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -2144,7 +2236,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { b.h_view[k] = valueB + k; } - Kokkos::deep_copy(b.d_view, b.h_view); + Kokkos::deep_copy(b.d_base, b.h_base); } else { for (int k(0); k < K; ++k) { b.h_base[k] = valueB + k; @@ -2163,7 +2255,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 31/36: Ar1d,k + Bscalar // ************************************************************w - // std::cout << "Starting case 31/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 31/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -2178,7 +2272,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { a.h_view[k] = valueA + k; } - Kokkos::deep_copy(a.d_view, a.h_view); + Kokkos::deep_copy(a.d_base, a.h_base); } else { for (int k(0); k < K; ++k) { a.h_base[k] = valueA + k; @@ -2205,7 +2299,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 32/36: Ar1d,k + Br0 // ************************************************************ - // std::cout << "Starting case 32/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 32/36" << std::endl; +#endif if constexpr (std::is_same_v) { // Avoid the test, due to compilation errors } else { @@ -2223,7 +2319,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { a.h_view[k] = valueA + k; } - Kokkos::deep_copy(a.d_view, a.h_view); + Kokkos::deep_copy(a.d_base, a.h_base); } else { for (int k(0); k < K; ++k) { a.h_base[k] = valueA + k; @@ -2251,7 +2347,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 33/36: Ar1d,k + Br1s_1 // ************************************************************ - // std::cout << "Starting case 33/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 33/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -2266,7 +2364,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { a.h_view[k] = valueA + k; } - Kokkos::deep_copy(a.d_view, a.h_view); + Kokkos::deep_copy(a.d_base, a.h_base); } else { for (int k(0); k < K; ++k) { a.h_base[k] = valueA + k; @@ -2294,7 +2392,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 34/36: Ar1d,k + Br1s_k // ************************************************************ - // std::cout << "Starting case 34/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 34/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -2309,7 +2409,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { a.h_view[k] = valueA + k; } - Kokkos::deep_copy(a.d_view, a.h_view); + Kokkos::deep_copy(a.d_base, a.h_base); } else { for (int k(0); k < K; ++k) { a.h_base[k] = valueA + k; @@ -2321,7 +2421,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { b.h_view[k] = valueB + k; } - Kokkos::deep_copy(b.d_view, b.h_view); + Kokkos::deep_copy(b.d_base, b.h_base); } else { for (int k(0); k < K; ++k) { b.h_base[k] = valueB + k; @@ -2341,7 +2441,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 35/36: Ar1d,k + Br1d,1 // ************************************************************ - // std::cout << "Starting case 35/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 35/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -2356,7 +2458,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { a.h_view[k] = valueA + k; } - Kokkos::deep_copy(a.d_view, a.h_view); + Kokkos::deep_copy(a.d_base, a.h_base); } else { for (int k(0); k < K; ++k) { a.h_base[k] = valueA + k; @@ -2383,7 +2485,9 @@ void impl_test_axpby_mv_unification(int const N, int const K) { // ************************************************************ // Case 36/36: Ar1d,k + Br1d,k // ************************************************************ - // std::cout << "Starting case 36/36" << std::endl; +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Starting case 36/36" << std::endl; +#endif for (size_t i(0); i < valuesA.size(); ++i) { tScalarA const valueA(valuesA[i]); for (size_t j(0); j < valuesB.size(); ++j) { @@ -2398,7 +2502,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { a.h_view[k] = valueA + k; } - Kokkos::deep_copy(a.d_view, a.h_view); + Kokkos::deep_copy(a.d_base, a.h_base); } else { for (int k(0); k < K; ++k) { a.h_base[k] = valueA + k; @@ -2410,7 +2514,7 @@ void impl_test_axpby_mv_unification(int const N, int const K) { for (int k(0); k < K; ++k) { b.h_view[k] = valueB + k; } - Kokkos::deep_copy(b.d_view, b.h_view); + Kokkos::deep_copy(b.d_base, b.h_base); } else { for (int k(0); k < K; ++k) { b.h_base[k] = valueB + k; @@ -2439,6 +2543,9 @@ int test_axpby_unification() { #if defined(KOKKOSKERNELS_INST_LAYOUTLEFT) || \ (!defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Calling impl_test_axpby_unif(), L-LLL" << std::endl; +#endif Test::impl_test_axpby_unification< tScalarA, Kokkos::LayoutLeft, tScalarX, Kokkos::LayoutLeft, tScalarB, Kokkos::LayoutLeft, tScalarY, Kokkos::LayoutLeft, Device>(14); @@ -2447,6 +2554,9 @@ int test_axpby_unification() { #if defined(KOKKOSKERNELS_INST_LAYOUTRIGHT) || \ (!defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Calling impl_test_axpby_unif(), L-RRR" << std::endl; +#endif Test::impl_test_axpby_unification< tScalarA, Kokkos::LayoutRight, tScalarX, Kokkos::LayoutRight, tScalarB, Kokkos::LayoutRight, tScalarY, Kokkos::LayoutRight, Device>(14); @@ -2454,6 +2564,9 @@ int test_axpby_unification() { #if (!defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS)) +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Calling impl_test_axpby_unif(), L-SSS" << std::endl; +#endif Test::impl_test_axpby_unification< tScalarA, Kokkos::LayoutStride, tScalarX, Kokkos::LayoutStride, tScalarB, Kokkos::LayoutStride, tScalarY, Kokkos::LayoutStride, Device>(14); @@ -2461,18 +2574,30 @@ int test_axpby_unification() { #if !defined(KOKKOSKERNELS_ETI_ONLY) && \ !defined(KOKKOSKERNELS_IMPL_CHECK_ETI_CALLS) +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Calling impl_test_axpby_unif(), L-SLL" << std::endl; +#endif Test::impl_test_axpby_unification< tScalarA, Kokkos::LayoutStride, tScalarX, Kokkos::LayoutStride, tScalarB, Kokkos::LayoutLeft, tScalarY, Kokkos::LayoutLeft, Device>(14); +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Calling impl_test_axpby_unif(), L-LSS" << std::endl; +#endif Test::impl_test_axpby_unification< tScalarA, Kokkos::LayoutLeft, tScalarX, Kokkos::LayoutLeft, tScalarB, Kokkos::LayoutStride, tScalarY, Kokkos::LayoutStride, Device>(14); +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Calling impl_test_axpby_unif(), L-SRS" << std::endl; +#endif Test::impl_test_axpby_unification< tScalarA, Kokkos::LayoutLeft, tScalarX, Kokkos::LayoutStride, tScalarB, Kokkos::LayoutRight, tScalarY, Kokkos::LayoutStride, Device>(14); +#ifdef HAVE_KOKKOSKERNELS_DEBUG + std::cout << "Calling impl_test_axpby_unif(), L-LSR" << std::endl; +#endif Test::impl_test_axpby_unification< tScalarA, Kokkos::LayoutStride, tScalarX, Kokkos::LayoutLeft, tScalarB, Kokkos::LayoutStride, tScalarY, Kokkos::LayoutRight, Device>(14); diff --git a/blas/unit_test/Test_Blas2_ger.hpp b/blas/unit_test/Test_Blas2_ger.hpp index 7d30a4b65d..df3d2cb5d1 100644 --- a/blas/unit_test/Test_Blas2_ger.hpp +++ b/blas/unit_test/Test_Blas2_ger.hpp @@ -79,10 +79,11 @@ class GerTester { using _KAT_A = Kokkos::ArithTraits; using _AuxType = typename _KAT_A::mag_type; - void populateVariables(ScalarA& alpha, _HostViewTypeX& h_x, - _HostViewTypeY& h_y, _HostViewTypeA& h_A, - _ViewTypeExpected& h_expected, _ViewTypeX& x, - _ViewTypeY& y, _ViewTypeA& A, + void populateVariables(ScalarA& alpha, + view_stride_adapter<_ViewTypeX, false>& x, + view_stride_adapter<_ViewTypeY, false>& y, + view_stride_adapter<_ViewTypeA, false>& A, + _ViewTypeExpected& h_expected, bool& expectedResultIsKnown); template @@ -149,11 +150,10 @@ class GerTester { T shrinkAngleToZeroTwoPiRange(const T input); template - void callKkGerAndCompareAgainstExpected(const ScalarA& alpha, TX& x, TY& y, - _ViewTypeA& A, - const _HostViewTypeA& h_A, - const _ViewTypeExpected& h_expected, - const std::string& situation); + void callKkGerAndCompareAgainstExpected( + const ScalarA& alpha, TX& x, TY& y, + view_stride_adapter<_ViewTypeA, false>& A, + const _ViewTypeExpected& h_expected, const std::string& situation); const bool _A_is_complex; const bool _A_is_lr; @@ -286,8 +286,7 @@ void GerTesterpopulateVariables(alpha, x.h_view, y.h_view, A.h_view, - h_expected.d_view, x.d_view, y.d_view, A.d_view, + this->populateVariables(alpha, x, y, A, h_expected.d_view, expectedResultIsKnown); // ******************************************************************** @@ -333,8 +332,7 @@ void GerTestercallKkGerAndCompareAgainstExpected( - alpha, x.d_view, y.d_view, A.d_view, A.h_view, h_expected.d_view, - "non const {x,y}"); + alpha, x.d_view, y.d_view, A, h_expected.d_view, "non const {x,y}"); } // ******************************************************************** @@ -343,8 +341,7 @@ void GerTestercallKkGerAndCompareAgainstExpected(alpha, x.d_view_const, y.d_view, - A.d_view, A.h_view, + this->callKkGerAndCompareAgainstExpected(alpha, x.d_view_const, y.d_view, A, h_expected.d_view, "const x"); } @@ -354,8 +351,7 @@ void GerTestercallKkGerAndCompareAgainstExpected(alpha, x.d_view, y.d_view_const, - A.d_view, A.h_view, + this->callKkGerAndCompareAgainstExpected(alpha, x.d_view, y.d_view_const, A, h_expected.d_view, "const y"); } @@ -366,7 +362,7 @@ void GerTestercallKkGerAndCompareAgainstExpected(alpha, x.d_view_const, - y.d_view_const, A.d_view, A.h_view, + y.d_view_const, A, h_expected.d_view, "const {x,y}"); } @@ -388,52 +384,53 @@ void GerTester -void GerTester::populateVariables(ScalarA& alpha, _HostViewTypeX& h_x, - _HostViewTypeY& h_y, - _HostViewTypeA& h_A, - _ViewTypeExpected& h_expected, - _ViewTypeX& x, _ViewTypeY& y, - _ViewTypeA& A, - bool& expectedResultIsKnown) { +void GerTester< + ScalarX, tLayoutX, ScalarY, tLayoutY, ScalarA, tLayoutA, + Device>::populateVariables(ScalarA& alpha, + view_stride_adapter<_ViewTypeX, false>& x, + view_stride_adapter<_ViewTypeY, false>& y, + view_stride_adapter<_ViewTypeA, false>& A, + _ViewTypeExpected& h_expected, + bool& expectedResultIsKnown) { expectedResultIsKnown = false; if (_useAnalyticalResults) { - this->populateAnalyticalValues(alpha, h_x, h_y, h_A, h_expected); - Kokkos::deep_copy(x, h_x); - Kokkos::deep_copy(y, h_y); - Kokkos::deep_copy(A, h_A); + this->populateAnalyticalValues(alpha, x.h_view, y.h_view, A.h_view, + h_expected); + Kokkos::deep_copy(x.d_base, x.h_base); + Kokkos::deep_copy(y.d_base, y.h_base); + Kokkos::deep_copy(A.d_base, A.h_base); expectedResultIsKnown = true; } else if ((_M == 1) && (_N == 1)) { alpha = 3; - h_x[0] = 2; + x.h_view[0] = 2; - h_y[0] = 3; + y.h_view[0] = 3; - h_A(0, 0) = 7; + A.h_view(0, 0) = 7; - Kokkos::deep_copy(x, h_x); - Kokkos::deep_copy(y, h_y); - Kokkos::deep_copy(A, h_A); + Kokkos::deep_copy(x.d_base, x.h_base); + Kokkos::deep_copy(y.d_base, y.h_base); + Kokkos::deep_copy(A.d_base, A.h_base); h_expected(0, 0) = 25; expectedResultIsKnown = true; } else if ((_M == 1) && (_N == 2)) { alpha = 3; - h_x[0] = 2; + x.h_view[0] = 2; - h_y[0] = 3; - h_y[1] = 4; + y.h_view[0] = 3; + y.h_view[1] = 4; - h_A(0, 0) = 7; - h_A(0, 1) = -6; + A.h_view(0, 0) = 7; + A.h_view(0, 1) = -6; - Kokkos::deep_copy(x, h_x); - Kokkos::deep_copy(y, h_y); - Kokkos::deep_copy(A, h_A); + Kokkos::deep_copy(x.d_base, x.h_base); + Kokkos::deep_copy(y.d_base, y.h_base); + Kokkos::deep_copy(A.d_base, A.h_base); h_expected(0, 0) = 25; h_expected(0, 1) = 18; @@ -441,20 +438,20 @@ void GerTester void GerTester:: - callKkGerAndCompareAgainstExpected(const ScalarA& alpha, TX& x, TY& y, - _ViewTypeA& A, const _HostViewTypeA& h_A, - const _ViewTypeExpected& h_expected, - const std::string& situation) { + callKkGerAndCompareAgainstExpected( + const ScalarA& alpha, TX& x, TY& y, + view_stride_adapter<_ViewTypeA, false>& A, + const _ViewTypeExpected& h_expected, const std::string& situation) { #ifdef HAVE_KOKKOSKERNELS_DEBUG #if KOKKOS_VERSION < 40199 KOKKOS_IMPL_DO_NOT_USE_PRINTF( @@ -1383,7 +1380,7 @@ void GerTestercompareKkGerAgainstExpected(alpha, h_A, h_expected); + this->compareKkGerAgainstExpected(alpha, A.h_view, h_expected); } } diff --git a/blas/unit_test/Test_Blas2_syr.hpp b/blas/unit_test/Test_Blas2_syr.hpp index 6c2651c47e..1253a8e329 100644 --- a/blas/unit_test/Test_Blas2_syr.hpp +++ b/blas/unit_test/Test_Blas2_syr.hpp @@ -76,9 +76,10 @@ class SyrTester { using _KAT_A = Kokkos::ArithTraits; using _AuxType = typename _KAT_A::mag_type; - void populateVariables(ScalarA& alpha, _HostViewTypeX& h_x, - _HostViewTypeA& h_A, _ViewTypeExpected& h_expected, - _ViewTypeX& x, _ViewTypeA& A, + void populateVariables(ScalarA& alpha, + view_stride_adapter<_ViewTypeX, false>& x, + view_stride_adapter<_ViewTypeA, false>& A, + _ViewTypeExpected& h_expected, bool& expectedResultIsKnown); template @@ -145,11 +146,9 @@ class SyrTester { T shrinkAngleToZeroTwoPiRange(const T input); template - void callKkSyrAndCompareAgainstExpected(const ScalarA& alpha, TX& x, - _ViewTypeA& A, - const _HostViewTypeA& h_A, - const _ViewTypeExpected& h_expected, - const std::string& situation); + void callKkSyrAndCompareAgainstExpected( + const ScalarA& alpha, TX& x, view_stride_adapter<_ViewTypeA, false>& A, + const _ViewTypeExpected& h_expected, const std::string& situation); template void callKkGerAndCompareKkSyrAgainstIt( @@ -283,8 +282,8 @@ void SyrTester::test( // ******************************************************************** // Step 2 of 7: populate alpha, h_x, h_A, h_expected, x, A // ******************************************************************** - this->populateVariables(alpha, x.h_view, A.h_view, h_expected.d_view, - x.d_view, A.d_view, expectedResultIsKnown); + this->populateVariables(alpha, x, A, h_expected.d_view, + expectedResultIsKnown); // ******************************************************************** // Step 3 of 7: populate h_vanilla @@ -328,8 +327,8 @@ void SyrTester::test( Kokkos::deep_copy(org_A.h_view, A.h_view); if (test_x) { - this->callKkSyrAndCompareAgainstExpected( - alpha, x.d_view, A.d_view, A.h_view, h_expected.d_view, "non const x"); + this->callKkSyrAndCompareAgainstExpected(alpha, x.d_view, A, + h_expected.d_view, "non const x"); if ((_useAnalyticalResults == false) && // Just to save run time (_kkGerShouldThrowException == false)) { @@ -344,9 +343,8 @@ void SyrTester::test( if (test_cx) { Kokkos::deep_copy(A.d_base, org_A.d_base); - this->callKkSyrAndCompareAgainstExpected(alpha, x.d_view_const, A.d_view, - A.h_view, h_expected.d_view, - "const x"); + this->callKkSyrAndCompareAgainstExpected(alpha, x.d_view_const, A, + h_expected.d_view, "const x"); } // ******************************************************************** @@ -372,42 +370,42 @@ void SyrTester::test( template void SyrTester::populateVariables( - ScalarA& alpha, _HostViewTypeX& h_x, _HostViewTypeA& h_A, - _ViewTypeExpected& h_expected, _ViewTypeX& x, _ViewTypeA& A, + ScalarA& alpha, view_stride_adapter<_ViewTypeX, false>& x, + view_stride_adapter<_ViewTypeA, false>& A, _ViewTypeExpected& h_expected, bool& expectedResultIsKnown) { expectedResultIsKnown = false; if (_useAnalyticalResults) { - this->populateAnalyticalValues(alpha, h_x, h_A, h_expected); - Kokkos::deep_copy(x, h_x); - Kokkos::deep_copy(A, h_A); + this->populateAnalyticalValues(alpha, x.h_view, A.h_view, h_expected); + Kokkos::deep_copy(x.d_base, x.h_base); + Kokkos::deep_copy(A.d_base, A.h_base); expectedResultIsKnown = true; } else if (_N == 1) { alpha = 3; - h_x[0] = 2; + x.h_view[0] = 2; - h_A(0, 0) = 7; + A.h_view(0, 0) = 7; - Kokkos::deep_copy(x, h_x); - Kokkos::deep_copy(A, h_A); + Kokkos::deep_copy(x.d_base, x.h_base); + Kokkos::deep_copy(A.d_base, A.h_base); h_expected(0, 0) = 19; expectedResultIsKnown = true; } else if (_N == 2) { alpha = 3; - h_x[0] = -2; - h_x[1] = 9; + x.h_view[0] = -2; + x.h_view[1] = 9; - h_A(0, 0) = 17; - h_A(0, 1) = -43; - h_A(1, 0) = -43; - h_A(1, 1) = 101; + A.h_view(0, 0) = 17; + A.h_view(0, 1) = -43; + A.h_view(1, 0) = -43; + A.h_view(1, 1) = 101; - Kokkos::deep_copy(x, h_x); - Kokkos::deep_copy(A, h_A); + Kokkos::deep_copy(x.d_base, x.h_base); + Kokkos::deep_copy(A.d_base, A.h_base); if (_useUpOption) { h_expected(0, 0) = 29; @@ -430,17 +428,17 @@ void SyrTester::populateVariables( { ScalarX randStart, randEnd; Test::getRandomBounds(1.0, randStart, randEnd); - Kokkos::fill_random(x, rand_pool, randStart, randEnd); + Kokkos::fill_random(x.d_view, rand_pool, randStart, randEnd); } { ScalarA randStart, randEnd; Test::getRandomBounds(1.0, randStart, randEnd); - Kokkos::fill_random(A, rand_pool, randStart, randEnd); + Kokkos::fill_random(A.d_view, rand_pool, randStart, randEnd); } - Kokkos::deep_copy(h_x, x); - Kokkos::deep_copy(h_A, A); + Kokkos::deep_copy(x.h_base, x.d_base); + Kokkos::deep_copy(A.h_base, A.d_base); if (_useHermitianOption && _A_is_complex) { // **************************************************************** @@ -448,12 +446,12 @@ void SyrTester::populateVariables( // **************************************************************** for (int i(0); i < _N; ++i) { for (int j(i + 1); j < _N; ++j) { - h_A(i, j) = _KAT_A::conj(h_A(j, i)); + A.h_view(i, j) = _KAT_A::conj(A.h_view(j, i)); } } for (int i(0); i < _N; ++i) { - h_A(i, i) = 0.5 * (h_A(i, i) + _KAT_A::conj(h_A(i, i))); + A.h_view(i, i) = 0.5 * (A.h_view(i, i) + _KAT_A::conj(A.h_view(i, i))); } } else { // **************************************************************** @@ -461,18 +459,18 @@ void SyrTester::populateVariables( // **************************************************************** for (int i(0); i < _N; ++i) { for (int j(i + 1); j < _N; ++j) { - h_A(i, j) = h_A(j, i); + A.h_view(i, j) = A.h_view(j, i); } } } - Kokkos::deep_copy(A, h_A); + Kokkos::deep_copy(A.d_base, A.h_base); } #ifdef HAVE_KOKKOSKERNELS_DEBUG if (_N <= 2) { for (int i(0); i < _M; ++i) { for (int j(0); j < _N; ++j) { - std::cout << "h_origA(" << i << "," << j << ")=" << h_A(i, j) + std::cout << "h_origA(" << i << "," << j << ")=" << A.h_view(i, j) << std::endl; } } @@ -1437,10 +1435,9 @@ template template void SyrTester:: - callKkSyrAndCompareAgainstExpected(const ScalarA& alpha, TX& x, - _ViewTypeA& A, const _HostViewTypeA& h_A, - const _ViewTypeExpected& h_expected, - const std::string& situation) { + callKkSyrAndCompareAgainstExpected( + const ScalarA& alpha, TX& x, view_stride_adapter<_ViewTypeA, false>& A, + const _ViewTypeExpected& h_expected, const std::string& situation) { #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "In Test_Blas2_syr, '" << situation << "', alpha = " << alpha << std::endl; @@ -1461,7 +1458,7 @@ void SyrTester:: bool gotStdException(false); bool gotUnknownException(false); try { - KokkosBlas::syr(mode.c_str(), uplo.c_str(), alpha, x, A); + KokkosBlas::syr(mode.c_str(), uplo.c_str(), alpha, x, A.d_view); } catch (const std::exception& e) { #ifdef HAVE_KOKKOSKERNELS_DEBUG std::cout << "In Test_Blas2_syr, '" << situation @@ -1486,8 +1483,8 @@ void SyrTester:: << "have thrown a std::exception"; if ((gotStdException == false) && (gotUnknownException == false)) { - Kokkos::deep_copy(h_A, A); - this->compareKkSyrAgainstReference(alpha, h_A, h_expected); + Kokkos::deep_copy(A.h_base, A.d_base); + this->compareKkSyrAgainstReference(alpha, A.h_view, h_expected); } } diff --git a/blas/unit_test/Test_Blas2_syr2.hpp b/blas/unit_test/Test_Blas2_syr2.hpp index a3b53129fe..c49eba765b 100644 --- a/blas/unit_test/Test_Blas2_syr2.hpp +++ b/blas/unit_test/Test_Blas2_syr2.hpp @@ -83,10 +83,11 @@ class Syr2Tester { using _KAT_A = Kokkos::ArithTraits; using _AuxType = typename _KAT_A::mag_type; - void populateVariables(ScalarA& alpha, _HostViewTypeX& h_x, - _HostViewTypeY& h_y, _HostViewTypeA& h_A, - _ViewTypeExpected& h_expected, _ViewTypeX& x, - _ViewTypeY& y, _ViewTypeA& A, + void populateVariables(ScalarA& alpha, + view_stride_adapter<_ViewTypeX, false>& x, + view_stride_adapter<_ViewTypeY, false>& y, + view_stride_adapter<_ViewTypeA, false>& A, + _ViewTypeExpected& h_expected, bool& expectedResultIsKnown); template @@ -153,11 +154,10 @@ class Syr2Tester { T shrinkAngleToZeroTwoPiRange(const T input); template - void callKkSyr2AndCompareAgainstExpected(const ScalarA& alpha, TX& x, TY& y, - _ViewTypeA& A, - const _HostViewTypeA& h_A, - const _ViewTypeExpected& h_expected, - const std::string& situation); + void callKkSyr2AndCompareAgainstExpected( + const ScalarA& alpha, TX& x, TY& y, + view_stride_adapter<_ViewTypeA, false>& A, + const _ViewTypeExpected& h_expected, const std::string& situation); template void callKkGerAndCompareKkSyr2AgainstIt( @@ -296,8 +296,7 @@ void Syr2TesterpopulateVariables(alpha, x.h_view, y.h_view, A.h_view, - h_expected.d_view, x.d_view, y.d_view, A.d_view, + this->populateVariables(alpha, x, y, A, h_expected.d_view, expectedResultIsKnown); // ******************************************************************** @@ -336,8 +335,7 @@ void Syr2TestercallKkSyr2AndCompareAgainstExpected(alpha, x.d_view, y.d_view, - A.d_view, A.h_view, + this->callKkSyr2AndCompareAgainstExpected(alpha, x.d_view, y.d_view, A, h_expected.d_view, "non const x"); if ((_useAnalyticalResults == false) && // Just to save run time @@ -354,8 +352,7 @@ void Syr2TestercallKkSyr2AndCompareAgainstExpected( - alpha, x.d_view_const, y.d_view_const, A.d_view, A.h_view, - h_expected.d_view, "const x"); + alpha, x.d_view_const, y.d_view_const, A, h_expected.d_view, "const x"); } // ******************************************************************** @@ -384,55 +381,56 @@ void Syr2Tester -void Syr2Tester::populateVariables(ScalarA& alpha, _HostViewTypeX& h_x, - _HostViewTypeY& h_y, - _HostViewTypeA& h_A, - _ViewTypeExpected& h_expected, - _ViewTypeX& x, _ViewTypeY& y, - _ViewTypeA& A, - bool& expectedResultIsKnown) { +void Syr2Tester< + ScalarX, tLayoutX, ScalarY, tLayoutY, ScalarA, tLayoutA, + Device>::populateVariables(ScalarA& alpha, + view_stride_adapter<_ViewTypeX, false>& x, + view_stride_adapter<_ViewTypeY, false>& y, + view_stride_adapter<_ViewTypeA, false>& A, + _ViewTypeExpected& h_expected, + bool& expectedResultIsKnown) { expectedResultIsKnown = false; if (_useAnalyticalResults) { - this->populateAnalyticalValues(alpha, h_x, h_y, h_A, h_expected); - Kokkos::deep_copy(x, h_x); - Kokkos::deep_copy(y, h_y); - Kokkos::deep_copy(A, h_A); + this->populateAnalyticalValues(alpha, x.h_view, y.h_view, A.h_view, + h_expected); + Kokkos::deep_copy(x.d_base, x.h_base); + Kokkos::deep_copy(y.d_base, y.h_base); + Kokkos::deep_copy(A.d_base, A.h_base); expectedResultIsKnown = true; } else if (_N == 1) { alpha = 3; - h_x[0] = 2; + x.h_view[0] = 2; - h_y[0] = 4; + y.h_view[0] = 4; - h_A(0, 0) = 7; + A.h_view(0, 0) = 7; - Kokkos::deep_copy(x, h_x); - Kokkos::deep_copy(y, h_y); - Kokkos::deep_copy(A, h_A); + Kokkos::deep_copy(x.d_base, x.h_base); + Kokkos::deep_copy(y.d_base, y.h_base); + Kokkos::deep_copy(A.d_base, A.h_base); h_expected(0, 0) = 55; expectedResultIsKnown = true; } else if (_N == 2) { alpha = 3; - h_x[0] = -2; - h_x[1] = 9; + x.h_view[0] = -2; + x.h_view[1] = 9; - h_y[0] = 5; - h_y[1] = -4; + y.h_view[0] = 5; + y.h_view[1] = -4; - h_A(0, 0) = 17; - h_A(0, 1) = -43; - h_A(1, 0) = -43; - h_A(1, 1) = 101; + A.h_view(0, 0) = 17; + A.h_view(0, 1) = -43; + A.h_view(1, 0) = -43; + A.h_view(1, 1) = 101; - Kokkos::deep_copy(x, h_x); - Kokkos::deep_copy(y, h_y); - Kokkos::deep_copy(A, h_A); + Kokkos::deep_copy(x.d_base, x.h_base); + Kokkos::deep_copy(y.d_base, y.h_base); + Kokkos::deep_copy(A.d_base, A.h_base); if (_useUpOption) { h_expected(0, 0) = -43; @@ -455,24 +453,24 @@ void Syr2TestercompareKkSyr2AgainstReference(alpha, h_A, h_expected); + Kokkos::deep_copy(A.h_base, A.d_base); + this->compareKkSyr2AgainstReference(alpha, A.h_view, h_expected); } }