diff --git a/blas/src/KokkosBlas1_rot.hpp b/blas/src/KokkosBlas1_rot.hpp index cd4c687148..4b75d07c8a 100644 --- a/blas/src/KokkosBlas1_rot.hpp +++ b/blas/src/KokkosBlas1_rot.hpp @@ -17,10 +17,28 @@ #ifndef KOKKOSBLAS1_ROT_HPP_ #define KOKKOSBLAS1_ROT_HPP_ +#include #include +#include +#include namespace KokkosBlas { +// clang-format off +/// \brief Apply a plane rotation. +/// +/// \tparam execution_space Space on which to execute. Must be able to access VectorView, MagnitudeView and ScalarView. +/// \tparam VectorView A 1-D view of nonconst scalars +/// \tparam MagnitudeView A 0-D view of nonconst, real-valued scalar +/// \tparam ScalarView A 0-D view of scalar +/// +/// \param space [in] the execution space +/// \param X [in/out] First vector to be rotated +/// \param Y [in/out] Second vector to be rotated +/// \param c [out] cosine value associated with the +/// rotation +/// \param s [out] sine value associated with the rotation +// clang-format on template void rot(execution_space const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, ScalarView const& s) { diff --git a/blas/src/KokkosBlas1_rotg.hpp b/blas/src/KokkosBlas1_rotg.hpp index b309316002..e3c58c016e 100644 --- a/blas/src/KokkosBlas1_rotg.hpp +++ b/blas/src/KokkosBlas1_rotg.hpp @@ -19,18 +19,21 @@ #include #include +#include namespace KokkosBlas { /// \brief Compute the coefficients to apply a Givens rotation. /// -/// \tparam Scalar data type of inputs and outputs +/// \tparam execution_space Space on which to execute. Must be able to access SViewType and MViewType. +/// \tparam SViewType 0-D View type containing a nonconst real or complex scalar +/// \tparam MViewType 0-D View type containing a nonconst real/magnitude-typed scalar /// /// \param space [in] the execution space /// \param a [in/out] on input one of the values to rotate, on output the -/// rotated value +/// rotated value r /// \param b [in/out] on input one of the values to rotate, on -/// output the rotated value +/// output the rotation parameter z /// \param c [out] cosine value associated with the /// rotation /// \param s [out] sine value associated with the rotation diff --git a/blas/src/KokkosBlas1_rotm.hpp b/blas/src/KokkosBlas1_rotm.hpp index 6f5442e931..a26294300f 100644 --- a/blas/src/KokkosBlas1_rotm.hpp +++ b/blas/src/KokkosBlas1_rotm.hpp @@ -19,6 +19,7 @@ #include #include +#include namespace KokkosBlas { @@ -30,10 +31,10 @@ namespace KokkosBlas { /// \tparam ParamView a rank1 view of static extent [5] type that /// holds const data /// -/// \param space [in] execution space used for parallel loops in this kernel -/// \param X [in/out] vector to be rotated with param coefficients -/// \param Y [in/out] vector to be rotated with param coefficients -/// \param param [in] output of rotmg contains rotation coefficients +/// \param space [in] execution space used for parallel loops in this kernel +/// \param X [in/out] First vector to be rotated with param coefficients +/// \param Y [in/out] Second vector to be rotated with param coefficients +/// \param param [in] rotation parameters produced by rotmg /// template void rotm(execution_space const& space, VectorView const& X, VectorView const& Y, ParamView const& param) { diff --git a/blas/src/KokkosBlas1_rotmg.hpp b/blas/src/KokkosBlas1_rotmg.hpp index a6c629f987..647eb3248e 100644 --- a/blas/src/KokkosBlas1_rotmg.hpp +++ b/blas/src/KokkosBlas1_rotmg.hpp @@ -19,9 +19,11 @@ #include #include +#include namespace KokkosBlas { +// clang-format off /// \brief Compute the coefficients to apply a modified Givens rotation. /// /// \tparam execution_space the execution space where the kernel will be @@ -32,12 +34,13 @@ namespace KokkosBlas { /// static extent 5 that holds non const data /// /// \param space [in] execution space used for parallel loops -/// \param d1 [in/out] -/// \param d2 [in/out] -/// \param x1 [in/out] -/// \param y1 [in] -/// \param param [out] -/// +/// \param d1 [in/out] On input, square of initial x scaling factor. On output, square of x scaling factor to be applied after rotm. +/// \param d2 [in/out] On input, square of initial y scaling factor. On output, square of y scaling factor +/// to be applied after rotm. +/// \param x1 [in/out] On input, element from first vector to rotate. On output, the rotated element before scaling. +/// \param y1 [in] Element from second vector to rotate. It is not modified by this routine. +/// \param param [out] 5-element parameter array defining the rotation, to be used by rotm. +// clang-format on template void rotmg(execution_space const& space, DXView const& d1, DXView const& d2, DXView const& x1, YView const& y1, PView const& param) { diff --git a/example/wiki/blas/CMakeLists.txt b/example/wiki/blas/CMakeLists.txt index 5061c54b7d..a5cdc08d7d 100644 --- a/example/wiki/blas/CMakeLists.txt +++ b/example/wiki/blas/CMakeLists.txt @@ -23,6 +23,16 @@ KOKKOSKERNELS_ADD_EXECUTABLE_AND_TEST( SOURCES KokkosBlas1_wiki_nrm2.cpp ) +KOKKOSKERNELS_ADD_EXECUTABLE_AND_TEST( + wiki_blas1_rotg_rot + SOURCES KokkosBlas1_wiki_rotg_rot.cpp + ) + +KOKKOSKERNELS_ADD_EXECUTABLE_AND_TEST( + wiki_blas1_rotmg_rotm + SOURCES KokkosBlas1_wiki_rotmg_rotm.cpp + ) + KOKKOSKERNELS_ADD_EXECUTABLE_AND_TEST( wiki_blas2_ger SOURCES KokkosBlas2_wiki_ger.cpp diff --git a/example/wiki/blas/KokkosBlas1_wiki_rotg_rot.cpp b/example/wiki/blas/KokkosBlas1_wiki_rotg_rot.cpp new file mode 100644 index 0000000000..b8347bb63c --- /dev/null +++ b/example/wiki/blas/KokkosBlas1_wiki_rotg_rot.cpp @@ -0,0 +1,59 @@ +#include +#include +#include +#include "KokkosBlas1_rotg.hpp" +#include "KokkosBlas1_rot.hpp" +#include "KokkosKernels_PrintUtils.hpp" + +using execution_space = Kokkos::DefaultExecutionSpace; +using Scalar = double; +using Vector = Kokkos::View; +using ScalarView = Kokkos::View; + +int main(int argc, char* argv[]) { + Kokkos::initialize(); + { + const int N = 10; + Vector x("x", N); + Vector y("y", N); + + // Populate x,y with uniform random values between 0 and 10 + Kokkos::Random_XorShift64_Pool rand_pool(13718); + Kokkos::fill_random(x, rand_pool, Scalar(10)); + Kokkos::fill_random(y, rand_pool, Scalar(10)); + + std::cout << "x,y before applying Givens rotation:\n"; + KokkosKernels::Impl::kk_print_1Dview(std::cout, x); + KokkosKernels::Impl::kk_print_1Dview(std::cout, y); + + ScalarView c("c"); + ScalarView s("s"); + + // Calculate Givens rotation coefficients to eliminate y(0) + KokkosBlas::rotg(execution_space(), Kokkos::subview(x, 0), + Kokkos::subview(y, 0), c, s); + + std::cout << "\nrotg output (rotation parameters) to eliminate y(0):\n"; + std::cout << "c = "; + KokkosKernels::Impl::kk_print_1Dview(std::cout, c); + std::cout << "s = "; + KokkosKernels::Impl::kk_print_1Dview(std::cout, s); + std::cout << "r = x(0) = "; + KokkosKernels::Impl::kk_print_1Dview(std::cout, Kokkos::subview(x, 0)); + std::cout << "z = "; + KokkosKernels::Impl::kk_print_1Dview(std::cout, Kokkos::subview(y, 0)); + + // Zero out y(0), which now contains the output parameter z. + // This completes the replacement of [x(0), y(0)] with [r, 0]. + Kokkos::deep_copy(Kokkos::subview(y, 0), Scalar(0)); + + // Apply the rotation to the remaining entries of x and y + KokkosBlas::rot(execution_space(), Kokkos::subview(x, Kokkos::make_pair(1, N)), + Kokkos::subview(y, Kokkos::make_pair(1, N)), c, s); + + std::cout << "\nx,y after applying Givens rotation:\n"; + KokkosKernels::Impl::kk_print_1Dview(std::cout, x); + KokkosKernels::Impl::kk_print_1Dview(std::cout, y); + } + Kokkos::finalize(); +} diff --git a/example/wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp b/example/wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp new file mode 100644 index 0000000000..2760a09ab3 --- /dev/null +++ b/example/wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp @@ -0,0 +1,72 @@ +#include +#include +#include +#include "KokkosBlas1_rotmg.hpp" +#include "KokkosBlas1_rotm.hpp" +#include "KokkosKernels_PrintUtils.hpp" + +using execution_space = Kokkos::DefaultExecutionSpace; +using Scalar = double; +using Vector = Kokkos::View; +using ParamView = Kokkos::View; +using ScalarView = Kokkos::View; + +int main(int argc, char* argv[]) { + Kokkos::initialize(); + { + const int N = 10; + Vector x("x", N); + Vector y("y", N); + ScalarView d1("d1"); + ScalarView d2("d2"); + ParamView param("param"); + + // Populate x,y with uniform random values between 0 and 10 + Kokkos::Random_XorShift64_Pool rand_pool(13718); + Kokkos::fill_random(x, rand_pool, Scalar(10)); + Kokkos::fill_random(y, rand_pool, Scalar(10)); + + // Populate input vector scaling factors with 1 + Kokkos::deep_copy(d1, Scalar(1)); + Kokkos::deep_copy(d2, Scalar(1)); + + std::cout << "x,y before applying modified Givens rotation:\n"; + KokkosKernels::Impl::kk_print_1Dview(std::cout, x); + KokkosKernels::Impl::kk_print_1Dview(std::cout, y); + + // Calculate Givens rotation coefficients to eliminate y(0) + KokkosBlas::rotmg( + execution_space(), d1, d2, Kokkos::subview(x, 0), Kokkos::subview(y, 0), param); + + auto paramHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), param); + + std::cout << "\nrotmg output (rotation parameters) to eliminate y(0):\n"; + std::cout << "d1 = "; + KokkosKernels::Impl::kk_print_1Dview(std::cout, d1); + std::cout << "d2 = "; + KokkosKernels::Impl::kk_print_1Dview(std::cout, d2); + std::cout << "flag = " << paramHost(0) << '\n'; + std::cout << "h components = "; + for (int i = 0; i < 4; i++) std::cout << paramHost(1 + i) << " "; + std::cout << '\n'; + + // Zero out y(0), which was left unmodified by rotmg. + Kokkos::deep_copy(Kokkos::subview(y, 0), Scalar(0)); + + // Apply the rotation to the remaining entries of x and y + KokkosBlas::rotm(execution_space(), Kokkos::subview(x, Kokkos::make_pair(1, N)), + Kokkos::subview(y, Kokkos::make_pair(1, N)), param); + + // Apply scaling factors: sqrt(d1) and sqrt(d2) to x and y respectively + Kokkos::parallel_for( + Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(int i) { + x(i) *= Kokkos::sqrt(d1()); + y(i) *= Kokkos::sqrt(d2()); + }); + + std::cout << "\nx,y after applying modified Givens rotation and scaling by [sqrt(d1), sqrt(d2)]):\n"; + KokkosKernels::Impl::kk_print_1Dview(std::cout, x); + KokkosKernels::Impl::kk_print_1Dview(std::cout, y); + } + Kokkos::finalize(); +}