From 63742bd6bc53ff41debc4fc936e65c1e1a8f943e Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Mon, 6 Jan 2025 12:11:43 -0700 Subject: [PATCH 1/5] rotg/rot/rotmg/rotm docs and examples - Add wiki examples for rotg/rot and rotmg/rotm - Improve inline/doxygen comments for routines Signed-off-by: Brian Kelley --- blas/src/KokkosBlas1_rot.hpp | 17 +++++ blas/src/KokkosBlas1_rotg.hpp | 9 ++- blas/src/KokkosBlas1_rotm.hpp | 9 ++- blas/src/KokkosBlas1_rotmg.hpp | 11 +-- example/wiki/blas/CMakeLists.txt | 10 +++ .../wiki/blas/KokkosBlas1_wiki_rotg_rot.cpp | 57 ++++++++++++++ .../wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp | 74 +++++++++++++++++++ 7 files changed, 175 insertions(+), 12 deletions(-) create mode 100644 example/wiki/blas/KokkosBlas1_wiki_rotg_rot.cpp create mode 100644 example/wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp diff --git a/blas/src/KokkosBlas1_rot.hpp b/blas/src/KokkosBlas1_rot.hpp index cd4c687148..9ae2d053f2 100644 --- a/blas/src/KokkosBlas1_rot.hpp +++ b/blas/src/KokkosBlas1_rot.hpp @@ -17,10 +17,27 @@ #ifndef KOKKOSBLAS1_ROT_HPP_ #define KOKKOSBLAS1_ROT_HPP_ +#include #include +#include namespace KokkosBlas { +// clang-format off +/// \brief Apply a plane rotation. +/// +/// \tparam execution_space Space on which to execute. Must be able to access VectorView, MagnitudeView and ScalarView. +/// \tparam VectorView A 1-D view of nonconst scalars +/// \tparam MagnitudeView A 0-D view of nonconst, real-valued scalar +/// \tparam ScalarView A 0-D view of scalar +/// +/// \param space [in] the execution space +/// \param X [in/out] First vector to be rotated +/// \param Y [in/out] Second vector to be rotated +/// \param c [out] cosine value associated with the +/// rotation +/// \param s [out] sine value associated with the rotation +// clang-format on template void rot(execution_space const& space, VectorView const& X, VectorView const& Y, MagnitudeView const& c, ScalarView const& s) { diff --git a/blas/src/KokkosBlas1_rotg.hpp b/blas/src/KokkosBlas1_rotg.hpp index b309316002..e3c58c016e 100644 --- a/blas/src/KokkosBlas1_rotg.hpp +++ b/blas/src/KokkosBlas1_rotg.hpp @@ -19,18 +19,21 @@ #include #include +#include namespace KokkosBlas { /// \brief Compute the coefficients to apply a Givens rotation. /// -/// \tparam Scalar data type of inputs and outputs +/// \tparam execution_space Space on which to execute. Must be able to access SViewType and MViewType. +/// \tparam SViewType 0-D View type containing a nonconst real or complex scalar +/// \tparam MViewType 0-D View type containing a nonconst real/magnitude-typed scalar /// /// \param space [in] the execution space /// \param a [in/out] on input one of the values to rotate, on output the -/// rotated value +/// rotated value r /// \param b [in/out] on input one of the values to rotate, on -/// output the rotated value +/// output the rotation parameter z /// \param c [out] cosine value associated with the /// rotation /// \param s [out] sine value associated with the rotation diff --git a/blas/src/KokkosBlas1_rotm.hpp b/blas/src/KokkosBlas1_rotm.hpp index 6f5442e931..a26294300f 100644 --- a/blas/src/KokkosBlas1_rotm.hpp +++ b/blas/src/KokkosBlas1_rotm.hpp @@ -19,6 +19,7 @@ #include #include +#include namespace KokkosBlas { @@ -30,10 +31,10 @@ namespace KokkosBlas { /// \tparam ParamView a rank1 view of static extent [5] type that /// holds const data /// -/// \param space [in] execution space used for parallel loops in this kernel -/// \param X [in/out] vector to be rotated with param coefficients -/// \param Y [in/out] vector to be rotated with param coefficients -/// \param param [in] output of rotmg contains rotation coefficients +/// \param space [in] execution space used for parallel loops in this kernel +/// \param X [in/out] First vector to be rotated with param coefficients +/// \param Y [in/out] Second vector to be rotated with param coefficients +/// \param param [in] rotation parameters produced by rotmg /// template void rotm(execution_space const& space, VectorView const& X, VectorView const& Y, ParamView const& param) { diff --git a/blas/src/KokkosBlas1_rotmg.hpp b/blas/src/KokkosBlas1_rotmg.hpp index a6c629f987..b1707bea57 100644 --- a/blas/src/KokkosBlas1_rotmg.hpp +++ b/blas/src/KokkosBlas1_rotmg.hpp @@ -19,6 +19,7 @@ #include #include +#include namespace KokkosBlas { @@ -32,11 +33,11 @@ namespace KokkosBlas { /// static extent 5 that holds non const data /// /// \param space [in] execution space used for parallel loops -/// \param d1 [in/out] -/// \param d2 [in/out] -/// \param x1 [in/out] -/// \param y1 [in] -/// \param param [out] +/// \param d1 [in/out] On input, square of initial x scaling factor. On output, square of x scaling factor to be applied +/// after rotm. \param d2 [in/out] On input, square of initial y scaling factor. On output, square of y scaling factor +/// to be applied after rotm. \param x1 [in/out] On input, element from first vector to rotate. On output, the rotated +/// element before scaling. \param y1 [in] Element from second vector to rotate. It is not modified by this routine. +/// \param param [out] 5-element parameter array defining the rotation, to be used by rotm. /// template void rotmg(execution_space const& space, DXView const& d1, DXView const& d2, DXView const& x1, YView const& y1, diff --git a/example/wiki/blas/CMakeLists.txt b/example/wiki/blas/CMakeLists.txt index 5061c54b7d..a5cdc08d7d 100644 --- a/example/wiki/blas/CMakeLists.txt +++ b/example/wiki/blas/CMakeLists.txt @@ -23,6 +23,16 @@ KOKKOSKERNELS_ADD_EXECUTABLE_AND_TEST( SOURCES KokkosBlas1_wiki_nrm2.cpp ) +KOKKOSKERNELS_ADD_EXECUTABLE_AND_TEST( + wiki_blas1_rotg_rot + SOURCES KokkosBlas1_wiki_rotg_rot.cpp + ) + +KOKKOSKERNELS_ADD_EXECUTABLE_AND_TEST( + wiki_blas1_rotmg_rotm + SOURCES KokkosBlas1_wiki_rotmg_rotm.cpp + ) + KOKKOSKERNELS_ADD_EXECUTABLE_AND_TEST( wiki_blas2_ger SOURCES KokkosBlas2_wiki_ger.cpp diff --git a/example/wiki/blas/KokkosBlas1_wiki_rotg_rot.cpp b/example/wiki/blas/KokkosBlas1_wiki_rotg_rot.cpp new file mode 100644 index 0000000000..d973ef0e3e --- /dev/null +++ b/example/wiki/blas/KokkosBlas1_wiki_rotg_rot.cpp @@ -0,0 +1,57 @@ +#include +#include +#include +#include "KokkosBlas1_rotg.hpp" +#include "KokkosBlas1_rot.hpp" +#include "KokkosKernels_PrintUtils.hpp" + +using execution_space = Kokkos::DefaultExecutionSpace; +using Scalar = double; +using Vector = Kokkos::View; +using ScalarView = Kokkos::View; + +int main(int argc, char* argv[]) { + Kokkos::initialize(); + { + const int N = 10; + Vector x("x", N); + Vector y("y", N); + + // Populate x,y with uniform random values between 0 and 10 + Kokkos::Random_XorShift64_Pool rand_pool(13718); + Kokkos::fill_random(x, rand_pool, Scalar(10)); + Kokkos::fill_random(y, rand_pool, Scalar(10)); + + std::cout << "x,y before applying Givens rotation:\n"; + KokkosKernels::Impl::kk_print_1Dview(std::cout, x); + KokkosKernels::Impl::kk_print_1Dview(std::cout, y); + + ScalarView c("c"); + ScalarView s("s"); + + // Calculate Givens rotation coefficients to eliminate y(0) + KokkosBlas::rotg(execution_space(), Kokkos::subview(x, 0), Kokkos::subview(y, 0), c, s); + + std::cout << "\nrotg output (rotation parameters) to eliminate y(0):\n"; + std::cout << "c = "; + KokkosKernels::Impl::kk_print_1Dview(std::cout, c); + std::cout << "s = "; + KokkosKernels::Impl::kk_print_1Dview(std::cout, s); + std::cout << "r = x(0) = "; + KokkosKernels::Impl::kk_print_1Dview(std::cout, Kokkos::subview(x, 0)); + std::cout << "z = "; + KokkosKernels::Impl::kk_print_1Dview(std::cout, Kokkos::subview(y, 0)); + + // Zero out y(0), which now contains the output parameter z. + // This completes the replacement of [x(0), y(0)] with [r, 0]. + Kokkos::deep_copy(Kokkos::subview(y, 0), Scalar(0)); + + // Apply the rotation to the remaining entries of x and y + KokkosBlas::rot(execution_space(), Kokkos::subview(x, Kokkos::make_pair(1, N)), Kokkos::subview(y, Kokkos::make_pair(1, N)), c, s); + + std::cout << "\nx,y after applying Givens rotation:\n"; + KokkosKernels::Impl::kk_print_1Dview(std::cout, x); + KokkosKernels::Impl::kk_print_1Dview(std::cout, y); + } + Kokkos::finalize(); +} diff --git a/example/wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp b/example/wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp new file mode 100644 index 0000000000..32a24a796b --- /dev/null +++ b/example/wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp @@ -0,0 +1,74 @@ +#include +#include +#include +#include "KokkosBlas1_rotmg.hpp" +#include "KokkosBlas1_rotm.hpp" +#include "KokkosBlas1_scal.hpp" +#include "KokkosKernels_PrintUtils.hpp" + +using execution_space = Kokkos::DefaultExecutionSpace; +using Scalar = double; +using Vector = Kokkos::View; +using ParamView = Kokkos::View; +using ScalarView = Kokkos::View; + +int main(int argc, char* argv[]) { + Kokkos::initialize(); + { + const int N = 10; + Vector x("x", N); + Vector y("y", N); + ScalarView d1("d1"); + ScalarView d2("d2"); + ParamView param("param"); + + // Populate x,y with uniform random values between 0 and 10 + Kokkos::Random_XorShift64_Pool rand_pool(13718); + Kokkos::fill_random(x, rand_pool, Scalar(10)); + Kokkos::fill_random(y, rand_pool, Scalar(10)); + + // Populate input vector scaling factors with 1 + Kokkos::deep_copy(d1, Scalar(1)); + Kokkos::deep_copy(d2, Scalar(1)); + + std::cout << "x,y before applying modified Givens rotation:\n"; + KokkosKernels::Impl::kk_print_1Dview(std::cout, x); + KokkosKernels::Impl::kk_print_1Dview(std::cout, y); + + // Calculate Givens rotation coefficients to eliminate y(0) + KokkosBlas::rotmg(execution_space(), d1, d2, Kokkos::subview(x, 0), Kokkos::subview(y, 0), param); + + auto paramHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), param); + + std::cout << "\nrotmg output (rotation parameters) to eliminate y(0):\n"; + std::cout << "d1 = "; + KokkosKernels::Impl::kk_print_1Dview(std::cout, d1); + std::cout << "d2 = "; + KokkosKernels::Impl::kk_print_1Dview(std::cout, d2); + std::cout << "flag = " << paramHost(0) << '\n'; + std::cout << "h components = "; + for(int i = 0; i < 4; i++) + std::cout << paramHost(1+i) << " "; + std::cout << '\n'; + + // Zero out y(0), which was left unmodified by rotmg. + Kokkos::deep_copy(Kokkos::subview(y, 0), Scalar(0)); + + // Apply the rotation to the remaining entries of x and y + KokkosBlas::rotm(execution_space(), Kokkos::subview(x, Kokkos::make_pair(1, N)), Kokkos::subview(y, Kokkos::make_pair(1, N)), param); + + // Apply scaling factors: sqrt(d1) and sqrt(d2) to x and y respectively + Kokkos::parallel_for(Kokkos::RangePolicy(0, N), + KOKKOS_LAMBDA(int i) + { + x(i) *= Kokkos::sqrt(d1()); + y(i) *= Kokkos::sqrt(d2()); + }); + + std::cout << "\nx,y after applying modified Givens rotation\n"; + std::cout << "(including scaling by [sqrt(d1), sqrt(d2)]):\n"; + KokkosKernels::Impl::kk_print_1Dview(std::cout, x); + KokkosKernels::Impl::kk_print_1Dview(std::cout, y); + } + Kokkos::finalize(); +} From cf339ac4b25ba9c6b2f5859fb244fdff43fdca84 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Mon, 6 Jan 2025 12:17:15 -0700 Subject: [PATCH 2/5] Small example change, formatting Signed-off-by: Brian Kelley --- .../wiki/blas/KokkosBlas1_wiki_rotg_rot.cpp | 12 +++++---- .../wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp | 25 +++++++++---------- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/example/wiki/blas/KokkosBlas1_wiki_rotg_rot.cpp b/example/wiki/blas/KokkosBlas1_wiki_rotg_rot.cpp index d973ef0e3e..b8347bb63c 100644 --- a/example/wiki/blas/KokkosBlas1_wiki_rotg_rot.cpp +++ b/example/wiki/blas/KokkosBlas1_wiki_rotg_rot.cpp @@ -6,9 +6,9 @@ #include "KokkosKernels_PrintUtils.hpp" using execution_space = Kokkos::DefaultExecutionSpace; -using Scalar = double; -using Vector = Kokkos::View; -using ScalarView = Kokkos::View; +using Scalar = double; +using Vector = Kokkos::View; +using ScalarView = Kokkos::View; int main(int argc, char* argv[]) { Kokkos::initialize(); @@ -30,7 +30,8 @@ int main(int argc, char* argv[]) { ScalarView s("s"); // Calculate Givens rotation coefficients to eliminate y(0) - KokkosBlas::rotg(execution_space(), Kokkos::subview(x, 0), Kokkos::subview(y, 0), c, s); + KokkosBlas::rotg(execution_space(), Kokkos::subview(x, 0), + Kokkos::subview(y, 0), c, s); std::cout << "\nrotg output (rotation parameters) to eliminate y(0):\n"; std::cout << "c = "; @@ -47,7 +48,8 @@ int main(int argc, char* argv[]) { Kokkos::deep_copy(Kokkos::subview(y, 0), Scalar(0)); // Apply the rotation to the remaining entries of x and y - KokkosBlas::rot(execution_space(), Kokkos::subview(x, Kokkos::make_pair(1, N)), Kokkos::subview(y, Kokkos::make_pair(1, N)), c, s); + KokkosBlas::rot(execution_space(), Kokkos::subview(x, Kokkos::make_pair(1, N)), + Kokkos::subview(y, Kokkos::make_pair(1, N)), c, s); std::cout << "\nx,y after applying Givens rotation:\n"; KokkosKernels::Impl::kk_print_1Dview(std::cout, x); diff --git a/example/wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp b/example/wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp index 32a24a796b..6b35fe273c 100644 --- a/example/wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp +++ b/example/wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp @@ -7,10 +7,10 @@ #include "KokkosKernels_PrintUtils.hpp" using execution_space = Kokkos::DefaultExecutionSpace; -using Scalar = double; -using Vector = Kokkos::View; -using ParamView = Kokkos::View; -using ScalarView = Kokkos::View; +using Scalar = double; +using Vector = Kokkos::View; +using ParamView = Kokkos::View; +using ScalarView = Kokkos::View; int main(int argc, char* argv[]) { Kokkos::initialize(); @@ -36,7 +36,8 @@ int main(int argc, char* argv[]) { KokkosKernels::Impl::kk_print_1Dview(std::cout, y); // Calculate Givens rotation coefficients to eliminate y(0) - KokkosBlas::rotmg(execution_space(), d1, d2, Kokkos::subview(x, 0), Kokkos::subview(y, 0), param); + KokkosBlas::rotmg( + execution_space(), d1, d2, Kokkos::subview(x, 0), Kokkos::subview(y, 0), param); auto paramHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), param); @@ -47,26 +48,24 @@ int main(int argc, char* argv[]) { KokkosKernels::Impl::kk_print_1Dview(std::cout, d2); std::cout << "flag = " << paramHost(0) << '\n'; std::cout << "h components = "; - for(int i = 0; i < 4; i++) - std::cout << paramHost(1+i) << " "; + for (int i = 0; i < 4; i++) std::cout << paramHost(1 + i) << " "; std::cout << '\n'; // Zero out y(0), which was left unmodified by rotmg. Kokkos::deep_copy(Kokkos::subview(y, 0), Scalar(0)); // Apply the rotation to the remaining entries of x and y - KokkosBlas::rotm(execution_space(), Kokkos::subview(x, Kokkos::make_pair(1, N)), Kokkos::subview(y, Kokkos::make_pair(1, N)), param); + KokkosBlas::rotm(execution_space(), Kokkos::subview(x, Kokkos::make_pair(1, N)), + Kokkos::subview(y, Kokkos::make_pair(1, N)), param); // Apply scaling factors: sqrt(d1) and sqrt(d2) to x and y respectively - Kokkos::parallel_for(Kokkos::RangePolicy(0, N), - KOKKOS_LAMBDA(int i) - { + Kokkos::parallel_for( + Kokkos::RangePolicy(0, N), KOKKOS_LAMBDA(int i) { x(i) *= Kokkos::sqrt(d1()); y(i) *= Kokkos::sqrt(d2()); }); - std::cout << "\nx,y after applying modified Givens rotation\n"; - std::cout << "(including scaling by [sqrt(d1), sqrt(d2)]):\n"; + std::cout << "\nx,y after applying modified Givens rotation and scaling by [sqrt(d1), sqrt(d2)]):\n"; KokkosKernels::Impl::kk_print_1Dview(std::cout, x); KokkosKernels::Impl::kk_print_1Dview(std::cout, y); } From 1c5e0be2c325fc07bda8382aaf3447d83f6d26fc Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Mon, 6 Jan 2025 12:45:49 -0700 Subject: [PATCH 3/5] Remove unused include Signed-off-by: Brian Kelley --- example/wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/example/wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp b/example/wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp index 6b35fe273c..2760a09ab3 100644 --- a/example/wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp +++ b/example/wiki/blas/KokkosBlas1_wiki_rotmg_rotm.cpp @@ -3,7 +3,6 @@ #include #include "KokkosBlas1_rotmg.hpp" #include "KokkosBlas1_rotm.hpp" -#include "KokkosBlas1_scal.hpp" #include "KokkosKernels_PrintUtils.hpp" using execution_space = Kokkos::DefaultExecutionSpace; From df4917e4aa48e04c29d304747a24659edb3e59de Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Mon, 6 Jan 2025 12:48:08 -0700 Subject: [PATCH 4/5] Suppress clang-format on doxygen section Signed-off-by: Brian Kelley --- blas/src/KokkosBlas1_rotmg.hpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/blas/src/KokkosBlas1_rotmg.hpp b/blas/src/KokkosBlas1_rotmg.hpp index b1707bea57..647eb3248e 100644 --- a/blas/src/KokkosBlas1_rotmg.hpp +++ b/blas/src/KokkosBlas1_rotmg.hpp @@ -23,6 +23,7 @@ namespace KokkosBlas { +// clang-format off /// \brief Compute the coefficients to apply a modified Givens rotation. /// /// \tparam execution_space the execution space where the kernel will be @@ -33,12 +34,13 @@ namespace KokkosBlas { /// static extent 5 that holds non const data /// /// \param space [in] execution space used for parallel loops -/// \param d1 [in/out] On input, square of initial x scaling factor. On output, square of x scaling factor to be applied -/// after rotm. \param d2 [in/out] On input, square of initial y scaling factor. On output, square of y scaling factor -/// to be applied after rotm. \param x1 [in/out] On input, element from first vector to rotate. On output, the rotated -/// element before scaling. \param y1 [in] Element from second vector to rotate. It is not modified by this routine. +/// \param d1 [in/out] On input, square of initial x scaling factor. On output, square of x scaling factor to be applied after rotm. +/// \param d2 [in/out] On input, square of initial y scaling factor. On output, square of y scaling factor +/// to be applied after rotm. +/// \param x1 [in/out] On input, element from first vector to rotate. On output, the rotated element before scaling. +/// \param y1 [in] Element from second vector to rotate. It is not modified by this routine. /// \param param [out] 5-element parameter array defining the rotation, to be used by rotm. -/// +// clang-format on template void rotmg(execution_space const& space, DXView const& d1, DXView const& d2, DXView const& x1, YView const& y1, PView const& param) { From baa59a7d99381ac44bd9d122294419bbf1be329b Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Mon, 6 Jan 2025 14:51:10 -0700 Subject: [PATCH 5/5] rot: add missing include Signed-off-by: Brian Kelley --- blas/src/KokkosBlas1_rot.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/blas/src/KokkosBlas1_rot.hpp b/blas/src/KokkosBlas1_rot.hpp index 9ae2d053f2..4b75d07c8a 100644 --- a/blas/src/KokkosBlas1_rot.hpp +++ b/blas/src/KokkosBlas1_rot.hpp @@ -20,6 +20,7 @@ #include #include #include +#include namespace KokkosBlas {