Skip to content

Commit

Permalink
Set Kokkos Kernels to 4.5.01
Browse files Browse the repository at this point in the history
  • Loading branch information
tpadioleau committed Jan 5, 2025
1 parent 9dec723 commit 13dec09
Show file tree
Hide file tree
Showing 12 changed files with 391 additions and 20 deletions.
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
url = https://github.com/kokkos/kokkos.git
[submodule "vendor/kokkos-kernels"]
path = vendor/kokkos-kernels
url = https://github.com/yasahi-hpc/kokkos-kernels.git
url = https://github.com/kokkos/kokkos-kernels.git
[submodule "vendor/doxygen-awesome-css"]
path = vendor/doxygen-awesome-css
url = https://github.com/jothepro/doxygen-awesome-css.git
Expand Down
8 changes: 2 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -208,19 +208,15 @@ if("${DDC_BUILD_KERNELS_SPLINES}")
)
if("${DDC_KokkosKernels_DEPENDENCY_POLICY}" STREQUAL "AUTO")
if(NOT TARGET Kokkos::kokkoskernels)
# fork https://github.com/yasahi-hpc/kokkos-kernels
# on branch develop-spline-kernels-v2
find_package(KokkosKernels QUIET)
find_package(KokkosKernels 4.5...<5 QUIET)
if(NOT KokkosKernels_FOUND)
ddc_configure_kokkos_kernels()
endif()
endif()
elseif("${DDC_KokkosKernels_DEPENDENCY_POLICY}" STREQUAL "EMBEDDED")
ddc_configure_kokkos_kernels()
elseif("${DDC_KokkosKernels_DEPENDENCY_POLICY}" STREQUAL "INSTALLED")
# fork https://github.com/yasahi-hpc/kokkos-kernels
# on branch develop-spline-kernels-v2
find_package(KokkosKernels REQUIRED)
find_package(KokkosKernels 4.5...<5 REQUIRED)
endif()

add_library(ddc_splines INTERFACE)
Expand Down
3 changes: 1 addition & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ To use DDC components, one needs the following dependencies:
* PDI 1.6...<2
* (optional, spline interpolation) DDC::splines
* Ginkgo 1.8.0
* Kokkos Kernels fork <https://github.com/yasahi-hpc/kokkos-kernels> on branch develop-spline-kernels-v2

* Kokkos Kernels 4.5...<5

## Getting the code and basic configuration

Expand Down
4 changes: 1 addition & 3 deletions cmake/DDCConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,7 @@ if(@DDC_BUILD_KERNELS_SPLINES@)
list(PREPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_LIST_DIR})
ddc_find_dependency(LAPACKE)
list(POP_FRONT CMAKE_MODULE_PATH)
# fork https://github.com/yasahi-hpc/kokkos-kernels
# on branch develop-spline-kernels-v2
ddc_find_dependency(KokkosKernels)
ddc_find_dependency(KokkosKernels 4.5...<5)
endif()

if(@DDC_BUILD_PDI_WRAPPER@)
Expand Down
2 changes: 1 addition & 1 deletion docs/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ set(DOXYGEN_EXAMPLE_PATH "${DDC_SOURCE_DIR}/examples")
set(DOXYGEN_EXPAND_ONLY_PREDEF YES)
set(DOXYGEN_EXTRACT_ALL YES CACHE STRING "")
set(DOXYGEN_EXCLUDE_SYMBOLS "detail")
set(DOXYGEN_EXCLUDE_PATTERNS "*/experimental/*;*/detail/*")
set(DOXYGEN_EXCLUDE_PATTERNS "*/detail/*;*/experimental/*;*/kokkos-kernels-ext/*")
set(DOXYGEN_EXTRACT_LOCAL_CLASSES YES)
set(DOXYGEN_FULL_PATH_NAMES NO)
set(DOXYGEN_HTML_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/html")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright (C) The DDC development team, see COPYRIGHT.md file
//
// SPDX-License-Identifier: MIT

// clang-format off
// NOLINTBEGIN(*)

#ifndef KOKKOSBATCHED_GBTRS_HPP_
#define KOKKOSBATCHED_GBTRS_HPP_

#include <KokkosBatched_Util.hpp>

/// \author Yuuichi Asahi ([email protected])

namespace KokkosBatched {

/// \brief Serial Batched Gbtrs:
///
/// Solve A_l x_l = b_l for all l = 0, ..., N
/// with a general band matrix A using the LU factorization computed
/// by gbtrf.
///
/// \tparam AViewType: Input type for the matrix, needs to be a 2D view
/// \tparam BViewType: Input type for the right-hand side and the solution,
/// needs to be a 1D view
/// \tparam PivViewType: Integer type for pivot indices, needs to be a 1D view
///
/// \param A [in]: A is a ldab by n banded matrix.
/// Details of the LU factorization of the band matrix A, as computed by
/// gbtrf. U is stored as an upper triangular band matrix with KL+KU
/// superdiagonals in rows 1 to KL+KU+1, and the multipliers used during
/// the factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
/// \param b [inout]: right-hand side and the solution
/// \param piv [in]: The pivot indices; for 1 <= i <= N, row i of the matrix
/// was interchanged with row piv(i).
/// \param kl [in]: kl specifies the number of subdiagonals within the band
/// of A. kl >= 0
/// \param ku [in]: ku specifies the number of superdiagonals within the band
/// of A. ku >= 0
///
/// No nested parallel_for is used inside of the function.
///

template <typename ArgTrans, typename ArgAlgo>
struct SerialGbtrs {
template <typename AViewType, typename BViewType, typename PivViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A,
const BViewType &b,
const PivViewType &piv, const int kl,
const int ku);
};
} // namespace KokkosBatched

#include "KokkosBatched_Gbtrs_Serial_Impl.hpp"

#endif // KOKKOSBATCHED_GBTRS_HPP_

// NOLINTEND(*)
// clang-format on
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
// Copyright (C) The DDC development team, see COPYRIGHT.md file
//
// SPDX-License-Identifier: MIT

// clang-format off
// NOLINTBEGIN(*)

#ifndef KOKKOSBATCHED_GBTRS_SERIAL_IMPL_HPP_
#define KOKKOSBATCHED_GBTRS_SERIAL_IMPL_HPP_

#include <Kokkos_Swap.hpp>
#include <KokkosBatched_Util.hpp>
#include <KokkosBlas2_gemv.hpp>
#include <KokkosBatched_Tbsv.hpp>

namespace KokkosBatched {

template <typename AViewType, typename BViewType>
KOKKOS_INLINE_FUNCTION static int checkGbtrsInput(
[[maybe_unused]] const AViewType &A, [[maybe_unused]] const BViewType &b,
[[maybe_unused]] const int kl, [[maybe_unused]] const int ku) {
static_assert(Kokkos::is_view_v<AViewType>,
"KokkosBatched::gbtrs: AViewType is not a Kokkos::View.");
static_assert(Kokkos::is_view_v<BViewType>,
"KokkosBatched::gbtrs: BViewType is not a Kokkos::View.");
static_assert(AViewType::rank == 2,
"KokkosBatched::gbtrs: AViewType must have rank 2.");
static_assert(BViewType::rank == 1,
"KokkosBatched::gbtrs: BViewType must have rank 1.");
#if (KOKKOSKERNELS_DEBUG_LEVEL > 0)
if (kl < 0) {
Kokkos::printf(
"KokkosBatched::gbtrs: input parameter kl must not be less than 0: kl "
"= "
"%d\n",
kl);
return 1;
}

if (ku < 0) {
Kokkos::printf(
"KokkosBatched::gbtrs: input parameter ku must not be less than 0: ku "
"= "
"%d\n",
ku);
return 1;
}

const int lda = A.extent(0), n = A.extent(1);
if (lda < (2 * kl + ku + 1)) {
Kokkos::printf(
"KokkosBatched::gbtrs: leading dimension of A must be smaller than 2 * "
"kl + ku + 1: "
"lda = %d, kl = %d, ku = %d\n",
lda, kl, ku);
return 1;
}

const int ldb = b.extent(0);
if (ldb < Kokkos::max(1, n)) {
Kokkos::printf(
"KokkosBatched::gbtrs: leading dimension of b must be smaller than "
"max(1, n): "
"ldb = %d, n = %d\n",
ldb, n);
return 1;
}

#endif
return 0;
}

//// Non-transpose ////
template <>
struct SerialGbtrs<Trans::NoTranspose, Algo::Level3::Unblocked> {
template <typename AViewType, typename BViewType, typename PivViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A,
const BViewType &b,
const PivViewType &piv, const int kl,
const int ku) {
// Quick return if possible
const int n = A.extent(1);
if (n == 0) return 0;

auto info = checkGbtrsInput(A, b, kl, ku);
if (info) return info;

bool lonti = kl > 0;
const int kd = ku + kl + 1;
if (lonti) {
for (int j = 0; j < n - 1; ++j) {
const int lm = Kokkos::min(kl, n - j - 1);
auto l = piv(j);
// If pivot index is not j, swap rows l and j in b
if (l != j) {
Kokkos::kokkos_swap(b(l), b(j));
}

// Perform a rank-1 update of the remaining part of the current column
// (ger)
for (int i = 0; i < lm; ++i) {
b(j + 1 + i) = b(j + 1 + i) - A(kd + i, j) * b(j);
}
}
}

// Solve U*X = b for each right hand side, overwriting B with X.
[[maybe_unused]] auto info_tbsv =
KokkosBatched::SerialTbsv<Uplo::Upper, Trans::NoTranspose,
Diag::NonUnit,
Algo::Trsv::Unblocked>::invoke(A, b, kl + ku);

return 0;
}
};

//// Transpose ////
template <>
struct SerialGbtrs<Trans::Transpose, Algo::Level3::Unblocked> {
template <typename AViewType, typename BViewType, typename PivViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A,
const BViewType &b,
const PivViewType &piv, const int kl,
const int ku) {
// Quick return if possible
const int n = A.extent(1);
if (n == 0) return 0;

auto info = checkGbtrsInput(A, b, kl, ku);
if (info) return info;

bool lonti = kl > 0;
const int kd = ku + kl + 1;

// Solve U*X = b for each right hand side, overwriting B with X.
[[maybe_unused]] auto info_tbsv =
KokkosBatched::SerialTbsv<Uplo::Upper, Trans::Transpose, Diag::NonUnit,
Algo::Tbsv::Unblocked>::invoke(A, b, kl + ku);

if (lonti) {
for (int j = n - 2; j >= 0; --j) {
const int lm = Kokkos::min(kl, n - j - 1);

// Gemv transposed
auto a = Kokkos::subview(b, Kokkos::pair(j + 1, j + 1 + lm));
auto x = Kokkos::subview(A, Kokkos::pair(kd, kd + lm), j);
auto y = Kokkos::subview(b, Kokkos::pair(j, j + lm));

[[maybe_unused]] auto info_gemv =
KokkosBlas::Impl::SerialGemvInternal<Algo::Gemv::Unblocked>::invoke(
1, a.extent(0), -1.0, a.data(), a.stride_0(), a.stride_0(),
x.data(), x.stride_0(), 1.0, y.data(), y.stride_0());

// If pivot index is not j, swap rows l and j in b
auto l = piv(j);
if (l != j) {
Kokkos::kokkos_swap(b(l), b(j));
}
}
}

return 0;
}
};
} // namespace KokkosBatched

#endif // KOKKOSBATCHED_GBTRS_SERIAL_IMPL_HPP_

// NOLINTEND(*)
// clang-format on
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
// Copyright (C) The DDC development team, see COPYRIGHT.md file
//
// SPDX-License-Identifier: MIT

// clang-format off
// NOLINTBEGIN(*)

#ifndef KOKKOSBATCHED_GETRS_HPP_
#define KOKKOSBATCHED_GETRS_HPP_

#include <KokkosBatched_Util.hpp>

/// \author Yuuichi Asahi ([email protected])

namespace KokkosBatched {

/// \brief Serial Batched Getrs:
/// Solve a system of linear equations
/// A * x = b or A**T * x = b
/// with a general N-by-N matrix A using LU factorization computed
/// by Getrf.
/// \tparam AViewType: Input type for the matrix, needs to be a 2D view
/// \tparam PivViewType: Input type for the pivot indices, needs to be a 1D view
/// \tparam BViewType: Input type for the right-hand side and the solution,
/// needs to be a 1D view
///
/// \param A [inout]: A is a m by n general matrix, a rank 2 view
/// \param piv [out]: On exit, the pivot indices, a rank 1 view
/// \param B [inout]: right-hand side and the solution, a rank 1 view
///
/// No nested parallel_for is used inside of the function.
///

template <typename ArgTrans, typename ArgAlgo>
struct SerialGetrs {
template <typename AViewType, typename PivViewType, typename BViewType>
KOKKOS_INLINE_FUNCTION static int invoke(const AViewType &A,
const PivViewType &piv,
const BViewType &b);
};
} // namespace KokkosBatched

#include "KokkosBatched_Getrs_Serial_Impl.hpp"

#endif // KOKKOSBATCHED_GETRS_HPP_

// NOLINTEND(*)
// clang-format on
Loading

0 comments on commit 13dec09

Please sign in to comment.