Skip to content

Commit

Permalink
Address CI build errors
Browse files Browse the repository at this point in the history
  • Loading branch information
e10harvey committed Oct 24, 2023
1 parent 0ea31c3 commit f648609
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 55 deletions.
79 changes: 38 additions & 41 deletions blas/impl/KokkosBlas2_gemv_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,10 +199,9 @@ struct SingleLevelTransposeGEMV {
};

// Single-level parallel version of GEMV.
template <class AViewType, class XViewType, class YViewType,
class IndexType = typename AViewType::size_type>
void singleLevelGemv(const typename AViewType::execution_space& space,
const char trans[],
template <class ExecutionSpace, class AViewType, class XViewType,
class YViewType, class IndexType = typename AViewType::size_type>
void singleLevelGemv(const ExecutionSpace& space, const char trans[],
typename AViewType::const_value_type& alpha,
const AViewType& A, const XViewType& x,
typename YViewType::const_value_type& beta,
Expand All @@ -222,9 +221,8 @@ void singleLevelGemv(const typename AViewType::execution_space& space,
static_assert(std::is_integral<IndexType>::value,
"IndexType must be an integer");

using y_value_type = typename YViewType::non_const_value_type;
using execution_space = typename AViewType::execution_space;
using policy_type = Kokkos::RangePolicy<execution_space, IndexType>;
using y_value_type = typename YViewType::non_const_value_type;
using policy_type = Kokkos::RangePolicy<ExecutionSpace, IndexType>;

using AlphaCoeffType = typename AViewType::non_const_value_type;
using BetaCoeffType = typename YViewType::non_const_value_type;
Expand Down Expand Up @@ -442,8 +440,8 @@ struct TwoLevelGEMV_LayoutRightTag {};
// ---------------------------------------------------------------------------------------------
// Functor for a two-level parallel_reduce version of GEMV (non-transpose),
// designed for performance on GPU. Kernel depends on the layout of A.
template <class AViewType, class XViewType, class YViewType,
class IndexType = typename AViewType::size_type>
template <class ExecutionSpace, class AViewType, class XViewType,
class YViewType, class IndexType = typename AViewType::size_type>
struct TwoLevelGEMV {
using y_value_type = typename YViewType::non_const_value_type;
using AlphaCoeffType = typename AViewType::non_const_value_type;
Expand All @@ -453,9 +451,8 @@ struct TwoLevelGEMV {
std::is_same<y_value_type, Kokkos::Experimental::bhalf_t>::value,
float, y_value_type>::type;

using execution_space = typename AViewType::execution_space;
using policy_type = Kokkos::TeamPolicy<execution_space>;
using member_type = typename policy_type::member_type;
using policy_type = Kokkos::TeamPolicy<ExecutionSpace>;
using member_type = typename policy_type::member_type;

TwoLevelGEMV(const AlphaCoeffType& alpha, const AViewType& A,
const XViewType& x, const BetaCoeffType& beta,
Expand Down Expand Up @@ -564,7 +561,8 @@ struct TwoLevelGEMV {
// transpose GEMV. The functor uses parallel-for over the columns of the input
// matrix A and each team uses parallel-reduce over the row of its column.
// The output vector y is the reduction result.
template <class AViewType, class XViewType, class YViewType, const bool conj,
template <class ExecutionSpace, class AViewType, class XViewType,
class YViewType, const bool conj,
class IndexType = typename AViewType::size_type>
struct TwoLevelTransposeGEMV {
using y_value_type = typename YViewType::non_const_value_type;
Expand All @@ -575,9 +573,8 @@ struct TwoLevelTransposeGEMV {
std::is_same<y_value_type, Kokkos::Experimental::bhalf_t>::value,
float, y_value_type>::type;

using execution_space = typename AViewType::execution_space;
using policy_type = Kokkos::TeamPolicy<execution_space>;
using member_type = typename policy_type::member_type;
using policy_type = Kokkos::TeamPolicy<ExecutionSpace>;
using member_type = typename policy_type::member_type;

TwoLevelTransposeGEMV(const AlphaCoeffType& alpha, const AViewType& A,
const XViewType& x, const BetaCoeffType& beta,
Expand Down Expand Up @@ -637,10 +634,9 @@ struct TwoLevelTransposeGEMV {
};

// Two-level parallel version of GEMV.
template <class AViewType, class XViewType, class YViewType,
class IndexType = typename AViewType::size_type>
void twoLevelGemv(const typename AViewType::execution_space& space,
const char trans[],
template <class ExecutionSpace, class AViewType, class XViewType,
class YViewType, class IndexType = typename AViewType::size_type>
void twoLevelGemv(const ExecutionSpace& space, const char trans[],
typename AViewType::const_value_type& alpha,
const AViewType& A, const XViewType& x,
typename YViewType::const_value_type& beta,
Expand All @@ -661,9 +657,8 @@ void twoLevelGemv(const typename AViewType::execution_space& space,
"IndexType must be an integer");

using y_value_type = typename YViewType::non_const_value_type;
using execution_space = typename AViewType::execution_space;
using team_policy_type = Kokkos::TeamPolicy<execution_space>;
using range_policy_type = Kokkos::RangePolicy<execution_space, IndexType>;
using team_policy_type = Kokkos::TeamPolicy<ExecutionSpace>;
using range_policy_type = Kokkos::RangePolicy<ExecutionSpace, IndexType>;

using Kokkos::ArithTraits;
using KAT = ArithTraits<typename AViewType::non_const_value_type>;
Expand Down Expand Up @@ -704,19 +699,19 @@ void twoLevelGemv(const typename AViewType::execution_space& space,
using layout_tag =
typename std::conditional<isLayoutLeft, TwoLevelGEMV_LayoutLeftTag,
TwoLevelGEMV_LayoutRightTag>::type;
using tagged_policy = Kokkos::TeamPolicy<execution_space, layout_tag>;
using functor_type =
TwoLevelGEMV<AViewType, XViewType, YViewType, IndexType>;
using tagged_policy = Kokkos::TeamPolicy<ExecutionSpace, layout_tag>;
using functor_type = TwoLevelGEMV<ExecutionSpace, AViewType, XViewType,
YViewType, IndexType>;
functor_type functor(alpha, A, x, beta, y);
tagged_policy team;
if (isLayoutLeft) {
if constexpr (isLayoutLeft) {
using AccumScalar = typename std::conditional<
std::is_same<y_value_type, Kokkos::Experimental::half_t>::value ||
std::is_same<y_value_type, Kokkos::Experimental::bhalf_t>::value,
float, y_value_type>::type;
size_t sharedPerTeam = 32 * sizeof(AccumScalar);
IndexType numTeams = (A.extent(0) + 31) / 32;
tagged_policy temp(1, 1);
tagged_policy temp(space, 1, 1);
temp.set_scratch_size(0, Kokkos::PerTeam(sharedPerTeam));
int teamSize =
temp.team_size_recommended(functor, Kokkos::ParallelForTag());
Expand All @@ -727,7 +722,7 @@ void twoLevelGemv(const typename AViewType::execution_space& space,
// FIXME SYCL: team_size_recommended() returns too big of a team size.
// Kernel hangs with 1024 threads on XEHP.
#ifdef KOKKOS_ENABLE_SYCL
if (std::is_same<execution_space, Kokkos::Experimental::SYCL>::value) {
if (std::is_same<ExecutionSpace, Kokkos::Experimental::SYCL>::value) {
if (teamSize > 256) teamSize = 256;
}
#endif
Expand All @@ -749,16 +744,18 @@ void twoLevelGemv(const typename AViewType::execution_space& space,
} else if (tr == 'T') {
// transpose, and not conj transpose
team_policy_type team(space, A.extent(1), Kokkos::AUTO);
using functor_type = TwoLevelTransposeGEMV<AViewType, XViewType,
YViewType, false, IndexType>;
using functor_type =
TwoLevelTransposeGEMV<ExecutionSpace, AViewType, XViewType, YViewType,
false, IndexType>;
functor_type functor(alpha, A, x, beta, y);
Kokkos::parallel_for("KokkosBlas::gemv[twoLevelTranspose]", team,
functor);
} else if (tr == 'C' || tr == 'H') {
// conjugate transpose
team_policy_type team(space, A.extent(1), Kokkos::AUTO);
using functor_type = TwoLevelTransposeGEMV<AViewType, XViewType,
YViewType, true, IndexType>;
using functor_type =
TwoLevelTransposeGEMV<ExecutionSpace, AViewType, XViewType, YViewType,
true, IndexType>;
functor_type functor(alpha, A, x, beta, y);
Kokkos::parallel_for("KokkosBlas::gemv[twoLevelTranspose]", team,
functor);
Expand All @@ -769,23 +766,23 @@ void twoLevelGemv(const typename AViewType::execution_space& space,
// generalGemv: use 1 level (Range) or 2 level (Team) implementation,
// depending on whether execution space is CPU or GPU. enable_if makes sure
// unused kernels are not instantiated.
template <class AViewType, class XViewType, class YViewType, class IndexType,
template <class ExecutionSpace, class AViewType, class XViewType,
class YViewType, class IndexType,
typename std::enable_if<!KokkosKernels::Impl::kk_is_gpu_exec_space<
typename AViewType::execution_space>()>::type* = nullptr>
void generalGemvImpl(const typename AViewType::execution_space& space,
const char trans[],
ExecutionSpace>()>::type* = nullptr>
void generalGemvImpl(const ExecutionSpace& space, const char trans[],
typename AViewType::const_value_type& alpha,
const AViewType& A, const XViewType& x,
typename YViewType::const_value_type& beta,
const YViewType& y) {
singleLevelGemv(space, trans, alpha, A, x, beta, y);
}

template <class AViewType, class XViewType, class YViewType, class IndexType,
template <class ExecutionSpace, class AViewType, class XViewType,
class YViewType, class IndexType,
typename std::enable_if<KokkosKernels::Impl::kk_is_gpu_exec_space<
typename AViewType::execution_space>()>::type* = nullptr>
void generalGemvImpl(const typename AViewType::execution_space& space,
const char trans[],
ExecutionSpace>()>::type* = nullptr>
void generalGemvImpl(const ExecutionSpace& space, const char trans[],
typename AViewType::const_value_type& alpha,
const AViewType& A, const XViewType& x,
typename YViewType::const_value_type& beta,
Expand Down
6 changes: 3 additions & 3 deletions blas/impl/KokkosBlas2_gemv_spec.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,10 @@ struct GEMV {
// Prefer int as the index type, but use a larger type if needed.
if (numRows < static_cast<size_type>(INT_MAX) &&
numCols < static_cast<size_type>(INT_MAX)) {
generalGemvImpl<AViewType, XViewType, YViewType, int>(space, trans, alpha,
A, x, beta, y);
generalGemvImpl<ExecutionSpace, AViewType, XViewType, YViewType, int>(
space, trans, alpha, A, x, beta, y);
} else {
generalGemvImpl<AViewType, XViewType, YViewType, int64_t>(
generalGemvImpl<ExecutionSpace, AViewType, XViewType, YViewType, int64_t>(
space, trans, alpha, A, x, beta, y);
}
Kokkos::Profiling::popRegion();
Expand Down
12 changes: 8 additions & 4 deletions sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,11 @@ void sptrsvcuSPARSE_symbolic(ExecutionSpace &space, KernelHandle *sptrsv_handle,
std::is_same<memory_space, Kokkos::CudaUVMSpace>::value ||
std::is_same<memory_space, Kokkos::CudaHostPinnedSpace>::value;

if (!is_cuda_space) {
if constexpr (!is_cuda_space) {
throw std::runtime_error(
"KokkosKernels sptrsvcuSPARSE_symbolic: MEMORY IS NOT ALLOCATED IN GPU "
"DEVICE for CUSPARSE\n");
} else if (std::is_same<idx_type, int>::value) {
} else if constexpr (std::is_same<idx_type, int>::value) {
bool is_lower = sptrsv_handle->is_lower_tri();
sptrsv_handle->create_cuSPARSE_Handle(trans, is_lower);

Expand Down Expand Up @@ -277,6 +277,7 @@ void sptrsvcuSPARSE_symbolic(ExecutionSpace &space, KernelHandle *sptrsv_handle,
}
#endif
#else
(void)space;
(void)sptrsv_handle;
(void)nrows;
(void)row_map;
Expand Down Expand Up @@ -369,8 +370,10 @@ void sptrsvcuSPARSE_solve(ExecutionSpace &space, KernelHandle *sptrsv_handle,
typename KernelHandle::SPTRSVcuSparseHandleType *h =
sptrsv_handle->get_cuSparseHandle();

KOKKOS_CUSPARSE_SAFE_CALL(
cusparseSetStream(h->handle, space.cuda_stream()));
if constexpr (std::is_same_v<ExecutionSpace, Kokkos::Cuda>) {
KOKKOS_CUSPARSE_SAFE_CALL(
cusparseSetStream(h->handle, space.cuda_stream()));
}

int nnz = entries.extent_int(0);

Expand Down Expand Up @@ -440,6 +443,7 @@ void sptrsvcuSPARSE_solve(ExecutionSpace &space, KernelHandle *sptrsv_handle,
}
#endif
#else
(void)space;
(void)sptrsv_handle;
(void)nrows;
(void)row_map;
Expand Down
16 changes: 9 additions & 7 deletions sparse/impl/KokkosSparse_sptrsv_solve_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2913,7 +2913,8 @@ void lower_tri_solve(ExecutionSpace &space, TriSolveHandle &thandle,

#if defined(KOKKOSKERNELS_ENABLE_SUPERNODAL_SPTRSV)
using namespace KokkosSparse::Experimental;
using memory_space = typename TriSolveHandle::memory_space;
using memory_space = typename ExecutionSpace::memory_space;
using device_t = Kokkos::Device<ExecutionSpace, memory_space>;
using integer_view_t = typename TriSolveHandle::integer_view_t;
using integer_view_host_t = typename TriSolveHandle::integer_view_host_t;
using scalar_t = typename ValuesType::non_const_value_type;
Expand Down Expand Up @@ -3075,7 +3076,7 @@ void lower_tri_solve(ExecutionSpace &space, TriSolveHandle &thandle,
// NOTE: we currently supports only default_layout = LayoutLeft
using team_policy_type = Kokkos::TeamPolicy<ExecutionSpace>;
using supernode_view_type =
Kokkos::View<scalar_t **, default_layout, memory_space,
Kokkos::View<scalar_t **, default_layout, device_t,
Kokkos::MemoryUnmanaged>;
if (diag_kernel_type_host(lvl) == 3) {
// using device-level kernels (functor is called to scatter the
Expand Down Expand Up @@ -3148,7 +3149,7 @@ void lower_tri_solve(ExecutionSpace &space, TriSolveHandle &thandle,
char unit_diag = (unit_diagonal ? 'U' : 'N');
// NOTE: we currently supports only default_layout =
// LayoutLeft
Kokkos::View<scalar_t **, default_layout, memory_space,
Kokkos::View<scalar_t **, default_layout, device_t,
Kokkos::MemoryUnmanaged>
Xjj(Xj.data(), nscol, 1);
KokkosBlas::trsm(space, "L", "L", "N", &unit_diag, one, Ljj,
Expand Down Expand Up @@ -3311,6 +3312,7 @@ void upper_tri_solve(ExecutionSpace &space, TriSolveHandle &thandle,
cudaProfilerStop();
#endif
using memory_space = typename ExecutionSpace::memory_space;
using device_t = Kokkos::Device<ExecutionSpace, memory_space>;
typedef typename TriSolveHandle::size_type size_type;
typedef typename TriSolveHandle::nnz_lno_view_t NGBLType;

Expand Down Expand Up @@ -3527,7 +3529,7 @@ tstf); } // end elseif

// create a view for the s-th supernocal block column
// NOTE: we currently supports only default_layout = LayoutLeft
Kokkos::View<scalar_t **, default_layout, memory_space,
Kokkos::View<scalar_t **, default_layout, device_t,
Kokkos::MemoryUnmanaged>
viewU(&dataU[i1], nsrow, nscol);

Expand Down Expand Up @@ -3562,7 +3564,7 @@ tstf); } // end elseif
} else {
// NOTE: we currently supports only default_layout =
// LayoutLeft
Kokkos::View<scalar_t **, default_layout, memory_space,
Kokkos::View<scalar_t **, default_layout, device_t,
Kokkos::MemoryUnmanaged>
Xjj(Xj.data(), nscol, 1);
KokkosBlas::trsm(space, "L", "U", "N", "N", one, Ujj, Xjj);
Expand Down Expand Up @@ -3658,7 +3660,7 @@ tstf); } // end elseif

// create a view for the s-th supernocal block column
// NOTE: we currently supports only default_layout = LayoutLeft
Kokkos::View<scalar_t **, default_layout, memory_space,
Kokkos::View<scalar_t **, default_layout, device_t,
Kokkos::MemoryUnmanaged>
viewU(&dataU[i1], nsrow, nscol);

Expand Down Expand Up @@ -3695,7 +3697,7 @@ tstf); } // end elseif
KokkosBlas::gemv(space, "T", one, Ujj, Xj, zero, Y);
} else {
// NOTE: we currently supports only default_layout = LayoutLeft
Kokkos::View<scalar_t **, default_layout, memory_space,
Kokkos::View<scalar_t **, default_layout, device_t,
Kokkos::MemoryUnmanaged>
Xjj(Xj.data(), nscol, 1);
KokkosBlas::trsm(space, "L", "L", "T", "N", one, Ujj, Xjj);
Expand Down

0 comments on commit f648609

Please sign in to comment.