diff --git a/perf_test/batched/sparse/cusolver/KokkosBatched_Test_cusolverSp.cpp b/perf_test/batched/sparse/cusolver/KokkosBatched_Test_cusolverSp.cpp index 8b2b48c0f4..b3e1fd2aba 100644 --- a/perf_test/batched/sparse/cusolver/KokkosBatched_Test_cusolverSp.cpp +++ b/perf_test/batched/sparse/cusolver/KokkosBatched_Test_cusolverSp.cpp @@ -91,9 +91,9 @@ struct Functor_Test_SparseCuSolveQR { const size_t m = _r.extent(0) - 1; cusparseMatDescr_t descrA = 0; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); double tol = 1e-18; int reorder = 0; @@ -146,9 +146,9 @@ struct Functor_Test_Block_SparseCuSolveQR { const size_t block_m = N * m; cusparseMatDescr_t descrA = 0; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); double tol = 1e-18; int reorder = 0; @@ -229,9 +229,9 @@ struct Functor_Test_SparseCuSolveChol { const size_t m = _r.extent(0) - 1; cusparseMatDescr_t descrA = 0; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); double tol = 1e-18; int reorder = 0; @@ -284,9 +284,9 @@ struct Functor_Test_Block_SparseCuSolveChol { const size_t block_m = N * m; cusparseMatDescr_t descrA = 0; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO)); double tol = 1e-18; int reorder = 0; diff --git a/perf_test/sparse/KokkosSparse_spadd.cpp b/perf_test/sparse/KokkosSparse_spadd.cpp index 063c151812..0d9614e6a3 100644 --- a/perf_test/sparse/KokkosSparse_spadd.cpp +++ b/perf_test/sparse/KokkosSparse_spadd.cpp @@ -277,20 +277,20 @@ void run_experiment(int argc, char** argv, CommonInputParams) { const double alphabeta = 1.0; if (params.use_cusparse) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreate(&cusparseHandle)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(cusparseHandle, CUSPARSE_POINTER_MODE_HOST)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&A_cusparse)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&B_cusparse)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&C_cusparse)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(A_cusparse, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(B_cusparse, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(C_cusparse, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(A_cusparse, CUSPARSE_DIAG_TYPE_NON_UNIT)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(B_cusparse, CUSPARSE_DIAG_TYPE_NON_UNIT)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(C_cusparse, CUSPARSE_DIAG_TYPE_NON_UNIT)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(A_cusparse, CUSPARSE_INDEX_BASE_ZERO)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(B_cusparse, CUSPARSE_INDEX_BASE_ZERO)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(C_cusparse, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreate(&cusparseHandle)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(cusparseHandle, CUSPARSE_POINTER_MODE_HOST)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&A_cusparse)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&B_cusparse)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&C_cusparse)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(A_cusparse, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(B_cusparse, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(C_cusparse, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(A_cusparse, CUSPARSE_DIAG_TYPE_NON_UNIT)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(B_cusparse, CUSPARSE_DIAG_TYPE_NON_UNIT)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(C_cusparse, CUSPARSE_DIAG_TYPE_NON_UNIT)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(A_cusparse, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(B_cusparse, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(C_cusparse, CUSPARSE_INDEX_BASE_ZERO)); } #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL @@ -320,16 +320,16 @@ void run_experiment(int argc, char** argv, CommonInputParams) { if constexpr (std::is_same_v && std::is_same_v) { // Symbolic phase: compute buffer size, then compute nnz size_t bufferSize; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDcsrgeam2_bufferSizeExt( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDcsrgeam2_bufferSizeExt( cusparseHandle, A.numRows(), A.numCols(), &alphabeta, A_cusparse, A.nnz(), A.values.data(), A.graph.row_map.data(), A.graph.entries.data(), &alphabeta, B_cusparse, B.nnz(), B.values.data(), B.graph.row_map.data(), B.graph.entries.data(), C_cusparse, NULL, row_mapC.data(), NULL, &bufferSize)); // Allocate work buffer KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void**)&cusparseBuffer, bufferSize)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseXcsrgeam2Nnz(cusparseHandle, m, n, A_cusparse, A.nnz(), - A.graph.row_map.data(), A.graph.entries.data(), B_cusparse, - B.nnz(), B.graph.row_map.data(), B.graph.entries.data(), - C_cusparse, row_mapC.data(), &c_nnz, cusparseBuffer)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( + cusparseXcsrgeam2Nnz(cusparseHandle, m, n, A_cusparse, A.nnz(), A.graph.row_map.data(), + A.graph.entries.data(), B_cusparse, B.nnz(), B.graph.row_map.data(), + B.graph.entries.data(), C_cusparse, row_mapC.data(), &c_nnz, cusparseBuffer)); } else { throw std::runtime_error( "Must enable int as both ordinal and offset type in KokkosKernels " @@ -351,7 +351,7 @@ void run_experiment(int argc, char** argv, CommonInputParams) { if (params.use_cusparse) { #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE if constexpr (std::is_same_v && std::is_same_v) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDcsrgeam2( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDcsrgeam2( cusparseHandle, m, n, &alphabeta, A_cusparse, A.nnz(), A.values.data(), A.graph.row_map.data(), A.graph.entries.data(), &alphabeta, B_cusparse, B.nnz(), B.values.data(), B.graph.row_map.data(), B.graph.entries.data(), C_cusparse, valuesC.data(), row_mapC.data(), entriesC.data(), cusparseBuffer)); @@ -379,7 +379,7 @@ void run_experiment(int argc, char** argv, CommonInputParams) { } #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE - if (params.use_cusparse) KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroy(cusparseHandle)); + if (params.use_cusparse) KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroy(cusparseHandle)); #endif #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL diff --git a/perf_test/sparse/KokkosSparse_spmv_struct_tuning.cpp b/perf_test/sparse/KokkosSparse_spmv_struct_tuning.cpp index 3a857fcb23..57a445bbff 100644 --- a/perf_test/sparse/KokkosSparse_spmv_struct_tuning.cpp +++ b/perf_test/sparse/KokkosSparse_spmv_struct_tuning.cpp @@ -509,14 +509,16 @@ int main(int argc, char** argv) { /* create matrix */ cusparseSpMatDescr_t A_cusparse; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr( &A_cusparse, A.numRows(), A.numCols(), A.nnz(), (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(), (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType, CUSPARSE_INDEX_BASE_ZERO, myCudaDataType)); /* create lhs and rhs */ cusparseDnVecDescr_t vecX, vecY; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&vecX, x1.extent_int(0), (void*)x1.data(), myCudaDataType)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&vecY, y1.extent_int(0), (void*)y1.data(), myCudaDataType)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( + cusparseCreateDnVec(&vecX, x1.extent_int(0), (void*)x1.data(), myCudaDataType)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( + cusparseCreateDnVec(&vecY, y1.extent_int(0), (void*)y1.data(), myCudaDataType)); const double alpha = 1.0, beta = 1.0; size_t bufferSize = 0; @@ -528,9 +530,9 @@ int main(int argc, char** argv) { #else cusparseSpMVAlg_t alg = CUSPARSE_MV_ALG_DEFAULT; #endif - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMV_bufferSize(controls.getCusparseHandle(), CUSPARSE_OPERATION_NON_TRANSPOSE, - &alpha, A_cusparse, vecX, &beta, vecY, myCudaDataType, alg, - &bufferSize)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( + cusparseSpMV_bufferSize(controls.getCusparseHandle(), CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse, + vecX, &beta, vecY, myCudaDataType, alg, &bufferSize)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc(&dBuffer, bufferSize)); /* perform SpMV */ @@ -540,8 +542,9 @@ int main(int argc, char** argv) { double ave_time = 0.0; for (int i = 0; i < loop; i++) { Kokkos::Timer timer; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMV(controls.getCusparseHandle(), CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, - A_cusparse, vecX, &beta, vecY, myCudaDataType, alg, dBuffer)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpMV(controls.getCusparseHandle(), + CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse, vecX, + &beta, vecY, myCudaDataType, alg, dBuffer)); Kokkos::fence(); double time = timer.seconds(); ave_time += time; @@ -565,9 +568,9 @@ int main(int argc, char** argv) { Kokkos::Profiling::popRegion(); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(dBuffer)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecX)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecY)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(A_cusparse)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecX)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecY)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(A_cusparse)); #else // The data needs to be reformatted for cusparse before launching the // kernel. Step one, extract raw data diff --git a/sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp b/sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp index 24bf3382db..59d3945d32 100644 --- a/sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp +++ b/sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp @@ -55,7 +55,7 @@ void sptrsvcuSPARSE_symbolic(ExecutionSpace &space, KernelHandle *sptrsv_handle, typename KernelHandle::SPTRSVcuSparseHandleType *h = sptrsv_handle->get_cuSparseHandle(); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream())); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream())); int64_t nnz = static_cast(entries.extent(0)); size_t pBufferSize; @@ -85,52 +85,52 @@ void sptrsvcuSPARSE_symbolic(ExecutionSpace &space, KernelHandle *sptrsv_handle, cudaDataType cudaValueType = cuda_data_type_from(); // Create sparse matrix in CSR format - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr( &(h->matDescr), static_cast(nrows), static_cast(nrows), nnz, rm, (void *)entries.data(), (void *)values.data(), cudaCsrRowMapType, cudaCsrColIndType, CUSPARSE_INDEX_BASE_ZERO, cudaValueType)); // Create dummy dense vector B (RHS) nnz_scalar_view_t b_dummy(Kokkos::view_alloc(space, "b_dummy"), nrows); - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseCreateDnVec(&(h->vecBDescr_dummy), static_cast(nrows), b_dummy.data(), cudaValueType)); // Create dummy dense vector X (LHS) nnz_scalar_view_t x_dummy(Kokkos::view_alloc(space, "x_dummy"), nrows); - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseCreateDnVec(&(h->vecXDescr_dummy), static_cast(nrows), x_dummy.data(), cudaValueType)); // Specify Lower|Upper fill mode if (is_lower) { cusparseFillMode_t fillmode = CUSPARSE_FILL_MODE_LOWER; - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseSpMatSetAttribute(h->matDescr, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(fillmode))); } else { cusparseFillMode_t fillmode = CUSPARSE_FILL_MODE_UPPER; - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseSpMatSetAttribute(h->matDescr, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(fillmode))); } // Specify Unit|Non-Unit diagonal type. cusparseDiagType_t diagtype = CUSPARSE_DIAG_TYPE_NON_UNIT; - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseSpMatSetAttribute(h->matDescr, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(diagtype))); // Allocate an external buffer for analysis - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpSV_bufferSize(h->handle, h->transpose, &alpha, h->matDescr, h->vecBDescr_dummy, - h->vecXDescr_dummy, cudaValueType, CUSPARSE_SPSV_ALG_DEFAULT, - h->spsvDescr, &pBufferSize)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( + cusparseSpSV_bufferSize(h->handle, h->transpose, &alpha, h->matDescr, h->vecBDescr_dummy, h->vecXDescr_dummy, + cudaValueType, CUSPARSE_SPSV_ALG_DEFAULT, h->spsvDescr, &pBufferSize)); // pBuffer returned by cudaMalloc is automatically aligned to 128 bytes. KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&(h->pBuffer), pBufferSize)); // Run analysis - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpSV_analysis(h->handle, h->transpose, &alpha, h->matDescr, h->vecBDescr_dummy, - h->vecXDescr_dummy, cudaValueType, CUSPARSE_SPSV_ALG_DEFAULT, - h->spsvDescr, h->pBuffer)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpSV_analysis(h->handle, h->transpose, &alpha, h->matDescr, + h->vecBDescr_dummy, h->vecXDescr_dummy, cudaValueType, + CUSPARSE_SPSV_ALG_DEFAULT, h->spsvDescr, h->pBuffer)); // Destroy dummy dense vector descriptors - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecBDescr_dummy)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecXDescr_dummy)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecBDescr_dummy)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecXDescr_dummy)); } #else // CUDA_VERSION < 11030 typedef typename KernelHandle::nnz_lno_t idx_type; @@ -152,7 +152,7 @@ void sptrsvcuSPARSE_symbolic(ExecutionSpace &space, KernelHandle *sptrsv_handle, typename KernelHandle::SPTRSVcuSparseHandleType *h = sptrsv_handle->get_cuSparseHandle(); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream())); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream())); cusparseStatus_t status; status = cusparseCreateCsrsv2Info(&(h->info)); @@ -283,27 +283,28 @@ void sptrsvcuSPARSE_solve(ExecutionSpace &space, KernelHandle *sptrsv_handle, ty } else { typename KernelHandle::SPTRSVcuSparseHandleType *h = sptrsv_handle->get_cuSparseHandle(); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream())); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream())); const scalar_type alpha = scalar_type(1.0); cudaDataType cudaValueType = cuda_data_type_from(); // Create dense vector B (RHS) - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseCreateDnVec(&(h->vecBDescr), static_cast(nrows), (void *)rhs.data(), cudaValueType)); // Create dense vector X (LHS) - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseCreateDnVec(&(h->vecXDescr), static_cast(nrows), (void *)lhs.data(), cudaValueType)); // Solve - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpSV_solve(h->handle, h->transpose, &alpha, h->matDescr, h->vecBDescr, - h->vecXDescr, cudaValueType, CUSPARSE_SPSV_ALG_DEFAULT, h->spsvDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpSV_solve(h->handle, h->transpose, &alpha, h->matDescr, h->vecBDescr, + h->vecXDescr, cudaValueType, CUSPARSE_SPSV_ALG_DEFAULT, + h->spsvDescr)); // Destroy dense vector descriptors - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecBDescr)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecXDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecBDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecXDescr)); } #else // CUDA_VERSION < 11030 typedef typename KernelHandle::nnz_lno_t idx_type; @@ -316,7 +317,7 @@ void sptrsvcuSPARSE_solve(ExecutionSpace &space, KernelHandle *sptrsv_handle, ty typename KernelHandle::SPTRSVcuSparseHandleType *h = sptrsv_handle->get_cuSparseHandle(); if constexpr (std::is_same_v) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream())); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream())); } int nnz = entries.extent_int(0); @@ -439,30 +440,30 @@ void sptrsvcuSPARSE_solve_streams(const std::vector &execspace_v h_v[i] = sptrsv_handle->get_cuSparseHandle(); // Bind cuspare handle to a stream - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(h_v[i]->handle, execspace_v[i].cuda_stream())); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(h_v[i]->handle, execspace_v[i].cuda_stream())); int64_t nrows = static_cast(sptrsv_handle->get_nrows()); // Create dense vector B (RHS) - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseCreateDnVec(&(h_v[i]->vecBDescr), nrows, (void *)rhs_v[i].data(), cudaValueType)); // Create dense vector X (LHS) - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseCreateDnVec(&(h_v[i]->vecXDescr), nrows, (void *)lhs_v[i].data(), cudaValueType)); } // Solve for (int i = 0; i < nstreams; i++) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpSV_solve(h_v[i]->handle, h_v[i]->transpose, &alpha, h_v[i]->matDescr, - h_v[i]->vecBDescr, h_v[i]->vecXDescr, cudaValueType, - CUSPARSE_SPSV_ALG_DEFAULT, h_v[i]->spsvDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( + cusparseSpSV_solve(h_v[i]->handle, h_v[i]->transpose, &alpha, h_v[i]->matDescr, h_v[i]->vecBDescr, + h_v[i]->vecXDescr, cudaValueType, CUSPARSE_SPSV_ALG_DEFAULT, h_v[i]->spsvDescr)); } // Destroy dense vector descriptors for (int i = 0; i < nstreams; i++) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h_v[i]->vecBDescr)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h_v[i]->vecXDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h_v[i]->vecBDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h_v[i]->vecXDescr)); } } #else // CUDA_VERSION < 11030 @@ -493,7 +494,7 @@ void sptrsvcuSPARSE_solve_streams(const std::vector &execspace_v h_v[i] = sptrsv_handle_v[i]->get_cuSparseHandle(); // Bind cuspare handle to a stream - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(h_v[i]->handle, execspace_v[i].cuda_stream())); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(h_v[i]->handle, execspace_v[i].cuda_stream())); if (h_v[i]->pBuffer == nullptr) { std::cout << " pBuffer invalid on stream " << i << std::endl; @@ -510,25 +511,25 @@ void sptrsvcuSPARSE_solve_streams(const std::vector &execspace_v int nnz = entries_v[i].extent_int(0); int nrows = static_cast(sptrsv_handle_v[i]->get_nrows()); if (std::is_same::value) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDcsrsv2_solve( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDcsrsv2_solve( h_v[i]->handle, h_v[i]->transpose, nrows, nnz, reinterpret_cast(&alpha), h_v[i]->descr, reinterpret_cast(vals_v[i]), reinterpret_cast(rm_v[i]), reinterpret_cast(ent_v[i]), h_v[i]->info, reinterpret_cast(bv_v[i]), reinterpret_cast(xv_v[i]), h_v[i]->policy, h_v[i]->pBuffer)); } else if (std::is_same::value) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseScsrsv2_solve( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseScsrsv2_solve( h_v[i]->handle, h_v[i]->transpose, nrows, nnz, reinterpret_cast(&alpha), h_v[i]->descr, reinterpret_cast(vals_v[i]), reinterpret_cast(rm_v[i]), reinterpret_cast(ent_v[i]), h_v[i]->info, reinterpret_cast(bv_v[i]), reinterpret_cast(xv_v[i]), h_v[i]->policy, h_v[i]->pBuffer)); } else if (std::is_same >::value) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseZcsrsv2_solve( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseZcsrsv2_solve( h_v[i]->handle, h_v[i]->transpose, nrows, nnz, reinterpret_cast(&alpha), h_v[i]->descr, reinterpret_cast(vals_v[i]), reinterpret_cast(rm_v[i]), reinterpret_cast(ent_v[i]), h_v[i]->info, reinterpret_cast(bv_v[i]), reinterpret_cast(xv_v[i]), h_v[i]->policy, h_v[i]->pBuffer)); } else if (std::is_same >::value) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCcsrsv2_solve( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCcsrsv2_solve( h_v[i]->handle, h_v[i]->transpose, nrows, nnz, reinterpret_cast(&alpha), h_v[i]->descr, reinterpret_cast(vals_v[i]), reinterpret_cast(rm_v[i]), reinterpret_cast(ent_v[i]), h_v[i]->info, reinterpret_cast(bv_v[i]), diff --git a/sparse/src/KokkosSparse_Utils_cusparse.hpp b/sparse/src/KokkosSparse_Utils_cusparse.hpp index 07681cb409..510dc99dc9 100644 --- a/sparse/src/KokkosSparse_Utils_cusparse.hpp +++ b/sparse/src/KokkosSparse_Utils_cusparse.hpp @@ -64,9 +64,23 @@ inline void cusparse_internal_safe_call(cusparseStatus_t cusparseStatus, const c } } -// The macro below defines is the public interface for the safe cusparse calls. -// The functions themselves are protected by impl namespace. -#define KOKKOS_CUSPARSE_SAFE_CALL(call) KokkosSparse::Impl::cusparse_internal_safe_call(call, #call, __FILE__, __LINE__) +#define KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(call) \ + KokkosSparse::Impl::cusparse_internal_safe_call(call, #call, __FILE__, __LINE__) + +// Deprecated public interface for the cuSparse safe calls +#if defined(KOKKOS_COMPILER_MSVC) +#define KOKKOSSPARSE_CUSPARSE_SAFE_CALL(call) \ + (__pragma(message("warning: KOKKOS_CUSPARSE_SAFE_CALL is deprecated and will be removed in a future version")) \ + KOKKOSPARSE_IMPL_CUSPARSE_SAFE_CALL(call)) +#elif defined(KOKKOS_COMPILER_GNU) || defined(KOKKOS_COMPILER_CLANG) +#define KOKKOSSPARSE_CUSPARSE_SAFE_CALL(call) \ + (__extension__({ \ + _Pragma("\"KOKKOS_CUSPARSE_SAFE_CALL is deprecated and will be removed in a future version\""); \ + KOKKOSPARSE_IMPL_CUSPARSE_SAFE_CALL(call); \ + })) +#else +#define KOKKOSSPARSE_CUSPARSE_SAFE_CALL(call) KOKKOSPARSE_IMPL_CUSPARSE_SAFE_CALL(call) // no good way to deprecate? +#endif template cudaDataType cuda_data_type_from() { @@ -152,10 +166,10 @@ inline cusparseIndexType_t cusparse_index_type_t_from() { // destructed. struct TemporarySetCusparseStream { TemporarySetCusparseStream(cusparseHandle_t handle_, const Kokkos::Cuda& exec_) : handle(handle_) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(handle, exec_.cuda_stream())); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(handle, exec_.cuda_stream())); } - ~TemporarySetCusparseStream() { KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(handle, NULL)); } + ~TemporarySetCusparseStream() { KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(handle, NULL)); } cusparseHandle_t handle; }; diff --git a/sparse/src/KokkosSparse_spgemm_handle.hpp b/sparse/src/KokkosSparse_spgemm_handle.hpp index 1857e0bbc7..9967d287a2 100644 --- a/sparse/src/KokkosSparse_spgemm_handle.hpp +++ b/sparse/src/KokkosSparse_spgemm_handle.hpp @@ -181,14 +181,14 @@ class SPGEMMHandle { buffer3 = buffer4 = buffer5 = nullptr; cusparseHandle = kkControls.getCusparseHandle(); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_createDescr(&spgemmDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_createDescr(&spgemmDescr)); } ~cuSparseSpgemmHandleType() { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_A)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_B)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_C)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_destroyDescr(spgemmDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_A)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_B)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_C)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_destroyDescr(spgemmDescr)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer3)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer4)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer5)); @@ -205,11 +205,11 @@ class SPGEMMHandle { // Get singleton cusparse handle from default controls cusparseHandle = kkControls.getCusparseHandle(); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&generalDescr)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(generalDescr, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(generalDescr, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&generalDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(generalDescr, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(generalDescr, CUSPARSE_INDEX_BASE_ZERO)); } - ~cuSparseSpgemmHandleType() { KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyMatDescr(generalDescr)); } + ~cuSparseSpgemmHandleType() { KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyMatDescr(generalDescr)); } }; #endif #endif diff --git a/sparse/src/KokkosSparse_spmv_handle.hpp b/sparse/src/KokkosSparse_spmv_handle.hpp index 4da6e47551..a6e99538d3 100644 --- a/sparse/src/KokkosSparse_spmv_handle.hpp +++ b/sparse/src/KokkosSparse_spmv_handle.hpp @@ -124,7 +124,7 @@ struct CuSparse10_SpMV_Data : public TPL_SpMV_Data { exec.fence(); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer)); #endif - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(mat)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(mat)); } cusparseSpMatDescr_t mat; @@ -136,7 +136,7 @@ struct CuSparse10_SpMV_Data : public TPL_SpMV_Data { // Data used by cuSPARSE <10.3 for CRS, and >=9 for BSR struct CuSparse9_SpMV_Data : public TPL_SpMV_Data { CuSparse9_SpMV_Data(const Kokkos::Cuda& exec_) : TPL_SpMV_Data(exec_) {} - ~CuSparse9_SpMV_Data() { KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyMatDescr(mat)); } + ~CuSparse9_SpMV_Data() { KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyMatDescr(mat)); } cusparseMatDescr_t mat; }; diff --git a/sparse/src/KokkosSparse_sptrsv_handle.hpp b/sparse/src/KokkosSparse_sptrsv_handle.hpp index acac8b8fe0..080f668fa3 100644 --- a/sparse/src/KokkosSparse_sptrsv_handle.hpp +++ b/sparse/src/KokkosSparse_sptrsv_handle.hpp @@ -126,9 +126,9 @@ class SPTRSVHandle { void *pBuffer{nullptr}; cuSparseHandleType(bool transpose_, bool /*is_lower*/) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreate(&handle)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreate(&handle)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST)); if (transpose_) { transpose = CUSPARSE_OPERATION_TRANSPOSE; @@ -136,7 +136,7 @@ class SPTRSVHandle { transpose = CUSPARSE_OPERATION_NON_TRANSPOSE; } - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpSV_createDescr(&spsvDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpSV_createDescr(&spsvDescr)); } ~cuSparseHandleType() { @@ -144,9 +144,9 @@ class SPTRSVHandle { KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(pBuffer)); pBuffer = nullptr; } - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(matDescr)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpSV_destroyDescr(spsvDescr)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroy(handle)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(matDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpSV_destroyDescr(spsvDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroy(handle)); } }; #else // CUDA_VERSION < 11030 diff --git a/sparse/tpls/KokkosKernels_tpl_handles_def.hpp b/sparse/tpls/KokkosKernels_tpl_handles_def.hpp index d52959a591..d70ff5379e 100644 --- a/sparse/tpls/KokkosKernels_tpl_handles_def.hpp +++ b/sparse/tpls/KokkosKernels_tpl_handles_def.hpp @@ -25,14 +25,14 @@ namespace KokkosKernels { namespace Impl { -CusparseSingleton::CusparseSingleton() { KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreate(&cusparseHandle)); } +CusparseSingleton::CusparseSingleton() { KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreate(&cusparseHandle)); } CusparseSingleton& CusparseSingleton::singleton() { std::unique_ptr& instance = get_instance(); if (!instance) { instance = std::make_unique(); Kokkos::push_finalize_hook([&]() { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroy(instance->cusparseHandle)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroy(instance->cusparseHandle)); instance.reset(); }); } diff --git a/sparse/tpls/KokkosSparse_spadd_numeric_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spadd_numeric_tpl_spec_decl.hpp index eff19977cf..93362d176d 100644 --- a/sparse/tpls/KokkosSparse_spadd_numeric_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spadd_numeric_tpl_spec_decl.hpp @@ -84,21 +84,21 @@ namespace Impl { auto &cuspHandle = KokkosKernels::Impl::CusparseSingleton::singleton().cusparseHandle; \ cusparsePointerMode_t oldPtrMode; \ \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, exec.cuda_stream())); \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseGetPointerMode(cuspHandle, &oldPtrMode)); \ - KOKKOS_CUSPARSE_SAFE_CALL( \ + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, exec.cuda_stream())); \ + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseGetPointerMode(cuspHandle, &oldPtrMode)); \ + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( \ cusparseSetPointerMode(cuspHandle, CUSPARSE_POINTER_MODE_HOST)); /* alpha, beta on host*/ \ OFFSET_TYPE nnzA = colidxA.extent(0); \ OFFSET_TYPE nnzB = colidxB.extent(0); \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparse##TOKEN##csrgeam2( \ + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparse##TOKEN##csrgeam2( \ cuspHandle, m, n, reinterpret_cast(&alpha), cuspData.descrA, nnzA, \ reinterpret_cast(valuesA.data()), rowmapA.data(), colidxA.data(), \ reinterpret_cast(&beta), cuspData.descrB, nnzB, \ reinterpret_cast(valuesB.data()), rowmapB.data(), colidxB.data(), cuspData.descrC, \ reinterpret_cast(valuesC.data()), const_cast(rowmapC.data()), \ colidxC.data(), cuspData.workspace)); \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(cuspHandle, oldPtrMode)); \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, NULL)); \ + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(cuspHandle, oldPtrMode)); \ + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, NULL)); \ \ Kokkos::Profiling::popRegion(); \ } \ diff --git a/sparse/tpls/KokkosSparse_spadd_symbolic_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spadd_symbolic_tpl_spec_decl.hpp index 69286e5e98..878d1dd8e7 100644 --- a/sparse/tpls/KokkosSparse_spadd_symbolic_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spadd_symbolic_tpl_spec_decl.hpp @@ -73,26 +73,26 @@ namespace Impl { OFFSET_TYPE nnzB = colidxB.extent(0); \ OFFSET_TYPE nnzC = 0; \ \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, exec.cuda_stream())); \ + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, exec.cuda_stream())); \ \ /* https://docs.nvidia.com/cuda/cusparse/index.html#cusparsecreatematdescr \ It sets the fields MatrixType and IndexBase to the default values \ CUSPARSE_MATRIX_TYPE_GENERAL and CUSPARSE_INDEX_BASE_ZERO, \ respectively, while leaving other fields uninitialized. */ \ \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrA)); \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrB)); \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrC)); \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparse##TOKEN##csrgeam2_bufferSizeExt( \ + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrA)); \ + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrB)); \ + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrC)); \ + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparse##TOKEN##csrgeam2_bufferSizeExt( \ cuspHandle, m, n, &one, cuspData.descrA, nnzA, NULL, rowmapA.data(), colidxA.data(), &one, cuspData.descrB, \ nnzB, NULL, rowmapB.data(), colidxB.data(), cuspData.descrC, NULL, rowmapC.data(), NULL, &nbytes)); \ cuspData.nbytes = nbytes; \ cuspData.workspace = Kokkos::kokkos_malloc(nbytes); \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseXcsrgeam2Nnz( \ + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseXcsrgeam2Nnz( \ cuspHandle, m, n, cuspData.descrA, nnzA, rowmapA.data(), colidxA.data(), cuspData.descrB, nnzB, \ rowmapB.data(), colidxB.data(), cuspData.descrC, rowmapC.data(), &nnzC, cuspData.workspace)); \ addHandle->set_c_nnz(nnzC); \ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, NULL)); \ + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, NULL)); \ \ Kokkos::Profiling::popRegion(); \ } \ diff --git a/sparse/tpls/KokkosSparse_spgemm_noreuse_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spgemm_noreuse_tpl_spec_decl.hpp index 6da49b683b..d54a91000a 100644 --- a/sparse/tpls/KokkosSparse_spgemm_noreuse_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spgemm_noreuse_tpl_spec_decl.hpp @@ -55,53 +55,56 @@ Matrix spgemm_noreuse_cusparse(const MatrixConst &A, const MatrixConst &B) { typename Matrix::row_map_type::non_const_type row_mapC(Kokkos::view_alloc(Kokkos::WithoutInitializing, "C rowmap"), m + 1); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_createDescr(&spgemmDescr)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_createDescr(&spgemmDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr( &descr_A, m, n, A.graph.entries.extent(0), (void *)A.graph.row_map.data(), (void *)A.graph.entries.data(), (void *)A.values.data(), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, cudaScalarType)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr( &descr_B, n, k, B.graph.entries.extent(0), (void *)B.graph.row_map.data(), (void *)B.graph.entries.data(), (void *)B.values.data(), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, cudaScalarType)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&descr_C, m, k, 0, (void *)row_mapC.data(), nullptr, nullptr, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - cudaScalarType)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&descr_C, m, k, 0, (void *)row_mapC.data(), nullptr, nullptr, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, cudaScalarType)); //---------------------------------------------------------------------- // query workEstimation buffer size, allocate, then call again with buffer. - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, - descr_C, cudaScalarType, alg, spgemmDescr, &bufferSize1, - nullptr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation(cusparseHandle, op, op, &alpha, descr_A, descr_B, + &beta, descr_C, cudaScalarType, alg, spgemmDescr, + &bufferSize1, nullptr)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&buffer1, bufferSize1)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, - descr_C, cudaScalarType, alg, spgemmDescr, &bufferSize1, - buffer1)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation(cusparseHandle, op, op, &alpha, descr_A, descr_B, + &beta, descr_C, cudaScalarType, alg, spgemmDescr, + &bufferSize1, buffer1)); //---------------------------------------------------------------------- // query compute buffer size, allocate, then call again with buffer. - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, descr_C, - cudaScalarType, alg, spgemmDescr, &bufferSize2, nullptr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, + descr_C, cudaScalarType, alg, spgemmDescr, &bufferSize2, + nullptr)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&buffer2, bufferSize2)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, descr_C, - cudaScalarType, alg, spgemmDescr, &bufferSize2, buffer2)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, + descr_C, cudaScalarType, alg, spgemmDescr, &bufferSize2, + buffer2)); int64_t unused1, unused2, c_nnz; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMatGetSize(descr_C, &unused1, &unused2, &c_nnz)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpMatGetSize(descr_C, &unused1, &unused2, &c_nnz)); typename Matrix::index_type entriesC(Kokkos::view_alloc(Kokkos::WithoutInitializing, "C entries"), c_nnz); typename Matrix::values_type valuesC(Kokkos::view_alloc(Kokkos::WithoutInitializing, "C values"), c_nnz); - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseCsrSetPointers(descr_C, (void *)row_mapC.data(), (void *)entriesC.data(), (void *)valuesC.data())); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, descr_C, - cudaScalarType, alg, spgemmDescr, &bufferSize2, buffer2)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_copy(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, descr_C, - cudaScalarType, alg, spgemmDescr)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_A)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_B)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_C)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_destroyDescr(spgemmDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, + descr_C, cudaScalarType, alg, spgemmDescr, &bufferSize2, + buffer2)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_copy(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, + descr_C, cudaScalarType, alg, spgemmDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_A)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_B)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_C)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_destroyDescr(spgemmDescr)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer1)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer2)); return Matrix("C", m, k, c_nnz, valuesC, row_mapC, entriesC); diff --git a/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_decl.hpp index c26118fec9..917704870a 100644 --- a/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_decl.hpp @@ -67,13 +67,13 @@ void spgemm_numeric_cusparse(KernelHandle *handle, lno_t /*m*/, lno_t /*n*/, lno return; } - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseCsrSetPointers(h->descr_A, (void *)row_mapA.data(), (void *)entriesA.data(), (void *)valuesA.data())); - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseCsrSetPointers(h->descr_B, (void *)row_mapB.data(), (void *)entriesB.data(), (void *)valuesB.data())); - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseCsrSetPointers(h->descr_C, (void *)row_mapC.data(), (void *)entriesC.data(), (void *)valuesC.data())); if (!handle->are_entries_computed()) { @@ -81,13 +81,14 @@ void spgemm_numeric_cusparse(KernelHandle *handle, lno_t /*m*/, lno_t /*n*/, lno // If symbolic was previously called with computeRowptrs=true, then // buffer5 will have already been allocated to the correct size. Otherwise // size and allocate it here. - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, - h->descr_C, h->alg, h->spgemmDescr, &h->bufferSize5, nullptr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A, + h->descr_B, h->descr_C, h->alg, h->spgemmDescr, + &h->bufferSize5, nullptr)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer5, h->bufferSize5)); } - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, - h->descr_C, h->alg, h->spgemmDescr, &h->bufferSize5, - h->buffer5)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A, + h->descr_B, h->descr_C, h->alg, h->spgemmDescr, + &h->bufferSize5, h->buffer5)); handle->set_computed_rowptrs(); handle->set_computed_entries(); } @@ -100,12 +101,12 @@ void spgemm_numeric_cusparse(KernelHandle *handle, lno_t /*m*/, lno_t /*n*/, lno // handle, we save/restore the pointer mode to not interference with // others' use cusparsePointerMode_t oldPtrMode; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseGetPointerMode(h->cusparseHandle, &oldPtrMode)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(h->cusparseHandle, CUSPARSE_POINTER_MODE_HOST)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, - h->descr_B, &beta, h->descr_C, h->scalarType, h->alg, - h->spgemmDescr)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(h->cusparseHandle, oldPtrMode)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseGetPointerMode(h->cusparseHandle, &oldPtrMode)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(h->cusparseHandle, CUSPARSE_POINTER_MODE_HOST)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_compute(h->cusparseHandle, h->opA, h->opB, &alpha, + h->descr_A, h->descr_B, &beta, h->descr_C, + h->scalarType, h->alg, h->spgemmDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(h->cusparseHandle, oldPtrMode)); handle->set_call_numeric(); } @@ -120,20 +121,20 @@ void spgemm_numeric_cusparse(KernelHandle *handle, lno_t /*m*/, lno_t /*n*/, lno const ConstRowMapType &row_mapC, const EntriesType &entriesC, const ValuesType &valuesC) { using scalar_type = typename KernelHandle::nnz_scalar_t; auto h = handle->get_cusparse_spgemm_handle(); - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseCsrSetPointers(h->descr_A, (void *)row_mapA.data(), (void *)entriesA.data(), (void *)valuesA.data())); - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseCsrSetPointers(h->descr_B, (void *)row_mapB.data(), (void *)entriesB.data(), (void *)valuesB.data())); - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseCsrSetPointers(h->descr_C, (void *)row_mapC.data(), (void *)entriesC.data(), (void *)valuesC.data())); const auto alpha = Kokkos::ArithTraits::one(); const auto beta = Kokkos::ArithTraits::zero(); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, - &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT, - h->spgemmDescr, &h->bufferSize4, h->buffer4)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_copy(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, - &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT, - h->spgemmDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( + cusparseSpGEMM_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, &beta, h->descr_C, + h->scalarType, CUSPARSE_SPGEMM_DEFAULT, h->spgemmDescr, &h->bufferSize4, h->buffer4)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_copy(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, + h->descr_B, &beta, h->descr_C, h->scalarType, + CUSPARSE_SPGEMM_DEFAULT, h->spgemmDescr)); handle->set_computed_entries(); handle->set_call_numeric(); } @@ -180,7 +181,7 @@ void spgemm_numeric_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, co // Only call numeric if C actually has entries if (handle->get_c_nnz()) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseXcsrgemm( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseXcsrgemm( h->cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, m, k, n, h->generalDescr, nnzA, valuesA.data(), row_mapA.data(), entriesA.data(), h->generalDescr, nnzB, valuesB.data(), row_mapB.data(), entriesB.data(), h->generalDescr, valuesC.data(), row_mapC.data(), entriesC.data())); diff --git a/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_decl.hpp index 6385fff835..c789a1cf4d 100644 --- a/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_decl.hpp @@ -72,61 +72,62 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, c // which however is not available in this function. So we fake it with the // entries instead. Fortunately, it seems cupsarse does not access that in // the symbolic phase. - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_A, m, n, entriesA.extent(0), (void *)row_mapA.data(), - (void *)entriesA.data(), (void *)entriesA.data() /*fake*/, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - h->scalarType)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( + cusparseCreateCsr(&h->descr_A, m, n, entriesA.extent(0), (void *)row_mapA.data(), (void *)entriesA.data(), + (void *)entriesA.data() /*fake*/, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, h->scalarType)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_B, n, k, entriesB.extent(0), (void *)row_mapB.data(), - (void *)entriesB.data(), (void *)entriesB.data() /*fake*/, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - h->scalarType)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( + cusparseCreateCsr(&h->descr_B, n, k, entriesB.extent(0), (void *)row_mapB.data(), (void *)entriesB.data(), + (void *)entriesB.data() /*fake*/, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, h->scalarType)); #if CUDA_VERSION >= 12020 // at some point cusparseCreateCsr started to need a non-null row-pointer // array, even if the operation that consumed the handle doesn't need to // read it. This was observed on a system with CUDA 12.2, but it may have // started earlier. - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_C, m, k, 0, (void *)row_mapC.data(), nullptr, nullptr, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - h->scalarType)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_C, m, k, 0, (void *)row_mapC.data(), nullptr, + nullptr, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, h->scalarType)); #else - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_C, m, k, 0, nullptr, nullptr, nullptr, CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, h->scalarType)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_C, m, k, 0, nullptr, nullptr, nullptr, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, h->scalarType)); #endif //---------------------------------------------------------------------- // ask bufferSize1 bytes for external memory - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_workEstimation(h->cusparseHandle, h->opA, h->opB, h->descr_A, - h->descr_B, h->descr_C, h->alg, h->spgemmDescr, - &bufferSize1, nullptr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_workEstimation(h->cusparseHandle, h->opA, h->opB, + h->descr_A, h->descr_B, h->descr_C, h->alg, + h->spgemmDescr, &bufferSize1, nullptr)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&buffer1, bufferSize1)); // inspect matrices A and B to understand the memory requirement for the // next step - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_workEstimation(h->cusparseHandle, h->opA, h->opB, h->descr_A, - h->descr_B, h->descr_C, h->alg, h->spgemmDescr, - &bufferSize1, buffer1)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_workEstimation(h->cusparseHandle, h->opA, h->opB, + h->descr_A, h->descr_B, h->descr_C, h->alg, + h->spgemmDescr, &bufferSize1, buffer1)); //---------------------------------------------------------------------- // Compute nnz of C - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_nnz(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, - h->descr_C, h->alg, h->spgemmDescr, &bufferSize2, nullptr, - &h->bufferSize3, nullptr, &h->bufferSize4, nullptr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_nnz( + h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, h->descr_C, h->alg, h->spgemmDescr, &bufferSize2, + nullptr, &h->bufferSize3, nullptr, &h->bufferSize4, nullptr)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&buffer2, bufferSize2)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer3, h->bufferSize3)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer4, h->bufferSize4)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_nnz(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, - h->descr_C, h->alg, h->spgemmDescr, &bufferSize2, buffer2, - &h->bufferSize3, h->buffer3, &h->bufferSize4, h->buffer4)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_nnz( + h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, h->descr_C, h->alg, h->spgemmDescr, &bufferSize2, + buffer2, &h->bufferSize3, h->buffer3, &h->bufferSize4, h->buffer4)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer2)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer1)); int64_t C_nrow, C_ncol, C_nnz; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMatGetSize(h->descr_C, &C_nrow, &C_ncol, &C_nnz)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpMatGetSize(h->descr_C, &C_nrow, &C_ncol, &C_nnz)); if (C_nnz > std::numeric_limits::max()) { throw std::runtime_error("nnz of C overflowed over 32-bit int\n"); } @@ -145,15 +146,16 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, c // we must use dummy versions and then discard them. KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&dummyEntries, C_nnz * sizeof(Ordinal))); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&dummyValues, C_nnz * sizeof(Scalar))); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCsrSetPointers(h->descr_C, row_mapC.data(), dummyEntries, dummyValues)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( + cusparseCsrSetPointers(h->descr_C, row_mapC.data(), dummyEntries, dummyValues)); //-------------------------------------------------------------------------- cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, h->descr_C, h->alg, h->spgemmDescr, &h->bufferSize5, nullptr); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer5, h->bufferSize5)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, - h->descr_C, h->alg, h->spgemmDescr, &h->bufferSize5, - h->buffer5)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A, + h->descr_B, h->descr_C, h->alg, h->spgemmDescr, + &h->bufferSize5, h->buffer5)); if (!handle->get_c_nnz()) { // cuSPARSE does not populate C rowptrs if C has no entries Kokkos::deep_copy(typename KernelHandle::HandleExecSpace(), row_mapC, Offset(0)); @@ -194,40 +196,40 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, c KOKKOS_IMPL_CUDA_SAFE_CALL( cudaMalloc(&dummyValues_AB, sizeof(scalar_type) * std::max(entriesA.extent(0), entriesB.extent(0)))); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_A, m, n, entriesA.extent(0), (void *)row_mapA.data(), - (void *)entriesA.data(), dummyValues_AB, CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, h->scalarType)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr( + &h->descr_A, m, n, entriesA.extent(0), (void *)row_mapA.data(), (void *)entriesA.data(), dummyValues_AB, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, h->scalarType)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_B, n, k, entriesB.extent(0), (void *)row_mapB.data(), - (void *)entriesB.data(), dummyValues_AB, CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, h->scalarType)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr( + &h->descr_B, n, k, entriesB.extent(0), (void *)row_mapB.data(), (void *)entriesB.data(), dummyValues_AB, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, h->scalarType)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_C, m, k, 0, row_mapC.data(), nullptr, nullptr, - CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, - h->scalarType)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_C, m, k, 0, row_mapC.data(), nullptr, nullptr, + CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, h->scalarType)); //---------------------------------------------------------------------- // query workEstimation buffer size, allocate, then call again with buffer. - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, - h->descr_B, &beta, h->descr_C, h->scalarType, h->alg, - h->spgemmDescr, &h->bufferSize3, nullptr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( + cusparseSpGEMM_workEstimation(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, &beta, + h->descr_C, h->scalarType, h->alg, h->spgemmDescr, &h->bufferSize3, nullptr)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer3, h->bufferSize3)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, - h->descr_B, &beta, h->descr_C, h->scalarType, h->alg, - h->spgemmDescr, &h->bufferSize3, h->buffer3)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( + cusparseSpGEMM_workEstimation(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, &beta, + h->descr_C, h->scalarType, h->alg, h->spgemmDescr, &h->bufferSize3, h->buffer3)); //---------------------------------------------------------------------- // query compute buffer size, allocate, then call again with buffer. - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, - &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT, - h->spgemmDescr, &h->bufferSize4, nullptr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( + cusparseSpGEMM_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, &beta, h->descr_C, + h->scalarType, CUSPARSE_SPGEMM_DEFAULT, h->spgemmDescr, &h->bufferSize4, nullptr)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer4, h->bufferSize4)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, - &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT, - h->spgemmDescr, &h->bufferSize4, h->buffer4)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( + cusparseSpGEMM_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, &beta, h->descr_C, + h->scalarType, CUSPARSE_SPGEMM_DEFAULT, h->spgemmDescr, &h->bufferSize4, h->buffer4)); int64_t C_nrow, C_ncol, C_nnz; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMatGetSize(h->descr_C, &C_nrow, &C_ncol, &C_nnz)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpMatGetSize(h->descr_C, &C_nrow, &C_ncol, &C_nnz)); if (C_nnz > std::numeric_limits::max()) { throw std::runtime_error("nnz of C overflowed over 32-bit int\n"); } @@ -245,20 +247,20 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, c // handle, so we can reuse those. KOKKOS_IMPL_CUDA_SAFE_CALL( cudaMalloc(&dummyValues_AB, sizeof(scalar_type) * std::max(entriesA.extent(0), entriesB.extent(0)))); - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseCsrSetPointers(h->descr_A, (void *)row_mapA.data(), (void *)entriesA.data(), dummyValues_AB)); - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseCsrSetPointers(h->descr_B, (void *)row_mapB.data(), (void *)entriesB.data(), dummyValues_AB)); } void *dummyEntries_C, *dummyValues_C; KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc(&dummyEntries_C, sizeof(ordinal_type) * C_nnz)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc(&dummyValues_C, sizeof(scalar_type) * C_nnz)); - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseCsrSetPointers(h->descr_C, (void *)row_mapC.data(), dummyEntries_C, dummyValues_C)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_copy(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, - &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT, - h->spgemmDescr)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_copy(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, + h->descr_B, &beta, h->descr_C, h->scalarType, + CUSPARSE_SPGEMM_DEFAULT, h->spgemmDescr)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(dummyValues_C)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(dummyEntries_C)); @@ -292,7 +294,7 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, c Kokkos::deep_copy(typename KernelHandle::HandleExecSpace(), row_mapC, size_type(0)); nnzC = 0; } else { - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseXcsrgemmNnz(h->cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, m, k, n, h->generalDescr, nnzA, row_mapA.data(), entriesA.data(), h->generalDescr, nnzB, row_mapB.data(), entriesB.data(), h->generalDescr, row_mapC.data(), nnzTotalDevHostPtr)); diff --git a/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp index defb13044f..5cd70d0f8e 100644 --- a/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp @@ -309,9 +309,9 @@ void spmv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode /* create and set the subhandle and matrix descriptor */ subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); handle->tpl_rank1 = subhandle; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); } cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW; @@ -321,25 +321,25 @@ void spmv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode "With cuSPARSE non-generic API, offset and entry types must both be int. " "Something wrong with TPL avail logic."); if constexpr (std::is_same_v) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSbsrmv( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSbsrmv( cusparseHandle, dirA, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), reinterpret_cast(&alpha), subhandle->mat, reinterpret_cast(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), reinterpret_cast(x.data()), reinterpret_cast(&beta), reinterpret_cast(y.data()))); } else if constexpr (std::is_same_v) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDbsrmv( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDbsrmv( cusparseHandle, dirA, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), reinterpret_cast(&alpha), subhandle->mat, reinterpret_cast(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), reinterpret_cast(x.data()), reinterpret_cast(&beta), reinterpret_cast(y.data()))); } else if constexpr (std::is_same_v>) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCbsrmv( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCbsrmv( cusparseHandle, dirA, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), reinterpret_cast(&alpha), subhandle->mat, reinterpret_cast(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), reinterpret_cast(x.data()), reinterpret_cast(&beta), reinterpret_cast(y.data()))); } else if constexpr (std::is_same_v>) { - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseZbsrmv(cusparseHandle, dirA, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), reinterpret_cast(&alpha), subhandle->mat, reinterpret_cast(A.values.data()), A.graph.row_map.data(), @@ -413,9 +413,9 @@ void spmv_mv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char m /* create and set the subhandle and matrix descriptor */ subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); handle->tpl_rank2 = subhandle; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); } cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW; @@ -424,28 +424,28 @@ void spmv_mv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char m "With cuSPARSE non-generic API, offset and entry types must both be int. " "Something wrong with TPL avail logic."); if constexpr (std::is_same_v) { - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseSbsrmm(cusparseHandle, dirA, myCusparseOperation, CUSPARSE_OPERATION_NON_TRANSPOSE, A.numRows(), colx, A.numCols(), A.nnz(), reinterpret_cast(&alpha), subhandle->mat, reinterpret_cast(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), reinterpret_cast(x.data()), ldx, reinterpret_cast(&beta), reinterpret_cast(y.data()), ldy)); } else if constexpr (std::is_same_v) { - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseDbsrmm(cusparseHandle, dirA, myCusparseOperation, CUSPARSE_OPERATION_NON_TRANSPOSE, A.numRows(), colx, A.numCols(), A.nnz(), reinterpret_cast(&alpha), subhandle->mat, reinterpret_cast(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), reinterpret_cast(x.data()), ldx, reinterpret_cast(&beta), reinterpret_cast(y.data()), ldy)); } else if constexpr (std::is_same_v>) { - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseCbsrmm(cusparseHandle, dirA, myCusparseOperation, CUSPARSE_OPERATION_NON_TRANSPOSE, A.numRows(), colx, A.numCols(), A.nnz(), reinterpret_cast(&alpha), subhandle->mat, reinterpret_cast(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), reinterpret_cast(x.data()), ldx, reinterpret_cast(&beta), reinterpret_cast(y.data()), ldy)); } else if constexpr (std::is_same_v>) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseZbsrmm( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseZbsrmm( cusparseHandle, dirA, myCusparseOperation, CUSPARSE_OPERATION_NON_TRANSPOSE, A.numRows(), colx, A.numCols(), A.nnz(), reinterpret_cast(&alpha), subhandle->mat, reinterpret_cast(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(), diff --git a/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp index bcd367b9bb..b5f9ddf7c0 100644 --- a/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp @@ -87,8 +87,9 @@ cusparseDnMatDescr_t make_cusparse_dn_mat_descr_t(ViewType &view) { const cusparseOrder_t order = CUSPARSE_ORDER_COL; cusparseDnMatDescr_t descr; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnMat(&descr, static_cast(rows), static_cast(cols), - static_cast(ld), values, valueType, order)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateDnMat(&descr, static_cast(rows), + static_cast(cols), static_cast(ld), values, + valueType, order)); return descr; } @@ -175,22 +176,23 @@ void spmv_mv_cusparse(const Kokkos::Cuda &exec, Handle *handle, const char mode[ subhandle = new KokkosSparse::Impl::CuSparse10_SpMV_Data(exec); handle->tpl_rank2 = subhandle; /* create matrix */ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&subhandle->mat, A.numRows(), A.numCols(), A.nnz(), - (void *)A.graph.row_map.data(), (void *)A.graph.entries.data(), - (void *)A.values.data(), myCusparseOffsetType, myCusparseEntryType, - CUSPARSE_INDEX_BASE_ZERO, aCusparseType)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( + cusparseCreateCsr(&subhandle->mat, A.numRows(), A.numCols(), A.nnz(), (void *)A.graph.row_map.data(), + (void *)A.graph.entries.data(), (void *)A.values.data(), myCusparseOffsetType, + myCusparseEntryType, CUSPARSE_INDEX_BASE_ZERO, aCusparseType)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMM_bufferSize(cusparseHandle, opA, opB, &alpha, subhandle->mat, vecX, &beta, - vecY, computeType, algo, &subhandle->bufferSize)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpMM_bufferSize(cusparseHandle, opA, opB, &alpha, subhandle->mat, vecX, + &beta, vecY, computeType, algo, + &subhandle->bufferSize)); KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc(&subhandle->buffer, subhandle->bufferSize)); } - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMM(cusparseHandle, opA, opB, &alpha, subhandle->mat, vecX, &beta, vecY, - computeType, algo, subhandle->buffer)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpMM(cusparseHandle, opA, opB, &alpha, subhandle->mat, vecX, &beta, vecY, + computeType, algo, subhandle->buffer)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnMat(vecX)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnMat(vecY)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnMat(vecX)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnMat(vecY)); } #define KOKKOSSPARSE_SPMV_MV_CUSPARSE(SCALAR, ORDINAL, OFFSET, XL, YL, SPACE) \ diff --git a/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp index 8cf1f49e51..94abd043fa 100644 --- a/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp +++ b/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp @@ -83,8 +83,8 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], /* create lhs and rhs */ cusparseDnVecDescr_t vecX, vecY; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&vecX, x.extent_int(0), (void*)x.data(), myCudaDataType)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&vecY, y.extent_int(0), (void*)y.data(), myCudaDataType)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&vecX, x.extent_int(0), (void*)x.data(), myCudaDataType)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&vecY, y.extent_int(0), (void*)y.data(), myCudaDataType)); // Prior to CUDA 11.2.1, ALG2 was more performant than default for imbalanced // matrices. After 11.2.1, the default is performant for imbalanced matrices, @@ -115,14 +115,15 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], handle->tpl_rank1 = subhandle; /* create matrix */ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&subhandle->mat, A.numRows(), A.numCols(), A.nnz(), - (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(), - (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType, - CUSPARSE_INDEX_BASE_ZERO, myCudaDataType)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( + cusparseCreateCsr(&subhandle->mat, A.numRows(), A.numCols(), A.nnz(), (void*)A.graph.row_map.data(), + (void*)A.graph.entries.data(), (void*)A.values.data(), myCusparseOffsetType, + myCusparseEntryType, CUSPARSE_INDEX_BASE_ZERO, myCudaDataType)); /* size and allocate buffer */ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMV_bufferSize(cusparseHandle, myCusparseOperation, &alpha, subhandle->mat, vecX, - &beta, vecY, myCudaDataType, algo, &subhandle->bufferSize)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpMV_bufferSize(cusparseHandle, myCusparseOperation, &alpha, + subhandle->mat, vecX, &beta, vecY, myCudaDataType, + algo, &subhandle->bufferSize)); // Async memory management introduced in CUDA 11.2 #if (CUDA_VERSION >= 11020) KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMallocAsync(&subhandle->buffer, subhandle->bufferSize, exec.cuda_stream())); @@ -132,11 +133,11 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], } /* perform SpMV */ - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMV(cusparseHandle, myCusparseOperation, &alpha, subhandle->mat, vecX, &beta, vecY, - myCudaDataType, algo, subhandle->buffer)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpMV(cusparseHandle, myCusparseOperation, &alpha, subhandle->mat, vecX, + &beta, vecY, myCudaDataType, algo, subhandle->buffer)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecX)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecY)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecX)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecY)); #elif (9000 <= CUDA_VERSION) @@ -151,9 +152,9 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], subhandle = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec); handle->tpl_rank1 = subhandle; cusparseMatDescr_t descrA = 0; - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); - KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL)); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO)); } /* perform the actual SpMV operation */ @@ -161,26 +162,26 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[], "With cuSPARSE pre-10.0, offset type must be int. Something wrong with " "TPL avail logic."); if constexpr (std::is_same_v) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseScsrmv( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseScsrmv( cusparseHandle, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), reinterpret_cast(&alpha), subhandle->mat, reinterpret_cast(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(), reinterpret_cast(x.data()), reinterpret_cast(&beta), reinterpret_cast(y.data()))); } else if constexpr (std::is_same_v) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseDcsrmv( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDcsrmv( cusparseHandle, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), reinterpret_cast(&alpha), subhandle->mat, reinterpret_cast(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(), reinterpret_cast(x.data()), reinterpret_cast(&beta), reinterpret_cast(y.data()))); } else if constexpr (std::is_same_v>) { - KOKKOS_CUSPARSE_SAFE_CALL(cusparseCcsrmv( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCcsrmv( cusparseHandle, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), reinterpret_cast(&alpha), subhandle->mat, reinterpret_cast(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(), reinterpret_cast(x.data()), reinterpret_cast(&beta), reinterpret_cast(y.data()))); } else if constexpr (std::is_same_v>) { - KOKKOS_CUSPARSE_SAFE_CALL( + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL( cusparseZcsrmv(cusparseHandle, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), reinterpret_cast(&alpha), subhandle->mat, reinterpret_cast(A.values.data()), A.graph.row_map.data(), diff --git a/sparse/unit_test/Test_Sparse_Utils_cusparse.hpp b/sparse/unit_test/Test_Sparse_Utils_cusparse.hpp index ae802b24c4..227bc3df60 100644 --- a/sparse/unit_test/Test_Sparse_Utils_cusparse.hpp +++ b/sparse/unit_test/Test_Sparse_Utils_cusparse.hpp @@ -28,11 +28,11 @@ void test_cusparse_safe_call() { bool caught_exception = false; cusparseStatus_t myStatus = CUSPARSE_STATUS_SUCCESS; - KOKKOS_CUSPARSE_SAFE_CALL(myStatus); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(myStatus); try { myStatus = CUSPARSE_STATUS_INVALID_VALUE; - KOKKOS_CUSPARSE_SAFE_CALL(myStatus); + KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(myStatus); } catch (std::runtime_error& e) { caught_exception = true; }