diff --git a/perf_test/batched/sparse/cusolver/KokkosBatched_Test_cusolverSp.cpp b/perf_test/batched/sparse/cusolver/KokkosBatched_Test_cusolverSp.cpp
index 8b2b48c0f4..b3e1fd2aba 100644
--- a/perf_test/batched/sparse/cusolver/KokkosBatched_Test_cusolverSp.cpp
+++ b/perf_test/batched/sparse/cusolver/KokkosBatched_Test_cusolverSp.cpp
@@ -91,9 +91,9 @@ struct Functor_Test_SparseCuSolveQR {
     const size_t m   = _r.extent(0) - 1;
 
     cusparseMatDescr_t descrA = 0;
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO));
 
     double tol  = 1e-18;
     int reorder = 0;
@@ -146,9 +146,9 @@ struct Functor_Test_Block_SparseCuSolveQR {
     const size_t block_m   = N * m;
 
     cusparseMatDescr_t descrA = 0;
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO));
 
     double tol  = 1e-18;
     int reorder = 0;
@@ -229,9 +229,9 @@ struct Functor_Test_SparseCuSolveChol {
     const size_t m   = _r.extent(0) - 1;
 
     cusparseMatDescr_t descrA = 0;
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO));
 
     double tol  = 1e-18;
     int reorder = 0;
@@ -284,9 +284,9 @@ struct Functor_Test_Block_SparseCuSolveChol {
     const size_t block_m   = N * m;
 
     cusparseMatDescr_t descrA = 0;
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&descrA));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(descrA, CUSPARSE_MATRIX_TYPE_GENERAL));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(descrA, CUSPARSE_INDEX_BASE_ZERO));
 
     double tol  = 1e-18;
     int reorder = 0;
diff --git a/perf_test/sparse/KokkosSparse_spadd.cpp b/perf_test/sparse/KokkosSparse_spadd.cpp
index 063c151812..0d9614e6a3 100644
--- a/perf_test/sparse/KokkosSparse_spadd.cpp
+++ b/perf_test/sparse/KokkosSparse_spadd.cpp
@@ -277,20 +277,20 @@ void run_experiment(int argc, char** argv, CommonInputParams) {
   const double alphabeta = 1.0;
 
   if (params.use_cusparse) {
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreate(&cusparseHandle));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(cusparseHandle, CUSPARSE_POINTER_MODE_HOST));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&A_cusparse));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&B_cusparse));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&C_cusparse));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(A_cusparse, CUSPARSE_MATRIX_TYPE_GENERAL));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(B_cusparse, CUSPARSE_MATRIX_TYPE_GENERAL));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(C_cusparse, CUSPARSE_MATRIX_TYPE_GENERAL));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(A_cusparse, CUSPARSE_DIAG_TYPE_NON_UNIT));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(B_cusparse, CUSPARSE_DIAG_TYPE_NON_UNIT));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(C_cusparse, CUSPARSE_DIAG_TYPE_NON_UNIT));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(A_cusparse, CUSPARSE_INDEX_BASE_ZERO));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(B_cusparse, CUSPARSE_INDEX_BASE_ZERO));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(C_cusparse, CUSPARSE_INDEX_BASE_ZERO));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreate(&cusparseHandle));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(cusparseHandle, CUSPARSE_POINTER_MODE_HOST));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&A_cusparse));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&B_cusparse));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&C_cusparse));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(A_cusparse, CUSPARSE_MATRIX_TYPE_GENERAL));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(B_cusparse, CUSPARSE_MATRIX_TYPE_GENERAL));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(C_cusparse, CUSPARSE_MATRIX_TYPE_GENERAL));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(A_cusparse, CUSPARSE_DIAG_TYPE_NON_UNIT));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(B_cusparse, CUSPARSE_DIAG_TYPE_NON_UNIT));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatDiagType(C_cusparse, CUSPARSE_DIAG_TYPE_NON_UNIT));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(A_cusparse, CUSPARSE_INDEX_BASE_ZERO));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(B_cusparse, CUSPARSE_INDEX_BASE_ZERO));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(C_cusparse, CUSPARSE_INDEX_BASE_ZERO));
   }
 #endif
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
@@ -320,16 +320,16 @@ void run_experiment(int argc, char** argv, CommonInputParams) {
       if constexpr (std::is_same_v<lno_t, int> && std::is_same_v<size_type, int>) {
         // Symbolic phase: compute buffer size, then compute nnz
         size_t bufferSize;
-        KOKKOS_CUSPARSE_SAFE_CALL(cusparseDcsrgeam2_bufferSizeExt(
+        KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDcsrgeam2_bufferSizeExt(
             cusparseHandle, A.numRows(), A.numCols(), &alphabeta, A_cusparse, A.nnz(), A.values.data(),
             A.graph.row_map.data(), A.graph.entries.data(), &alphabeta, B_cusparse, B.nnz(), B.values.data(),
             B.graph.row_map.data(), B.graph.entries.data(), C_cusparse, NULL, row_mapC.data(), NULL, &bufferSize));
         // Allocate work buffer
         KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void**)&cusparseBuffer, bufferSize));
-        KOKKOS_CUSPARSE_SAFE_CALL(cusparseXcsrgeam2Nnz(cusparseHandle, m, n, A_cusparse, A.nnz(),
-                                                       A.graph.row_map.data(), A.graph.entries.data(), B_cusparse,
-                                                       B.nnz(), B.graph.row_map.data(), B.graph.entries.data(),
-                                                       C_cusparse, row_mapC.data(), &c_nnz, cusparseBuffer));
+        KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
+            cusparseXcsrgeam2Nnz(cusparseHandle, m, n, A_cusparse, A.nnz(), A.graph.row_map.data(),
+                                 A.graph.entries.data(), B_cusparse, B.nnz(), B.graph.row_map.data(),
+                                 B.graph.entries.data(), C_cusparse, row_mapC.data(), &c_nnz, cusparseBuffer));
       } else {
         throw std::runtime_error(
             "Must enable int as both ordinal and offset type in KokkosKernels "
@@ -351,7 +351,7 @@ void run_experiment(int argc, char** argv, CommonInputParams) {
       if (params.use_cusparse) {
 #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
         if constexpr (std::is_same_v<lno_t, int> && std::is_same_v<size_type, int>) {
-          KOKKOS_CUSPARSE_SAFE_CALL(cusparseDcsrgeam2(
+          KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDcsrgeam2(
               cusparseHandle, m, n, &alphabeta, A_cusparse, A.nnz(), A.values.data(), A.graph.row_map.data(),
               A.graph.entries.data(), &alphabeta, B_cusparse, B.nnz(), B.values.data(), B.graph.row_map.data(),
               B.graph.entries.data(), C_cusparse, valuesC.data(), row_mapC.data(), entriesC.data(), cusparseBuffer));
@@ -379,7 +379,7 @@ void run_experiment(int argc, char** argv, CommonInputParams) {
   }
 
 #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE
-  if (params.use_cusparse) KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroy(cusparseHandle));
+  if (params.use_cusparse) KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroy(cusparseHandle));
 #endif
 
 #ifdef KOKKOSKERNELS_ENABLE_TPL_MKL
diff --git a/perf_test/sparse/KokkosSparse_spmv_struct_tuning.cpp b/perf_test/sparse/KokkosSparse_spmv_struct_tuning.cpp
index 3a857fcb23..57a445bbff 100644
--- a/perf_test/sparse/KokkosSparse_spmv_struct_tuning.cpp
+++ b/perf_test/sparse/KokkosSparse_spmv_struct_tuning.cpp
@@ -509,14 +509,16 @@ int main(int argc, char** argv) {
 
       /* create matrix */
       cusparseSpMatDescr_t A_cusparse;
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr(
           &A_cusparse, A.numRows(), A.numCols(), A.nnz(), (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
           (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType, CUSPARSE_INDEX_BASE_ZERO, myCudaDataType));
 
       /* create lhs and rhs */
       cusparseDnVecDescr_t vecX, vecY;
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&vecX, x1.extent_int(0), (void*)x1.data(), myCudaDataType));
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&vecY, y1.extent_int(0), (void*)y1.data(), myCudaDataType));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
+          cusparseCreateDnVec(&vecX, x1.extent_int(0), (void*)x1.data(), myCudaDataType));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
+          cusparseCreateDnVec(&vecY, y1.extent_int(0), (void*)y1.data(), myCudaDataType));
 
       const double alpha = 1.0, beta = 1.0;
       size_t bufferSize = 0;
@@ -528,9 +530,9 @@ int main(int argc, char** argv) {
 #else
       cusparseSpMVAlg_t alg = CUSPARSE_MV_ALG_DEFAULT;
 #endif
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMV_bufferSize(controls.getCusparseHandle(), CUSPARSE_OPERATION_NON_TRANSPOSE,
-                                                        &alpha, A_cusparse, vecX, &beta, vecY, myCudaDataType, alg,
-                                                        &bufferSize));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
+          cusparseSpMV_bufferSize(controls.getCusparseHandle(), CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse,
+                                  vecX, &beta, vecY, myCudaDataType, alg, &bufferSize));
       KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc(&dBuffer, bufferSize));
 
       /* perform SpMV */
@@ -540,8 +542,9 @@ int main(int argc, char** argv) {
       double ave_time = 0.0;
       for (int i = 0; i < loop; i++) {
         Kokkos::Timer timer;
-        KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMV(controls.getCusparseHandle(), CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha,
-                                               A_cusparse, vecX, &beta, vecY, myCudaDataType, alg, dBuffer));
+        KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpMV(controls.getCusparseHandle(),
+                                                          CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, A_cusparse, vecX,
+                                                          &beta, vecY, myCudaDataType, alg, dBuffer));
         Kokkos::fence();
         double time = timer.seconds();
         ave_time += time;
@@ -565,9 +568,9 @@ int main(int argc, char** argv) {
       Kokkos::Profiling::popRegion();
 
       KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(dBuffer));
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecX));
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecY));
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(A_cusparse));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecX));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecY));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(A_cusparse));
 #else
       // The data needs to be reformatted for cusparse before launching the
       // kernel. Step one, extract raw data
diff --git a/sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp b/sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp
index 24bf3382db..59d3945d32 100644
--- a/sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp
+++ b/sparse/impl/KokkosSparse_sptrsv_cuSPARSE_impl.hpp
@@ -55,7 +55,7 @@ void sptrsvcuSPARSE_symbolic(ExecutionSpace &space, KernelHandle *sptrsv_handle,
 
     typename KernelHandle::SPTRSVcuSparseHandleType *h = sptrsv_handle->get_cuSparseHandle();
 
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream()));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream()));
 
     int64_t nnz = static_cast<int64_t>(entries.extent(0));
     size_t pBufferSize;
@@ -85,52 +85,52 @@ void sptrsvcuSPARSE_symbolic(ExecutionSpace &space, KernelHandle *sptrsv_handle,
     cudaDataType cudaValueType            = cuda_data_type_from<scalar_type>();
 
     // Create sparse matrix in CSR format
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr(
         &(h->matDescr), static_cast<int64_t>(nrows), static_cast<int64_t>(nrows), nnz, rm, (void *)entries.data(),
         (void *)values.data(), cudaCsrRowMapType, cudaCsrColIndType, CUSPARSE_INDEX_BASE_ZERO, cudaValueType));
 
     // Create dummy dense vector B (RHS)
     nnz_scalar_view_t b_dummy(Kokkos::view_alloc(space, "b_dummy"), nrows);
-    KOKKOS_CUSPARSE_SAFE_CALL(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
         cusparseCreateDnVec(&(h->vecBDescr_dummy), static_cast<int64_t>(nrows), b_dummy.data(), cudaValueType));
 
     // Create dummy dense vector X (LHS)
     nnz_scalar_view_t x_dummy(Kokkos::view_alloc(space, "x_dummy"), nrows);
-    KOKKOS_CUSPARSE_SAFE_CALL(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
         cusparseCreateDnVec(&(h->vecXDescr_dummy), static_cast<int64_t>(nrows), x_dummy.data(), cudaValueType));
 
     // Specify Lower|Upper fill mode
     if (is_lower) {
       cusparseFillMode_t fillmode = CUSPARSE_FILL_MODE_LOWER;
-      KOKKOS_CUSPARSE_SAFE_CALL(
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
           cusparseSpMatSetAttribute(h->matDescr, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(fillmode)));
     } else {
       cusparseFillMode_t fillmode = CUSPARSE_FILL_MODE_UPPER;
-      KOKKOS_CUSPARSE_SAFE_CALL(
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
           cusparseSpMatSetAttribute(h->matDescr, CUSPARSE_SPMAT_FILL_MODE, &fillmode, sizeof(fillmode)));
     }
 
     // Specify Unit|Non-Unit diagonal type.
     cusparseDiagType_t diagtype = CUSPARSE_DIAG_TYPE_NON_UNIT;
-    KOKKOS_CUSPARSE_SAFE_CALL(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
         cusparseSpMatSetAttribute(h->matDescr, CUSPARSE_SPMAT_DIAG_TYPE, &diagtype, sizeof(diagtype)));
 
     // Allocate an external buffer for analysis
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpSV_bufferSize(h->handle, h->transpose, &alpha, h->matDescr, h->vecBDescr_dummy,
-                                                      h->vecXDescr_dummy, cudaValueType, CUSPARSE_SPSV_ALG_DEFAULT,
-                                                      h->spsvDescr, &pBufferSize));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
+        cusparseSpSV_bufferSize(h->handle, h->transpose, &alpha, h->matDescr, h->vecBDescr_dummy, h->vecXDescr_dummy,
+                                cudaValueType, CUSPARSE_SPSV_ALG_DEFAULT, h->spsvDescr, &pBufferSize));
 
     // pBuffer returned by cudaMalloc is automatically aligned to 128 bytes.
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&(h->pBuffer), pBufferSize));
 
     // Run analysis
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpSV_analysis(h->handle, h->transpose, &alpha, h->matDescr, h->vecBDescr_dummy,
-                                                    h->vecXDescr_dummy, cudaValueType, CUSPARSE_SPSV_ALG_DEFAULT,
-                                                    h->spsvDescr, h->pBuffer));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpSV_analysis(h->handle, h->transpose, &alpha, h->matDescr,
+                                                               h->vecBDescr_dummy, h->vecXDescr_dummy, cudaValueType,
+                                                               CUSPARSE_SPSV_ALG_DEFAULT, h->spsvDescr, h->pBuffer));
 
     // Destroy dummy dense vector descriptors
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecBDescr_dummy));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecXDescr_dummy));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecBDescr_dummy));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecXDescr_dummy));
   }
 #else  // CUDA_VERSION < 11030
   typedef typename KernelHandle::nnz_lno_t idx_type;
@@ -152,7 +152,7 @@ void sptrsvcuSPARSE_symbolic(ExecutionSpace &space, KernelHandle *sptrsv_handle,
 
     typename KernelHandle::SPTRSVcuSparseHandleType *h = sptrsv_handle->get_cuSparseHandle();
 
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream()));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream()));
 
     cusparseStatus_t status;
     status = cusparseCreateCsrsv2Info(&(h->info));
@@ -283,27 +283,28 @@ void sptrsvcuSPARSE_solve(ExecutionSpace &space, KernelHandle *sptrsv_handle, ty
   } else {
     typename KernelHandle::SPTRSVcuSparseHandleType *h = sptrsv_handle->get_cuSparseHandle();
 
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream()));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream()));
 
     const scalar_type alpha = scalar_type(1.0);
 
     cudaDataType cudaValueType = cuda_data_type_from<scalar_type>();
 
     // Create dense vector B (RHS)
-    KOKKOS_CUSPARSE_SAFE_CALL(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
         cusparseCreateDnVec(&(h->vecBDescr), static_cast<int64_t>(nrows), (void *)rhs.data(), cudaValueType));
 
     // Create dense vector X (LHS)
-    KOKKOS_CUSPARSE_SAFE_CALL(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
         cusparseCreateDnVec(&(h->vecXDescr), static_cast<int64_t>(nrows), (void *)lhs.data(), cudaValueType));
 
     // Solve
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpSV_solve(h->handle, h->transpose, &alpha, h->matDescr, h->vecBDescr,
-                                                 h->vecXDescr, cudaValueType, CUSPARSE_SPSV_ALG_DEFAULT, h->spsvDescr));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpSV_solve(h->handle, h->transpose, &alpha, h->matDescr, h->vecBDescr,
+                                                            h->vecXDescr, cudaValueType, CUSPARSE_SPSV_ALG_DEFAULT,
+                                                            h->spsvDescr));
 
     // Destroy dense vector descriptors
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecBDescr));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecXDescr));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecBDescr));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h->vecXDescr));
   }
 #else  // CUDA_VERSION < 11030
   typedef typename KernelHandle::nnz_lno_t idx_type;
@@ -316,7 +317,7 @@ void sptrsvcuSPARSE_solve(ExecutionSpace &space, KernelHandle *sptrsv_handle, ty
     typename KernelHandle::SPTRSVcuSparseHandleType *h = sptrsv_handle->get_cuSparseHandle();
 
     if constexpr (std::is_same_v<ExecutionSpace, Kokkos::Cuda>) {
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream()));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(h->handle, space.cuda_stream()));
     }
 
     int nnz = entries.extent_int(0);
@@ -439,30 +440,30 @@ void sptrsvcuSPARSE_solve_streams(const std::vector<ExecutionSpace> &execspace_v
       h_v[i]                          = sptrsv_handle->get_cuSparseHandle();
 
       // Bind cuspare handle to a stream
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(h_v[i]->handle, execspace_v[i].cuda_stream()));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(h_v[i]->handle, execspace_v[i].cuda_stream()));
 
       int64_t nrows = static_cast<int64_t>(sptrsv_handle->get_nrows());
 
       // Create dense vector B (RHS)
-      KOKKOS_CUSPARSE_SAFE_CALL(
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
           cusparseCreateDnVec(&(h_v[i]->vecBDescr), nrows, (void *)rhs_v[i].data(), cudaValueType));
 
       // Create dense vector X (LHS)
-      KOKKOS_CUSPARSE_SAFE_CALL(
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
           cusparseCreateDnVec(&(h_v[i]->vecXDescr), nrows, (void *)lhs_v[i].data(), cudaValueType));
     }
 
     // Solve
     for (int i = 0; i < nstreams; i++) {
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpSV_solve(h_v[i]->handle, h_v[i]->transpose, &alpha, h_v[i]->matDescr,
-                                                   h_v[i]->vecBDescr, h_v[i]->vecXDescr, cudaValueType,
-                                                   CUSPARSE_SPSV_ALG_DEFAULT, h_v[i]->spsvDescr));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
+          cusparseSpSV_solve(h_v[i]->handle, h_v[i]->transpose, &alpha, h_v[i]->matDescr, h_v[i]->vecBDescr,
+                             h_v[i]->vecXDescr, cudaValueType, CUSPARSE_SPSV_ALG_DEFAULT, h_v[i]->spsvDescr));
     }
 
     // Destroy dense vector descriptors
     for (int i = 0; i < nstreams; i++) {
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h_v[i]->vecBDescr));
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h_v[i]->vecXDescr));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h_v[i]->vecBDescr));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(h_v[i]->vecXDescr));
     }
   }
 #else  // CUDA_VERSION < 11030
@@ -493,7 +494,7 @@ void sptrsvcuSPARSE_solve_streams(const std::vector<ExecutionSpace> &execspace_v
       h_v[i]             = sptrsv_handle_v[i]->get_cuSparseHandle();
 
       // Bind cuspare handle to a stream
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(h_v[i]->handle, execspace_v[i].cuda_stream()));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(h_v[i]->handle, execspace_v[i].cuda_stream()));
 
       if (h_v[i]->pBuffer == nullptr) {
         std::cout << "  pBuffer invalid on stream " << i << std::endl;
@@ -510,25 +511,25 @@ void sptrsvcuSPARSE_solve_streams(const std::vector<ExecutionSpace> &execspace_v
       int nnz   = entries_v[i].extent_int(0);
       int nrows = static_cast<int>(sptrsv_handle_v[i]->get_nrows());
       if (std::is_same<scalar_type, double>::value) {
-        KOKKOS_CUSPARSE_SAFE_CALL(cusparseDcsrsv2_solve(
+        KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDcsrsv2_solve(
             h_v[i]->handle, h_v[i]->transpose, nrows, nnz, reinterpret_cast<const double *>(&alpha), h_v[i]->descr,
             reinterpret_cast<const double *>(vals_v[i]), reinterpret_cast<const int *>(rm_v[i]),
             reinterpret_cast<const int *>(ent_v[i]), h_v[i]->info, reinterpret_cast<const double *>(bv_v[i]),
             reinterpret_cast<double *>(xv_v[i]), h_v[i]->policy, h_v[i]->pBuffer));
       } else if (std::is_same<scalar_type, float>::value) {
-        KOKKOS_CUSPARSE_SAFE_CALL(cusparseScsrsv2_solve(
+        KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseScsrsv2_solve(
             h_v[i]->handle, h_v[i]->transpose, nrows, nnz, reinterpret_cast<const float *>(&alpha), h_v[i]->descr,
             reinterpret_cast<const float *>(vals_v[i]), reinterpret_cast<const int *>(rm_v[i]),
             reinterpret_cast<const int *>(ent_v[i]), h_v[i]->info, reinterpret_cast<const float *>(bv_v[i]),
             reinterpret_cast<float *>(xv_v[i]), h_v[i]->policy, h_v[i]->pBuffer));
       } else if (std::is_same<scalar_type, Kokkos::complex<double> >::value) {
-        KOKKOS_CUSPARSE_SAFE_CALL(cusparseZcsrsv2_solve(
+        KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseZcsrsv2_solve(
             h_v[i]->handle, h_v[i]->transpose, nrows, nnz, reinterpret_cast<const cuDoubleComplex *>(&alpha),
             h_v[i]->descr, reinterpret_cast<const cuDoubleComplex *>(vals_v[i]), reinterpret_cast<const int *>(rm_v[i]),
             reinterpret_cast<const int *>(ent_v[i]), h_v[i]->info, reinterpret_cast<const cuDoubleComplex *>(bv_v[i]),
             reinterpret_cast<cuDoubleComplex *>(xv_v[i]), h_v[i]->policy, h_v[i]->pBuffer));
       } else if (std::is_same<scalar_type, Kokkos::complex<float> >::value) {
-        KOKKOS_CUSPARSE_SAFE_CALL(cusparseCcsrsv2_solve(
+        KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCcsrsv2_solve(
             h_v[i]->handle, h_v[i]->transpose, nrows, nnz, reinterpret_cast<const cuComplex *>(&alpha), h_v[i]->descr,
             reinterpret_cast<const cuComplex *>(vals_v[i]), reinterpret_cast<const int *>(rm_v[i]),
             reinterpret_cast<const int *>(ent_v[i]), h_v[i]->info, reinterpret_cast<const cuComplex *>(bv_v[i]),
diff --git a/sparse/src/KokkosSparse_Utils_cusparse.hpp b/sparse/src/KokkosSparse_Utils_cusparse.hpp
index 07681cb409..510dc99dc9 100644
--- a/sparse/src/KokkosSparse_Utils_cusparse.hpp
+++ b/sparse/src/KokkosSparse_Utils_cusparse.hpp
@@ -64,9 +64,23 @@ inline void cusparse_internal_safe_call(cusparseStatus_t cusparseStatus, const c
   }
 }
 
-// The macro below defines is the public interface for the safe cusparse calls.
-// The functions themselves are protected by impl namespace.
-#define KOKKOS_CUSPARSE_SAFE_CALL(call) KokkosSparse::Impl::cusparse_internal_safe_call(call, #call, __FILE__, __LINE__)
+#define KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(call) \
+  KokkosSparse::Impl::cusparse_internal_safe_call(call, #call, __FILE__, __LINE__)
+
+// Deprecated public interface for the cuSparse safe calls
+#if defined(KOKKOS_COMPILER_MSVC)
+#define KOKKOSSPARSE_CUSPARSE_SAFE_CALL(call)                                                                    \
+  (__pragma(message("warning: KOKKOS_CUSPARSE_SAFE_CALL is deprecated and will be removed in a future version")) \
+       KOKKOSPARSE_IMPL_CUSPARSE_SAFE_CALL(call))
+#elif defined(KOKKOS_COMPILER_GNU) || defined(KOKKOS_COMPILER_CLANG)
+#define KOKKOSSPARSE_CUSPARSE_SAFE_CALL(call)                                                       \
+  (__extension__({                                                                                  \
+    _Pragma("\"KOKKOS_CUSPARSE_SAFE_CALL is deprecated and will be removed in a future version\""); \
+    KOKKOSPARSE_IMPL_CUSPARSE_SAFE_CALL(call);                                                      \
+  }))
+#else
+#define KOKKOSSPARSE_CUSPARSE_SAFE_CALL(call) KOKKOSPARSE_IMPL_CUSPARSE_SAFE_CALL(call)  // no good way to deprecate?
+#endif
 
 template <typename T>
 cudaDataType cuda_data_type_from() {
@@ -152,10 +166,10 @@ inline cusparseIndexType_t cusparse_index_type_t_from<unsigned short>() {
 // destructed.
 struct TemporarySetCusparseStream {
   TemporarySetCusparseStream(cusparseHandle_t handle_, const Kokkos::Cuda& exec_) : handle(handle_) {
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(handle, exec_.cuda_stream()));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(handle, exec_.cuda_stream()));
   }
 
-  ~TemporarySetCusparseStream() { KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(handle, NULL)); }
+  ~TemporarySetCusparseStream() { KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(handle, NULL)); }
 
   cusparseHandle_t handle;
 };
diff --git a/sparse/src/KokkosSparse_spgemm_handle.hpp b/sparse/src/KokkosSparse_spgemm_handle.hpp
index 1857e0bbc7..9967d287a2 100644
--- a/sparse/src/KokkosSparse_spgemm_handle.hpp
+++ b/sparse/src/KokkosSparse_spgemm_handle.hpp
@@ -181,14 +181,14 @@ class SPGEMMHandle {
       buffer3 = buffer4 = buffer5 = nullptr;
 
       cusparseHandle = kkControls.getCusparseHandle();
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_createDescr(&spgemmDescr));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_createDescr(&spgemmDescr));
     }
 
     ~cuSparseSpgemmHandleType() {
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_A));
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_B));
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_C));
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_destroyDescr(spgemmDescr));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_A));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_B));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_C));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_destroyDescr(spgemmDescr));
       KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer3));
       KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer4));
       KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer5));
@@ -205,11 +205,11 @@ class SPGEMMHandle {
       // Get singleton cusparse handle from default controls
       cusparseHandle = kkControls.getCusparseHandle();
 
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&generalDescr));
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(generalDescr, CUSPARSE_MATRIX_TYPE_GENERAL));
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(generalDescr, CUSPARSE_INDEX_BASE_ZERO));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&generalDescr));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(generalDescr, CUSPARSE_MATRIX_TYPE_GENERAL));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(generalDescr, CUSPARSE_INDEX_BASE_ZERO));
     }
-    ~cuSparseSpgemmHandleType() { KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyMatDescr(generalDescr)); }
+    ~cuSparseSpgemmHandleType() { KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyMatDescr(generalDescr)); }
   };
 #endif
 #endif
diff --git a/sparse/src/KokkosSparse_spmv_handle.hpp b/sparse/src/KokkosSparse_spmv_handle.hpp
index 4da6e47551..a6e99538d3 100644
--- a/sparse/src/KokkosSparse_spmv_handle.hpp
+++ b/sparse/src/KokkosSparse_spmv_handle.hpp
@@ -124,7 +124,7 @@ struct CuSparse10_SpMV_Data : public TPL_SpMV_Data<Kokkos::Cuda> {
     exec.fence();
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer));
 #endif
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(mat));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(mat));
   }
 
   cusparseSpMatDescr_t mat;
@@ -136,7 +136,7 @@ struct CuSparse10_SpMV_Data : public TPL_SpMV_Data<Kokkos::Cuda> {
 // Data used by cuSPARSE <10.3 for CRS, and >=9 for BSR
 struct CuSparse9_SpMV_Data : public TPL_SpMV_Data<Kokkos::Cuda> {
   CuSparse9_SpMV_Data(const Kokkos::Cuda& exec_) : TPL_SpMV_Data(exec_) {}
-  ~CuSparse9_SpMV_Data() { KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyMatDescr(mat)); }
+  ~CuSparse9_SpMV_Data() { KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyMatDescr(mat)); }
 
   cusparseMatDescr_t mat;
 };
diff --git a/sparse/src/KokkosSparse_sptrsv_handle.hpp b/sparse/src/KokkosSparse_sptrsv_handle.hpp
index acac8b8fe0..080f668fa3 100644
--- a/sparse/src/KokkosSparse_sptrsv_handle.hpp
+++ b/sparse/src/KokkosSparse_sptrsv_handle.hpp
@@ -126,9 +126,9 @@ class SPTRSVHandle {
     void *pBuffer{nullptr};
 
     cuSparseHandleType(bool transpose_, bool /*is_lower*/) {
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreate(&handle));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreate(&handle));
 
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(handle, CUSPARSE_POINTER_MODE_HOST));
 
       if (transpose_) {
         transpose = CUSPARSE_OPERATION_TRANSPOSE;
@@ -136,7 +136,7 @@ class SPTRSVHandle {
         transpose = CUSPARSE_OPERATION_NON_TRANSPOSE;
       }
 
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpSV_createDescr(&spsvDescr));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpSV_createDescr(&spsvDescr));
     }
 
     ~cuSparseHandleType() {
@@ -144,9 +144,9 @@ class SPTRSVHandle {
         KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(pBuffer));
         pBuffer = nullptr;
       }
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(matDescr));
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpSV_destroyDescr(spsvDescr));
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroy(handle));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(matDescr));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpSV_destroyDescr(spsvDescr));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroy(handle));
     }
   };
 #else  // CUDA_VERSION < 11030
diff --git a/sparse/tpls/KokkosKernels_tpl_handles_def.hpp b/sparse/tpls/KokkosKernels_tpl_handles_def.hpp
index d52959a591..d70ff5379e 100644
--- a/sparse/tpls/KokkosKernels_tpl_handles_def.hpp
+++ b/sparse/tpls/KokkosKernels_tpl_handles_def.hpp
@@ -25,14 +25,14 @@
 namespace KokkosKernels {
 namespace Impl {
 
-CusparseSingleton::CusparseSingleton() { KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreate(&cusparseHandle)); }
+CusparseSingleton::CusparseSingleton() { KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreate(&cusparseHandle)); }
 
 CusparseSingleton& CusparseSingleton::singleton() {
   std::unique_ptr<CusparseSingleton>& instance = get_instance();
   if (!instance) {
     instance = std::make_unique<CusparseSingleton>();
     Kokkos::push_finalize_hook([&]() {
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroy(instance->cusparseHandle));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroy(instance->cusparseHandle));
       instance.reset();
     });
   }
diff --git a/sparse/tpls/KokkosSparse_spadd_numeric_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spadd_numeric_tpl_spec_decl.hpp
index eff19977cf..93362d176d 100644
--- a/sparse/tpls/KokkosSparse_spadd_numeric_tpl_spec_decl.hpp
+++ b/sparse/tpls/KokkosSparse_spadd_numeric_tpl_spec_decl.hpp
@@ -84,21 +84,21 @@ namespace Impl {
       auto &cuspHandle = KokkosKernels::Impl::CusparseSingleton::singleton().cusparseHandle;                           \
       cusparsePointerMode_t oldPtrMode;                                                                                \
                                                                                                                        \
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, exec.cuda_stream()));                                    \
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseGetPointerMode(cuspHandle, &oldPtrMode));                                      \
-      KOKKOS_CUSPARSE_SAFE_CALL(                                                                                       \
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, exec.cuda_stream()));                         \
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseGetPointerMode(cuspHandle, &oldPtrMode));                           \
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(                                                                            \
           cusparseSetPointerMode(cuspHandle, CUSPARSE_POINTER_MODE_HOST)); /* alpha, beta on host*/                    \
       OFFSET_TYPE nnzA = colidxA.extent(0);                                                                            \
       OFFSET_TYPE nnzB = colidxB.extent(0);                                                                            \
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparse##TOKEN##csrgeam2(                                                             \
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparse##TOKEN##csrgeam2(                                                  \
           cuspHandle, m, n, reinterpret_cast<const TPL_SCALAR_TYPE *>(&alpha), cuspData.descrA, nnzA,                  \
           reinterpret_cast<const TPL_SCALAR_TYPE *>(valuesA.data()), rowmapA.data(), colidxA.data(),                   \
           reinterpret_cast<const TPL_SCALAR_TYPE *>(&beta), cuspData.descrB, nnzB,                                     \
           reinterpret_cast<const TPL_SCALAR_TYPE *>(valuesB.data()), rowmapB.data(), colidxB.data(), cuspData.descrC,  \
           reinterpret_cast<TPL_SCALAR_TYPE *>(valuesC.data()), const_cast<OFFSET_TYPE *>(rowmapC.data()),              \
           colidxC.data(), cuspData.workspace));                                                                        \
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(cuspHandle, oldPtrMode));                                       \
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, NULL));                                                  \
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(cuspHandle, oldPtrMode));                            \
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, NULL));                                       \
                                                                                                                        \
       Kokkos::Profiling::popRegion();                                                                                  \
     }                                                                                                                  \
diff --git a/sparse/tpls/KokkosSparse_spadd_symbolic_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spadd_symbolic_tpl_spec_decl.hpp
index 69286e5e98..878d1dd8e7 100644
--- a/sparse/tpls/KokkosSparse_spadd_symbolic_tpl_spec_decl.hpp
+++ b/sparse/tpls/KokkosSparse_spadd_symbolic_tpl_spec_decl.hpp
@@ -73,26 +73,26 @@ namespace Impl {
       OFFSET_TYPE nnzB = colidxB.extent(0);                                                                            \
       OFFSET_TYPE nnzC = 0;                                                                                            \
                                                                                                                        \
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, exec.cuda_stream()));                                    \
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, exec.cuda_stream()));                         \
                                                                                                                        \
       /* https://docs.nvidia.com/cuda/cusparse/index.html#cusparsecreatematdescr                                       \
        It sets the fields MatrixType and IndexBase to the default values                                               \
        CUSPARSE_MATRIX_TYPE_GENERAL and CUSPARSE_INDEX_BASE_ZERO,                                                      \
        respectively, while leaving other fields uninitialized. */                                                      \
                                                                                                                        \
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrA));                                             \
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrB));                                             \
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrC));                                             \
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparse##TOKEN##csrgeam2_bufferSizeExt(                                               \
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrA));                                  \
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrB));                                  \
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&cuspData.descrC));                                  \
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparse##TOKEN##csrgeam2_bufferSizeExt(                                    \
           cuspHandle, m, n, &one, cuspData.descrA, nnzA, NULL, rowmapA.data(), colidxA.data(), &one, cuspData.descrB,  \
           nnzB, NULL, rowmapB.data(), colidxB.data(), cuspData.descrC, NULL, rowmapC.data(), NULL, &nbytes));          \
       cuspData.nbytes    = nbytes;                                                                                     \
       cuspData.workspace = Kokkos::kokkos_malloc<MEM_SPACE_TYPE>(nbytes);                                              \
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseXcsrgeam2Nnz(                                                                  \
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseXcsrgeam2Nnz(                                                       \
           cuspHandle, m, n, cuspData.descrA, nnzA, rowmapA.data(), colidxA.data(), cuspData.descrB, nnzB,              \
           rowmapB.data(), colidxB.data(), cuspData.descrC, rowmapC.data(), &nnzC, cuspData.workspace));                \
       addHandle->set_c_nnz(nnzC);                                                                                      \
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, NULL));                                                  \
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetStream(cuspHandle, NULL));                                       \
                                                                                                                        \
       Kokkos::Profiling::popRegion();                                                                                  \
     }                                                                                                                  \
diff --git a/sparse/tpls/KokkosSparse_spgemm_noreuse_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spgemm_noreuse_tpl_spec_decl.hpp
index 6da49b683b..d54a91000a 100644
--- a/sparse/tpls/KokkosSparse_spgemm_noreuse_tpl_spec_decl.hpp
+++ b/sparse/tpls/KokkosSparse_spgemm_noreuse_tpl_spec_decl.hpp
@@ -55,53 +55,56 @@ Matrix spgemm_noreuse_cusparse(const MatrixConst &A, const MatrixConst &B) {
   typename Matrix::row_map_type::non_const_type row_mapC(Kokkos::view_alloc(Kokkos::WithoutInitializing, "C rowmap"),
                                                          m + 1);
 
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_createDescr(&spgemmDescr));
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_createDescr(&spgemmDescr));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr(
       &descr_A, m, n, A.graph.entries.extent(0), (void *)A.graph.row_map.data(), (void *)A.graph.entries.data(),
       (void *)A.values.data(), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, cudaScalarType));
 
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr(
       &descr_B, n, k, B.graph.entries.extent(0), (void *)B.graph.row_map.data(), (void *)B.graph.entries.data(),
       (void *)B.values.data(), CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, cudaScalarType));
 
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&descr_C, m, k, 0, (void *)row_mapC.data(), nullptr, nullptr,
-                                              CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
-                                              cudaScalarType));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&descr_C, m, k, 0, (void *)row_mapC.data(), nullptr, nullptr,
+                                                         CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
+                                                         CUSPARSE_INDEX_BASE_ZERO, cudaScalarType));
 
   //----------------------------------------------------------------------
   // query workEstimation buffer size, allocate, then call again with buffer.
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta,
-                                                          descr_C, cudaScalarType, alg, spgemmDescr, &bufferSize1,
-                                                          nullptr));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation(cusparseHandle, op, op, &alpha, descr_A, descr_B,
+                                                                     &beta, descr_C, cudaScalarType, alg, spgemmDescr,
+                                                                     &bufferSize1, nullptr));
   KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&buffer1, bufferSize1));
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta,
-                                                          descr_C, cudaScalarType, alg, spgemmDescr, &bufferSize1,
-                                                          buffer1));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation(cusparseHandle, op, op, &alpha, descr_A, descr_B,
+                                                                     &beta, descr_C, cudaScalarType, alg, spgemmDescr,
+                                                                     &bufferSize1, buffer1));
 
   //----------------------------------------------------------------------
   // query compute buffer size, allocate, then call again with buffer.
 
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, descr_C,
-                                                   cudaScalarType, alg, spgemmDescr, &bufferSize2, nullptr));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta,
+                                                              descr_C, cudaScalarType, alg, spgemmDescr, &bufferSize2,
+                                                              nullptr));
   KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&buffer2, bufferSize2));
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, descr_C,
-                                                   cudaScalarType, alg, spgemmDescr, &bufferSize2, buffer2));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta,
+                                                              descr_C, cudaScalarType, alg, spgemmDescr, &bufferSize2,
+                                                              buffer2));
   int64_t unused1, unused2, c_nnz;
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMatGetSize(descr_C, &unused1, &unused2, &c_nnz));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpMatGetSize(descr_C, &unused1, &unused2, &c_nnz));
 
   typename Matrix::index_type entriesC(Kokkos::view_alloc(Kokkos::WithoutInitializing, "C entries"), c_nnz);
   typename Matrix::values_type valuesC(Kokkos::view_alloc(Kokkos::WithoutInitializing, "C values"), c_nnz);
 
-  KOKKOS_CUSPARSE_SAFE_CALL(
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
       cusparseCsrSetPointers(descr_C, (void *)row_mapC.data(), (void *)entriesC.data(), (void *)valuesC.data()));
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, descr_C,
-                                                   cudaScalarType, alg, spgemmDescr, &bufferSize2, buffer2));
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_copy(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta, descr_C,
-                                                cudaScalarType, alg, spgemmDescr));
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_A));
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_B));
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_C));
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_destroyDescr(spgemmDescr));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta,
+                                                              descr_C, cudaScalarType, alg, spgemmDescr, &bufferSize2,
+                                                              buffer2));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_copy(cusparseHandle, op, op, &alpha, descr_A, descr_B, &beta,
+                                                           descr_C, cudaScalarType, alg, spgemmDescr));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_A));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_B));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroySpMat(descr_C));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_destroyDescr(spgemmDescr));
   KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer1));
   KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer2));
   return Matrix("C", m, k, c_nnz, valuesC, row_mapC, entriesC);
diff --git a/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_decl.hpp
index c26118fec9..917704870a 100644
--- a/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_decl.hpp
+++ b/sparse/tpls/KokkosSparse_spgemm_numeric_tpl_spec_decl.hpp
@@ -67,13 +67,13 @@ void spgemm_numeric_cusparse(KernelHandle *handle, lno_t /*m*/, lno_t /*n*/, lno
     return;
   }
 
-  KOKKOS_CUSPARSE_SAFE_CALL(
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
       cusparseCsrSetPointers(h->descr_A, (void *)row_mapA.data(), (void *)entriesA.data(), (void *)valuesA.data()));
 
-  KOKKOS_CUSPARSE_SAFE_CALL(
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
       cusparseCsrSetPointers(h->descr_B, (void *)row_mapB.data(), (void *)entriesB.data(), (void *)valuesB.data()));
 
-  KOKKOS_CUSPARSE_SAFE_CALL(
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
       cusparseCsrSetPointers(h->descr_C, (void *)row_mapC.data(), (void *)entriesC.data(), (void *)valuesC.data()));
 
   if (!handle->are_entries_computed()) {
@@ -81,13 +81,14 @@ void spgemm_numeric_cusparse(KernelHandle *handle, lno_t /*m*/, lno_t /*n*/, lno
       // If symbolic was previously called with computeRowptrs=true, then
       // buffer5 will have already been allocated to the correct size. Otherwise
       // size and allocate it here.
-      KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B,
-                                                         h->descr_C, h->alg, h->spgemmDescr, &h->bufferSize5, nullptr));
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A,
+                                                                    h->descr_B, h->descr_C, h->alg, h->spgemmDescr,
+                                                                    &h->bufferSize5, nullptr));
       KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer5, h->bufferSize5));
     }
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B,
-                                                       h->descr_C, h->alg, h->spgemmDescr, &h->bufferSize5,
-                                                       h->buffer5));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A,
+                                                                  h->descr_B, h->descr_C, h->alg, h->spgemmDescr,
+                                                                  &h->bufferSize5, h->buffer5));
     handle->set_computed_rowptrs();
     handle->set_computed_entries();
   }
@@ -100,12 +101,12 @@ void spgemm_numeric_cusparse(KernelHandle *handle, lno_t /*m*/, lno_t /*n*/, lno
   // handle, we save/restore the pointer mode to not interference with
   // others' use
   cusparsePointerMode_t oldPtrMode;
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseGetPointerMode(h->cusparseHandle, &oldPtrMode));
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(h->cusparseHandle, CUSPARSE_POINTER_MODE_HOST));
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A,
-                                                        h->descr_B, &beta, h->descr_C, h->scalarType, h->alg,
-                                                        h->spgemmDescr));
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(h->cusparseHandle, oldPtrMode));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseGetPointerMode(h->cusparseHandle, &oldPtrMode));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(h->cusparseHandle, CUSPARSE_POINTER_MODE_HOST));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_compute(h->cusparseHandle, h->opA, h->opB, &alpha,
+                                                                   h->descr_A, h->descr_B, &beta, h->descr_C,
+                                                                   h->scalarType, h->alg, h->spgemmDescr));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetPointerMode(h->cusparseHandle, oldPtrMode));
   handle->set_call_numeric();
 }
 
@@ -120,20 +121,20 @@ void spgemm_numeric_cusparse(KernelHandle *handle, lno_t /*m*/, lno_t /*n*/, lno
                              const ConstRowMapType &row_mapC, const EntriesType &entriesC, const ValuesType &valuesC) {
   using scalar_type = typename KernelHandle::nnz_scalar_t;
   auto h            = handle->get_cusparse_spgemm_handle();
-  KOKKOS_CUSPARSE_SAFE_CALL(
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
       cusparseCsrSetPointers(h->descr_A, (void *)row_mapA.data(), (void *)entriesA.data(), (void *)valuesA.data()));
-  KOKKOS_CUSPARSE_SAFE_CALL(
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
       cusparseCsrSetPointers(h->descr_B, (void *)row_mapB.data(), (void *)entriesB.data(), (void *)valuesB.data()));
-  KOKKOS_CUSPARSE_SAFE_CALL(
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
       cusparseCsrSetPointers(h->descr_C, (void *)row_mapC.data(), (void *)entriesC.data(), (void *)valuesC.data()));
   const auto alpha = Kokkos::ArithTraits<scalar_type>::one();
   const auto beta  = Kokkos::ArithTraits<scalar_type>::zero();
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B,
-                                                   &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT,
-                                                   h->spgemmDescr, &h->bufferSize4, h->buffer4));
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_copy(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B,
-                                                &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT,
-                                                h->spgemmDescr));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
+      cusparseSpGEMM_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, &beta, h->descr_C,
+                             h->scalarType, CUSPARSE_SPGEMM_DEFAULT, h->spgemmDescr, &h->bufferSize4, h->buffer4));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_copy(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A,
+                                                           h->descr_B, &beta, h->descr_C, h->scalarType,
+                                                           CUSPARSE_SPGEMM_DEFAULT, h->spgemmDescr));
   handle->set_computed_entries();
   handle->set_call_numeric();
 }
@@ -180,7 +181,7 @@ void spgemm_numeric_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, co
 
   // Only call numeric if C actually has entries
   if (handle->get_c_nnz()) {
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseXcsrgemm(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseXcsrgemm(
         h->cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, m, k, n, h->generalDescr,
         nnzA, valuesA.data(), row_mapA.data(), entriesA.data(), h->generalDescr, nnzB, valuesB.data(), row_mapB.data(),
         entriesB.data(), h->generalDescr, valuesC.data(), row_mapC.data(), entriesC.data()));
diff --git a/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_decl.hpp
index 6385fff835..c789a1cf4d 100644
--- a/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_decl.hpp
+++ b/sparse/tpls/KokkosSparse_spgemm_symbolic_tpl_spec_decl.hpp
@@ -72,61 +72,62 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, c
     // which however is not available in this function. So we fake it with the
     // entries instead. Fortunately, it seems cupsarse does not access that in
     // the symbolic phase.
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_A, m, n, entriesA.extent(0), (void *)row_mapA.data(),
-                                                (void *)entriesA.data(), (void *)entriesA.data() /*fake*/,
-                                                CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
-                                                h->scalarType));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
+        cusparseCreateCsr(&h->descr_A, m, n, entriesA.extent(0), (void *)row_mapA.data(), (void *)entriesA.data(),
+                          (void *)entriesA.data() /*fake*/, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
+                          CUSPARSE_INDEX_BASE_ZERO, h->scalarType));
 
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_B, n, k, entriesB.extent(0), (void *)row_mapB.data(),
-                                                (void *)entriesB.data(), (void *)entriesB.data() /*fake*/,
-                                                CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
-                                                h->scalarType));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
+        cusparseCreateCsr(&h->descr_B, n, k, entriesB.extent(0), (void *)row_mapB.data(), (void *)entriesB.data(),
+                          (void *)entriesB.data() /*fake*/, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
+                          CUSPARSE_INDEX_BASE_ZERO, h->scalarType));
 
 #if CUDA_VERSION >= 12020
     // at some point cusparseCreateCsr started to need a non-null row-pointer
     // array, even if the operation that consumed the handle doesn't need to
     // read it. This was observed on a system with CUDA 12.2, but it may have
     // started earlier.
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_C, m, k, 0, (void *)row_mapC.data(), nullptr, nullptr,
-                                                CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
-                                                h->scalarType));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_C, m, k, 0, (void *)row_mapC.data(), nullptr,
+                                                           nullptr, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
+                                                           CUSPARSE_INDEX_BASE_ZERO, h->scalarType));
 #else
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_C, m, k, 0, nullptr, nullptr, nullptr, CUSPARSE_INDEX_32I,
-                                                CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, h->scalarType));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_C, m, k, 0, nullptr, nullptr, nullptr,
+                                                           CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
+                                                           CUSPARSE_INDEX_BASE_ZERO, h->scalarType));
 #endif
 
     //----------------------------------------------------------------------
     // ask bufferSize1 bytes for external memory
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_workEstimation(h->cusparseHandle, h->opA, h->opB, h->descr_A,
-                                                                 h->descr_B, h->descr_C, h->alg, h->spgemmDescr,
-                                                                 &bufferSize1, nullptr));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_workEstimation(h->cusparseHandle, h->opA, h->opB,
+                                                                            h->descr_A, h->descr_B, h->descr_C, h->alg,
+                                                                            h->spgemmDescr, &bufferSize1, nullptr));
 
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&buffer1, bufferSize1));
     // inspect matrices A and B to understand the memory requirement for the
     // next step
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_workEstimation(h->cusparseHandle, h->opA, h->opB, h->descr_A,
-                                                                 h->descr_B, h->descr_C, h->alg, h->spgemmDescr,
-                                                                 &bufferSize1, buffer1));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_workEstimation(h->cusparseHandle, h->opA, h->opB,
+                                                                            h->descr_A, h->descr_B, h->descr_C, h->alg,
+                                                                            h->spgemmDescr, &bufferSize1, buffer1));
 
     //----------------------------------------------------------------------
     // Compute nnz of C
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_nnz(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B,
-                                                      h->descr_C, h->alg, h->spgemmDescr, &bufferSize2, nullptr,
-                                                      &h->bufferSize3, nullptr, &h->bufferSize4, nullptr));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_nnz(
+        h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, h->descr_C, h->alg, h->spgemmDescr, &bufferSize2,
+        nullptr, &h->bufferSize3, nullptr, &h->bufferSize4, nullptr));
 
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&buffer2, bufferSize2));
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer3, h->bufferSize3));
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer4, h->bufferSize4));
 
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_nnz(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B,
-                                                      h->descr_C, h->alg, h->spgemmDescr, &bufferSize2, buffer2,
-                                                      &h->bufferSize3, h->buffer3, &h->bufferSize4, h->buffer4));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_nnz(
+        h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, h->descr_C, h->alg, h->spgemmDescr, &bufferSize2,
+        buffer2, &h->bufferSize3, h->buffer3, &h->bufferSize4, h->buffer4));
 
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer2));
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(buffer1));
 
     int64_t C_nrow, C_ncol, C_nnz;
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMatGetSize(h->descr_C, &C_nrow, &C_ncol, &C_nnz));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpMatGetSize(h->descr_C, &C_nrow, &C_ncol, &C_nnz));
     if (C_nnz > std::numeric_limits<int>::max()) {
       throw std::runtime_error("nnz of C overflowed over 32-bit int\n");
     }
@@ -145,15 +146,16 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, c
     // we must use dummy versions and then discard them.
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&dummyEntries, C_nnz * sizeof(Ordinal)));
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&dummyValues, C_nnz * sizeof(Scalar)));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCsrSetPointers(h->descr_C, row_mapC.data(), dummyEntries, dummyValues));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
+        cusparseCsrSetPointers(h->descr_C, row_mapC.data(), dummyEntries, dummyValues));
     //--------------------------------------------------------------------------
 
     cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B, h->descr_C, h->alg,
                              h->spgemmDescr, &h->bufferSize5, nullptr);
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer5, h->bufferSize5));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A, h->descr_B,
-                                                       h->descr_C, h->alg, h->spgemmDescr, &h->bufferSize5,
-                                                       h->buffer5));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMMreuse_copy(h->cusparseHandle, h->opA, h->opB, h->descr_A,
+                                                                  h->descr_B, h->descr_C, h->alg, h->spgemmDescr,
+                                                                  &h->bufferSize5, h->buffer5));
     if (!handle->get_c_nnz()) {
       // cuSPARSE does not populate C rowptrs if C has no entries
       Kokkos::deep_copy(typename KernelHandle::HandleExecSpace(), row_mapC, Offset(0));
@@ -194,40 +196,40 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, c
     KOKKOS_IMPL_CUDA_SAFE_CALL(
         cudaMalloc(&dummyValues_AB, sizeof(scalar_type) * std::max(entriesA.extent(0), entriesB.extent(0))));
 
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_A, m, n, entriesA.extent(0), (void *)row_mapA.data(),
-                                                (void *)entriesA.data(), dummyValues_AB, CUSPARSE_INDEX_32I,
-                                                CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, h->scalarType));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr(
+        &h->descr_A, m, n, entriesA.extent(0), (void *)row_mapA.data(), (void *)entriesA.data(), dummyValues_AB,
+        CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, h->scalarType));
 
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_B, n, k, entriesB.extent(0), (void *)row_mapB.data(),
-                                                (void *)entriesB.data(), dummyValues_AB, CUSPARSE_INDEX_32I,
-                                                CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, h->scalarType));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr(
+        &h->descr_B, n, k, entriesB.extent(0), (void *)row_mapB.data(), (void *)entriesB.data(), dummyValues_AB,
+        CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, h->scalarType));
 
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_C, m, k, 0, row_mapC.data(), nullptr, nullptr,
-                                                CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO,
-                                                h->scalarType));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&h->descr_C, m, k, 0, row_mapC.data(), nullptr, nullptr,
+                                                           CUSPARSE_INDEX_32I, CUSPARSE_INDEX_32I,
+                                                           CUSPARSE_INDEX_BASE_ZERO, h->scalarType));
 
     //----------------------------------------------------------------------
     // query workEstimation buffer size, allocate, then call again with buffer.
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A,
-                                                            h->descr_B, &beta, h->descr_C, h->scalarType, h->alg,
-                                                            h->spgemmDescr, &h->bufferSize3, nullptr));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
+        cusparseSpGEMM_workEstimation(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, &beta,
+                                      h->descr_C, h->scalarType, h->alg, h->spgemmDescr, &h->bufferSize3, nullptr));
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer3, h->bufferSize3));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_workEstimation(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A,
-                                                            h->descr_B, &beta, h->descr_C, h->scalarType, h->alg,
-                                                            h->spgemmDescr, &h->bufferSize3, h->buffer3));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
+        cusparseSpGEMM_workEstimation(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, &beta,
+                                      h->descr_C, h->scalarType, h->alg, h->spgemmDescr, &h->bufferSize3, h->buffer3));
 
     //----------------------------------------------------------------------
     // query compute buffer size, allocate, then call again with buffer.
 
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B,
-                                                     &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT,
-                                                     h->spgemmDescr, &h->bufferSize4, nullptr));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
+        cusparseSpGEMM_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, &beta, h->descr_C,
+                               h->scalarType, CUSPARSE_SPGEMM_DEFAULT, h->spgemmDescr, &h->bufferSize4, nullptr));
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc((void **)&h->buffer4, h->bufferSize4));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B,
-                                                     &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT,
-                                                     h->spgemmDescr, &h->bufferSize4, h->buffer4));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
+        cusparseSpGEMM_compute(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B, &beta, h->descr_C,
+                               h->scalarType, CUSPARSE_SPGEMM_DEFAULT, h->spgemmDescr, &h->bufferSize4, h->buffer4));
     int64_t C_nrow, C_ncol, C_nnz;
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMatGetSize(h->descr_C, &C_nrow, &C_ncol, &C_nnz));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpMatGetSize(h->descr_C, &C_nrow, &C_ncol, &C_nnz));
     if (C_nnz > std::numeric_limits<int>::max()) {
       throw std::runtime_error("nnz of C overflowed over 32-bit int\n");
     }
@@ -245,20 +247,20 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, c
       // handle, so we can reuse those.
       KOKKOS_IMPL_CUDA_SAFE_CALL(
           cudaMalloc(&dummyValues_AB, sizeof(scalar_type) * std::max(entriesA.extent(0), entriesB.extent(0))));
-      KOKKOS_CUSPARSE_SAFE_CALL(
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
           cusparseCsrSetPointers(h->descr_A, (void *)row_mapA.data(), (void *)entriesA.data(), dummyValues_AB));
-      KOKKOS_CUSPARSE_SAFE_CALL(
+      KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
           cusparseCsrSetPointers(h->descr_B, (void *)row_mapB.data(), (void *)entriesB.data(), dummyValues_AB));
     }
     void *dummyEntries_C, *dummyValues_C;
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc(&dummyEntries_C, sizeof(ordinal_type) * C_nnz));
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc(&dummyValues_C, sizeof(scalar_type) * C_nnz));
-    KOKKOS_CUSPARSE_SAFE_CALL(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
         cusparseCsrSetPointers(h->descr_C, (void *)row_mapC.data(), dummyEntries_C, dummyValues_C));
 
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpGEMM_copy(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A, h->descr_B,
-                                                  &beta, h->descr_C, h->scalarType, CUSPARSE_SPGEMM_DEFAULT,
-                                                  h->spgemmDescr));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpGEMM_copy(h->cusparseHandle, h->opA, h->opB, &alpha, h->descr_A,
+                                                             h->descr_B, &beta, h->descr_C, h->scalarType,
+                                                             CUSPARSE_SPGEMM_DEFAULT, h->spgemmDescr));
 
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(dummyValues_C));
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaFree(dummyEntries_C));
@@ -292,7 +294,7 @@ void spgemm_symbolic_cusparse(KernelHandle *handle, lno_t m, lno_t n, lno_t k, c
     Kokkos::deep_copy(typename KernelHandle::HandleExecSpace(), row_mapC, size_type(0));
     nnzC = 0;
   } else {
-    KOKKOS_CUSPARSE_SAFE_CALL(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
         cusparseXcsrgemmNnz(h->cusparseHandle, CUSPARSE_OPERATION_NON_TRANSPOSE, CUSPARSE_OPERATION_NON_TRANSPOSE, m, k,
                             n, h->generalDescr, nnzA, row_mapA.data(), entriesA.data(), h->generalDescr, nnzB,
                             row_mapB.data(), entriesB.data(), h->generalDescr, row_mapC.data(), nnzTotalDevHostPtr));
diff --git a/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp
index defb13044f..5cd70d0f8e 100644
--- a/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp
+++ b/sparse/tpls/KokkosSparse_spmv_bsrmatrix_tpl_spec_decl.hpp
@@ -309,9 +309,9 @@ void spmv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode
     /* create and set the subhandle and matrix descriptor */
     subhandle         = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec);
     handle->tpl_rank1 = subhandle;
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO));
   }
 
   cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW;
@@ -321,25 +321,25 @@ void spmv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode
                 "With cuSPARSE non-generic API, offset and entry types must both be int. "
                 "Something wrong with TPL avail logic.");
   if constexpr (std::is_same_v<value_type, float>) {
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSbsrmv(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSbsrmv(
         cusparseHandle, dirA, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(),
         reinterpret_cast<float const*>(&alpha), subhandle->mat, reinterpret_cast<float const*>(A.values.data()),
         A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), reinterpret_cast<float const*>(x.data()),
         reinterpret_cast<float const*>(&beta), reinterpret_cast<float*>(y.data())));
   } else if constexpr (std::is_same_v<value_type, double>) {
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseDbsrmv(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDbsrmv(
         cusparseHandle, dirA, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(),
         reinterpret_cast<double const*>(&alpha), subhandle->mat, reinterpret_cast<double const*>(A.values.data()),
         A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), reinterpret_cast<double const*>(x.data()),
         reinterpret_cast<double const*>(&beta), reinterpret_cast<double*>(y.data())));
   } else if constexpr (std::is_same_v<value_type, Kokkos::complex<float>>) {
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCbsrmv(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCbsrmv(
         cusparseHandle, dirA, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(),
         reinterpret_cast<cuComplex const*>(&alpha), subhandle->mat, reinterpret_cast<cuComplex const*>(A.values.data()),
         A.graph.row_map.data(), A.graph.entries.data(), A.blockDim(), reinterpret_cast<cuComplex const*>(x.data()),
         reinterpret_cast<cuComplex const*>(&beta), reinterpret_cast<cuComplex*>(y.data())));
   } else if constexpr (std::is_same_v<value_type, Kokkos::complex<double>>) {
-    KOKKOS_CUSPARSE_SAFE_CALL(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
         cusparseZbsrmv(cusparseHandle, dirA, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(),
                        reinterpret_cast<cuDoubleComplex const*>(&alpha), subhandle->mat,
                        reinterpret_cast<cuDoubleComplex const*>(A.values.data()), A.graph.row_map.data(),
@@ -413,9 +413,9 @@ void spmv_mv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char m
     /* create and set the subhandle and matrix descriptor */
     subhandle         = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec);
     handle->tpl_rank2 = subhandle;
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO));
   }
   cusparseDirection_t dirA = CUSPARSE_DIRECTION_ROW;
 
@@ -424,28 +424,28 @@ void spmv_mv_bsr_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char m
                 "With cuSPARSE non-generic API, offset and entry types must both be int. "
                 "Something wrong with TPL avail logic.");
   if constexpr (std::is_same_v<value_type, float>) {
-    KOKKOS_CUSPARSE_SAFE_CALL(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
         cusparseSbsrmm(cusparseHandle, dirA, myCusparseOperation, CUSPARSE_OPERATION_NON_TRANSPOSE, A.numRows(), colx,
                        A.numCols(), A.nnz(), reinterpret_cast<float const*>(&alpha), subhandle->mat,
                        reinterpret_cast<float const*>(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(),
                        A.blockDim(), reinterpret_cast<float const*>(x.data()), ldx,
                        reinterpret_cast<float const*>(&beta), reinterpret_cast<float*>(y.data()), ldy));
   } else if constexpr (std::is_same_v<value_type, double>) {
-    KOKKOS_CUSPARSE_SAFE_CALL(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
         cusparseDbsrmm(cusparseHandle, dirA, myCusparseOperation, CUSPARSE_OPERATION_NON_TRANSPOSE, A.numRows(), colx,
                        A.numCols(), A.nnz(), reinterpret_cast<double const*>(&alpha), subhandle->mat,
                        reinterpret_cast<double const*>(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(),
                        A.blockDim(), reinterpret_cast<double const*>(x.data()), ldx,
                        reinterpret_cast<double const*>(&beta), reinterpret_cast<double*>(y.data()), ldy));
   } else if constexpr (std::is_same_v<value_type, Kokkos::complex<float>>) {
-    KOKKOS_CUSPARSE_SAFE_CALL(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
         cusparseCbsrmm(cusparseHandle, dirA, myCusparseOperation, CUSPARSE_OPERATION_NON_TRANSPOSE, A.numRows(), colx,
                        A.numCols(), A.nnz(), reinterpret_cast<cuComplex const*>(&alpha), subhandle->mat,
                        reinterpret_cast<cuComplex const*>(A.values.data()), A.graph.row_map.data(),
                        A.graph.entries.data(), A.blockDim(), reinterpret_cast<cuComplex const*>(x.data()), ldx,
                        reinterpret_cast<cuComplex const*>(&beta), reinterpret_cast<cuComplex*>(y.data()), ldy));
   } else if constexpr (std::is_same_v<value_type, Kokkos::complex<double>>) {
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseZbsrmm(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseZbsrmm(
         cusparseHandle, dirA, myCusparseOperation, CUSPARSE_OPERATION_NON_TRANSPOSE, A.numRows(), colx, A.numCols(),
         A.nnz(), reinterpret_cast<cuDoubleComplex const*>(&alpha), subhandle->mat,
         reinterpret_cast<cuDoubleComplex const*>(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(),
diff --git a/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp
index bcd367b9bb..b5f9ddf7c0 100644
--- a/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp
+++ b/sparse/tpls/KokkosSparse_spmv_mv_tpl_spec_decl.hpp
@@ -87,8 +87,9 @@ cusparseDnMatDescr_t make_cusparse_dn_mat_descr_t(ViewType &view) {
   const cusparseOrder_t order = CUSPARSE_ORDER_COL;
 
   cusparseDnMatDescr_t descr;
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnMat(&descr, static_cast<int64_t>(rows), static_cast<int64_t>(cols),
-                                                static_cast<int64_t>(ld), values, valueType, order));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateDnMat(&descr, static_cast<int64_t>(rows),
+                                                           static_cast<int64_t>(cols), static_cast<int64_t>(ld), values,
+                                                           valueType, order));
 
   return descr;
 }
@@ -175,22 +176,23 @@ void spmv_mv_cusparse(const Kokkos::Cuda &exec, Handle *handle, const char mode[
     subhandle         = new KokkosSparse::Impl::CuSparse10_SpMV_Data(exec);
     handle->tpl_rank2 = subhandle;
     /* create matrix */
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&subhandle->mat, A.numRows(), A.numCols(), A.nnz(),
-                                                (void *)A.graph.row_map.data(), (void *)A.graph.entries.data(),
-                                                (void *)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
-                                                CUSPARSE_INDEX_BASE_ZERO, aCusparseType));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
+        cusparseCreateCsr(&subhandle->mat, A.numRows(), A.numCols(), A.nnz(), (void *)A.graph.row_map.data(),
+                          (void *)A.graph.entries.data(), (void *)A.values.data(), myCusparseOffsetType,
+                          myCusparseEntryType, CUSPARSE_INDEX_BASE_ZERO, aCusparseType));
 
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMM_bufferSize(cusparseHandle, opA, opB, &alpha, subhandle->mat, vecX, &beta,
-                                                      vecY, computeType, algo, &subhandle->bufferSize));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpMM_bufferSize(cusparseHandle, opA, opB, &alpha, subhandle->mat, vecX,
+                                                                 &beta, vecY, computeType, algo,
+                                                                 &subhandle->bufferSize));
 
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc(&subhandle->buffer, subhandle->bufferSize));
   }
 
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMM(cusparseHandle, opA, opB, &alpha, subhandle->mat, vecX, &beta, vecY,
-                                         computeType, algo, subhandle->buffer));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpMM(cusparseHandle, opA, opB, &alpha, subhandle->mat, vecX, &beta, vecY,
+                                                    computeType, algo, subhandle->buffer));
 
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnMat(vecX));
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnMat(vecY));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnMat(vecX));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnMat(vecY));
 }
 
 #define KOKKOSSPARSE_SPMV_MV_CUSPARSE(SCALAR, ORDINAL, OFFSET, XL, YL, SPACE)                                       \
diff --git a/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp b/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp
index 8cf1f49e51..94abd043fa 100644
--- a/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp
+++ b/sparse/tpls/KokkosSparse_spmv_tpl_spec_decl.hpp
@@ -83,8 +83,8 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[],
 
   /* create lhs and rhs */
   cusparseDnVecDescr_t vecX, vecY;
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&vecX, x.extent_int(0), (void*)x.data(), myCudaDataType));
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&vecY, y.extent_int(0), (void*)y.data(), myCudaDataType));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&vecX, x.extent_int(0), (void*)x.data(), myCudaDataType));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateDnVec(&vecY, y.extent_int(0), (void*)y.data(), myCudaDataType));
 
   // Prior to CUDA 11.2.1, ALG2 was more performant than default for imbalanced
   // matrices. After 11.2.1, the default is performant for imbalanced matrices,
@@ -115,14 +115,15 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[],
     handle->tpl_rank1 = subhandle;
 
     /* create matrix */
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateCsr(&subhandle->mat, A.numRows(), A.numCols(), A.nnz(),
-                                                (void*)A.graph.row_map.data(), (void*)A.graph.entries.data(),
-                                                (void*)A.values.data(), myCusparseOffsetType, myCusparseEntryType,
-                                                CUSPARSE_INDEX_BASE_ZERO, myCudaDataType));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
+        cusparseCreateCsr(&subhandle->mat, A.numRows(), A.numCols(), A.nnz(), (void*)A.graph.row_map.data(),
+                          (void*)A.graph.entries.data(), (void*)A.values.data(), myCusparseOffsetType,
+                          myCusparseEntryType, CUSPARSE_INDEX_BASE_ZERO, myCudaDataType));
 
     /* size and allocate buffer */
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMV_bufferSize(cusparseHandle, myCusparseOperation, &alpha, subhandle->mat, vecX,
-                                                      &beta, vecY, myCudaDataType, algo, &subhandle->bufferSize));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpMV_bufferSize(cusparseHandle, myCusparseOperation, &alpha,
+                                                                 subhandle->mat, vecX, &beta, vecY, myCudaDataType,
+                                                                 algo, &subhandle->bufferSize));
     //  Async memory management introduced in CUDA 11.2
 #if (CUDA_VERSION >= 11020)
     KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMallocAsync(&subhandle->buffer, subhandle->bufferSize, exec.cuda_stream()));
@@ -132,11 +133,11 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[],
   }
 
   /* perform SpMV */
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseSpMV(cusparseHandle, myCusparseOperation, &alpha, subhandle->mat, vecX, &beta, vecY,
-                                         myCudaDataType, algo, subhandle->buffer));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSpMV(cusparseHandle, myCusparseOperation, &alpha, subhandle->mat, vecX,
+                                                    &beta, vecY, myCudaDataType, algo, subhandle->buffer));
 
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecX));
-  KOKKOS_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecY));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecX));
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDestroyDnVec(vecY));
 
 #elif (9000 <= CUDA_VERSION)
 
@@ -151,9 +152,9 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[],
     subhandle                 = new KokkosSparse::Impl::CuSparse9_SpMV_Data(exec);
     handle->tpl_rank1         = subhandle;
     cusparseMatDescr_t descrA = 0;
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL));
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCreateMatDescr(&subhandle->mat));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatType(subhandle->mat, CUSPARSE_MATRIX_TYPE_GENERAL));
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseSetMatIndexBase(subhandle->mat, CUSPARSE_INDEX_BASE_ZERO));
   }
 
   /* perform the actual SpMV operation */
@@ -161,26 +162,26 @@ void spmv_cusparse(const Kokkos::Cuda& exec, Handle* handle, const char mode[],
                 "With cuSPARSE pre-10.0, offset type must be int. Something wrong with "
                 "TPL avail logic.");
   if constexpr (std::is_same_v<value_type, float>) {
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseScsrmv(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseScsrmv(
         cusparseHandle, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), reinterpret_cast<float const*>(&alpha),
         subhandle->mat, reinterpret_cast<float const*>(A.values.data()), A.graph.row_map.data(), A.graph.entries.data(),
         reinterpret_cast<float const*>(x.data()), reinterpret_cast<float const*>(&beta),
         reinterpret_cast<float*>(y.data())));
 
   } else if constexpr (std::is_same_v<value_type, double>) {
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseDcsrmv(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseDcsrmv(
         cusparseHandle, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(), reinterpret_cast<double const*>(&alpha),
         subhandle->mat, reinterpret_cast<double const*>(A.values.data()), A.graph.row_map.data(),
         A.graph.entries.data(), reinterpret_cast<double const*>(x.data()), reinterpret_cast<double const*>(&beta),
         reinterpret_cast<double*>(y.data())));
   } else if constexpr (std::is_same_v<value_type, Kokkos::complex<float>>) {
-    KOKKOS_CUSPARSE_SAFE_CALL(cusparseCcsrmv(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(cusparseCcsrmv(
         cusparseHandle, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(),
         reinterpret_cast<cuComplex const*>(&alpha), subhandle->mat, reinterpret_cast<cuComplex const*>(A.values.data()),
         A.graph.row_map.data(), A.graph.entries.data(), reinterpret_cast<cuComplex const*>(x.data()),
         reinterpret_cast<cuComplex const*>(&beta), reinterpret_cast<cuComplex*>(y.data())));
   } else if constexpr (std::is_same_v<value_type, Kokkos::complex<double>>) {
-    KOKKOS_CUSPARSE_SAFE_CALL(
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(
         cusparseZcsrmv(cusparseHandle, myCusparseOperation, A.numRows(), A.numCols(), A.nnz(),
                        reinterpret_cast<cuDoubleComplex const*>(&alpha), subhandle->mat,
                        reinterpret_cast<cuDoubleComplex const*>(A.values.data()), A.graph.row_map.data(),
diff --git a/sparse/unit_test/Test_Sparse_Utils_cusparse.hpp b/sparse/unit_test/Test_Sparse_Utils_cusparse.hpp
index ae802b24c4..227bc3df60 100644
--- a/sparse/unit_test/Test_Sparse_Utils_cusparse.hpp
+++ b/sparse/unit_test/Test_Sparse_Utils_cusparse.hpp
@@ -28,11 +28,11 @@ void test_cusparse_safe_call() {
   bool caught_exception = false;
 
   cusparseStatus_t myStatus = CUSPARSE_STATUS_SUCCESS;
-  KOKKOS_CUSPARSE_SAFE_CALL(myStatus);
+  KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(myStatus);
 
   try {
     myStatus = CUSPARSE_STATUS_INVALID_VALUE;
-    KOKKOS_CUSPARSE_SAFE_CALL(myStatus);
+    KOKKOSSPARSE_IMPL_CUSPARSE_SAFE_CALL(myStatus);
   } catch (std::runtime_error& e) {
     caught_exception = true;
   }