Skip to content

Commit

Permalink
Drop CDPv1
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed Jan 10, 2025
1 parent fae0a1b commit 398605f
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 72 deletions.
9 changes: 3 additions & 6 deletions cub/cub/detail/detect_cuda_runtime.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,9 @@
# endif // CUB_RUNTIME_FUNCTION predefined

# ifdef CUB_RDC_ENABLED
// Detect available version of CDP:
# if __CUDACC_VER_MAJOR__ < 12 || defined(CUDA_FORCE_CDP1_IF_SUPPORTED)
# define CUB_DETAIL_CDPv1
# else
# define CUB_DETAIL_CDPv2
# endif
# ifdef CUDA_FORCE_CDP1_IF_SUPPORTED
# error "CUDA Dynamic Parallelism 1 is no longer supported. Please undefine CUDA_FORCE_CDP1_IF_SUPPORTED."
# endif // CUDA_FORCE_CDP1_IF_SUPPORTED
# endif

#endif // Do not document
22 changes: 1 addition & 21 deletions cub/cub/detail/device_synchronize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -45,27 +45,7 @@ _CCCL_EXEC_CHECK_DISABLE
CUB_RUNTIME_FUNCTION inline cudaError_t device_synchronize()
{
cudaError_t result = cudaErrorNotSupported;

// Device-side sync is only available under CDPv1:
#if defined(CUB_DETAIL_CDPv1)

# if ((__CUDACC_VER_MAJOR__ > 11) || ((__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ >= 6)))
// CUDA >= 11.6
# define CUB_TMP_DEVICE_SYNC_IMPL result = __cudaDeviceSynchronizeDeprecationAvoidance();
# else // CUDA < 11.6:
# define CUB_TMP_DEVICE_SYNC_IMPL result = cudaDeviceSynchronize();
# endif

#else // CDPv2 or no CDP:

# define CUB_TMP_DEVICE_SYNC_IMPL /* unavailable */

#endif // CDP version

NV_IF_TARGET(NV_IS_HOST, (result = cudaDeviceSynchronize();), (CUB_TMP_DEVICE_SYNC_IMPL));

#undef CUB_TMP_DEVICE_SYNC_IMPL

NV_IF_TARGET(NV_IS_HOST, (result = cudaDeviceSynchronize();), ());
return result;
}

Expand Down
10 changes: 0 additions & 10 deletions cub/cub/util_device.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -505,18 +505,8 @@ CUB_RUNTIME_FUNCTION inline cudaError_t DebugSyncStream(cudaStream_t stream)
"device-side sync requires <sm_90, RDC, and CDPv1"); \
return cudaSuccess

# ifdef CUB_DETAIL_CDPv1

// Can sync everywhere but SM_90+
NV_IF_TARGET(NV_PROVIDES_SM_90, (CUB_TMP_DEVICE_SYNC_UNAVAILABLE;), (CUB_TMP_SYNC_AVAILABLE;));

# else // CDPv2 or no CDP:

// Can only sync on host
NV_IF_TARGET(NV_IS_HOST, (CUB_TMP_SYNC_AVAILABLE;), (CUB_TMP_DEVICE_SYNC_UNAVAILABLE;));

# endif // CDP version

# undef CUB_TMP_DEVICE_SYNC_UNAVAILABLE
# undef CUB_TMP_SYNC_AVAILABLE

Expand Down
45 changes: 10 additions & 35 deletions thrust/thrust/system/cuda/detail/cdp_dispatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,45 +53,20 @@
* \endcode
*/

#if defined(CUB_DETAIL_CDPv1)

// Special case for NVCC -- need to inform the device path about the kernels
// that are launched from the host path.
# if defined(__CUDACC__) && defined(__CUDA_ARCH__)

// seq_impl only used on platforms that do not support device synchronization.
# define THRUST_CDP_DISPATCH(par_impl, seq_impl) \
if (false) \
{ /* Without this, the device pass won't compile any kernels. */ \
NV_IF_TARGET(NV_ANY_TARGET, par_impl); \
} \
NV_IF_TARGET(NV_PROVIDES_SM_90, seq_impl, par_impl)

# else // NVCC device pass

// seq_impl only used on platforms that do not support device synchronization.
# define THRUST_CDP_DISPATCH(par_impl, seq_impl) NV_IF_TARGET(NV_PROVIDES_SM_90, seq_impl, par_impl)

# endif // NVCC device pass

#else // CDPv1 unavailable. Always fallback to serial on device:

// Special case for NVCC -- need to inform the device path about the kernels
// that are launched from the host path.
# if defined(__CUDACC__) && defined(__CUDA_ARCH__)
#if defined(__CUDACC__) && defined(__CUDA_ARCH__)

// Device-side launch not supported, fallback to sequential in device code.
# define THRUST_CDP_DISPATCH(par_impl, seq_impl) \
if (false) \
{ /* Without this, the device pass won't compile any kernels. */ \
NV_IF_TARGET(NV_ANY_TARGET, par_impl); \
} \
NV_IF_TARGET(NV_IS_HOST, par_impl, seq_impl)

# else // !(NVCC device pass):
# define THRUST_CDP_DISPATCH(par_impl, seq_impl) \
if (false) \
{ /* Without this, the device pass won't compile any kernels. */ \
NV_IF_TARGET(NV_ANY_TARGET, par_impl); \
} \
NV_IF_TARGET(NV_IS_HOST, par_impl, seq_impl)

# define THRUST_CDP_DISPATCH(par_impl, seq_impl) NV_IF_TARGET(NV_IS_HOST, par_impl, seq_impl)
#else // !(NVCC device pass):

# endif // NVCC device pass
# define THRUST_CDP_DISPATCH(par_impl, seq_impl) NV_IF_TARGET(NV_IS_HOST, par_impl, seq_impl)

#endif // CDP version
#endif // NVCC device pass

0 comments on commit 398605f

Please sign in to comment.