Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Drop CDPv1 #3344

Merged
merged 1 commit into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions cub/cub/detail/detect_cuda_runtime.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,9 @@
# endif // CUB_RUNTIME_FUNCTION predefined

# ifdef CUB_RDC_ENABLED
// Detect available version of CDP:
# if __CUDACC_VER_MAJOR__ < 12 || defined(CUDA_FORCE_CDP1_IF_SUPPORTED)
# define CUB_DETAIL_CDPv1
# else
# define CUB_DETAIL_CDPv2
# endif
# ifdef CUDA_FORCE_CDP1_IF_SUPPORTED
# error "CUDA Dynamic Parallelism 1 is no longer supported. Please undefine CUDA_FORCE_CDP1_IF_SUPPORTED."
# endif // CUDA_FORCE_CDP1_IF_SUPPORTED
# endif

#endif // Do not document
22 changes: 1 addition & 21 deletions cub/cub/detail/device_synchronize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -45,27 +45,7 @@ _CCCL_EXEC_CHECK_DISABLE
CUB_RUNTIME_FUNCTION inline cudaError_t device_synchronize()
{
cudaError_t result = cudaErrorNotSupported;

// Device-side sync is only available under CDPv1:
#if defined(CUB_DETAIL_CDPv1)

# if ((__CUDACC_VER_MAJOR__ > 11) || ((__CUDACC_VER_MAJOR__ == 11) && (__CUDACC_VER_MINOR__ >= 6)))
// CUDA >= 11.6
# define CUB_TMP_DEVICE_SYNC_IMPL result = __cudaDeviceSynchronizeDeprecationAvoidance();
# else // CUDA < 11.6:
# define CUB_TMP_DEVICE_SYNC_IMPL result = cudaDeviceSynchronize();
# endif

#else // CDPv2 or no CDP:

# define CUB_TMP_DEVICE_SYNC_IMPL /* unavailable */

#endif // CDP version

NV_IF_TARGET(NV_IS_HOST, (result = cudaDeviceSynchronize();), (CUB_TMP_DEVICE_SYNC_IMPL));

#undef CUB_TMP_DEVICE_SYNC_IMPL

NV_IF_TARGET(NV_IS_HOST, (result = cudaDeviceSynchronize();), ());
return result;
}

Expand Down
10 changes: 0 additions & 10 deletions cub/cub/util_device.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -505,18 +505,8 @@ CUB_RUNTIME_FUNCTION inline cudaError_t DebugSyncStream(cudaStream_t stream)
"device-side sync requires <sm_90, RDC, and CDPv1"); \
return cudaSuccess

# ifdef CUB_DETAIL_CDPv1

// Can sync everywhere but SM_90+
NV_IF_TARGET(NV_PROVIDES_SM_90, (CUB_TMP_DEVICE_SYNC_UNAVAILABLE;), (CUB_TMP_SYNC_AVAILABLE;));

# else // CDPv2 or no CDP:

// Can only sync on host
NV_IF_TARGET(NV_IS_HOST, (CUB_TMP_SYNC_AVAILABLE;), (CUB_TMP_DEVICE_SYNC_UNAVAILABLE;));

# endif // CDP version

# undef CUB_TMP_DEVICE_SYNC_UNAVAILABLE
# undef CUB_TMP_SYNC_AVAILABLE

Expand Down
45 changes: 10 additions & 35 deletions thrust/thrust/system/cuda/detail/cdp_dispatch.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,45 +53,20 @@
* \endcode
*/

#if defined(CUB_DETAIL_CDPv1)

// Special case for NVCC -- need to inform the device path about the kernels
// that are launched from the host path.
# if defined(__CUDACC__) && defined(__CUDA_ARCH__)

// seq_impl only used on platforms that do not support device synchronization.
# define THRUST_CDP_DISPATCH(par_impl, seq_impl) \
if (false) \
{ /* Without this, the device pass won't compile any kernels. */ \
NV_IF_TARGET(NV_ANY_TARGET, par_impl); \
} \
NV_IF_TARGET(NV_PROVIDES_SM_90, seq_impl, par_impl)

# else // NVCC device pass

// seq_impl only used on platforms that do not support device synchronization.
# define THRUST_CDP_DISPATCH(par_impl, seq_impl) NV_IF_TARGET(NV_PROVIDES_SM_90, seq_impl, par_impl)

# endif // NVCC device pass

#else // CDPv1 unavailable. Always fallback to serial on device:

// Special case for NVCC -- need to inform the device path about the kernels
// that are launched from the host path.
# if defined(__CUDACC__) && defined(__CUDA_ARCH__)
#if defined(__CUDACC__) && defined(__CUDA_ARCH__)

// Device-side launch not supported, fallback to sequential in device code.
# define THRUST_CDP_DISPATCH(par_impl, seq_impl) \
if (false) \
{ /* Without this, the device pass won't compile any kernels. */ \
NV_IF_TARGET(NV_ANY_TARGET, par_impl); \
} \
NV_IF_TARGET(NV_IS_HOST, par_impl, seq_impl)

# else // !(NVCC device pass):
# define THRUST_CDP_DISPATCH(par_impl, seq_impl) \
if (false) \
{ /* Without this, the device pass won't compile any kernels. */ \
NV_IF_TARGET(NV_ANY_TARGET, par_impl); \
} \
NV_IF_TARGET(NV_IS_HOST, par_impl, seq_impl)

# define THRUST_CDP_DISPATCH(par_impl, seq_impl) NV_IF_TARGET(NV_IS_HOST, par_impl, seq_impl)
#else // !(NVCC device pass):

# endif // NVCC device pass
# define THRUST_CDP_DISPATCH(par_impl, seq_impl) NV_IF_TARGET(NV_IS_HOST, par_impl, seq_impl)

#endif // CDP version
#endif // NVCC device pass
Loading