Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deprecate CUB iterators existing in Thrust #3304

Merged
merged 8 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cub/benchmarks/bench/run_length_encode/encode.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@

#include <cub/device/device_run_length_encode.cuh>

#include <thrust/iterator/constant_iterator.h>

#include <look_back_helper.cuh>
#include <nvbench_helper.cuh>

Expand Down Expand Up @@ -74,7 +76,7 @@ static void rle(nvbench::state& state, nvbench::type_list<T, OffsetT>)
using offset_t = OffsetT;
using keys_input_it_t = const T*;
using unique_output_it_t = T*;
using vals_input_it_t = cub::ConstantInputIterator<offset_t, OffsetT>;
using vals_input_it_t = thrust::constant_iterator<offset_t, OffsetT>;
using aggregate_output_it_t = offset_t*;
using num_runs_output_iterator_t = offset_t*;
using equality_op_t = ::cuda::std::equal_to<>;
Expand Down
1 change: 0 additions & 1 deletion cub/cub/agent/agent_reduce_by_key.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
#include <cub/block/block_scan.cuh>
#include <cub/block/block_store.cuh>
#include <cub/iterator/cache_modified_input_iterator.cuh>
#include <cub/iterator/constant_input_iterator.cuh>

#include <cuda/std/type_traits>

Expand Down
1 change: 0 additions & 1 deletion cub/cub/agent/agent_rle.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
#include <cub/block/block_store.cuh>
#include <cub/grid/grid_queue.cuh>
#include <cub/iterator/cache_modified_input_iterator.cuh>
#include <cub/iterator/constant_input_iterator.cuh>

#include <cuda/ptx>
#include <cuda/std/type_traits>
Expand Down
1 change: 0 additions & 1 deletion cub/cub/agent/agent_segment_fixup.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
#include <cub/block/block_scan.cuh>
#include <cub/block/block_store.cuh>
#include <cub/iterator/cache_modified_input_iterator.cuh>
#include <cub/iterator/constant_input_iterator.cuh>

#include <cuda/std/type_traits>

Expand Down
6 changes: 6 additions & 0 deletions cub/cub/agent/agent_spmv_orig.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,9 @@ struct CCCL_DEPRECATED_BECAUSE("Use the cuSPARSE library instead") AgentSpmv
__syncthreads();

// Search for the thread's starting coordinate within the merge tile
_CCCL_SUPPRESS_DEPRECATED_PUSH
CountingInputIterator<OffsetT> tile_nonzero_indices(tile_start_coord.y);
_CCCL_SUPPRESS_DEPRECATED_POP
CoordinateT thread_start_coord;

MergePathSearch(
Expand Down Expand Up @@ -567,7 +569,9 @@ struct CCCL_DEPRECATED_BECAUSE("Use the cuSPARSE library instead") AgentSpmv
__syncthreads();

// Search for the thread's starting coordinate within the merge tile
_CCCL_SUPPRESS_DEPRECATED_PUSH
CountingInputIterator<OffsetT> tile_nonzero_indices(tile_start_coord.y);
_CCCL_SUPPRESS_DEPRECATED_POP
CoordinateT thread_start_coord;

MergePathSearch(
Expand Down Expand Up @@ -701,7 +705,9 @@ struct CCCL_DEPRECATED_BECAUSE("Use the cuSPARSE library instead") AgentSpmv
// Search our starting coordinates
OffsetT diagonal = (tile_idx + threadIdx.x) * TILE_ITEMS;
CoordinateT tile_coord;
_CCCL_SUPPRESS_DEPRECATED_PUSH
CountingInputIterator<OffsetT> nonzero_indices(0);
_CCCL_SUPPRESS_DEPRECATED_POP

// Search the merge path
MergePathSearch(
Expand Down
5 changes: 5 additions & 0 deletions cub/cub/device/device_run_length_encode.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
#include <cub/device/dispatch/dispatch_reduce_by_key.cuh>
#include <cub/device/dispatch/dispatch_rle.cuh>
#include <cub/device/dispatch/tuning/tuning_run_length_encode.cuh>
#include <cub/iterator/constant_input_iterator.cuh>

#include <iterator>

Expand Down Expand Up @@ -199,14 +200,17 @@ struct DeviceRunLengthEncode
using length_t = cub::detail::non_void_value_t<LengthsOutputIteratorT, offset_t>;

// Generator type for providing 1s values for run-length reduction
_CCCL_SUPPRESS_DEPRECATED_PUSH
using lengths_input_iterator_t = ConstantInputIterator<length_t, offset_t>;
_CCCL_SUPPRESS_DEPRECATED_POP

using accum_t = ::cuda::std::__accumulator_t<reduction_op, length_t, length_t>;

using key_t = cub::detail::non_void_value_t<UniqueOutputIteratorT, cub::detail::value_t<InputIteratorT>>;

using policy_t = detail::rle::encode::policy_hub<accum_t, key_t>;

_CCCL_SUPPRESS_DEPRECATED_PUSH
return DispatchReduceByKey<
InputIteratorT,
UniqueOutputIteratorT,
Expand All @@ -228,6 +232,7 @@ struct DeviceRunLengthEncode
reduction_op(),
num_items,
stream);
_CCCL_SUPPRESS_DEPRECATED_POP
}

//! @rst
Expand Down
2 changes: 2 additions & 0 deletions cub/cub/device/dispatch/dispatch_spmv_orig.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,9 @@ CUB_DETAIL_KERNEL_ATTRIBUTES void DeviceSpmvSearchKernel(
{
OffsetT diagonal = (tile_idx * TILE_ITEMS);
CoordinateT tile_coordinate;
_CCCL_SUPPRESS_DEPRECATED_PUSH
CountingInputIterator<OffsetT> nonzero_indices(0);
_CCCL_SUPPRESS_DEPRECATED_POP

// Search the merge path
MergePathSearch(
Expand Down
19 changes: 14 additions & 5 deletions cub/cub/device/dispatch/dispatch_streaming_reduce.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,20 @@
# pragma system_header
#endif // no system header

_CCCL_SUPPRESS_DEPRECATED_PUSH
#include <cuda/std/functional>
_CCCL_SUPPRESS_DEPRECATED_POP

#include <cub/device/dispatch/dispatch_reduce.cuh>
#include <cub/iterator/arg_index_input_iterator.cuh>
#include <cub/iterator/constant_input_iterator.cuh>

#include <thrust/iterator/iterator_adaptor.h>
#include <thrust/iterator/tabulate_output_iterator.h>

#include <cuda/std/functional>
#include <cuda/std/type_traits>

#ifndef _CCCL_DOXYGEN_INVOKED // Do not document

// suppress deprecation warnings for ConstantInputIterator
_CCCL_SUPPRESS_DEPRECATED_PUSH
CUB_NAMESPACE_BEGIN

namespace detail::reduce
Expand Down Expand Up @@ -190,6 +189,12 @@ template <typename InputIteratorT,
detail::reduce::policy_hub<KeyValuePair<PerPartitionOffsetT, InitT>, PerPartitionOffsetT, ReductionOpT>>
struct dispatch_streaming_arg_reduce_t
{
# if _CCCL_COMPILER(NVHPC)
// NVHPC fails to suppress a deprecation when the alias is inside the function below, so we put it here and span a
// deprecation suppression region across the entire file as well
using constant_offset_it_t = ConstantInputIterator<GlobalOffsetT>;
# endif // _CCCL_COMPILER(NVHPC)

// Internal dispatch routine for computing a device-wide argument extremum, like `ArgMin` and `ArgMax`
//
// @param[in] d_temp_storage
Expand Down Expand Up @@ -229,7 +234,11 @@ struct dispatch_streaming_arg_reduce_t
cudaStream_t stream)
{
// Constant iterator to provide the offset of the current partition for the user-provided input iterator
# if !_CCCL_COMPILER(NVHPC)
_CCCL_SUPPRESS_DEPRECATED_PUSH
using constant_offset_it_t = ConstantInputIterator<GlobalOffsetT>;
_CCCL_SUPPRESS_DEPRECATED_POP
# endif

// Wrapped input iterator to produce index-value tuples, i.e., <PerPartitionOffsetT, InputT>-tuples
// We make sure to offset the user-provided input iterator by the current partition's offset
Expand Down Expand Up @@ -373,7 +382,7 @@ struct dispatch_streaming_arg_reduce_t
};

} // namespace detail::reduce

_CCCL_SUPPRESS_DEPRECATED_POP
CUB_NAMESPACE_END

#endif // !_CCCL_DOXYGEN_INVOKED
10 changes: 9 additions & 1 deletion cub/cub/iterator/constant_input_iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,13 @@ CUB_NAMESPACE_BEGIN
* The difference type of this iterator (Default: @p ptrdiff_t)
*/
template <typename ValueType, typename OffsetT = ptrdiff_t>
class ConstantInputIterator
class
#ifndef __CUDA_ARCH__
// Avoid generating a deprecation warning from length_encode.compute_xx.cpp1.ii, which is compiled by cicc for which
// we cannot suppress the warning
CCCL_DEPRECATED_BECAUSE("Use thrust::constant_iterator instead")
#endif
ConstantInputIterator
{
public:
// Required iterator traits
Expand Down Expand Up @@ -216,11 +222,13 @@ public:
}

/// ostream operator
_CCCL_SUPPRESS_DEPRECATED_PUSH
friend std::ostream& operator<<(std::ostream& os, const self_type& itr)
{
os << "[" << itr.val << "," << itr.offset << "]";
return os;
}
_CCCL_SUPPRESS_DEPRECATED_POP
};

CUB_NAMESPACE_END
4 changes: 3 additions & 1 deletion cub/cub/iterator/counting_input_iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ CUB_NAMESPACE_BEGIN
* The difference type of this iterator (Default: @p ptrdiff_t)
*/
template <typename ValueType, typename OffsetT = ptrdiff_t>
class CountingInputIterator
class CCCL_DEPRECATED_BECAUSE("Use thrust::counting_iterator instead") CountingInputIterator
{
public:
// Required iterator traits
Expand Down Expand Up @@ -218,11 +218,13 @@ public:

/// ostream operator
#if !_CCCL_COMPILER(NVRTC)
_CCCL_SUPPRESS_DEPRECATED_PUSH
friend std::ostream& operator<<(std::ostream& os, const self_type& itr)
{
os << "[" << itr.val << "]";
return os;
}
_CCCL_SUPPRESS_DEPRECATED_POP
#endif // !_CCCL_COMPILER(NVRTC)
};

Expand Down
4 changes: 3 additions & 1 deletion cub/cub/iterator/discard_output_iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ CUB_NAMESPACE_BEGIN
* @brief A discard iterator
*/
template <typename OffsetT = ptrdiff_t>
class DiscardOutputIterator
class CCCL_DEPRECATED_BECAUSE("Use thrust::discard_iterator instead") DiscardOutputIterator
{
public:
// Required iterator traits
Expand Down Expand Up @@ -191,11 +191,13 @@ public:
}

/// ostream operator
_CCCL_SUPPRESS_DEPRECATED_PUSH
friend std::ostream& operator<<(std::ostream& os, const self_type& itr)
{
os << "[" << itr.offset << "]";
return os;
}
_CCCL_SUPPRESS_DEPRECATED_POP
};

CUB_NAMESPACE_END
4 changes: 3 additions & 1 deletion cub/cub/iterator/transform_input_iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ CUB_NAMESPACE_BEGIN
* The difference type of this iterator (Default: @p ptrdiff_t)
*/
template <typename ValueType, typename ConversionOp, typename InputIteratorT, typename OffsetT = ptrdiff_t>
class TransformInputIterator
class CCCL_DEPRECATED_BECAUSE("Use thrust::transform_iterator instead") TransformInputIterator
{
public:
// Required iterator traits
Expand Down Expand Up @@ -233,10 +233,12 @@ public:
}

/// ostream operator
_CCCL_SUPPRESS_DEPRECATED_PUSH
friend std::ostream& operator<<(std::ostream& os, const self_type& /* itr */)
{
return os;
}
_CCCL_SUPPRESS_DEPRECATED_POP
};

CUB_NAMESPACE_END
9 changes: 5 additions & 4 deletions cub/test/catch2_test_block_run_length_decode.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@
#include <cub/block/block_run_length_decode.cuh>
#include <cub/block/block_store.cuh>
#include <cub/device/device_scan.cuh>
#include <cub/iterator/counting_input_iterator.cuh>
#include <cub/iterator/transform_input_iterator.cuh>
#include <cub/util_allocator.cuh>

#include <thrust/iterator/counting_iterator.h>

#include <cuda/std/type_traits>

#include <c2h/catch2_test_helper.h>
Expand Down Expand Up @@ -332,11 +333,11 @@ void TestAlgorithmSpecialisation()

using RunItemT = float;
using RunLengthT = uint32_t;
using ItemItT = cub::CountingInputIterator<RunItemT>;
using RunLengthsItT = cub::TransformInputIterator<RunLengthT, ModOp, cub::CountingInputIterator<RunLengthT>>;
using ItemItT = thrust::counting_iterator<RunItemT>;
using RunLengthsItT = thrust::transform_iterator<ModOp, thrust::counting_iterator<RunLengthT>>;

ItemItT d_unique_items(1000U);
RunLengthsItT d_run_lengths(cub::CountingInputIterator<RunLengthT>(0), ModOp{});
RunLengthsItT d_run_lengths(thrust::counting_iterator<RunLengthT>(0), ModOp{});

constexpr uint32_t num_runs = 10000;
constexpr uint32_t num_blocks = (num_runs + (RUNS_PER_BLOCK - 1U)) / RUNS_PER_BLOCK;
Expand Down
1 change: 0 additions & 1 deletion cub/test/catch2_test_block_store.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@

#include <cub/block/block_store.cuh>
#include <cub/iterator/cache_modified_output_iterator.cuh>
#include <cub/iterator/discard_output_iterator.cuh>
#include <cub/util_allocator.cuh>
#include <cub/util_arch.cuh>

Expand Down
4 changes: 2 additions & 2 deletions cub/test/catch2_test_device_for.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@
// above header needs to be included first

#include <cub/device/device_for.cuh>
#include <cub/iterator/counting_input_iterator.cuh>

#include <thrust/count.h>
#include <thrust/detail/raw_pointer_cast.h>
#include <thrust/equal.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/sequence.h>

#include "catch2_test_launch_helper.h"
Expand Down Expand Up @@ -260,7 +260,7 @@ C2H_TEST("Device for each works with counting iterator", "[for][device]")
max_items,
}));

const auto it = cub::CountingInputIterator<int>{0};
const auto it = thrust::counting_iterator<int>{0};
c2h::device_vector<int> counts(num_items);
device_for_each(it, it + num_items, incrementer_t{thrust::raw_pointer_cast(counts.data())});

Expand Down
4 changes: 2 additions & 2 deletions cub/test/catch2_test_device_for_copy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@
// above header needs to be included first

#include <cub/device/device_for.cuh>
#include <cub/iterator/counting_input_iterator.cuh>

#include <thrust/count.h>
#include <thrust/detail/raw_pointer_cast.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/sequence.h>

#include "catch2_test_launch_helper.h"
Expand Down Expand Up @@ -200,7 +200,7 @@ C2H_TEST("Device for each works with counting iterator", "[for][device]")
max_items,
}));

const auto it = cub::CountingInputIterator<int>{0};
const auto it = thrust::counting_iterator<int>{0};
c2h::device_vector<int> counts(num_items);
device_for_each_copy(it, it + num_items, incrementer_t{thrust::raw_pointer_cast(counts.data())});

Expand Down
7 changes: 4 additions & 3 deletions cub/test/catch2_test_device_histogram.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@
******************************************************************************/

#include <cub/device/device_histogram.cuh>
#include <cub/iterator/counting_input_iterator.cuh>

#include <thrust/iterator/counting_iterator.h>

#include <cuda/std/__algorithm_>
#include <cuda/std/array>
Expand Down Expand Up @@ -495,7 +496,7 @@ C2H_TEST("DeviceHistogram::HistogramEven sample iterator", "[histogram_even][dev
const auto lower_level = caller_vector<int>{0, -10, cs::numeric_limits<int>::lowest()};
const auto upper_level = caller_vector<int>{total_values, 10, cs::numeric_limits<int>::max()};

auto sample_iterator = cub::CountingInputIterator<sample_t>(0);
auto sample_iterator = thrust::counting_iterator<sample_t>(0);

// Channel #0: 0, 4, 8, 12
// Channel #1: 1, 5, 9, 13
Expand Down Expand Up @@ -584,7 +585,7 @@ C2H_TEST_LIST("DeviceHistogram::HistogramEven bin computation does not overflow"
constexpr sample_t lower_level = 0;
constexpr sample_t upper_level = cs::numeric_limits<sample_t>::max();
constexpr auto num_samples = 1000;
auto d_samples = cub::CountingInputIterator<sample_t>{0UL};
auto d_samples = thrust::counting_iterator<sample_t>{0UL};
auto d_histo_out = c2h::device_vector<counter_t>(1024);
const auto num_bins = GENERATE(1, 2);

Expand Down
4 changes: 4 additions & 0 deletions cub/test/catch2_test_device_reduce.cu
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@
#include <c2h/custom_type.h>
#include <c2h/extended_types.h>

// need to suppress deprecation warnings for ConstantInputIterator in the cudafe1.stub.c file, so there is no matching
// _CCCL_SUPPRESS_DEPRECATED_POP at the end of this file
_CCCL_SUPPRESS_DEPRECATED_PUSH

DECLARE_LAUNCH_WRAPPER(cub::DeviceReduce::Reduce, device_reduce);
DECLARE_LAUNCH_WRAPPER(cub::DeviceReduce::Sum, device_sum);
DECLARE_LAUNCH_WRAPPER(cub::DeviceReduce::Min, device_min);
Expand Down
Loading
Loading