Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into decimal32-decimal64
Browse files Browse the repository at this point in the history
  • Loading branch information
bdice authored Jan 28, 2025
2 parents f6b627b + 328605f commit 23a8204
Show file tree
Hide file tree
Showing 56 changed files with 586 additions and 327 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ jobs:
package-name: libcudf
package-type: cpp
wheel-build-pylibcudf:
needs: [wheel-publish-libcudf]
needs: [wheel-build-libcudf]
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
Expand All @@ -111,7 +111,7 @@ jobs:
package-name: pylibcudf
package-type: python
wheel-build-cudf:
needs: wheel-publish-pylibcudf
needs: wheel-build-pylibcudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
Expand All @@ -132,7 +132,7 @@ jobs:
package-name: cudf
package-type: python
wheel-build-dask-cudf:
needs: wheel-publish-cudf
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
Expand All @@ -155,7 +155,7 @@ jobs:
package-name: dask_cudf
package-type: python
wheel-build-cudf-polars:
needs: wheel-publish-pylibcudf
needs: wheel-build-pylibcudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
Expand Down
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ci:
autoupdate_branch: ""
autoupdate_commit_msg: "[pre-commit.ci] pre-commit autoupdate"
autoupdate_schedule: quarterly
skip: ["verify-alpha-spec"]
skip: ["verify-alpha-spec", "nbqa-isort"]
submodules: false

repos:
Expand Down Expand Up @@ -173,7 +173,7 @@ repos:
)
- id: verify-alpha-spec
- repo: https://github.com/rapidsai/dependency-file-generator
rev: v1.16.0
rev: v1.17.0
hooks:
- id: rapids-dependency-file-generator
args: ["--clean"]
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ dependencies:
- pandas
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.11,<1.18
- polars>=1.20,<1.22
- pre-commit
- ptxcompiler
- pyarrow>=14.0.0,<20.0.0a0
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ dependencies:
- pandas
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.11,<1.18
- polars>=1.20,<1.22
- pre-commit
- pyarrow>=14.0.0,<20.0.0a0
- pydata-sphinx-theme>=0.15.4
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf-polars/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ requirements:
run:
- python
- pylibcudf ={{ version }}
- polars >=1.11,<1.18
- polars >=1.20,<1.22
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}

test:
Expand Down
5 changes: 5 additions & 0 deletions cpp/benchmarks/io/parquet/parquet_reader_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,10 @@ void BM_parquet_read_long_strings(nvbench::state& state)
cycle_dtypes(d_type, num_cols), table_size_bytes{data_size}, profile); // THIS
auto const view = tbl->view();

// set smaller threshold to reduce file size and execution time
auto const threshold = 1;
setenv("LIBCUDF_LARGE_STRINGS_THRESHOLD", std::to_string(threshold).c_str(), 1);

cudf::io::parquet_writer_options write_opts =
cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
.compression(compression);
Expand All @@ -129,6 +133,7 @@ void BM_parquet_read_long_strings(nvbench::state& state)
}();

parquet_read_common(num_rows_written, num_cols, source_sink, state);
unsetenv("LIBCUDF_LARGE_STRINGS_THRESHOLD");
}

template <data_type DataType>
Expand Down
10 changes: 7 additions & 3 deletions cpp/include/cudf/detail/sizes_to_offsets_iterator.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
* Copyright (c) 2020-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -255,12 +255,14 @@ static sizes_to_offsets_iterator<ScanIterator, LastType> make_sizes_to_offsets_i
* @param begin Input iterator for scan
* @param end End of the input iterator
* @param result Output iterator for scan result
* @param initial_offset Initial offset to add to scan
* @return The last element of the scan
*/
template <typename SizesIterator, typename OffsetsIterator>
auto sizes_to_offsets(SizesIterator begin,
SizesIterator end,
OffsetsIterator result,
int64_t initial_offset,
rmm::cuda_stream_view stream)
{
using SizeType = typename thrust::iterator_traits<SizesIterator>::value_type;
Expand All @@ -273,7 +275,8 @@ auto sizes_to_offsets(SizesIterator begin,
make_sizes_to_offsets_iterator(result, result + std::distance(begin, end), last_element.data());
// This function uses the type of the initialization parameter as the accumulator type
// when computing the individual scan output elements.
thrust::exclusive_scan(rmm::exec_policy(stream), begin, end, output_itr, LastType{0});
thrust::exclusive_scan(
rmm::exec_policy_nosync(stream), begin, end, output_itr, static_cast<LastType>(initial_offset));
return last_element.value(stream);
}

Expand Down Expand Up @@ -319,7 +322,8 @@ std::pair<std::unique_ptr<column>, size_type> make_offsets_child_column(
});
auto input_itr = cudf::detail::make_counting_transform_iterator(0, map_fn);
// Use the sizes-to-offsets iterator to compute the total number of elements
auto const total_elements = sizes_to_offsets(input_itr, input_itr + count + 1, d_offsets, stream);
auto const total_elements =
sizes_to_offsets(input_itr, input_itr + count + 1, d_offsets, 0, stream);
CUDF_EXPECTS(
total_elements <= static_cast<decltype(total_elements)>(std::numeric_limits<size_type>::max()),
"Size of output exceeds the column size limit",
Expand Down
18 changes: 10 additions & 8 deletions cpp/include/cudf/detail/utilities/integer_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ CUDF_HOST_DEVICE constexpr S round_up_safe(S number_to_round, S modulus)
* `modulus` is positive and does not check for overflow.
*/
template <typename S>
constexpr S round_down_safe(S number_to_round, S modulus) noexcept
CUDF_HOST_DEVICE constexpr S round_down_safe(S number_to_round, S modulus) noexcept
{
auto remainder = number_to_round % modulus;
auto rounded_down = number_to_round - remainder;
Expand Down Expand Up @@ -113,24 +113,26 @@ CUDF_HOST_DEVICE constexpr S round_up_unsafe(S number_to_round, S modulus) noexc
* the result will be incorrect
*/
template <typename S, typename T>
constexpr S div_rounding_up_unsafe(S const& dividend, T const& divisor) noexcept
CUDF_HOST_DEVICE constexpr S div_rounding_up_unsafe(S const& dividend, T const& divisor) noexcept
{
return (dividend + divisor - 1) / divisor;
}

namespace detail {
template <typename I>
constexpr I div_rounding_up_safe(std::integral_constant<bool, false>,
I dividend,
I divisor) noexcept
CUDF_HOST_DEVICE constexpr I div_rounding_up_safe(cuda::std::integral_constant<bool, false>,
I dividend,
I divisor) noexcept
{
// TODO: This could probably be implemented faster
return (dividend > divisor) ? 1 + div_rounding_up_unsafe(dividend - divisor, divisor)
: (dividend > 0);
}

template <typename I>
constexpr I div_rounding_up_safe(std::integral_constant<bool, true>, I dividend, I divisor) noexcept
CUDF_HOST_DEVICE constexpr I div_rounding_up_safe(cuda::std::integral_constant<bool, true>,
I dividend,
I divisor) noexcept
{
auto quotient = dividend / divisor;
auto remainder = dividend % divisor;
Expand All @@ -156,9 +158,9 @@ constexpr I div_rounding_up_safe(std::integral_constant<bool, true>, I dividend,
* the non-integral division `dividend/divisor`
*/
template <typename I>
constexpr I div_rounding_up_safe(I dividend, I divisor) noexcept
CUDF_HOST_DEVICE constexpr I div_rounding_up_safe(I dividend, I divisor) noexcept
{
using i_is_a_signed_type = std::integral_constant<bool, std::is_signed_v<I>>;
using i_is_a_signed_type = cuda::std::integral_constant<bool, cuda::std::is_signed_v<I>>;
return detail::div_rounding_up_safe(i_is_a_signed_type{}, dividend, divisor);
}

Expand Down
6 changes: 3 additions & 3 deletions cpp/include/cudf/fixed_point/temporary.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -54,7 +54,7 @@ auto to_string(T value) -> std::string
}

template <typename T>
constexpr auto abs(T value)
CUDF_HOST_DEVICE constexpr auto abs(T value)
{
return value >= 0 ? value : -value;
}
Expand All @@ -72,7 +72,7 @@ CUDF_HOST_DEVICE inline auto max(T lhs, T rhs)
}

template <typename BaseType>
constexpr auto exp10(int32_t exponent)
CUDF_HOST_DEVICE constexpr auto exp10(int32_t exponent)
{
BaseType value = 1;
while (exponent > 0)
Expand Down
16 changes: 9 additions & 7 deletions cpp/include/cudf/io/text/detail/multistate.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -18,6 +18,8 @@

#include <cudf/utilities/export.hpp>

#include <cuda/functional>

#include <cstdint>

namespace CUDF_EXPORT cudf {
Expand Down Expand Up @@ -45,7 +47,7 @@ struct multistate {
*
* @note: The behavior of this function is undefined if size() => max_segment_count
*/
constexpr void enqueue(uint8_t head, uint8_t tail)
CUDF_HOST_DEVICE constexpr void enqueue(uint8_t head, uint8_t tail)
{
_heads |= (head & 0xFu) << (_size * 4);
_tails |= (tail & 0xFu) << (_size * 4);
Expand All @@ -55,17 +57,17 @@ struct multistate {
/**
* @brief get's the number of segments this multistate represents
*/
[[nodiscard]] constexpr uint8_t size() const { return _size; }
[[nodiscard]] CUDF_HOST_DEVICE constexpr uint8_t size() const { return _size; }

/**
* @brief get's the highest (____, tail] value this multistate represents
*/
[[nodiscard]] constexpr uint8_t max_tail() const
[[nodiscard]] CUDF_HOST_DEVICE constexpr uint8_t max_tail() const
{
uint8_t maximum = 0;

for (uint8_t i = 0; i < _size; i++) {
maximum = std::max(maximum, get_tail(i));
maximum = cuda::std::max(maximum, get_tail(i));
}

return maximum;
Expand All @@ -74,15 +76,15 @@ struct multistate {
/**
* @brief get's the Nth (head, ____] value state this multistate represents
*/
[[nodiscard]] constexpr uint8_t get_head(uint8_t idx) const
[[nodiscard]] CUDF_HOST_DEVICE constexpr uint8_t get_head(uint8_t idx) const
{
return (_heads >> (idx * 4)) & 0xFu;
}

/**
* @brief get's the Nth (____, tail] value state this multistate represents
*/
[[nodiscard]] constexpr uint8_t get_tail(uint8_t idx) const
[[nodiscard]] CUDF_HOST_DEVICE constexpr uint8_t get_tail(uint8_t idx) const
{
return (_tails >> (idx * 4)) & 0xFu;
}
Expand Down
8 changes: 5 additions & 3 deletions cpp/include/cudf/strings/detail/convert/fixed_point.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,6 +17,7 @@

#include <cudf/fixed_point/temporary.hpp>

#include <cuda/std/limits>
#include <cuda/std/optional>
#include <cuda/std/type_traits>
#include <thrust/pair.h>
Expand Down Expand Up @@ -46,7 +47,7 @@ __device__ inline thrust::pair<UnsignedDecimalType, int32_t> parse_integer(
// highest value where another decimal digit cannot be appended without an overflow;
// this preserves the most digits when scaling the final result for this type
constexpr UnsignedDecimalType decimal_max =
(std::numeric_limits<UnsignedDecimalType>::max() - 9L) / 10L;
(cuda::std::numeric_limits<UnsignedDecimalType>::max() - 9L) / 10L;

__uint128_t value = 0; // for checking overflow
int32_t exp_offset = 0;
Expand Down Expand Up @@ -90,7 +91,8 @@ __device__ inline thrust::pair<UnsignedDecimalType, int32_t> parse_integer(
template <bool check_only = false>
__device__ cuda::std::optional<int32_t> parse_exponent(char const* iter, char const* iter_end)
{
constexpr uint32_t exponent_max = static_cast<uint32_t>(std::numeric_limits<int32_t>::max());
constexpr uint32_t exponent_max =
static_cast<uint32_t>(cuda::std::numeric_limits<int32_t>::max());

// get optional exponent sign
int32_t const exp_sign = [&iter] {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -17,6 +17,8 @@

#include <cudf/strings/detail/convert/int_to_string.cuh>

#include <cuda/std/functional>

namespace cudf::strings::detail {

/**
Expand All @@ -33,7 +35,7 @@ __device__ inline int32_t fixed_point_string_size(__int128_t const& value, int32
auto const abs_value = numeric::detail::abs(value);
auto const exp_ten = numeric::detail::exp10<__int128_t>(-scale);
auto const fraction = count_digits(abs_value % exp_ten);
auto const num_zeros = std::max(0, (-scale - fraction));
auto const num_zeros = cuda::std::max(0, (-scale - fraction));
return static_cast<int32_t>(value < 0) + // sign if negative
count_digits(abs_value / exp_ten) + // integer
1 + // decimal point
Expand Down Expand Up @@ -66,7 +68,7 @@ __device__ inline void fixed_point_to_string(__int128_t const& value, int32_t sc
if (value < 0) *out_ptr++ = '-'; // add sign
auto const abs_value = numeric::detail::abs(value);
auto const exp_ten = numeric::detail::exp10<__int128_t>(-scale);
auto const num_zeros = std::max(0, (-scale - count_digits(abs_value % exp_ten)));
auto const num_zeros = cuda::std::max(0, (-scale - count_digits(abs_value % exp_ten)));

out_ptr += integer_to_string(abs_value / exp_ten, out_ptr); // add the integer part
*out_ptr++ = '.'; // add decimal point
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/strings/detail/convert/int_to_string.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2022, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -67,7 +67,7 @@ __device__ inline size_type integer_to_string(IntegerType value, char* d_buffer)
* @return size_type number of digits in input value
*/
template <typename IntegerType>
constexpr size_type count_digits(IntegerType value)
__device__ constexpr size_type count_digits(IntegerType value)
{
if (value == 0) return 1;
bool const is_negative = cuda::std::is_signed<IntegerType>() ? (value < 0) : false;
Expand Down
Loading

0 comments on commit 23a8204

Please sign in to comment.