From 231015910bae375077e07c01d2bf70697182ccad Mon Sep 17 00:00:00 2001 From: Shruti Shivakumar Date: Thu, 9 Jan 2025 16:53:25 -0500 Subject: [PATCH 1/7] Support multithreaded reading of compressed buffers in JSON reader (#17670) Addresses #17638 This PR introduces multithreaded host-side decompression of compressed input buffers passed to the JSON reader, and uses a stream pool to transfer the uncompressed buffers to device. Authors: - Shruti Shivakumar (https://github.com/shrshi) Approvers: - Paul Mattione (https://github.com/pmattione-nvidia) - Nghia Truong (https://github.com/ttnghia) URL: https://github.com/rapidsai/cudf/pull/17670 --- cpp/src/io/json/read_json.cu | 71 +++++++++++++++++++++++++++++++----- 1 file changed, 61 insertions(+), 10 deletions(-) diff --git a/cpp/src/io/json/read_json.cu b/cpp/src/io/json/read_json.cu index 82d8152ca1c..113342e9cbf 100644 --- a/cpp/src/io/json/read_json.cu +++ b/cpp/src/io/json/read_json.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,6 +30,7 @@ #include #include +#include #include #include @@ -37,12 +38,25 @@ #include #include +#include +#include + #include namespace cudf::io::json::detail { namespace { +namespace pools { + +BS::thread_pool& tpool() +{ + static BS::thread_pool _tpool(std::thread::hardware_concurrency()); + return _tpool; +} + +} // namespace pools + class compressed_host_buffer_source final : public datasource { public: explicit compressed_host_buffer_source(std::unique_ptr const& src, @@ -51,8 +65,8 @@ class compressed_host_buffer_source final : public datasource { { auto ch_buffer = host_span(reinterpret_cast(_dbuf_ptr->data()), _dbuf_ptr->size()); - if (comptype == compression_type::GZIP || comptype == compression_type::ZIP || - comptype == compression_type::SNAPPY) { + if (_comptype == compression_type::GZIP || _comptype == compression_type::ZIP || + _comptype == compression_type::SNAPPY) { _decompressed_ch_buffer_size = cudf::io::detail::get_uncompressed_size(_comptype, ch_buffer); } else { _decompressed_buffer = cudf::io::detail::decompress(_comptype, ch_buffer); @@ -96,7 +110,22 @@ class compressed_host_buffer_source final : public datasource { return std::make_unique(_decompressed_buffer.data() + offset, count); } - [[nodiscard]] bool supports_device_read() const override { return false; } + std::future device_read_async(size_t offset, + size_t size, + uint8_t* dst, + rmm::cuda_stream_view stream) override + { + auto& thread_pool = pools::tpool(); + return thread_pool.submit_task([this, offset, size, dst, stream] { + auto hbuf = host_read(offset, size); + CUDF_CUDA_TRY( + cudaMemcpyAsync(dst, hbuf->data(), hbuf->size(), cudaMemcpyHostToDevice, stream.value())); + stream.synchronize(); + return hbuf->size(); + }); + } + + [[nodiscard]] bool supports_device_read() const override { return true; } [[nodiscard]] size_t size() const override { return _decompressed_ch_buffer_size; } @@ -431,6 +460,8 @@ device_span ingest_raw_input(device_span buffer, // line of file i+1 don't end up on the same JSON line, if file i does not already end with a line // delimiter. auto constexpr num_delimiter_chars = 1; + std::vector> thread_tasks; + auto stream_pool = cudf::detail::fork_streams(stream, pools::tpool().get_thread_count()); auto delimiter_map = cudf::detail::make_empty_host_vector(sources.size(), stream); std::vector prefsum_source_sizes(sources.size()); @@ -447,13 +478,17 @@ device_span ingest_raw_input(device_span buffer, auto const total_bytes_to_read = std::min(range_size, prefsum_source_sizes.back() - range_offset); range_offset -= start_source ? prefsum_source_sizes[start_source - 1] : 0; - for (std::size_t i = start_source; i < sources.size() && bytes_read < total_bytes_to_read; i++) { + for (std::size_t i = start_source, cur_stream = 0; + i < sources.size() && bytes_read < total_bytes_to_read; + i++) { if (sources[i]->is_empty()) continue; auto data_size = std::min(sources[i]->size() - range_offset, total_bytes_to_read - bytes_read); auto destination = reinterpret_cast(buffer.data()) + bytes_read + (num_delimiter_chars * delimiter_map.size()); - if (sources[i]->is_device_read_preferred(data_size)) { - bytes_read += sources[i]->device_read(range_offset, data_size, destination, stream); + if (sources[i]->supports_device_read()) { + thread_tasks.emplace_back(sources[i]->device_read_async( + range_offset, data_size, destination, stream_pool[cur_stream++ % stream_pool.size()])); + bytes_read += data_size; } else { h_buffers.emplace_back(sources[i]->host_read(range_offset, data_size)); auto const& h_buffer = h_buffers.back(); @@ -481,6 +516,15 @@ device_span ingest_raw_input(device_span buffer, buffer.data()); } stream.synchronize(); + + if (thread_tasks.size()) { + auto const bytes_read = std::accumulate( + thread_tasks.begin(), thread_tasks.end(), std::size_t{0}, [](std::size_t sum, auto& task) { + return sum + task.get(); + }); + CUDF_EXPECTS(bytes_read == total_bytes_to_read, "something's fishy"); + } + return buffer.first(bytes_read + (delimiter_map.size() * num_delimiter_chars)); } @@ -505,10 +549,17 @@ table_with_metadata read_json(host_span> sources, return read_json_impl(sources, reader_opts, stream, mr); std::vector> compressed_sources; - for (size_t i = 0; i < sources.size(); i++) { - compressed_sources.emplace_back( - std::make_unique(sources[i], reader_opts.get_compression())); + std::vector>> thread_tasks; + auto& thread_pool = pools::tpool(); + for (auto& src : sources) { + thread_tasks.emplace_back(thread_pool.submit_task([&reader_opts, &src] { + return std::make_unique(src, reader_opts.get_compression()); + })); } + std::transform(thread_tasks.begin(), + thread_tasks.end(), + std::back_inserter(compressed_sources), + [](auto& task) { return task.get(); }); // in read_json_impl, we need the compressed source size to actually be the // uncompressed source size for correct batching return read_json_impl(compressed_sources, reader_opts, stream, mr); From a8a41975b0c1cfaedb7d4461ee027f6f9ff75b0e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 9 Jan 2025 14:16:04 -0800 Subject: [PATCH 2/7] Remove cudf._libs.types.pyx (#17665) Contributes to https://github.com/rapidsai/cudf/issues/17317 1. Moves some Python routines/objects to `cudf/utils/dtypes.py` 2. Moves specific column only routines directly to `cudf/_libs/column.pyx` Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Lawrence Mitchell (https://github.com/wence-) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17665 --- python/cudf/cudf/_lib/CMakeLists.txt | 4 +- python/cudf/cudf/_lib/column.pxd | 4 +- python/cudf/cudf/_lib/column.pyx | 100 ++++++++-- python/cudf/cudf/_lib/scalar.pyx | 49 ++--- python/cudf/cudf/_lib/types.pxd | 11 -- python/cudf/cudf/_lib/types.pyx | 172 ------------------ python/cudf/cudf/core/_base_index.py | 9 +- .../cudf/cudf/core/_internals/aggregation.py | 4 +- python/cudf/cudf/core/_internals/binaryop.py | 4 +- python/cudf/cudf/core/_internals/unary.py | 4 +- python/cudf/cudf/core/column/categorical.py | 10 +- python/cudf/cudf/core/column/column.py | 27 +-- python/cudf/cudf/core/column/lists.py | 4 +- python/cudf/cudf/core/column/string.py | 12 +- python/cudf/cudf/core/copy_types.py | 6 +- python/cudf/cudf/core/dtypes.py | 5 +- python/cudf/cudf/core/groupby/groupby.py | 21 +-- python/cudf/cudf/core/index.py | 6 +- python/cudf/cudf/core/indexed_frame.py | 3 +- python/cudf/cudf/core/join/join.py | 6 +- python/cudf/cudf/core/multiindex.py | 11 +- python/cudf/cudf/core/reshape.py | 9 +- python/cudf/cudf/io/csv.py | 8 +- python/cudf/cudf/io/json.py | 8 +- python/cudf/cudf/io/orc.py | 4 +- python/cudf/cudf/utils/dtypes.py | 66 ++++++- 26 files changed, 251 insertions(+), 316 deletions(-) delete mode 100644 python/cudf/cudf/_lib/types.pxd delete mode 100644 python/cudf/cudf/_lib/types.pyx diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt index ff6fba1c3e8..ec44a6aa8c5 100644 --- a/python/cudf/cudf/_lib/CMakeLists.txt +++ b/python/cudf/cudf/_lib/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# Copyright (c) 2022-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -12,7 +12,7 @@ # the License. # ============================================================================= -set(cython_sources column.pyx scalar.pyx strings_udf.pyx types.pyx) +set(cython_sources column.pyx scalar.pyx strings_udf.pyx) set(linked_libraries cudf::cudf) rapids_cython_create_modules( diff --git a/python/cudf/cudf/_lib/column.pxd b/python/cudf/cudf/_lib/column.pxd index 8b1d16f0d85..026c12895e8 100644 --- a/python/cudf/cudf/_lib/column.pxd +++ b/python/cudf/cudf/_lib/column.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. from typing import Literal @@ -13,6 +13,8 @@ from pylibcudf.libcudf.column.column_view cimport ( from pylibcudf.libcudf.types cimport size_type from rmm.librmm.device_buffer cimport device_buffer +cdef dtype_from_lists_column_view(column_view cv) +cdef dtype_from_column_view(column_view cv) cdef class Column: cdef public: diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx index f7dcd89ea48..c59bbc0f40c 100644 --- a/python/cudf/cudf/_lib/column.pyx +++ b/python/cudf/cudf/_lib/column.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. from typing import Literal @@ -19,24 +19,21 @@ from cudf.core.buffer import ( as_buffer, cuda_array_interface_wrapper, ) -from cudf.utils.dtypes import _get_base_dtype +from cudf.utils.dtypes import ( + _get_base_dtype, + dtype_to_pylibcudf_type, + PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES, +) from cpython.buffer cimport PyObject_CheckBuffer -from libc.stdint cimport uintptr_t -from libcpp.memory cimport make_unique, unique_ptr +from libc.stdint cimport uintptr_t, int32_t +from libcpp.memory cimport make_shared, make_unique, shared_ptr, unique_ptr from libcpp.utility cimport move from libcpp.vector cimport vector from rmm.pylibrmm.device_buffer cimport DeviceBuffer -from cudf._lib.types cimport ( - dtype_from_column_view, - dtype_to_pylibcudf_type, -) - -from cudf._lib.types import dtype_from_pylibcudf_column - -from pylibcudf cimport DataType as plc_DataType +from pylibcudf cimport DataType as plc_DataType, Column as plc_Column cimport pylibcudf.libcudf.copying as cpp_copying cimport pylibcudf.libcudf.types as libcudf_types cimport pylibcudf.libcudf.unary as libcudf_unary @@ -45,6 +42,7 @@ from pylibcudf.libcudf.column.column_factories cimport ( make_numeric_column ) from pylibcudf.libcudf.column.column_view cimport column_view +from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view from pylibcudf.libcudf.null_mask cimport null_count as cpp_null_count from pylibcudf.libcudf.scalar.scalar cimport scalar @@ -64,6 +62,80 @@ cdef get_element(column_view col_view, size_type index): ) +def dtype_from_pylibcudf_column(plc_Column col not None): + type_ = col.type() + tid = type_.id() + + if tid == pylibcudf.TypeId.LIST: + child = col.list_view().child() + return cudf.ListDtype(dtype_from_pylibcudf_column(child)) + elif tid == pylibcudf.TypeId.STRUCT: + fields = { + str(i): dtype_from_pylibcudf_column(col.child(i)) + for i in range(col.num_children()) + } + return cudf.StructDtype(fields) + elif tid == pylibcudf.TypeId.DECIMAL64: + return cudf.Decimal64Dtype( + precision=cudf.Decimal64Dtype.MAX_PRECISION, + scale=-type_.scale() + ) + elif tid == pylibcudf.TypeId.DECIMAL32: + return cudf.Decimal32Dtype( + precision=cudf.Decimal32Dtype.MAX_PRECISION, + scale=-type_.scale() + ) + elif tid == pylibcudf.TypeId.DECIMAL128: + return cudf.Decimal128Dtype( + precision=cudf.Decimal128Dtype.MAX_PRECISION, + scale=-type_.scale() + ) + else: + return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[tid] + + +cdef dtype_from_lists_column_view(column_view cv): + # lists_column_view have no default constructor, so we heap + # allocate it to get around Cython's limitation of requiring + # default constructors for stack allocated objects + cdef shared_ptr[lists_column_view] lv = make_shared[lists_column_view](cv) + cdef column_view child = lv.get()[0].child() + + if child.type().id() == libcudf_types.type_id.LIST: + return cudf.ListDtype(dtype_from_lists_column_view(child)) + else: + return cudf.ListDtype(dtype_from_column_view(child)) + + +cdef dtype_from_column_view(column_view cv): + cdef libcudf_types.type_id tid = cv.type().id() + if tid == libcudf_types.type_id.LIST: + return dtype_from_lists_column_view(cv) + elif tid == libcudf_types.type_id.STRUCT: + fields = { + str(i): dtype_from_column_view(cv.child(i)) + for i in range(cv.num_children()) + } + return cudf.StructDtype(fields) + elif tid == libcudf_types.type_id.DECIMAL64: + return cudf.Decimal64Dtype( + precision=cudf.Decimal64Dtype.MAX_PRECISION, + scale=-cv.type().scale() + ) + elif tid == libcudf_types.type_id.DECIMAL32: + return cudf.Decimal32Dtype( + precision=cudf.Decimal32Dtype.MAX_PRECISION, + scale=-cv.type().scale() + ) + elif tid == libcudf_types.type_id.DECIMAL128: + return cudf.Decimal128Dtype( + precision=cudf.Decimal128Dtype.MAX_PRECISION, + scale=-cv.type().scale() + ) + else: + return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[(tid)] + + cdef class Column: """ A Column stores columnar data in device memory. @@ -361,7 +433,7 @@ cdef class Column: col = self data_dtype = col.dtype - cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype) + cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype) cdef libcudf_types.size_type offset = self.offset cdef vector[mutable_column_view] children cdef void* data @@ -424,7 +496,7 @@ cdef class Column: col = self data_dtype = col.dtype - cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype) + cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype) cdef libcudf_types.size_type offset = self.offset cdef vector[column_view] children cdef void* data diff --git a/python/cudf/cudf/_lib/scalar.pyx b/python/cudf/cudf/_lib/scalar.pyx index fd6d0257940..65607c91302 100644 --- a/python/cudf/cudf/_lib/scalar.pyx +++ b/python/cudf/cudf/_lib/scalar.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. import copy @@ -14,17 +14,16 @@ import pylibcudf as plc import cudf from cudf.core.dtypes import ListDtype, StructDtype -from cudf._lib.types import PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES -from cudf._lib.types cimport dtype_from_column_view, underlying_type_t_type_id from cudf.core.missing import NA, NaT +from cudf.utils.dtypes import PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES # We currently need this cimport because some of the implementations here # access the c_obj of the scalar, and because we need to be able to call # pylibcudf.Scalar.from_libcudf. Both of those are temporarily acceptable until # DeviceScalar is phased out entirely from cuDF Cython (at which point # cudf.Scalar will be directly backed by pylibcudf.Scalar). -from pylibcudf cimport Scalar as plc_Scalar, type_id as plc_TypeID -from pylibcudf.libcudf.scalar.scalar cimport list_scalar, scalar, struct_scalar +from pylibcudf cimport Scalar as plc_Scalar +from pylibcudf.libcudf.scalar.scalar cimport scalar def _replace_nested(obj, check, replacement): @@ -223,40 +222,22 @@ cdef class DeviceScalar: return s cdef void _set_dtype(self, dtype=None): - cdef plc_TypeID cdtype_id = self.c_value.type().id() + cdtype_id = self.c_value.type().id() if dtype is not None: self._dtype = dtype elif cdtype_id in { - plc_TypeID.DECIMAL32, - plc_TypeID.DECIMAL64, - plc_TypeID.DECIMAL128, + plc.TypeID.DECIMAL32, + plc.TypeID.DECIMAL64, + plc.TypeID.DECIMAL128, }: raise TypeError( "Must pass a dtype when constructing from a fixed-point scalar" ) - elif cdtype_id == plc_TypeID.STRUCT: - struct_table_view = (self.get_raw_ptr())[0].view() - self._dtype = StructDtype({ - str(i): dtype_from_column_view(struct_table_view.column(i)) - for i in range(struct_table_view.num_columns()) - }) - elif cdtype_id == plc_TypeID.LIST: - if ( - self.get_raw_ptr() - )[0].view().type().id() == plc_TypeID.LIST: - self._dtype = dtype_from_column_view( - (self.get_raw_ptr())[0].view() - ) - else: - self._dtype = ListDtype( - PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[ - ( - (self.get_raw_ptr())[0] - .view().type().id() - ) - ] - ) + elif cdtype_id == plc.TypeID.STRUCT: + self._dtype = StructDtype.from_arrow( + plc.interop.to_arrow(self.c_value).type + ) + elif cdtype_id == plc.TypeID.LIST: + self._dtype = ListDtype.from_arrow(plc.interop.to_arrow(self.c_value).type) else: - self._dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[ - (cdtype_id) - ] + self._dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[cdtype_id] diff --git a/python/cudf/cudf/_lib/types.pxd b/python/cudf/cudf/_lib/types.pxd deleted file mode 100644 index 18b1d26e4db..00000000000 --- a/python/cudf/cudf/_lib/types.pxd +++ /dev/null @@ -1,11 +0,0 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. - -from libc.stdint cimport int32_t - -from pylibcudf.libcudf.column.column_view cimport column_view - -ctypedef int32_t underlying_type_t_type_id - -cdef dtype_from_column_view(column_view cv) - -cpdef dtype_to_pylibcudf_type(dtype) diff --git a/python/cudf/cudf/_lib/types.pyx b/python/cudf/cudf/_lib/types.pyx deleted file mode 100644 index 777bd070b32..00000000000 --- a/python/cudf/cudf/_lib/types.pyx +++ /dev/null @@ -1,172 +0,0 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. - -import numpy as np -import pandas as pd - -from libcpp.memory cimport make_shared, shared_ptr - -cimport pylibcudf.libcudf.types as libcudf_types -from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view - -import pylibcudf as plc - -import cudf - - -SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES = { - np.dtype("int8"): plc.types.TypeId.INT8, - np.dtype("int16"): plc.types.TypeId.INT16, - np.dtype("int32"): plc.types.TypeId.INT32, - np.dtype("int64"): plc.types.TypeId.INT64, - np.dtype("uint8"): plc.types.TypeId.UINT8, - np.dtype("uint16"): plc.types.TypeId.UINT16, - np.dtype("uint32"): plc.types.TypeId.UINT32, - np.dtype("uint64"): plc.types.TypeId.UINT64, - np.dtype("float32"): plc.types.TypeId.FLOAT32, - np.dtype("float64"): plc.types.TypeId.FLOAT64, - np.dtype("datetime64[s]"): plc.types.TypeId.TIMESTAMP_SECONDS, - np.dtype("datetime64[ms]"): plc.types.TypeId.TIMESTAMP_MILLISECONDS, - np.dtype("datetime64[us]"): plc.types.TypeId.TIMESTAMP_MICROSECONDS, - np.dtype("datetime64[ns]"): plc.types.TypeId.TIMESTAMP_NANOSECONDS, - np.dtype("object"): plc.types.TypeId.STRING, - np.dtype("bool"): plc.types.TypeId.BOOL8, - np.dtype("timedelta64[s]"): plc.types.TypeId.DURATION_SECONDS, - np.dtype("timedelta64[ms]"): plc.types.TypeId.DURATION_MILLISECONDS, - np.dtype("timedelta64[us]"): plc.types.TypeId.DURATION_MICROSECONDS, - np.dtype("timedelta64[ns]"): plc.types.TypeId.DURATION_NANOSECONDS, -} -PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES = { - plc_type: np_type - for np_type, plc_type in SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES.items() -} -# There's no equivalent to EMPTY in cudf. We translate EMPTY -# columns from libcudf to ``int8`` columns of all nulls in Python. -# ``int8`` is chosen because it uses the least amount of memory. -PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.EMPTY] = np.dtype("int8") -PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.STRUCT] = np.dtype("object") -PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.LIST] = np.dtype("object") - - -size_type_dtype = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.SIZE_TYPE_ID] - - -cdef dtype_from_lists_column_view(column_view cv): - # lists_column_view have no default constructor, so we heap - # allocate it to get around Cython's limitation of requiring - # default constructors for stack allocated objects - cdef shared_ptr[lists_column_view] lv = make_shared[lists_column_view](cv) - cdef column_view child = lv.get()[0].child() - - if child.type().id() == libcudf_types.type_id.LIST: - return cudf.ListDtype(dtype_from_lists_column_view(child)) - elif child.type().id() == libcudf_types.type_id.EMPTY: - return cudf.ListDtype("int8") - else: - return cudf.ListDtype( - dtype_from_column_view(child) - ) - -cdef dtype_from_structs_column_view(column_view cv): - fields = { - str(i): dtype_from_column_view(cv.child(i)) - for i in range(cv.num_children()) - } - return cudf.StructDtype(fields) - -cdef dtype_from_column_view(column_view cv): - cdef libcudf_types.type_id tid = cv.type().id() - if tid == libcudf_types.type_id.LIST: - return dtype_from_lists_column_view(cv) - elif tid == libcudf_types.type_id.STRUCT: - return dtype_from_structs_column_view(cv) - elif tid == libcudf_types.type_id.DECIMAL64: - return cudf.Decimal64Dtype( - precision=cudf.Decimal64Dtype.MAX_PRECISION, - scale=-cv.type().scale() - ) - elif tid == libcudf_types.type_id.DECIMAL32: - return cudf.Decimal32Dtype( - precision=cudf.Decimal32Dtype.MAX_PRECISION, - scale=-cv.type().scale() - ) - elif tid == libcudf_types.type_id.DECIMAL128: - return cudf.Decimal128Dtype( - precision=cudf.Decimal128Dtype.MAX_PRECISION, - scale=-cv.type().scale() - ) - else: - return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[ - (tid) - ] - - -cpdef dtype_to_pylibcudf_type(dtype): - if isinstance(dtype, cudf.ListDtype): - return plc.DataType(plc.TypeId.LIST) - elif isinstance(dtype, cudf.StructDtype): - return plc.DataType(plc.TypeId.STRUCT) - elif isinstance(dtype, cudf.Decimal128Dtype): - tid = plc.TypeId.DECIMAL128 - return plc.DataType(tid, -dtype.scale) - elif isinstance(dtype, cudf.Decimal64Dtype): - tid = plc.TypeId.DECIMAL64 - return plc.DataType(tid, -dtype.scale) - elif isinstance(dtype, cudf.Decimal32Dtype): - tid = plc.TypeId.DECIMAL32 - return plc.DataType(tid, -dtype.scale) - # libcudf types don't support timezones so convert to the base type - elif isinstance(dtype, pd.DatetimeTZDtype): - dtype = np.dtype(f" ColumnBase: if self.null_count == len(self): # self.categories is empty; just return codes return self.codes - gather_map = self.codes.astype(libcudf.types.size_type_dtype).fillna(0) + gather_map = self.codes.astype(SIZE_TYPE_DTYPE).fillna(0) out = self.categories.take(gather_map) out = out.set_mask(self.mask) return out @@ -1192,10 +1192,10 @@ def _concat( codes = [o.codes for o in objs] newsize = sum(map(len, codes)) - if newsize > np.iinfo(libcudf.types.size_type_dtype).max: + if newsize > np.iinfo(SIZE_TYPE_DTYPE).max: raise MemoryError( f"Result of concat cannot have " - f"size > {libcudf.types.size_type_dtype}_MAX" + f"size > {SIZE_TYPE_DTYPE}_MAX" ) elif newsize == 0: codes_col = column.column_empty(0, head.codes.dtype) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index e23ca810065..30da8727366 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -25,7 +25,6 @@ import cudf from cudf import _lib as libcudf from cudf._lib.column import Column -from cudf._lib.types import dtype_to_pylibcudf_type, size_type_dtype from cudf.api.types import ( _is_non_decimal_numeric_dtype, _is_pandas_nullable_extension_dtype, @@ -60,9 +59,11 @@ from cudf.core.mixins import BinaryOperand, Reducible from cudf.errors import MixedTypeError from cudf.utils.dtypes import ( + SIZE_TYPE_DTYPE, _maybe_convert_to_default_type, cudf_dtype_from_pa_type, cudf_dtype_to_pa_type, + dtype_to_pylibcudf_type, find_common_type, get_time_unit, is_column_like, @@ -874,7 +875,7 @@ def indices_of( value = as_column(value, dtype=self.dtype, length=1) mask = value.contains(self) return apply_boolean_mask( # type: ignore[return-value] - [as_column(range(0, len(self)), dtype=size_type_dtype)], mask + [as_column(range(0, len(self)), dtype=SIZE_TYPE_DTYPE)], mask )[0] def _find_first_and_last(self, value: ScalarLike) -> tuple[int, int]: @@ -954,7 +955,7 @@ def take( # TODO: For performance, the check and conversion of gather map should # be done by the caller. This check will be removed in future release. if indices.dtype.kind not in {"u", "i"}: - indices = indices.astype(libcudf.types.size_type_dtype) + indices = indices.astype(SIZE_TYPE_DTYPE) GatherMap(indices, len(self), nullify=not check_bounds or nullify) gathered = copying.gather([self], indices, nullify=nullify) # type: ignore[arg-type] return gathered[0]._with_type_metadata(self.dtype) # type: ignore[return-value] @@ -1743,9 +1744,7 @@ def column_empty( elif isinstance(dtype, ListDtype): data = None children = ( - as_column( - 0, length=row_count + 1, dtype=libcudf.types.size_type_dtype - ), + as_column(0, length=row_count + 1, dtype=SIZE_TYPE_DTYPE), column_empty(row_count, dtype=dtype.element_type), ) elif isinstance(dtype, CategoricalDtype): @@ -1754,21 +1753,16 @@ def column_empty( cudf.core.column.NumericalColumn( data=as_buffer( rmm.DeviceBuffer( - size=row_count - * cudf.dtype(libcudf.types.size_type_dtype).itemsize + size=row_count * cudf.dtype(SIZE_TYPE_DTYPE).itemsize ) ), size=None, - dtype=libcudf.types.size_type_dtype, + dtype=SIZE_TYPE_DTYPE, ), ) elif dtype.kind in "OU" and not isinstance(dtype, DecimalDtype): data = as_buffer(rmm.DeviceBuffer(size=0)) - children = ( - as_column( - 0, length=row_count + 1, dtype=libcudf.types.size_type_dtype - ), - ) + children = (as_column(0, length=row_count + 1, dtype=SIZE_TYPE_DTYPE),) else: data = as_buffer(rmm.DeviceBuffer(size=row_count * dtype.itemsize)) @@ -2552,10 +2546,9 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase: ) newsize = sum(map(len, objs)) - if newsize > np.iinfo(libcudf.types.size_type_dtype).max: + if newsize > np.iinfo(SIZE_TYPE_DTYPE).max: raise MemoryError( - f"Result of concat cannot have " - f"size > {libcudf.types.size_type_dtype}_MAX" + f"Result of concat cannot have " f"size > {SIZE_TYPE_DTYPE}_MAX" ) elif newsize == 0: return column_empty(0, head.dtype) diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 6fc2b5d4ca2..04b4003c510 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -14,7 +14,6 @@ import cudf import cudf.core.column.column as column -from cudf._lib.types import size_type_dtype from cudf.api.types import _is_non_decimal_numeric_dtype, is_scalar from cudf.core.buffer import acquire_spill_lock from cudf.core.column.column import ColumnBase, as_column @@ -22,6 +21,7 @@ from cudf.core.column.numerical import NumericalColumn from cudf.core.dtypes import ListDtype from cudf.core.missing import NA +from cudf.utils.dtypes import SIZE_TYPE_DTYPE if TYPE_CHECKING: from collections.abc import Sequence @@ -258,7 +258,7 @@ def from_sequences( offset_col = cast( NumericalColumn, - column.as_column(offset_vals, dtype=size_type_dtype), + column.as_column(offset_vals, dtype=SIZE_TYPE_DTYPE), ) # Build ListColumn diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 20eded9a27f..2bee85cb387 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -19,16 +19,18 @@ import cudf.api.types import cudf.core.column.column as column import cudf.core.column.datetime as datetime -from cudf import _lib as libcudf from cudf._lib.column import Column -from cudf._lib.types import dtype_to_pylibcudf_type, size_type_dtype from cudf.api.types import is_integer, is_scalar, is_string_dtype from cudf.core._internals import binaryop from cudf.core.buffer import acquire_spill_lock from cudf.core.column.column import ColumnBase from cudf.core.column.methods import ColumnMethods from cudf.utils.docutils import copy_docstring -from cudf.utils.dtypes import can_convert_to_column +from cudf.utils.dtypes import ( + SIZE_TYPE_DTYPE, + can_convert_to_column, + dtype_to_pylibcudf_type, +) if TYPE_CHECKING: from collections.abc import Callable, Sequence @@ -5611,7 +5613,7 @@ def __init__( if len(children) == 0 and size != 0: # all nulls-column: offsets = column.as_column( - 0, length=size + 1, dtype=size_type_dtype + 0, length=size + 1, dtype=SIZE_TYPE_DTYPE ) children = (offsets,) @@ -5888,7 +5890,7 @@ def as_decimal_column( ) -> cudf.core.column.DecimalBaseColumn: plc_column = plc.strings.convert.convert_fixed_point.to_fixed_point( self.to_pylibcudf(mode="read"), - libcudf.types.dtype_to_pylibcudf_type(dtype), + dtype_to_pylibcudf_type(dtype), ) result = Column.from_pylibcudf(plc_column) result.dtype.precision = dtype.precision # type: ignore[union-attr] diff --git a/python/cudf/cudf/core/copy_types.py b/python/cudf/cudf/core/copy_types.py index 4b6ad59c8e1..aaaf6c7ee4f 100644 --- a/python/cudf/cudf/core/copy_types.py +++ b/python/cudf/cudf/core/copy_types.py @@ -1,11 +1,11 @@ -# Copyright (c) 2023-2024, NVIDIA CORPORATION. +# Copyright (c) 2023-2025, NVIDIA CORPORATION. from dataclasses import dataclass from typing import TYPE_CHECKING, Any, cast from typing_extensions import Self import cudf -from cudf._lib.types import size_type_dtype +from cudf.utils.dtypes import SIZE_TYPE_DTYPE if TYPE_CHECKING: from cudf.core.column import NumericalColumn @@ -63,7 +63,7 @@ def __init__(self, column: Any, nrows: int, *, nullify: bool): # Alternately we can have an Optional[Column] and handle None # specially in _gather. self.column = cast( - "NumericalColumn", self.column.astype(size_type_dtype) + "NumericalColumn", self.column.astype(SIZE_TYPE_DTYPE) ) else: if self.column.dtype.kind not in {"i", "u"}: diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py index 8ed233ba737..ce7fb968069 100644 --- a/python/cudf/cudf/core/dtypes.py +++ b/python/cudf/cudf/core/dtypes.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. from __future__ import annotations import decimal @@ -57,7 +57,8 @@ def dtype(arbitrary): if np_dtype.kind in set("OU"): return np.dtype("object") elif ( - np_dtype not in cudf._lib.types.SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES + np_dtype + not in cudf.utils.dtypes.SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES ): raise TypeError(f"Unsupported type {np_dtype}") return np_dtype diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py index 17302311a7e..7bc4b08fc49 100644 --- a/python/cudf/cudf/core/groupby/groupby.py +++ b/python/cudf/cudf/core/groupby/groupby.py @@ -21,7 +21,6 @@ import cudf import cudf.core._internals from cudf import _lib as libcudf -from cudf._lib.types import size_type_dtype from cudf.api.extensions import no_default from cudf.api.types import ( is_list_like, @@ -46,7 +45,7 @@ from cudf.core.mixins import Reducible, Scannable from cudf.core.multiindex import MultiIndex from cudf.core.udf.groupby_utils import _can_be_jitted, jit_groupby_apply -from cudf.utils.dtypes import cudf_dtype_to_pa_type +from cudf.utils.dtypes import SIZE_TYPE_DTYPE, cudf_dtype_to_pa_type from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import GetAttrGetItemMixin @@ -588,7 +587,7 @@ def indices(self) -> dict[ScalarLike, cp.ndarray]: offsets, group_keys, (indices,) = self._groups( [ cudf.core.column.as_column( - range(len(self.obj)), dtype=size_type_dtype + range(len(self.obj)), dtype=SIZE_TYPE_DTYPE ) ] ) @@ -1185,7 +1184,7 @@ def _head_tail(self, n, *, take_head: bool, preserve_order: bool): # aggregation scheme in libcudf. This is probably "fast # enough" for most reasonable input sizes. _, offsets, _, group_values = self._grouped() - group_offsets = np.asarray(offsets, dtype=size_type_dtype) + group_offsets = np.asarray(offsets, dtype=SIZE_TYPE_DTYPE) size_per_group = np.diff(group_offsets) # "Out of bounds" n for the group size either means no entries # (negative) or all the entries (positive) @@ -1199,7 +1198,7 @@ def _head_tail(self, n, *, take_head: bool, preserve_order: bool): group_offsets = group_offsets[:-1] else: group_offsets = group_offsets[1:] - size_per_group - to_take = np.arange(size_per_group.sum(), dtype=size_type_dtype) + to_take = np.arange(size_per_group.sum(), dtype=SIZE_TYPE_DTYPE) fixup = np.empty_like(size_per_group) fixup[0] = 0 np.cumsum(size_per_group[:-1], out=fixup[1:]) @@ -1500,11 +1499,11 @@ def sample( # into a numpy array directly, rather than a list. # TODO: this uses the sort-based groupby, could one use hash-based? _, offsets, _, group_values = self._grouped() - group_offsets = np.asarray(offsets, dtype=size_type_dtype) + group_offsets = np.asarray(offsets, dtype=SIZE_TYPE_DTYPE) size_per_group = np.diff(group_offsets) if n is not None: samples_per_group = np.broadcast_to( - size_type_dtype.type(n), size_per_group.shape + SIZE_TYPE_DTYPE.type(n), size_per_group.shape ) if not replace and (minsize := size_per_group.min()) < n: raise ValueError( @@ -1517,7 +1516,7 @@ def sample( # which is round-to-nearest, ties to sgn(x) * inf). samples_per_group = np.round( size_per_group * frac, decimals=0 - ).astype(size_type_dtype) + ).astype(SIZE_TYPE_DTYPE) if replace: # We would prefer to use cupy here, but their rng.integers # interface doesn't take array-based low and high @@ -1525,7 +1524,7 @@ def sample( low = 0 high = np.repeat(size_per_group, samples_per_group) rng = np.random.default_rng(seed=random_state) - indices = rng.integers(low, high, dtype=size_type_dtype) + indices = rng.integers(low, high, dtype=SIZE_TYPE_DTYPE) indices += np.repeat(group_offsets[:-1], samples_per_group) else: # Approach: do a segmented argsort of the index array and take @@ -1533,7 +1532,7 @@ def sample( # We will shuffle the group indices and then pick them out # from the grouped dataframe index. nrows = len(group_values) - indices = cp.arange(nrows, dtype=size_type_dtype) + indices = cp.arange(nrows, dtype=SIZE_TYPE_DTYPE) if len(size_per_group) < 500: # Empirically shuffling with cupy is faster at this scale rs = cp.random.get_random_state() @@ -1557,7 +1556,7 @@ def sample( indices = ColumnBase.from_pylibcudf(plc_table.columns()[0]) indices = cp.asarray(indices.data_array_view(mode="read")) # Which indices are we going to want? - want = np.arange(samples_per_group.sum(), dtype=size_type_dtype) + want = np.arange(samples_per_group.sum(), dtype=SIZE_TYPE_DTYPE) scan = np.empty_like(samples_per_group) scan[0] = 0 np.cumsum(samples_per_group[:-1], out=scan[1:]) diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index b535e8aabd2..0d1bf552982 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -19,7 +19,6 @@ import cudf from cudf import _lib as libcudf -from cudf._lib.types import size_type_dtype from cudf.api.extensions import no_default from cudf.api.types import ( _is_non_decimal_numeric_dtype, @@ -53,6 +52,7 @@ from cudf.core.single_column_frame import SingleColumnFrame from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import ( + SIZE_TYPE_DTYPE, _maybe_convert_to_default_type, find_common_type, is_mixed_with_object_dtype, @@ -1002,7 +1002,7 @@ def _indices_of(self, value) -> cudf.core.column.NumericalColumn: i = [self._range.index(value)] except ValueError: i = [] - return as_column(i, dtype=size_type_dtype) + return as_column(i, dtype=SIZE_TYPE_DTYPE) def isin(self, values, level=None): if level is not None and level > 0: @@ -1348,7 +1348,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): result = as_column( -1, length=len(needle), - dtype=libcudf.types.size_type_dtype, + dtype=SIZE_TYPE_DTYPE, ) if not len(self): diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index eded681baf0..4c6f8a9c152 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -60,6 +60,7 @@ from cudf.utils import docutils, ioutils from cudf.utils._numba import _CUDFNumbaConfig from cudf.utils.docutils import copy_docstring +from cudf.utils.dtypes import SIZE_TYPE_DTYPE from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import _warn_no_dask_cudf @@ -3034,7 +3035,7 @@ def _slice(self, arg: slice, keep_index: bool = True) -> Self: NumericalColumn, as_column( range(start, stop, stride), - dtype=libcudf.types.size_type_dtype, + dtype=SIZE_TYPE_DTYPE, ), ), len(self), diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py index 6e965ceca66..ce7edc8fdbe 100644 --- a/python/cudf/cudf/core/join/join.py +++ b/python/cudf/cudf/core/join/join.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. from __future__ import annotations from typing import Any @@ -7,7 +7,6 @@ import cudf from cudf import _lib as libcudf -from cudf._lib.types import size_type_dtype from cudf.core._internals import sorting from cudf.core.buffer import acquire_spill_lock from cudf.core.copy_types import GatherMap @@ -17,6 +16,7 @@ _IndexIndexer, _match_join_keys, ) +from cudf.utils.dtypes import SIZE_TYPE_DTYPE class Merge: @@ -243,7 +243,7 @@ def _gather_maps(self, left_cols, right_cols): # tables, we gather from iota on both right and left, and then # sort the gather maps with those two columns as key. key_order = [ - cudf.core.column.as_column(range(n), dtype=size_type_dtype).take( + cudf.core.column.as_column(range(n), dtype=SIZE_TYPE_DTYPE).take( map_, nullify=null, check_bounds=False ) for map_, n, null in zip(maps, lengths, nullify) diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py index e7efd01ca85..64ec099cb39 100644 --- a/python/cudf/cudf/core/multiindex.py +++ b/python/cudf/cudf/core/multiindex.py @@ -17,7 +17,6 @@ import cudf import cudf._lib as libcudf -from cudf._lib.types import size_type_dtype from cudf.api.extensions import no_default from cudf.api.types import is_integer, is_list_like, is_object_dtype, is_scalar from cudf.core import column @@ -34,7 +33,7 @@ ensure_index, ) from cudf.core.join._join_helpers import _match_join_keys -from cudf.utils.dtypes import is_column_like +from cudf.utils.dtypes import SIZE_TYPE_DTYPE, is_column_like from cudf.utils.performance_tracking import _performance_tracking from cudf.utils.utils import NotIterable, _external_only_api, _is_same_name @@ -199,7 +198,7 @@ def __init__( ) if lo == -1: # Now we can gather and insert null automatically - code[code == -1] = np.iinfo(size_type_dtype).min + code[code == -1] = np.iinfo(SIZE_TYPE_DTYPE).min result_col = level._column.take(code, nullify=True) source_data[i] = result_col._with_type_metadata(level.dtype) @@ -1578,11 +1577,11 @@ def droplevel(self, level=-1) -> Self | cudf.Index: def to_pandas( self, *, nullable: bool = False, arrow_type: bool = False ) -> pd.MultiIndex: - # cudf uses np.iinfo(size_type_dtype).min as missing code + # cudf uses np.iinfo(SIZE_TYPE_DTYPE).min as missing code # pandas uses -1 as missing code pd_codes = ( code.find_and_replace( - column.as_column(np.iinfo(size_type_dtype).min, length=1), + column.as_column(np.iinfo(SIZE_TYPE_DTYPE).min, length=1), column.as_column(-1, length=1), ) for code in self._codes @@ -1903,7 +1902,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None): result = column.as_column( -1, length=len(target), - dtype=libcudf.types.size_type_dtype, + dtype=SIZE_TYPE_DTYPE, ) if not len(self): return _return_get_indexer_result(result.values) diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py index 0abd42d4d4e..eedd777aafe 100644 --- a/python/cudf/cudf/core/reshape.py +++ b/python/cudf/cudf/core/reshape.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2024, NVIDIA CORPORATION. +# Copyright (c) 2018-2025, NVIDIA CORPORATION. from __future__ import annotations import itertools @@ -12,13 +12,12 @@ import cudf from cudf._lib.column import Column -from cudf._lib.types import size_type_dtype from cudf.api.extensions import no_default from cudf.api.types import is_scalar from cudf.core._compat import PANDAS_LT_300 from cudf.core.column import ColumnBase, as_column, column_empty from cudf.core.column_accessor import ColumnAccessor -from cudf.utils.dtypes import min_unsigned_type +from cudf.utils.dtypes import SIZE_TYPE_DTYPE, min_unsigned_type if TYPE_CHECKING: from cudf._typing import Dtype @@ -1333,10 +1332,10 @@ def _one_hot_encode_column( else: column = column._get_decategorized_column() # type: ignore[attr-defined] - if column.size * categories.size >= np.iinfo(size_type_dtype).max: + if column.size * categories.size >= np.iinfo(SIZE_TYPE_DTYPE).max: raise ValueError( "Size limitation exceeded: column.size * category.size < " - f"np.iinfo({size_type_dtype}).max. Consider reducing " + f"np.iinfo({SIZE_TYPE_DTYPE}).max. Consider reducing " "size of category" ) result_labels = ( diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py index 6d617cbf38e..7e8468c8e8a 100644 --- a/python/cudf/cudf/io/csv.py +++ b/python/cudf/cudf/io/csv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2024, NVIDIA CORPORATION. +# Copyright (c) 2018-2025, NVIDIA CORPORATION. from __future__ import annotations import errno @@ -16,11 +16,13 @@ import cudf from cudf._lib.column import Column -from cudf._lib.types import dtype_to_pylibcudf_type from cudf.api.types import is_hashable, is_scalar from cudf.core.buffer import acquire_spill_lock from cudf.utils import ioutils -from cudf.utils.dtypes import _maybe_convert_to_default_type +from cudf.utils.dtypes import ( + _maybe_convert_to_default_type, + dtype_to_pylibcudf_type, +) from cudf.utils.performance_tracking import _performance_tracking _CSV_HEX_TYPE_MAP = { diff --git a/python/cudf/cudf/io/json.py b/python/cudf/cudf/io/json.py index ff326e09315..16c7d189dfd 100644 --- a/python/cudf/cudf/io/json.py +++ b/python/cudf/cudf/io/json.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# Copyright (c) 2019-2025, NVIDIA CORPORATION. from __future__ import annotations import os @@ -14,10 +14,12 @@ import cudf from cudf._lib.column import Column -from cudf._lib.types import dtype_to_pylibcudf_type from cudf.core.buffer import acquire_spill_lock from cudf.utils import ioutils -from cudf.utils.dtypes import _maybe_convert_to_default_type +from cudf.utils.dtypes import ( + _maybe_convert_to_default_type, + dtype_to_pylibcudf_type, +) if TYPE_CHECKING: from cudf.core.column import ColumnBase diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py index f3124552fd1..0ac2950a22b 100644 --- a/python/cudf/cudf/io/orc.py +++ b/python/cudf/cudf/io/orc.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2024, NVIDIA CORPORATION. +# Copyright (c) 2019-2025, NVIDIA CORPORATION. from __future__ import annotations import itertools @@ -11,11 +11,11 @@ import cudf from cudf._lib.column import Column -from cudf._lib.types import dtype_to_pylibcudf_type from cudf.api.types import is_list_like from cudf.core.buffer import acquire_spill_lock from cudf.core.index import _index_from_data from cudf.utils import ioutils +from cudf.utils.dtypes import dtype_to_pylibcudf_type try: import ujson as json # type: ignore[import-untyped] diff --git a/python/cudf/cudf/utils/dtypes.py b/python/cudf/cudf/utils/dtypes.py index 31a8f4de3b3..9e932acb5fa 100644 --- a/python/cudf/cudf/utils/dtypes.py +++ b/python/cudf/cudf/utils/dtypes.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. from __future__ import annotations import datetime @@ -11,6 +11,8 @@ import pyarrow as pa from pandas.core.dtypes.common import infer_dtype_from_object +import pylibcudf as plc + import cudf if TYPE_CHECKING: @@ -151,7 +153,7 @@ def cudf_dtype_from_pydata_dtype(dtype): return cudf.core.dtypes.Decimal64Dtype elif cudf.api.types.is_decimal128_dtype(dtype): return cudf.core.dtypes.Decimal128Dtype - elif dtype in cudf._lib.types.SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES: + elif dtype in SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES: return dtype.type return infer_dtype_from_object(dtype) @@ -604,6 +606,66 @@ def _get_base_dtype(dtype: pd.DatetimeTZDtype) -> np.dtype: return dtype.base +def dtype_to_pylibcudf_type(dtype) -> plc.DataType: + if isinstance(dtype, cudf.ListDtype): + return plc.DataType(plc.TypeId.LIST) + elif isinstance(dtype, cudf.StructDtype): + return plc.DataType(plc.TypeId.STRUCT) + elif isinstance(dtype, cudf.Decimal128Dtype): + tid = plc.TypeId.DECIMAL128 + return plc.DataType(tid, -dtype.scale) + elif isinstance(dtype, cudf.Decimal64Dtype): + tid = plc.TypeId.DECIMAL64 + return plc.DataType(tid, -dtype.scale) + elif isinstance(dtype, cudf.Decimal32Dtype): + tid = plc.TypeId.DECIMAL32 + return plc.DataType(tid, -dtype.scale) + # libcudf types don't support timezones so convert to the base type + elif isinstance(dtype, pd.DatetimeTZDtype): + dtype = _get_base_dtype(dtype) + else: + dtype = np.dtype(dtype) + return plc.DataType(SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES[dtype]) + + +SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES = { + np.dtype("int8"): plc.types.TypeId.INT8, + np.dtype("int16"): plc.types.TypeId.INT16, + np.dtype("int32"): plc.types.TypeId.INT32, + np.dtype("int64"): plc.types.TypeId.INT64, + np.dtype("uint8"): plc.types.TypeId.UINT8, + np.dtype("uint16"): plc.types.TypeId.UINT16, + np.dtype("uint32"): plc.types.TypeId.UINT32, + np.dtype("uint64"): plc.types.TypeId.UINT64, + np.dtype("float32"): plc.types.TypeId.FLOAT32, + np.dtype("float64"): plc.types.TypeId.FLOAT64, + np.dtype("datetime64[s]"): plc.types.TypeId.TIMESTAMP_SECONDS, + np.dtype("datetime64[ms]"): plc.types.TypeId.TIMESTAMP_MILLISECONDS, + np.dtype("datetime64[us]"): plc.types.TypeId.TIMESTAMP_MICROSECONDS, + np.dtype("datetime64[ns]"): plc.types.TypeId.TIMESTAMP_NANOSECONDS, + np.dtype("object"): plc.types.TypeId.STRING, + np.dtype("bool"): plc.types.TypeId.BOOL8, + np.dtype("timedelta64[s]"): plc.types.TypeId.DURATION_SECONDS, + np.dtype("timedelta64[ms]"): plc.types.TypeId.DURATION_MILLISECONDS, + np.dtype("timedelta64[us]"): plc.types.TypeId.DURATION_MICROSECONDS, + np.dtype("timedelta64[ns]"): plc.types.TypeId.DURATION_NANOSECONDS, +} +PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES = { + plc_type: np_type + for np_type, plc_type in SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES.items() +} +# There's no equivalent to EMPTY in cudf. We translate EMPTY +# columns from libcudf to ``int8`` columns of all nulls in Python. +# ``int8`` is chosen because it uses the least amount of memory. +PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.EMPTY] = np.dtype("int8") +PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.STRUCT] = np.dtype( + "object" +) +PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.TypeId.LIST] = np.dtype("object") + + +SIZE_TYPE_DTYPE = PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[plc.types.SIZE_TYPE_ID] + # Type dispatch loops similar to what are found in `np.add.types` # In NumPy, whether or not an op can be performed between two # operands is determined by checking to see if NumPy has a c/c++ From 559cda24e4258da1aa35b7de60f46e8a86b1effa Mon Sep 17 00:00:00 2001 From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com> Date: Thu, 9 Jan 2025 19:18:27 -0800 Subject: [PATCH 3/7] Use 64-bit offsets only if the current strings column output chunk size exceeds threshold (#17693) This PR improves on #17207 and only uses 64-bit offsets if the current output chunk of a strings column exceeds the large-strings threshold instead of using cumulative strings column sizes per `pass` or `row group` level. Authors: - Muhammad Haseeb (https://github.com/mhaseeb123) Approvers: - Karthikeyan (https://github.com/karthikeyann) - David Wendt (https://github.com/davidwendt) - Yunsong Wang (https://github.com/PointKernel) URL: https://github.com/rapidsai/cudf/pull/17693 --- cpp/src/io/parquet/reader_impl.cpp | 48 +++++++-------------- cpp/src/io/parquet/reader_impl_chunking.hpp | 5 +-- 2 files changed, 17 insertions(+), 36 deletions(-) diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp index c48ff896e33..f9fcca6bb4f 100644 --- a/cpp/src/io/parquet/reader_impl.cpp +++ b/cpp/src/io/parquet/reader_impl.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -97,38 +97,24 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num _stream); } - // Compute column string sizes (using page string offsets) for this subpass + // Compute column string sizes (using page string offsets) for this output table chunk col_string_sizes = calculate_page_string_offsets(); - // ensure cumulative column string sizes have been initialized - if (pass.cumulative_col_string_sizes.empty()) { - pass.cumulative_col_string_sizes.resize(_input_columns.size(), 0); - } - - // Add to the cumulative column string sizes of this pass - std::transform(pass.cumulative_col_string_sizes.begin(), - pass.cumulative_col_string_sizes.end(), - col_string_sizes.begin(), - pass.cumulative_col_string_sizes.begin(), - std::plus<>{}); - // Check for overflow in cumulative column string sizes of this pass so that the page string // offsets of overflowing (large) string columns are treated as 64-bit. auto const threshold = static_cast(strings::detail::get_offset64_threshold()); - auto const has_large_strings = std::any_of(pass.cumulative_col_string_sizes.cbegin(), - pass.cumulative_col_string_sizes.cend(), + auto const has_large_strings = std::any_of(col_string_sizes.cbegin(), + col_string_sizes.cend(), [=](std::size_t sz) { return sz > threshold; }); if (has_large_strings and not strings::detail::is_large_strings_enabled()) { CUDF_FAIL("String column exceeds the column size limit", std::overflow_error); } - // Mark any chunks for which the cumulative column string size has exceeded the - // large strings threshold - if (has_large_strings) { - for (auto& chunk : pass.chunks) { - auto const idx = chunk.src_col_index; - if (pass.cumulative_col_string_sizes[idx] > threshold) { chunk.is_large_string_col = true; } - } + // Mark/unmark column-chunk descriptors depending on the string sizes of corresponding output + // column chunks and the large strings threshold. + for (auto& chunk : pass.chunks) { + auto const idx = chunk.src_col_index; + chunk.is_large_string_col = (col_string_sizes[idx] > threshold); } } @@ -210,11 +196,9 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num // only do string buffer for leaf if (idx == max_depth - 1 and out_buf.string_size() == 0 and col_string_sizes[pass.chunks[c].src_col_index] > 0) { - out_buf.create_string_data( - col_string_sizes[pass.chunks[c].src_col_index], - pass.cumulative_col_string_sizes[pass.chunks[c].src_col_index] > - static_cast(strings::detail::get_offset64_threshold()), - _stream); + out_buf.create_string_data(col_string_sizes[pass.chunks[c].src_col_index], + pass.chunks[c].is_large_string_col, + _stream); } if (has_strings) { str_data[idx] = out_buf.string_data(); } out_buf.user_data |= @@ -416,11 +400,11 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num final_offsets.emplace_back(offset); out_buf.user_data |= PARQUET_COLUMN_BUFFER_FLAG_LIST_TERMINATED; } else if (out_buf.type.id() == type_id::STRING) { - // need to cap off the string offsets column - auto const sz = static_cast(col_string_sizes[idx]); - if (sz <= strings::detail::get_offset64_threshold()) { + // only if it is not a large strings column + if (col_string_sizes[idx] <= + static_cast(strings::detail::get_offset64_threshold())) { out_buffers.emplace_back(static_cast(out_buf.data()) + out_buf.size); - final_offsets.emplace_back(sz); + final_offsets.emplace_back(static_cast(col_string_sizes[idx])); } } } diff --git a/cpp/src/io/parquet/reader_impl_chunking.hpp b/cpp/src/io/parquet/reader_impl_chunking.hpp index ca46f198bb8..4a773fbced1 100644 --- a/cpp/src/io/parquet/reader_impl_chunking.hpp +++ b/cpp/src/io/parquet/reader_impl_chunking.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -130,9 +130,6 @@ struct pass_intermediate_data { rmm::device_buffer decomp_dict_data{0, cudf::get_default_stream()}; rmm::device_uvector str_dict_index{0, cudf::get_default_stream()}; - // cumulative strings column sizes. - std::vector cumulative_col_string_sizes{}; - int level_type_size{0}; // skip_rows / num_rows for this pass. From fb2413e1505297e737095d97e0732eec52519802 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Fri, 10 Jan 2025 10:06:35 -0800 Subject: [PATCH 4/7] Make tests build without relaxed constexpr (#17691) Contributes to https://github.com/rapidsai/cudf/issues/7795 This PR updates tests to build without depending on the relaxed constexpr build option. Authors: - Yunsong Wang (https://github.com/PointKernel) Approvers: - Shruti Shivakumar (https://github.com/shrshi) - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17691 --- .../cudf/detail/utilities/integer_utils.hpp | 12 +++-- cpp/include/cudf/utilities/span.hpp | 40 +++++++++------ cpp/src/io/utilities/parsing_utils.cuh | 49 ++++++++++--------- cpp/src/io/utilities/trie.cuh | 8 ++- .../transform/segmented_row_bit_count_test.cu | 4 +- cpp/tests/utilities/column_utilities.cu | 18 ++++--- 6 files changed, 75 insertions(+), 56 deletions(-) diff --git a/cpp/include/cudf/detail/utilities/integer_utils.hpp b/cpp/include/cudf/detail/utilities/integer_utils.hpp index 2e3d71815c0..44a86f1c84f 100644 --- a/cpp/include/cudf/detail/utilities/integer_utils.hpp +++ b/cpp/include/cudf/detail/utilities/integer_utils.hpp @@ -1,7 +1,7 @@ /* * Copyright 2019 BlazingDB, Inc. * Copyright 2019 Eyal Rozenberg - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,6 +23,8 @@ */ #include +#include +#include #include #include @@ -44,13 +46,17 @@ namespace util { * `modulus` is positive. The safety is in regard to rollover. */ template -constexpr S round_up_safe(S number_to_round, S modulus) +CUDF_HOST_DEVICE constexpr S round_up_safe(S number_to_round, S modulus) { auto remainder = number_to_round % modulus; if (remainder == 0) { return number_to_round; } auto rounded_up = number_to_round - remainder + modulus; if (rounded_up < number_to_round) { - throw std::invalid_argument("Attempt to round up beyond the type's maximum value"); +#ifndef __CUDA_ARCH__ + CUDF_FAIL("Attempt to round up beyond the type's maximum value", cudf::data_type_error); +#else + CUDF_UNREACHABLE("Attempt to round up beyond the type's maximum value"); +#endif } return rounded_up; } diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp index e7b76946248..b5044a58934 100644 --- a/cpp/include/cudf/utilities/span.hpp +++ b/cpp/include/cudf/utilities/span.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -197,11 +197,16 @@ struct host_span : public cudf::detail::span_basedata() + offset, count, _is_device_accessible}; } @@ -434,8 +439,8 @@ struct device_span : public cudf::detail::span_basedata() + offset, count}; } @@ -475,28 +480,28 @@ class base_2dspan { * * @return A pointer to the first element of the span */ - [[nodiscard]] constexpr auto data() const noexcept { return _flat.data(); } + [[nodiscard]] CUDF_HOST_DEVICE constexpr auto data() const noexcept { return _flat.data(); } /** * @brief Returns the size in the span as pair. * * @return pair representing rows and columns size of the span */ - [[nodiscard]] constexpr auto size() const noexcept { return _size; } + [[nodiscard]] CUDF_HOST_DEVICE constexpr auto size() const noexcept { return _size; } /** * @brief Returns the number of elements in the span. * * @return Number of elements in the span */ - [[nodiscard]] constexpr auto count() const noexcept { return _flat.size(); } + [[nodiscard]] CUDF_HOST_DEVICE constexpr auto count() const noexcept { return _flat.size(); } /** * @brief Checks if the span is empty. * * @return True if the span is empty, false otherwise */ - [[nodiscard]] constexpr bool is_empty() const noexcept { return count() == 0; } + [[nodiscard]] CUDF_HOST_DEVICE constexpr bool is_empty() const noexcept { return count() == 0; } /** * @brief Returns a reference to the row-th element of the sequence. @@ -507,7 +512,7 @@ class base_2dspan { * @param row the index of the element to access * @return A reference to the row-th element of the sequence, i.e., `data()[row]` */ - constexpr RowType operator[](size_t row) const + CUDF_HOST_DEVICE constexpr RowType operator[](size_t row) const { return _flat.subspan(row * _size.second, _size.second); } @@ -517,7 +522,10 @@ class base_2dspan { * * @return A flattened span of the 2D span */ - [[nodiscard]] constexpr RowType flat_view() const { return _flat; } + [[nodiscard]] CUDF_HOST_DEVICE constexpr RowType flat_view() const + { + return _flat; + } /** * @brief Construct a 2D span from another 2D span of convertible type diff --git a/cpp/src/io/utilities/parsing_utils.cuh b/cpp/src/io/utilities/parsing_utils.cuh index 75e45a68842..9833dab282e 100644 --- a/cpp/src/io/utilities/parsing_utils.cuh +++ b/cpp/src/io/utilities/parsing_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -171,7 +171,10 @@ constexpr uint8_t decode_digit(char c, bool* valid_flag) } // Converts character to lowercase. -constexpr char to_lower(char const c) { return c >= 'A' && c <= 'Z' ? c + ('a' - 'A') : c; } +CUDF_HOST_DEVICE constexpr char to_lower(char const c) +{ + return c >= 'A' && c <= 'Z' ? c + ('a' - 'A') : c; +} /** * @brief Checks if string is infinity, case insensitive with/without sign @@ -515,13 +518,13 @@ struct ConvertFunctor { template and !std::is_same_v and !cudf::is_fixed_point())> - __host__ __device__ __forceinline__ bool operator()(char const* begin, - char const* end, - void* out_buffer, - size_t row, - data_type const output_type, - parse_options_view const& opts, - bool as_hex = false) + __device__ __forceinline__ bool operator()(char const* begin, + char const* end, + void* out_buffer, + size_t row, + data_type const output_type, + parse_options_view const& opts, + bool as_hex = false) { auto const value = [as_hex, &opts, begin, end]() -> cuda::std::optional { // Check for user-specified true/false values @@ -564,13 +567,13 @@ struct ConvertFunctor { * @brief Dispatch for boolean type types. */ template )> - __host__ __device__ __forceinline__ bool operator()(char const* begin, - char const* end, - void* out_buffer, - size_t row, - data_type const output_type, - parse_options_view const& opts, - bool as_hex) + __device__ __forceinline__ bool operator()(char const* begin, + char const* end, + void* out_buffer, + size_t row, + data_type const output_type, + parse_options_view const& opts, + bool as_hex) { auto const value = [&opts, begin, end]() -> cuda::std::optional { // Check for user-specified true/false values @@ -593,13 +596,13 @@ struct ConvertFunctor { * is not valid. In such case, the validity mask is set to zero too. */ template )> - __host__ __device__ __forceinline__ bool operator()(char const* begin, - char const* end, - void* out_buffer, - size_t row, - data_type const output_type, - parse_options_view const& opts, - bool as_hex) + __device__ __forceinline__ bool operator()(char const* begin, + char const* end, + void* out_buffer, + size_t row, + data_type const output_type, + parse_options_view const& opts, + bool as_hex) { auto const value = [&opts, begin, end]() -> cuda::std::optional { // Check for user-specified true/false values diff --git a/cpp/src/io/utilities/trie.cuh b/cpp/src/io/utilities/trie.cuh index c0efc5b6f20..dbdc4a34277 100644 --- a/cpp/src/io/utilities/trie.cuh +++ b/cpp/src/io/utilities/trie.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * Copyright (c) 2018-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -74,16 +74,14 @@ CUDF_EXPORT trie create_serialized_trie(std::vector const& keys, /* * @brief Searches for a string in a serialized trie. * - * Can be executed on host or device, as long as the data is available - * * @param trie Pointer to the array of nodes that make up the trie * @param key Pointer to the start of the string to find * @param key_len Length of the string to find * * @return Boolean value; true if string is found, false otherwise */ -CUDF_HOST_DEVICE inline bool serialized_trie_contains(device_span trie, - device_span key) +__device__ inline bool serialized_trie_contains(device_span trie, + device_span key) { if (trie.empty()) { return false; } if (key.empty()) { return trie.front().is_leaf; } diff --git a/cpp/tests/transform/segmented_row_bit_count_test.cu b/cpp/tests/transform/segmented_row_bit_count_test.cu index 652b9053582..0e4f623f0a2 100644 --- a/cpp/tests/transform/segmented_row_bit_count_test.cu +++ b/cpp/tests/transform/segmented_row_bit_count_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -74,7 +74,7 @@ compute_segmented_row_bit_count(cudf::table_view const& input, cudf::size_type s // Since the number of rows may not divisible by segment_length, // the last segment may be shorter than the others. auto const size_begin = d_sizes + segment_idx * segment_length; - auto const size_end = std::min(size_begin + segment_length, d_sizes + num_rows); + auto const size_end = cuda::std::min(size_begin + segment_length, d_sizes + num_rows); return thrust::reduce(thrust::seq, size_begin, size_end); })); diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index fb9bdeb0b22..6888f26fd16 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,6 +37,8 @@ #include #include +#include +#include #include #include #include @@ -412,14 +414,16 @@ class corresponding_rows_not_equivalent { T const y = rhs.element(rhs_index); // Must handle inf and nan separately - if (std::isinf(x) || std::isinf(y)) { + if (cuda::std::isinf(x) || cuda::std::isinf(y)) { return x != y; // comparison of (inf==inf) returns true - } else if (std::isnan(x) || std::isnan(y)) { - return std::isnan(x) != std::isnan(y); // comparison of (nan==nan) returns false + } else if (cuda::std::isnan(x) || cuda::std::isnan(y)) { + return cuda::std::isnan(x) != + cuda::std::isnan(y); // comparison of (nan==nan) returns false } else { - T const abs_x_minus_y = std::abs(x - y); - return abs_x_minus_y >= std::numeric_limits::min() && - abs_x_minus_y > std::numeric_limits::epsilon() * std::abs(x + y) * fp_ulps; + T const abs_x_minus_y = cuda::std::abs(x - y); + return abs_x_minus_y >= cuda::std::numeric_limits::min() && + abs_x_minus_y > + cuda::std::numeric_limits::epsilon() * cuda::std::abs(x + y) * fp_ulps; } } else { // if either is null, then the inequality was checked already From dc2a75cba40d38f4a6ba66e652764e96fa6b593d Mon Sep 17 00:00:00 2001 From: Tianyu Liu Date: Fri, 10 Jan 2025 13:22:39 -0500 Subject: [PATCH 5/7] Add special orc test data: timestamp interspersed with null values (#17713) Authors: - Tianyu Liu (https://github.com/kingcrimsontianyu) Approvers: - Bradley Dice (https://github.com/bdice) - Vukasin Milovanovic (https://github.com/vuule) URL: https://github.com/rapidsai/cudf/pull/17713 --- ...e.timestamp.desynced.snappy.RLEv2.hasNull.orc | Bin 0 -> 5951 bytes ...stamp.desynced.uncompressed.RLEv2.hasNull.orc | Bin 0 -> 6565 bytes python/cudf/cudf/tests/test_orc.py | 6 ++++++ 3 files changed, 6 insertions(+) create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.timestamp.desynced.snappy.RLEv2.hasNull.orc create mode 100644 python/cudf/cudf/tests/data/orc/TestOrcFile.timestamp.desynced.uncompressed.RLEv2.hasNull.orc diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.timestamp.desynced.snappy.RLEv2.hasNull.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.timestamp.desynced.snappy.RLEv2.hasNull.orc new file mode 100644 index 0000000000000000000000000000000000000000..8772f84c3ba2a7942323f49ac28c3e5b172a91fc GIT binary patch literal 5951 zcmaKwdvH_dnZ_T?xR5lJv24X%8VbY$ylTk?@?*+;e|9od$AXd0?qoGRN57280S zG&aHsjtl{0T3Dr7sAwCiY&WdvCaltKsEEN4O+!UD5U~v)ifu$LL?qbAfQ=0HI_b{r z{ZBGzYY!Sz_4)7SG|3=|4MJiM<0FN zSjS?0h5h~`a{X7BdmWT4P*38k=9?dUW4rG+tVN&lqp#cRa1V?5YL@uTum4!Qy>{QtYjU2fOX;~l)?6%~6{T_DgDlxv67x}lc3 zAy@s-6WnDR^h1M&q1}x``^lk0+?Pj9L#LaDCYpz)%)6AIeq7qRrJUNLwrpu>+v4K= z^ofow8?0LfZCiHRx9sQNd&se6v~$bpt}PSYTc-H0z_qosXKOjLRlRy^i{L)jLtCHt z_SOv#Zyj8-b+_P^{qC)Y9@#qj=+@JZZJj8*Z>sktJn>TLcV8;^yrlj+p6z?QHGj`r z`xLK#Eq?WV-d->7z&hUHf8d??0si%A-j(&d*$sTb5BX(2{QEO}!?S$bkN9hzJ?*lAoP>fq!K)e|B)`(=QfX3kYOG0>c)8ZL46-OZae`pntny z`wqe0py0qT&hHeQ__5&JPXt$X31)ZW(mxiKg$iXa7aD$AXnUpbiGSL$Vb6}iy*qaQ zY{&kR6^Hii82$N<)4$j;@yi`kC1LCjmi{VO9to;n3$~PsUB3=K@h`y*uLlPY1b3H) z_eX<=4hBbm6FmKI!HIjsQ*RFA&~WL$50}S=)%Wac`R%alcf(Ko$MA-?hX+L~cmIBP ze|-4Rk>SyIhEI$3O&lGbI<^y|J4@f)S-$K6_3@o8f7t1IZ|4(#+_~XI(e^(T?M)UP zI9YV~&#cz}EIRi=(Unt0v!{y%AF}MJV%eEu!+#aq{!+Z=BUaDZ;{J2R+s_y8O&1@y z!1~T#i%(oEJ~vi;WxRNHg7w`?OUp7#WtW#4K3;13g!TPTm#+P6Y5$d_+dp5rH_Q6r z)uo5OSbE~irRT0Ky)wz#I8`FJUQ(7TkxiEvM3-K?S+XWGxpttO{;m>DQ$45T8cI-LhTNmH>atg+gv1<$c#^Ynk>oYIE z^A4-T>^Ghnd*kNJTO%XaCO2c~^0@N$%$cJjw; zTaNOk%mLG~i8r~kx_952+&x;GxV>j^OS0##Gn0?Hu^tWwW^d?28ip_4j`=@T1Ge9_*iaf6P4L+C8xK zW3y!A@)Kj$oHf*cf90|2jru>0Idj{7I`HdHnt6o}C&wPnz5h=Gzx(8va%10%k=|+k z-huZ%HETES`SaLQ6C1(fN+y zeEh-NGiQ(9y*)X$ba-K6@!Pe0biQz^cW7YM(DB0AhZFNFrpC7om@}*HnV9{j-KCr*k`>cefF5}tn;GvM@@fu#`xG%KN>mP*E@>mn$GtVn$`O~;+;NtYmhvUxYAG&8WDE9H8bQq`YQdE zfy!XP!OC!Dq%v9=tBhABDwCC|%5=e7mD$Q%Wxk4CC9D!viK`@4M8T0NvWlvrtC%Wx zm8Z&E<*V{n1*<|;;i^bgv?^8=uS!%U3r<$0t1?yDs$5mRnq4id7FCN2&QufC+G?_z zs-~-%YIn7#+FNj;+Fu>04pxV%!_|@MXmzYQUU0cOS)Hm*S7)lT)w$}tlr0q&T$PHY z5-B0oN=YdtrKOD2T`(>6N_|qlG$0L1L(;G`B8?WzN#oLlG$~C<)6$GIE6qvsoC29p zCX$I|5*Z=W%19X{qd7}tZkb2smHA|TSwI$)g=Aq_R2GxPWeHhQmXf7q8CjOIQkIvq zO{5M&VX?6kg6F3cn(t2r5E~up**}Dq@PbBB@9z(u#~C ztH>$xHS8K8XI+iBMp8r6XluwCs)nv%YTTS>YP>bR8h=foCRh`y3D-nwqMS`N@tQ#q&e25UpLVb067 z(b`yTyf#srtWDLXYcsW3&c52blC2afMM|+!q9l}BCCPbBNh=wpTj^1Hl|H3k8Bhi} z2bE!EL>X1alyPN3nN+5fX=PTKQ|47{l~5&8iB%F6!8xKLRg{WWF)Fvpqw=bJDnIAA zDyRyn!m5ZWs*0)Ns)Q=ZIjKskGODa9r^>6@YN1-B7IV(13AI*Dswp+CX4G!ANA2ZY zQ2W&Zbx<8rht&~vR2@^tIhWN*bxNI9XVh7BPMz1VHA2o+jaVbm5E`w9)KD5)!)V-` zX^mIo)A%(3O;8ikgf$UOlryJ^YZ98ICZ$PhGMcO=r^#!DT9H<)m1qgAR!eFrEzMn` zb!$CZuhysaYXjP#Hlz)6muaKgm^Q9WXp`EMHm%KQv)q;1ypF9C>O?xRPNE}pS{=!) z(9t?Z=hk_2UY$?p*9CMzu0j{qMIL`r7uCgdaa}@})TMN3u1=TL<#hQvcAcd9iMk}$QJ1dE)Me{(b@_UB zy|7+XFRmx*we@5@RZrJ5_3nC4y_fq)y}v$CAFL16hwCHt(fU|@ocm;bvOZOxuFuqG z>vQ$_26lsxyRJdpAZZ{Pv<+kf)j&5e4Q}o;4c-P{gTEor5NrrFgc~9aQSPROctfHg zxqbl+DKwKh4LLOA!5xBL2)zh;G4vAX3Fx)Zlh9Mp)8Gz6?}pw3y%%~P^nU0A z&0;O>Jy4+9$pAq*lI#4t!;AYjnKK*B)7 zz`)>!!2^RA1|JN57y>W^!955=7={Q8Q5a${#9>Ilkc1%xLmJ$-V93IdgCUPbHX4O! z6roX!MhO}TaF3voL?eYp8jTDZ-Dvcn(TheO8vWoNM`I9;AvA{37(rtcjWIOF(U?GE z65Nw$OrtS_#w;3hXv~9TgA{@kffR##1|$Je3z7s$fuun)Al)E6AidyT0O$UWHK%qXb3*MlFmaj1-JCj0}u! z7`-t1VD!TnfH4ST2*xms5g4Q3&cPUmF#%%|#uSWc7&9&!d^0a~jYrLbDjnlH8vcy+$;X)ACs~ z(`aUGX*mZGXt}G<|nf-d%=5T=8Hu>0CRAbUgX0tM_`W5 zzQ3sB;5~V}$^df;=Jf5rEX-M$b1>)8!bXb_ymk4<4x>ea79xM7A1x$WsJRsev@mFK zgZB(tylC;E#XmPaizb7;xm{cIMk zLbQs|Dn_dWtpr-N^NGV~rO-+*l=q|6jaJXX3kJ0M(CSBP0Ifl^hR_;DYZR?9w8qhz zKx-1MDYT~1nnCO0&F({M9uymt5R?d%7?cDQ0ZI#s1n)IaG$;m?87nBc_A5;KT z5WIt+!k{9cqM%}+;-C_slAuzc(%`)XDhnzHDh~@A79lJmSj4bMU?IRe0t*QX1q%%e z1B)9L4=i3-e6aYzI}S?_mJlpqSR$}QVTr*Kha~|^618G9NO||XQN#R-c_`V(Jnzdfp#t0NwiaFr_s)!-3{I}+P!G^q1})60NR6S z51~Db_6XXe;LV{uj`jrFlW0$&J&pDZ+OufSp*;_N0Xl@}5TQei4hcF4bZF5*qJu&Q z4gL~zxY6N3hZh|_bokK`Kt~WAA#{YnUxtn-I%4REqa%TiBsx;)NTVZzjx6{qdj~u6 zR<>1W64;tv;*Y8n6cW3TxOJu|};iYuuW!Cao!J+L~Sb z)6Co0Hla;q6Wb& zPubJ8Iw3a4mil%9XcJrU1y|j<^(*Zh2hv+aJp`-jwbevAm zNjgQR=?tBvb9CMzbch^chr~fRv<}iiIcWZ{!|m`mybhnk?+7@8j*uhFf7uar#2j%) z!jW{O9BD_!k>&4m5O*9I^&&*&SYn*GtGaiGuxT#%y+T7gk7R8ahIfv;2-HCyQnU@i|KNAdAht^zAiui zcvrA1)D`ZEbVa*jUGc6&SCW6SE8Ugp%68?t^4;uiVYjGT+)Z?AyUA{<)E@yCdDv?pSx6f4Mu^o$5|^XS%c9x$eA^?G*B_I>k;FIcHu_;1aq-F0o7EB3xP* z>7rb;V2R7^^0>S%pUdwGxPq>bD=b*%in?O1xGUjGx>ByRE91%vR=V;%>>go{s7Kr* z=^=WwJ!B8v!}Pd&JU!kXUyr{h&=V9Wdcr-Co@h_3C*G6jN%o|A(gIyiwkOwb literal 0 HcmV?d00001 diff --git a/python/cudf/cudf/tests/data/orc/TestOrcFile.timestamp.desynced.uncompressed.RLEv2.hasNull.orc b/python/cudf/cudf/tests/data/orc/TestOrcFile.timestamp.desynced.uncompressed.RLEv2.hasNull.orc new file mode 100644 index 0000000000000000000000000000000000000000..f5a1edbb10ee55f131764e18b638c35c8e06b0cf GIT binary patch literal 6565 zcmcI}eRLD|neHQ5Uls;CA_*YU;L0D8Mz%giwk3~b%O2U5JhCl$XcDS4b*gMPRM`fq zq_GiBaAXJ|vxQZfg^KQmD%%^b=uKFq-B1yOBbtVaZXlu!Ac}27K8Q%LkpUYS>~-7S zy}fsvJ@?#m`uk(v=ltI1eV_N8-^`pd=XJ7Hu~5Pn4HUWX2k@b20m#Kd{BcR{6HP8+ z?pb~+cx!*yP#Qb%(`eYB_*AGpce>QY5v#=FMjqz5Ur8<5Q6Tw}=$weTcX{CO?@6WC z-x)rzzT(}nYYd0Q|)KY48aZ$JO&?E>xtU$DOU1^W-Y^aYLw zzhM7?uYQr^_Y$sS^MfCKQ5`RRLFE6R)SpfE9}=!(dvE>j&(!f3kotWc3vK`Wga1qJ zFC_J+ZT}aEg*sk;?@#LZZ_nyaGyNxi=|9o`3I2=Jf4Kjwge&orAOG^->-c|s^1iZr zzAz^SL>w&VLQw!C4;(($2(VU&J`uJRqEL+ei*Tq2#~0)565LgcnG%lZF3xfZN3j$u z?&jF<;jForvvwJ$Z#llag0n};*}sx==swQzFF|=f=khAfOfDn(GIzNQx-zb@oNKS( zuBqg%m7`w4-KOO3A-MZh+(T7puI8Rqb1!SSGui@?uAr10)anPDY6e}kgOBo;t~U%0 z7zcON5ALG|5At6eF%O<<7#wdLoV4szfBa!d^X5`|v(~z~sb#ZE@cl>IHm|pB9md_KeQWWfZwdBz z1^b^69Qqr<@o(dQJ}J1oPB61xDEf|Yxex#Rl+gIJ(EeTFnrDP-d-3x=;kFIJJ$~W- zjlx6h_}kPcXP*;Z-XxqESn}laMOOnN#h}QzS!CZLTJr+l-zw_cCfc@Lv?nOqKZJ8T zM92SDboP6q%R5CgyKwRE3YUip6)zSVzh7v7sqoRi-@bnL_JKXycl}`dzTy=J_ii8g z;r3Jiuzma=w@(&_u`gKiqhM(ysC^~aR3df#IQZ!Q2(EuMIIusst0cTH8a#L)IP#O= zss9xmze_s#`VbBdmHhKiX>3S)*WRX|4!M3d^yt3~t$%Z9K(cbz&xiKKhYlVd8hLBz zlw|Mtk)g?>J20}NyH<0`&H4NWYPW;MTdUPY5v!u zv+osMK3Oz#YO&~jPJ3#x;`CzUZx-8syLin9obEG=`_3-jc5d;W^y2;JIbZ$V;^P+< zpB-I%d2I2_IOiJ|mn_dLQCwPL{BVi=BhI%zUb6O+C4HBdZ2NS{o-F4(SC$<5Y{~J@ zmz=%2 zzM|x{`2k>{jMt+zPE7QgBA z=xaBo-xwagIHX$hE@ zj=#>I(ZBr`zhu*9bJ;~r;mqp9k$WePayIKPzP@yG|!Ub0C0&O9$OTzmI>{olQG^!~o-cSkMbu3i0GKD5X-EIU4G z%h^JG_m&^6++g_Cs57_q`~5%us8LY(P;&I4+`E6@|Fe&dsyFmLAL*GA?&*K$V+*-q z_pe7EA73Bt|Lw;|>o&akucPaxw*Ij1ez}F(aOS1>^@*7Xj?YmLD zf7teRGn?Q0o4NfroYCQjvL_#${K^A!2XE~B$?)S>obLSj4{qOk&&k(@d#{}QdVcoI z+{n#h{ySSgb9!c1o}D{>(-|Ay{r2;JJ!?8Qck1TOpANtJxzjuOSMIs9H&6b{@SC5X zd~$Z<`P-L2ifp8M8R&))vA zPgd1mT>jqNhN-ca`{ysOYR)e^HDj;odF9zvpRQ`pKa`sL!S%6M?^v@IHvjT(8xP!A zcVKeQ6Bc)V>`dd~8xybJ8T`yFU%BdBGqiXR?msK$H&+G!|k~h z#kPxv({rzKpPL^kdhWAjM++9-vh_XMrSBKq;5@slr?2=TcV=YH)w6%uMEEgXL@@^@Jz$G-p4Pz`poe8-gOt=?KNX-?}bl# zhsSz1jSBl3F7|F6Dd`)&)Vp)kwxQvp-rb{*Z+MHd_2tp6{)SI`UmbnbKYXS4^^wwz z4WIYEIr_=Q;fda(W5WK1Ytz3P)AV1Mntp%G`RpS%CW}_Bd-lTg^u@8A&pmQ$`qQ!3 zadCF~+StiWkKCEQH8!#7!u(`OY4O0U|GG*C}D&qy0DwCC|%5-I>GFzFe%*(lQalsY2R4$Vfa#Bvo zX*nZj(bU{)TNC*(NkvMLR%8@e-bzJY$yJJ#5~Wlr zQxZy2N%6{*jFMHll^&&6=~McZ0cDV8pJye4M2H9z5h6;&h&YiTk~{~ICNe~p$PsxJS0z?S zR8rn*6`>+kl!{g{DpuuIc~oBB!z#ZjpbDx&s<0}eimGC&xGJeisnV*9Dyzz=@>Sd_ zG4F{gX_c&ss3NPVD!PiPVyoP|r>eYFzAArJpek4ustQ*{s-nD&Rq?7sRkA8om9ENE zWvg;kdEQ{PxLQ&zt(H|2)nqkQO;M-xc>S%SWI$oWq zPFAO?)76>kEN^dhUd>gD)e^N-EmIR}Qcdw*Q8Q{*?N)o#UbRo{R|nKV-T`%39Z^Ts zF?C#>P$$(Xby}TO=hS%(S0mO)G*XRBL+}o3C=IP)G_1z0@o2mnpT^HSrU`07ny@CK ziE3h+xF(@V@=j>dnv5o^$!YRhu2!s-Xr;W^b1bga(Jo6>o8KAm3| z&;@lNU04^QcJ2E~Crpa=JVzCMBell#v8Uk`zgk41W>nCOxE=^pSot zKnBSW8Rjn~qhySXlL<0OrpPpzA+!9IWM0qJi}ez{R4>yLdQwmE%k+$%)w}f`y;twk z`}F~Rkgw8*^^vbXrjP1l`nW!!PwG?pG+(dJ>T~*h4Yx*IBdL+r$Z80_sfMbdYnU3g z#$Ds7@z(fi{CsOouqIR!u8Gt{YhpF=nnX>K@2E-FWNNZCxte?}w^m#$sg>3ewPY<- zOV={BY^}T2Q|skFTX^5U6jAEE?$?YORk$oT?%z+)MZeY zMO_YcdGH5e5W^sWK?;Km1_A~W1_}lm1_t~g7~C*;VDQ4=gTW6&0EQq8AsE8ozX(GV zh8PTS7!oifVMxJ{h9Lt(7W}<1KsF$Ih0RJ%R zDb&-bXHd_g-i>+>>b!Fp)4(FwrnEFtISXVe-P{gUJt50Hz>J zA(+B2MPQ18KMPYFrUXn$m{KsMVamXig((M99)bdx#V|`?mclH9nShyunSz;ynSo#t z%x;)HFneM4!R&`Q0CN!L5X@l+mckr`IRm_Y-92694n2!b*+Flb=W;6{T74c>`6eQ5BbAu#!{5dsw&!jp#<_)#>(&=5yM z0u4ztq^`ZV5UWQ+_S&@tejbh7oYRO#2^ytnl;wWC5NkpsHKm+EBZEeEYU?31deG=a zqYsUKGzPDG`p_6gW90hCLSPJy@f%-SASWSkpfQcc3>vd&%-xtd1Pd1y@y!SOV39(w z`sPoKu#m7&u+XqDu&}VWr@uD?ix+~2r$1ZZ2Ve=#Fbn)JED>0uGw&|oI0TQ~sxZQm zf+c+`Fat{#mK-d3G;z@+hTw_(BZtr=Llcod+=nI#P4w&vBbrz=xgmH8Og$JL9-OiGBgut zCg&1|&`hJ5nJ?`_vm4Ex`R9yi_MzF2<^Y<5Xbzz{jOHksV`z?}If3RRnp0>_qd9}- zg_GTj<~(RFXfbFBXenqJXaY0|nu6dJ&UVsv=_7wv>$W;bP$3Apu?aeprfE; zpyQwupp&3epwke%0Xhph2RaWc7gjN>5?H0M%3viRI1DQVD-A0HD+{X|Ru8ORSbebi zAvgwW5Y`Z^VOS%uMq!P?8izFjYZ8JJu%=|JqcwuoCbVyxosYs*XFbNZ2?v-^csd(a-ThwTx2)E=|P?FoBQ=&+~l8GF{Av*+8n?c#Px zyHvQkooFZ9sdl=ZX=mHr?Vfh8@ZolUd!Rko9%>J_N7|$9vG%y|vG!zpsy*GFY0tLj z+Vc#T5euJSq>PLq7?PnFnqe50aSNYfyo`_WGXW;ZgqScBVWPr~Oq@wDNhZalnGBO< za!lSKc1Rpjhs;4ZNC)Mh9gJ|u;dXc&UWd=&cLW?kN5~NtzUYWLVve{Y;Yd1Cj-)6_(})U!FISiJRRN+Ux&XV&=C|K=m>X2I-(u1j(A6+ zBiWJaNDJTS$ads9@}1mHai^qH+9~TKgoit+PP&unWINrRo=$J4uhTC))*0*!b%r}5 zozc!%XS_4fnG~MrOm}8Fvz@uld>6M%+$HIfb`f1<7u7{~FC=dIcX>3WSwr|l+)|< zIsMLnGw2LC!_J5^Dx7u3oe5{snR2F`8E4j+bLK?_F0o7ElDcFr!bQ3$7wuw1i(GD( z$K`eTTz*%;6?BDMVbM}o)D?5ZT?tpxm2#zB8CO=c(v|P#c8j|u-O_GZH_=UYQ{7BA z+wJc5bbGsf-Tv-CcTlA24tGboqusIYcz2>Z*`4Z6i}c;u?p$}C<+5T{!b({gONdM? z#nLRpvaFl+uwK^3`bAbY$cET38)2htjE%DiHYswjX*R=V*&Lf+&0Q^CEmvNHvBkoO;}25{Q9;SHPFl2J)-L~zw$!Bf@NU@(QIUkhkrsV>L(kLS`t~=U zdiopRUSA3Yw2+BJg?~ia5AUY_n4kJ@^EV9?**P9Qsn)9XqQ6<_Wnh3O Date: Mon, 13 Jan 2025 08:47:23 -0500 Subject: [PATCH 6/7] Add seed parameter to hash_character_ngrams (#17643) Adds a seed parameter to the `nvtext::hash_character_ngrams` API. Makes this more useful in conjunction with other nvtext APIs. Authors: - David Wendt (https://github.com/davidwendt) Approvers: - Nghia Truong (https://github.com/ttnghia) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17643 --- cpp/include/nvtext/detail/generate_ngrams.hpp | 39 ------------------- cpp/include/nvtext/generate_ngrams.hpp | 4 +- cpp/src/text/generate_ngrams.cu | 13 ++++--- cpp/tests/streams/text/ngrams_test.cpp | 4 +- cpp/tests/text/ngrams_tests.cpp | 13 ++++++- 5 files changed, 25 insertions(+), 48 deletions(-) delete mode 100644 cpp/include/nvtext/detail/generate_ngrams.hpp diff --git a/cpp/include/nvtext/detail/generate_ngrams.hpp b/cpp/include/nvtext/detail/generate_ngrams.hpp deleted file mode 100644 index ae48fed4e79..00000000000 --- a/cpp/include/nvtext/detail/generate_ngrams.hpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include - -#include - -#include - -namespace CUDF_EXPORT nvtext { -namespace detail { - -/** - * @copydoc hash_character_ngrams(cudf::strings_column_view const&, - * cudf::size_type, rmm::device_async_resource_ref) - * - * @param stream CUDA stream used for allocating/copying device memory and launching kernels - */ -std::unique_ptr hash_character_ngrams(cudf::strings_column_view const& strings, - cudf::size_type ngrams, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr); - -} // namespace detail -} // namespace CUDF_EXPORT nvtext diff --git a/cpp/include/nvtext/generate_ngrams.hpp b/cpp/include/nvtext/generate_ngrams.hpp index 54282b8ef3c..b2ba1798a8f 100644 --- a/cpp/include/nvtext/generate_ngrams.hpp +++ b/cpp/include/nvtext/generate_ngrams.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -117,6 +117,7 @@ std::unique_ptr generate_character_ngrams( * * @param input Strings column to produce ngrams from * @param ngrams The ngram number to generate. Default is 5. + * @param seed The seed value to use with the hash algorithm. Default is 0. * @param stream CUDA stream used for device memory operations and kernel launches * @param mr Device memory resource used to allocate the returned column's device memory. * @return A lists column of hash values @@ -124,6 +125,7 @@ std::unique_ptr generate_character_ngrams( std::unique_ptr hash_character_ngrams( cudf::strings_column_view const& input, cudf::size_type ngrams = 5, + uint32_t seed = 0, rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu index 997b0278fe2..33d52ccd570 100644 --- a/cpp/src/text/generate_ngrams.cu +++ b/cpp/src/text/generate_ngrams.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,7 +32,7 @@ #include #include -#include +#include #include #include @@ -315,6 +315,7 @@ namespace { */ CUDF_KERNEL void character_ngram_hash_kernel(cudf::column_device_view const d_strings, cudf::size_type ngrams, + uint32_t seed, cudf::size_type const* d_ngram_offsets, cudf::hash_value_type* d_results) { @@ -332,7 +333,7 @@ CUDF_KERNEL void character_ngram_hash_kernel(cudf::column_device_view const d_st __shared__ cudf::hash_value_type hvs[block_size]; // temp store for hash values auto const ngram_offset = d_ngram_offsets[str_idx]; - auto const hasher = cudf::hashing::detail::MurmurHash3_x86_32{0}; + auto const hasher = cudf::hashing::detail::MurmurHash3_x86_32{seed}; auto const end = d_str.data() + d_str.size_bytes(); auto const warp_count = (d_str.size_bytes() / cudf::detail::warp_size) + 1; @@ -368,6 +369,7 @@ CUDF_KERNEL void character_ngram_hash_kernel(cudf::column_device_view const d_st std::unique_ptr hash_character_ngrams(cudf::strings_column_view const& input, cudf::size_type ngrams, + uint32_t seed, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { @@ -400,7 +402,7 @@ std::unique_ptr hash_character_ngrams(cudf::strings_column_view co auto d_hashes = hashes->mutable_view().data(); character_ngram_hash_kernel<<>>( - *d_strings, ngrams, d_offsets, d_hashes); + *d_strings, ngrams, seed, d_offsets, d_hashes); return make_lists_column( input.size(), std::move(offsets), std::move(hashes), 0, rmm::device_buffer{}, stream, mr); @@ -419,11 +421,12 @@ std::unique_ptr generate_character_ngrams(cudf::strings_column_vie std::unique_ptr hash_character_ngrams(cudf::strings_column_view const& strings, cudf::size_type ngrams, + uint32_t seed, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { CUDF_FUNC_RANGE(); - return detail::hash_character_ngrams(strings, ngrams, stream, mr); + return detail::hash_character_ngrams(strings, ngrams, seed, stream, mr); } } // namespace nvtext diff --git a/cpp/tests/streams/text/ngrams_test.cpp b/cpp/tests/streams/text/ngrams_test.cpp index 221c0a62f3e..47b9ac46d12 100644 --- a/cpp/tests/streams/text/ngrams_test.cpp +++ b/cpp/tests/streams/text/ngrams_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,7 +45,7 @@ TEST_F(TextNGramsTest, HashCharacterNgrams) auto input = cudf::test::strings_column_wrapper({"the quick brown fox", "jumped over the lazy dog."}); nvtext::hash_character_ngrams( - cudf::strings_column_view(input), 5, cudf::test::get_default_stream()); + cudf::strings_column_view(input), 5, 5, cudf::test::get_default_stream()); } TEST_F(TextNGramsTest, NgramsTokenize) diff --git a/cpp/tests/text/ngrams_tests.cpp b/cpp/tests/text/ngrams_tests.cpp index c72c7cfc80e..1a737231389 100644 --- a/cpp/tests/text/ngrams_tests.cpp +++ b/cpp/tests/text/ngrams_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -159,6 +159,17 @@ TEST_F(TextGenerateNgramsTest, NgramsHash) 2319357747u}}); // clang-format on CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected); + + results = nvtext::hash_character_ngrams(view, 10, 10); + // clang-format off + LCW expected2({LCW{2818025299u, 4026424618u, 578054337u, 2107870805u, 3942221995u, + 2802685757u, 2686450821u, 584898501u, 2206824201u, 487979059u}, + LCW{1154048732u, 3209682333u, 3246563372u, 3789750511u, 1287153502u, + 3759561568u, 1092423314u, 339538635u, 4265577390u, 879551618u, + 4222824617u, 1774528854u, 1028254379u, 485918316u, 879142987u, 3619248543u} + }); + // clang-format on + CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected2); } TEST_F(TextGenerateNgramsTest, NgramsHashErrors) From 4ec389b4c515e4b3b85d6fd28b1471b1f1de830d Mon Sep 17 00:00:00 2001 From: Nghia Truong <7416935+ttnghia@users.noreply.github.com> Date: Mon, 13 Jan 2025 09:27:13 -0800 Subject: [PATCH 7/7] Implement `HOST_UDF` aggregation for reduction and segmented reduction (#17645) Following https://github.com/rapidsai/cudf/pull/17592, this enables `HOST_UDF` aggregation in reduction and segmented reduction, allowing to execute a host-side user-defined function (UDF) through libcudf aggregation framework. Closes https://github.com/rapidsai/cudf/issues/16633. Authors: - Nghia Truong (https://github.com/ttnghia) - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Robert (Bobby) Evans (https://github.com/revans2) - Kyle Edwards (https://github.com/KyleFromNVIDIA) - Yunsong Wang (https://github.com/PointKernel) - https://github.com/nvdbaranec URL: https://github.com/rapidsai/cudf/pull/17645 --- cpp/include/cudf/aggregation.hpp | 4 +- cpp/include/cudf/aggregation/host_udf.hpp | 478 +++++++++++------- .../cudf/detail/aggregation/aggregation.hpp | 6 +- cpp/src/groupby/groupby.cu | 9 +- cpp/src/groupby/sort/aggregate.cpp | 81 ++- cpp/src/groupby/sort/host_udf_aggregation.cpp | 48 +- cpp/src/reductions/reductions.cpp | 16 +- cpp/src/reductions/segmented/reductions.cpp | 17 +- cpp/tests/CMakeLists.txt | 3 +- cpp/tests/groupby/host_udf_example_tests.cu | 75 +-- cpp/tests/groupby/host_udf_tests.cpp | 245 ++++----- .../reductions/host_udf_example_tests.cu | 422 ++++++++++++++++ .../main/java/ai/rapids/cudf/Aggregation.java | 2 +- .../ai/rapids/cudf/GroupByAggregation.java | 2 +- .../ai/rapids/cudf/ReductionAggregation.java | 17 +- .../cudf/SegmentedReductionAggregation.java | 11 +- 16 files changed, 941 insertions(+), 495 deletions(-) create mode 100644 cpp/tests/reductions/host_udf_example_tests.cu diff --git a/cpp/include/cudf/aggregation.hpp b/cpp/include/cudf/aggregation.hpp index a1b7db5e08a..2b2a660bed7 100644 --- a/cpp/include/cudf/aggregation.hpp +++ b/cpp/include/cudf/aggregation.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -601,7 +601,7 @@ std::unique_ptr make_udf_aggregation(udf_type type, data_type output_type); // Forward declaration of `host_udf_base` for the factory function of `HOST_UDF` aggregation. -struct host_udf_base; +class host_udf_base; /** * @brief Factory to create a HOST_UDF aggregation. diff --git a/cpp/include/cudf/aggregation/host_udf.hpp b/cpp/include/cudf/aggregation/host_udf.hpp index bbce76dc5f3..451d75137e4 100644 --- a/cpp/include/cudf/aggregation/host_udf.hpp +++ b/cpp/include/cudf/aggregation/host_udf.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,18 +17,16 @@ #pragma once #include +#include #include #include #include -#include #include #include +#include #include -#include -#include -#include /** * @file host_udf.hpp @@ -43,49 +41,141 @@ namespace CUDF_EXPORT cudf { */ /** - * @brief The interface for host-based UDF implementation. + * @brief The fundamental interface for host-based UDF implementation. * - * An implementation of host-based UDF needs to be derived from this base class, defining - * its own version of the required functions. In particular: - * - The derived class is required to implement `get_empty_output`, `operator()`, `is_equal`, - * and `clone` functions. - * - If necessary, the derived class can also override `do_hash` to compute hashing for its - * instance, and `get_required_data` to selectively access to the input data as well as - * intermediate data provided by libcudf. + * This class declares the functions `do_hash`, `is_equal`, and `clone` that must be defined in + * the users' UDF implementation. These functions are required for libcudf aggregation framework + * to perform its operations. + */ +class host_udf_base { + // Declare constructor private to prevent the users from deriving from this class. + private: + host_udf_base() = default; ///< Default constructor + + // Only allow deriving from the structs below. + friend struct reduce_host_udf; + friend struct segmented_reduce_host_udf; + friend struct groupby_host_udf; + + public: + virtual ~host_udf_base() = default; ///< Default destructor + + /** + * @brief Computes hash value of the instance. + * + * Overriding this function is optional when the derived class has data members such that + * each instance needs to be differentiated from each other. + * + * @return The hash value of the instance + */ + [[nodiscard]] virtual std::size_t do_hash() const + { + return std::hash{}(static_cast(aggregation::Kind::HOST_UDF)); + } + + /** + * @brief Compares two instances of the derived class for equality. + * @param other The other instance to compare with + * @return True if the two instances are equal + */ + [[nodiscard]] virtual bool is_equal(host_udf_base const& other) const = 0; + + /** + * @brief Clones the instance. + * + * The instances of the derived class should be lightweight for efficient cloning. + * + * @return A new instance cloned from this one + */ + [[nodiscard]] virtual std::unique_ptr clone() const = 0; +}; + +/** + * @brief The interface for host-based UDF implementation for reduction contexts. + * + * An implementation of host-based UDF for reduction needs to be derived from this class. + * In addition to implementing the virtual functions declared in the base class `host_udf_base`, + * such derived classes must also define the `operator()` function to perform reduction + * operations. * - * Example of such implementation: + * Example: * @code{.cpp} - * struct my_udf_aggregation : cudf::host_udf_base { + * struct my_udf_aggregation : cudf::reduce_host_udf { * my_udf_aggregation() = default; * - * // This UDF aggregation needs `GROUPED_VALUES` and `GROUP_OFFSETS`, - * // and the result from groupby `MAX` aggregation. - * [[nodiscard]] data_attribute_set_t get_required_data() const override + * [[nodiscard]] std::unique_ptr operator()( + * column_view const& input, + * data_type output_dtype, + * std::optional> init, + * rmm::cuda_stream_view stream, + * rmm::device_async_resource_ref mr) const override * { - * return {groupby_data_attribute::GROUPED_VALUES, - * groupby_data_attribute::GROUP_OFFSETS, - * cudf::make_max_aggregation()}; + * // Perform reduction computation using the input data and return the reduction result. + * // This is where the actual reduction logic is implemented. * } * - * [[nodiscard]] output_t get_empty_output( - * [[maybe_unused]] std::optional output_dtype, - * [[maybe_unused]] rmm::cuda_stream_view stream, - * [[maybe_unused]] rmm::device_async_resource_ref mr) const override + * [[nodiscard]] bool is_equal(host_udf_base const& other) const override * { - * // This UDF aggregation always returns a column of type INT32. - * return cudf::make_empty_column(cudf::data_type{cudf::type_id::INT32}); + * // Check if the other object is also instance of this class. + * // If there are internal state variables, they may need to be checked for equality as well. + * return dynamic_cast(&other) != nullptr; * } * - * [[nodiscard]] output_t operator()(input_map_t const& input, - * rmm::cuda_stream_view stream, - * rmm::device_async_resource_ref mr) const override + * [[nodiscard]] std::unique_ptr clone() const override * { - * // Perform UDF computation using the input data and return the result. + * return std::make_unique(); + * } + * }; + * @endcode + */ +struct reduce_host_udf : host_udf_base { + /** + * @brief Perform reduction operations. + * + * @param input The input column for reduction + * @param output_dtype The data type for the final output scalar + * @param init The initial value of the reduction + * @param stream The CUDA stream to use for any kernel launches + * @param mr Device memory resource to use for any allocations + * @return The output result of the aggregation + */ + [[nodiscard]] virtual std::unique_ptr operator()( + column_view const& input, + data_type output_dtype, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const = 0; +}; + +/** + * @brief The interface for host-based UDF implementation for segmented reduction context. + * + * An implementation of host-based UDF for segmented reduction needs to be derived from this class. + * In addition to implementing the virtual functions declared in the base class `host_udf_base`, + * such derived class must also define the `operator()` function to perform segmented reduction. + * + * Example: + * @code{.cpp} + * struct my_udf_aggregation : cudf::segmented_reduce_host_udf { + * my_udf_aggregation() = default; + * + * [[nodiscard]] std::unique_ptr operator()( + * column_view const& input, + * device_span offsets, + * data_type output_dtype, + * null_policy null_handling, + * std::optional> init, + * rmm::cuda_stream_view stream, + * rmm::device_async_resource_ref mr) const override + * { + * // Perform computation using the input data and return the result. + * // This is where the actual segmented reduction logic is implemented. * } * * [[nodiscard]] bool is_equal(host_udf_base const& other) const override * { * // Check if the other object is also instance of this class. + * // If there are internal state variables, they may need to be checked for equality as well. * return dynamic_cast(&other) != nullptr; * } * @@ -96,198 +186,232 @@ namespace CUDF_EXPORT cudf { * }; * @endcode */ -struct host_udf_base { - host_udf_base() = default; - virtual ~host_udf_base() = default; - +struct segmented_reduce_host_udf : host_udf_base { /** - * @brief Define the possible data needed for groupby aggregations. + * @brief Perform segmented reduction operations. * - * Note that only sort-based groupby aggregations are supported. + * @param input The input column for reduction + * @param offsets A list of offsets defining the segments for reduction + * @param output_dtype The data type for the final output column + * @param null_handling If `INCLUDE` then the reduction result is valid only if all elements in + * the segment are valid, and if `EXCLUDE` then the reduction result is valid if any + * element in the segment is valid + * @param init The initial value of the reduction + * @param stream The CUDA stream to use for any kernel launches + * @param mr Device memory resource to use for any allocations + * @return The output result of the aggregation */ - enum class groupby_data_attribute : int32_t { - INPUT_VALUES, ///< The input values column. - GROUPED_VALUES, ///< The input values grouped according to the input `keys` for which the - ///< values within each group maintain their original order. - SORTED_GROUPED_VALUES, ///< The input values grouped according to the input `keys` and - ///< sorted within each group. - NUM_GROUPS, ///< The number of groups (i.e., number of distinct keys). - GROUP_OFFSETS, ///< The offsets separating groups. - GROUP_LABELS ///< Group labels (which is also the same as group indices). - }; + [[nodiscard]] virtual std::unique_ptr operator()( + column_view const& input, + device_span offsets, + data_type output_dtype, + null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const = 0; +}; +// Forward declaration. +namespace groupby ::detail { +struct aggregate_result_functor; +} + +/** + * @brief The interface for host-based UDF implementation for groupby aggregation context. + * + * An implementation of host-based UDF for groupby needs to be derived from this class. + * In addition to implementing the virtual functions declared in the base class `host_udf_base`, + * such a derived class must also define the functions `get_empty_output()` to return result when + * the input is empty, and ``operator()`` to perform its groupby operations. + * + * During execution, the derived class can access internal data provided by the libcudf groupby + * framework through a set of ``get*`` accessors, as well as calling other built-in groupby + * aggregations through the ``compute_aggregation`` function. + * + * @note The derived class can only perform sort-based groupby aggregations. Hash-based groupby + * aggregations require more complex data structure and is not yet supported. + * + * Example: + * @code{.cpp} + * struct my_udf_aggregation : cudf::groupby_host_udf { + * my_udf_aggregation() = default; + * + * [[nodiscard]] std::unique_ptr get_empty_output( + * rmm::cuda_stream_view stream, + * rmm::device_async_resource_ref mr) const override + * { + * // Return a column corresponding to the result when the input values column is empty. + * } + * + * [[nodiscard]] std::unique_ptr operator()( + * rmm::cuda_stream_view stream, + * rmm::device_async_resource_ref mr) const override + * { + * // Perform UDF computation using the input data and return the result. + * } + * + * [[nodiscard]] bool is_equal(host_udf_base const& other) const override + * { + * // Check if the other object is also instance of this class. + * // If there are internal state variables, they may need to be checked for equality as well. + * return dynamic_cast(&other) != nullptr; + * } + * + * [[nodiscard]] std::unique_ptr clone() const override + * { + * return std::make_unique(); + * } + * }; + * @endcode + */ +struct groupby_host_udf : host_udf_base { /** - * @brief Describe possible data that may be needed in the derived class for its operations. + * @brief Get the output when the input values column is empty. * - * Such data can be either intermediate data such as sorted values or group labels etc, or the - * results of other aggregations. + * This is called in libcudf when the input values column is empty. In such situations libcudf + * tries to generate the output directly without unnecessarily evaluating the intermediate data. * - * Each derived host-based UDF class may need a different set of data. It is inefficient to - * evaluate and pass down all these possible data at once from libcudf. A solution for that is, - * the derived class can define a subset of data that it needs and libcudf will evaluate - * and pass down only data requested from that set. + * @param stream The CUDA stream to use for any kernel launches + * @param mr Device memory resource to use for any allocations + * @return The output result of the aggregation when the input values column is empty */ - struct data_attribute { - /** - * @brief Hold all possible data types for the input of the aggregation in the derived class. - */ - using value_type = std::variant>; - value_type value; ///< The actual data attribute, wrapped by this struct - ///< as a wrapper is needed to define `hash` and `equal_to` functors. - - data_attribute() = default; ///< Default constructor - data_attribute(data_attribute&&) = default; ///< Move constructor - - /** - * @brief Construct a new data attribute from an aggregation attribute. - * @param value_ An aggregation attribute - */ - template )> - data_attribute(T value_) : value{value_} - { - } - - /** - * @brief Construct a new data attribute from another aggregation request. - * @param value_ An aggregation request - */ - template || - std::is_same_v)> - data_attribute(std::unique_ptr value_) : value{std::move(value_)} - { - CUDF_EXPECTS(std::get>(value) != nullptr, - "Invalid aggregation request."); - if constexpr (std::is_same_v) { - CUDF_EXPECTS( - dynamic_cast(std::get>(value).get()) != nullptr, - "Requesting results from other aggregations is only supported in groupby " - "aggregations."); - } - } - - /** - * @brief Copy constructor. - * @param other The other data attribute to copy from - */ - data_attribute(data_attribute const& other); - - /** - * @brief Hash functor for `data_attribute`. - */ - struct hash { - /** - * @brief Compute the hash value of a data attribute. - * @param attr The data attribute to hash - * @return The hash value of the data attribute - */ - std::size_t operator()(data_attribute const& attr) const; - }; // struct hash - - /** - * @brief Equality comparison functor for `data_attribute`. - */ - struct equal_to { - /** - * @brief Check if two data attributes are equal. - * @param lhs The left-hand side data attribute - * @param rhs The right-hand side data attribute - * @return True if the two data attributes are equal - */ - bool operator()(data_attribute const& lhs, data_attribute const& rhs) const; - }; // struct equal_to - }; // struct data_attribute + [[nodiscard]] virtual std::unique_ptr get_empty_output( + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const = 0; /** - * @brief Set of attributes for the input data that is needed for computing the aggregation. + * @brief Perform the main groupby computation for the host-based UDF. + * + * @param stream The CUDA stream to use for any kernel launches + * @param mr Device memory resource to use for any allocations + * @return The output result of the aggregation */ - using data_attribute_set_t = - std::unordered_set; + [[nodiscard]] virtual std::unique_ptr operator()( + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const = 0; + + private: + // Allow the struct `aggregate_result_functor` to set its private callback variables. + friend struct groupby::detail::aggregate_result_functor; /** - * @brief Return a set of attributes for the data that is needed for computing the aggregation. - * - * The derived class should return the attributes corresponding to only the data that it needs to - * avoid unnecessary computation performed in libcudf. If this function is not overridden, an - * empty set is returned. That means all the data attributes (except results from other - * aggregations in groupby) will be needed. - * - * @return A set of `data_attribute` + * @brief Callback to access the input values column. + */ + std::function callback_input_values; + + /** + * @brief Callback to access the input values grouped according to the input keys for which the + * values within each group maintain their original order. + */ + std::function callback_grouped_values; + + /** + * @brief Callback to access the input values grouped according to the input keys and sorted + * within each group. + */ + std::function callback_sorted_grouped_values; + + /** + * @brief Callback to access the number of groups (i.e., number of distinct keys). */ - [[nodiscard]] virtual data_attribute_set_t get_required_data() const { return {}; } + std::function callback_num_groups; /** - * @brief Hold all possible types of the data that is passed to the derived class for executing - * the aggregation. + * @brief Callback to access the offsets separating groups. */ - using input_data_t = std::variant>; + std::function(void)> callback_group_offsets; /** - * @brief Input to the aggregation, mapping from each data attribute to its actual data. + * @brief Callback to access the group labels (which is also the same as group indices). */ - using input_map_t = std:: - unordered_map; + std::function(void)> callback_group_labels; /** - * @brief Output type of the aggregation. + * @brief Callback to access the result from other groupby aggregations. + */ + std::function)> callback_compute_aggregation; + + protected: + /** + * @brief Access the input values column. * - * Currently only a single type is supported as the output of the aggregation, but it will hold - * more type in the future when reduction is supported. + * @return The input values column. */ - using output_t = std::variant>; + [[nodiscard]] column_view get_input_values() const + { + CUDF_EXPECTS(callback_input_values, "Uninitialized callback_input_values."); + return callback_input_values(); + } /** - * @brief Get the output when the input values column is empty. + * @brief Access the input values grouped according to the input keys for which the values + * within each group maintain their original order. * - * This is called in libcudf when the input values column is empty. In such situations libcudf - * tries to generate the output directly without unnecessarily evaluating the intermediate data. + * @return The grouped values column. + */ + [[nodiscard]] column_view get_grouped_values() const + { + CUDF_EXPECTS(callback_grouped_values, "Uninitialized callback_grouped_values."); + return callback_grouped_values(); + } + + /** + * @brief Access the input values grouped according to the input keys and sorted within each + * group. * - * @param output_dtype The expected output data type - * @param stream The CUDA stream to use for any kernel launches - * @param mr Device memory resource to use for any allocations - * @return The output result of the aggregation when input values is empty + * @return The sorted grouped values column. */ - [[nodiscard]] virtual output_t get_empty_output(std::optional output_dtype, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const = 0; + [[nodiscard]] column_view get_sorted_grouped_values() const + { + CUDF_EXPECTS(callback_sorted_grouped_values, "Uninitialized callback_sorted_grouped_values."); + return callback_sorted_grouped_values(); + } /** - * @brief Perform the main computation for the host-based UDF. + * @brief Access the number of groups (i.e., number of distinct keys). * - * @param input The input data needed for performing all computation - * @param stream The CUDA stream to use for any kernel launches - * @param mr Device memory resource to use for any allocations - * @return The output result of the aggregation + * @return The number of groups. */ - [[nodiscard]] virtual output_t operator()(input_map_t const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const = 0; + [[nodiscard]] size_type get_num_groups() const + { + CUDF_EXPECTS(callback_num_groups, "Uninitialized callback_num_groups."); + return callback_num_groups(); + } /** - * @brief Computes hash value of the class's instance. - * @return The hash value of the instance + * @brief Access the offsets separating groups. + * + * @return The array of group offsets. */ - [[nodiscard]] virtual std::size_t do_hash() const + [[nodiscard]] device_span get_group_offsets() const { - return std::hash{}(static_cast(aggregation::Kind::HOST_UDF)); + CUDF_EXPECTS(callback_group_offsets, "Uninitialized callback_group_offsets."); + return callback_group_offsets(); } /** - * @brief Compares two instances of the derived class for equality. - * @param other The other derived class's instance to compare with - * @return True if the two instances are equal + * @brief Access the group labels (which is also the same as group indices). + * + * @return The array of group labels. */ - [[nodiscard]] virtual bool is_equal(host_udf_base const& other) const = 0; + [[nodiscard]] device_span get_group_labels() const + { + CUDF_EXPECTS(callback_group_labels, "Uninitialized callback_group_labels."); + return callback_group_labels(); + } /** - * @brief Clones the instance. + * @brief Compute a built-in groupby aggregation and access its result. * - * A class derived from `host_udf_base` should not store too much data such that its instances - * remain lightweight for efficient cloning. + * This allows the derived class to call any other built-in groupby aggregations on the same input + * values column and access the output for its operations. * - * @return A new instance cloned from this + * @param other_agg An arbitrary built-in groupby aggregation + * @return A `column_view` object corresponding to the output result of the given aggregation */ - [[nodiscard]] virtual std::unique_ptr clone() const = 0; + [[nodiscard]] column_view compute_aggregation(std::unique_ptr other_agg) const + { + CUDF_EXPECTS(callback_compute_aggregation, "Uninitialized callback for computing aggregation."); + return callback_compute_aggregation(std::move(other_agg)); + } }; /** @} */ // end of group diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp index d873e93bd20..5574ed6ea6e 100644 --- a/cpp/include/cudf/detail/aggregation/aggregation.hpp +++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -967,7 +967,9 @@ class udf_aggregation final : public rolling_aggregation { /** * @brief Derived class for specifying host-based UDF aggregation. */ -class host_udf_aggregation final : public groupby_aggregation { +class host_udf_aggregation final : public groupby_aggregation, + public reduce_aggregation, + public segmented_reduce_aggregation { public: std::unique_ptr udf_ptr; diff --git a/cpp/src/groupby/groupby.cu b/cpp/src/groupby/groupby.cu index 4c90cd0eef5..6234148e9fa 100644 --- a/cpp/src/groupby/groupby.cu +++ b/cpp/src/groupby/groupby.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -145,8 +145,11 @@ struct empty_column_constructor { } if constexpr (k == aggregation::Kind::HOST_UDF) { - auto const& udf_ptr = dynamic_cast(agg).udf_ptr; - return std::get>(udf_ptr->get_empty_output(std::nullopt, stream, mr)); + auto const& udf_base_ptr = + dynamic_cast(agg).udf_ptr; + auto const udf_ptr = dynamic_cast(udf_base_ptr.get()); + CUDF_EXPECTS(udf_ptr != nullptr, "Invalid HOST_UDF instance for groupby aggregation."); + return udf_ptr->get_empty_output(stream, mr); } return make_empty_column(target_type(values.type(), k)); diff --git a/cpp/src/groupby/sort/aggregate.cpp b/cpp/src/groupby/sort/aggregate.cpp index 6480070e85a..fb3f7559d64 100644 --- a/cpp/src/groupby/sort/aggregate.cpp +++ b/cpp/src/groupby/sort/aggregate.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -795,58 +795,41 @@ void aggregate_result_functor::operator()(aggregation con { if (cache.has_result(values, agg)) { return; } - auto const& udf_ptr = dynamic_cast(agg).udf_ptr; - auto const data_attrs = [&]() -> host_udf_base::data_attribute_set_t { - if (auto tmp = udf_ptr->get_required_data(); !tmp.empty()) { return tmp; } - // Empty attribute set means everything. - return {host_udf_base::groupby_data_attribute::INPUT_VALUES, - host_udf_base::groupby_data_attribute::GROUPED_VALUES, - host_udf_base::groupby_data_attribute::SORTED_GROUPED_VALUES, - host_udf_base::groupby_data_attribute::NUM_GROUPS, - host_udf_base::groupby_data_attribute::GROUP_OFFSETS, - host_udf_base::groupby_data_attribute::GROUP_LABELS}; - }(); + auto const& udf_base_ptr = dynamic_cast(agg).udf_ptr; + auto const udf_ptr = dynamic_cast(udf_base_ptr.get()); + CUDF_EXPECTS(udf_ptr != nullptr, "Invalid HOST_UDF instance for groupby aggregation."); - // Do not cache udf_input, as the actual input data may change from run to run. - host_udf_base::input_map_t udf_input; - for (auto const& attr : data_attrs) { - CUDF_EXPECTS(std::holds_alternative(attr.value) || - std::holds_alternative>(attr.value), - "Invalid input data attribute for HOST_UDF groupby aggregation."); - if (std::holds_alternative(attr.value)) { - switch (std::get(attr.value)) { - case host_udf_base::groupby_data_attribute::INPUT_VALUES: - udf_input.emplace(attr, values); - break; - case host_udf_base::groupby_data_attribute::GROUPED_VALUES: - udf_input.emplace(attr, get_grouped_values()); - break; - case host_udf_base::groupby_data_attribute::SORTED_GROUPED_VALUES: - udf_input.emplace(attr, get_sorted_values()); - break; - case host_udf_base::groupby_data_attribute::NUM_GROUPS: - udf_input.emplace(attr, helper.num_groups(stream)); - break; - case host_udf_base::groupby_data_attribute::GROUP_OFFSETS: - udf_input.emplace(attr, helper.group_offsets(stream)); - break; - case host_udf_base::groupby_data_attribute::GROUP_LABELS: - udf_input.emplace(attr, helper.group_labels(stream)); - break; - default: CUDF_UNREACHABLE("Invalid input data attribute for HOST_UDF groupby aggregation."); - } - } else { // data is result from another aggregation - auto other_agg = std::get>(attr.value)->clone(); + if (!udf_ptr->callback_input_values) { + udf_ptr->callback_input_values = [&]() -> column_view { return values; }; + } + if (!udf_ptr->callback_grouped_values) { + udf_ptr->callback_grouped_values = [&]() -> column_view { return get_grouped_values(); }; + } + if (!udf_ptr->callback_sorted_grouped_values) { + udf_ptr->callback_sorted_grouped_values = [&]() -> column_view { return get_sorted_values(); }; + } + if (!udf_ptr->callback_num_groups) { + udf_ptr->callback_num_groups = [&]() -> size_type { return helper.num_groups(stream); }; + } + if (!udf_ptr->callback_group_offsets) { + udf_ptr->callback_group_offsets = [&]() -> device_span { + return helper.group_offsets(stream); + }; + } + if (!udf_ptr->callback_group_labels) { + udf_ptr->callback_group_labels = [&]() -> device_span { + return helper.group_labels(stream); + }; + } + if (!udf_ptr->callback_compute_aggregation) { + udf_ptr->callback_compute_aggregation = + [&](std::unique_ptr other_agg) -> column_view { cudf::detail::aggregation_dispatcher(other_agg->kind, *this, *other_agg); - auto result = cache.get_result(values, *other_agg); - udf_input.emplace(std::move(other_agg), std::move(result)); - } + return cache.get_result(values, *other_agg); + }; } - auto output = (*udf_ptr)(udf_input, stream, mr); - CUDF_EXPECTS(std::holds_alternative>(output), - "Invalid output type from HOST_UDF groupby aggregation."); - cache.add_result(values, agg, std::get>(std::move(output))); + cache.add_result(values, agg, (*udf_ptr)(stream, mr)); } } // namespace detail diff --git a/cpp/src/groupby/sort/host_udf_aggregation.cpp b/cpp/src/groupby/sort/host_udf_aggregation.cpp index 0da47e17f48..6f1fe80c4bd 100644 --- a/cpp/src/groupby/sort/host_udf_aggregation.cpp +++ b/cpp/src/groupby/sort/host_udf_aggregation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,51 +16,9 @@ #include #include -#include namespace cudf { -host_udf_base::data_attribute::data_attribute(data_attribute const& other) - : value{std::visit(cudf::detail::visitor_overload{[](auto const& val) { return value_type{val}; }, - [](std::unique_ptr const& val) { - return value_type{val->clone()}; - }}, - other.value)} -{ -} - -std::size_t host_udf_base::data_attribute::hash::operator()(data_attribute const& attr) const -{ - auto const hash_value = - std::visit(cudf::detail::visitor_overload{ - [](auto const& val) { return std::hash{}(static_cast(val)); }, - [](std::unique_ptr const& val) { return val->do_hash(); }}, - attr.value); - return std::hash{}(attr.value.index()) ^ hash_value; -} - -bool host_udf_base::data_attribute::equal_to::operator()(data_attribute const& lhs, - data_attribute const& rhs) const -{ - auto const& lhs_val = lhs.value; - auto const& rhs_val = rhs.value; - if (lhs_val.index() != rhs_val.index()) { return false; } - return std::visit( - cudf::detail::visitor_overload{ - [](auto const& lhs_val, auto const& rhs_val) { - if constexpr (std::is_same_v) { - return lhs_val == rhs_val; - } else { - return false; - } - }, - [](std::unique_ptr const& lhs_val, std::unique_ptr const& rhs_val) { - return lhs_val->is_equal(*rhs_val); - }}, - lhs_val, - rhs_val); -} - namespace detail { host_udf_aggregation::host_udf_aggregation(std::unique_ptr udf_ptr_) @@ -99,5 +57,9 @@ template CUDF_EXPORT std::unique_ptr make_host_udf_aggregation); template CUDF_EXPORT std::unique_ptr make_host_udf_aggregation(std::unique_ptr); +template CUDF_EXPORT std::unique_ptr + make_host_udf_aggregation(std::unique_ptr); +template CUDF_EXPORT std::unique_ptr + make_host_udf_aggregation(std::unique_ptr); } // namespace cudf diff --git a/cpp/src/reductions/reductions.cpp b/cpp/src/reductions/reductions.cpp index 75ebc078930..928625a7e8f 100644 --- a/cpp/src/reductions/reductions.cpp +++ b/cpp/src/reductions/reductions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2024, NVIDIA CORPORATION. + * Copyright (c) 2019-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,6 +14,7 @@ * limitations under the License. */ +#include #include #include #include @@ -144,6 +145,13 @@ struct reduce_dispatch_functor { auto td_agg = static_cast(agg); return tdigest::detail::reduce_merge_tdigest(col, td_agg.max_centroids, stream, mr); } + case aggregation::HOST_UDF: { + auto const& udf_base_ptr = + dynamic_cast(agg).udf_ptr; + auto const udf_ptr = dynamic_cast(udf_base_ptr.get()); + CUDF_EXPECTS(udf_ptr != nullptr, "Invalid HOST_UDF instance for reduction."); + return (*udf_ptr)(col, output_dtype, init, stream, mr); + } // case aggregation::HOST_UDF default: CUDF_FAIL("Unsupported reduction operator"); } } @@ -161,9 +169,11 @@ std::unique_ptr reduce(column_view const& col, cudf::data_type_error); if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT || agg.kind == aggregation::MIN || agg.kind == aggregation::MAX || - agg.kind == aggregation::ANY || agg.kind == aggregation::ALL)) { + agg.kind == aggregation::ANY || agg.kind == aggregation::ALL || + agg.kind == aggregation::HOST_UDF)) { CUDF_FAIL( - "Initial value is only supported for SUM, PRODUCT, MIN, MAX, ANY, and ALL aggregation types"); + "Initial value is only supported for SUM, PRODUCT, MIN, MAX, ANY, ALL, and HOST_UDF " + "aggregation types"); } // Returns default scalar if input column is empty or all null diff --git a/cpp/src/reductions/segmented/reductions.cpp b/cpp/src/reductions/segmented/reductions.cpp index 1c3a2b0c0f3..5835bfcf0a1 100644 --- a/cpp/src/reductions/segmented/reductions.cpp +++ b/cpp/src/reductions/segmented/reductions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -13,6 +13,8 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + +#include #include #include #include @@ -98,6 +100,13 @@ struct segmented_reduce_dispatch_functor { } case segmented_reduce_aggregation::NUNIQUE: return segmented_nunique(col, offsets, null_handling, stream, mr); + case aggregation::HOST_UDF: { + auto const& udf_base_ptr = + dynamic_cast(agg).udf_ptr; + auto const udf_ptr = dynamic_cast(udf_base_ptr.get()); + CUDF_EXPECTS(udf_ptr != nullptr, "Invalid HOST_UDF instance for segmented reduction."); + return (*udf_ptr)(col, offsets, output_dtype, null_handling, init, stream, mr); + } // case aggregation::HOST_UDF default: CUDF_FAIL("Unsupported aggregation type."); } } @@ -117,9 +126,11 @@ std::unique_ptr segmented_reduce(column_view const& segmented_values, cudf::data_type_error); if (init.has_value() && !(agg.kind == aggregation::SUM || agg.kind == aggregation::PRODUCT || agg.kind == aggregation::MIN || agg.kind == aggregation::MAX || - agg.kind == aggregation::ANY || agg.kind == aggregation::ALL)) { + agg.kind == aggregation::ANY || agg.kind == aggregation::ALL || + agg.kind == aggregation::HOST_UDF)) { CUDF_FAIL( - "Initial value is only supported for SUM, PRODUCT, MIN, MAX, ANY, and ALL aggregation types"); + "Initial value is only supported for SUM, PRODUCT, MIN, MAX, ANY, ALL, and HOST_UDF " + "aggregation types"); } if (segmented_values.is_empty() && offsets.empty()) { diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 344979e1288..35877ac34b9 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -220,11 +220,12 @@ ConfigureTest( REDUCTIONS_TEST reductions/collect_ops_tests.cpp reductions/ewm_tests.cpp + reductions/host_udf_example_tests.cu + reductions/list_rank_test.cpp reductions/rank_tests.cpp reductions/reduction_tests.cpp reductions/scan_tests.cpp reductions/segmented_reduction_tests.cpp - reductions/list_rank_test.cpp reductions/tdigest_tests.cu GPUS 1 PERCENT 70 diff --git a/cpp/tests/groupby/host_udf_example_tests.cu b/cpp/tests/groupby/host_udf_example_tests.cu index a454bd692fc..e1ded37d8a7 100644 --- a/cpp/tests/groupby/host_udf_example_tests.cu +++ b/cpp/tests/groupby/host_udf_example_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,9 +21,7 @@ #include #include #include -#include #include -#include #include #include @@ -34,6 +32,9 @@ #include #include +using doubles_col = cudf::test::fixed_width_column_wrapper; +using int32s_col = cudf::test::fixed_width_column_wrapper; + namespace { /** * @brief A host-based UDF implementation for groupby. @@ -41,42 +42,21 @@ namespace { * For each group of values, the aggregation computes * `(group_idx + 1) * group_sum_of_squares - group_max * group_sum`. */ -struct host_udf_groupby_example : cudf::host_udf_base { +struct host_udf_groupby_example : cudf::groupby_host_udf { host_udf_groupby_example() = default; - [[nodiscard]] data_attribute_set_t get_required_data() const override - { - // We need grouped values, group offsets, group labels, and also results from groups' - // MAX and SUM aggregations. - return {groupby_data_attribute::GROUPED_VALUES, - groupby_data_attribute::GROUP_OFFSETS, - groupby_data_attribute::GROUP_LABELS, - cudf::make_max_aggregation(), - cudf::make_sum_aggregation()}; - } - - [[nodiscard]] output_t get_empty_output( - [[maybe_unused]] std::optional output_dtype, - [[maybe_unused]] rmm::cuda_stream_view stream, - [[maybe_unused]] rmm::device_async_resource_ref mr) const override + [[nodiscard]] std::unique_ptr get_empty_output( + rmm::cuda_stream_view, rmm::device_async_resource_ref) const override { return cudf::make_empty_column( cudf::data_type{cudf::type_to_id()}); } - [[nodiscard]] output_t operator()(input_map_t const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const override + [[nodiscard]] std::unique_ptr operator()( + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const override { - auto const& values = - std::get(input.at(groupby_data_attribute::GROUPED_VALUES)); - return cudf::type_dispatcher(values.type(), groupby_fn{this}, input, stream, mr); - } - - [[nodiscard]] std::size_t do_hash() const override - { - // Just return the same hash for all instances of this class. - return std::size_t{12345}; + auto const values = get_grouped_values(); + return cudf::type_dispatcher(values.type(), groupby_fn{*this}, stream, mr); } [[nodiscard]] bool is_equal(host_udf_base const& other) const override @@ -92,37 +72,33 @@ struct host_udf_groupby_example : cudf::host_udf_base { struct groupby_fn { // Store pointer to the parent class so we can call its functions. - host_udf_groupby_example const* parent; + host_udf_groupby_example const& parent; - // For simplicity, this example only accepts double input and always produces double output. + // For simplicity, this example only accepts a single type input and output. using InputType = double; using OutputType = double; template )> - output_t operator()(Args...) const + std::unique_ptr operator()(Args...) const { CUDF_FAIL("Unsupported input type."); } template )> - output_t operator()(input_map_t const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const + std::unique_ptr operator()(rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const { - auto const& values = - std::get(input.at(groupby_data_attribute::GROUPED_VALUES)); - if (values.size() == 0) { return parent->get_empty_output(std::nullopt, stream, mr); } + auto const values = parent.get_grouped_values(); + if (values.size() == 0) { return parent.get_empty_output(stream, mr); } - auto const offsets = std::get>( - input.at(groupby_data_attribute::GROUP_OFFSETS)); + auto const offsets = parent.get_group_offsets(); CUDF_EXPECTS(offsets.size() > 0, "Invalid offsets."); auto const num_groups = static_cast(offsets.size()) - 1; - auto const group_indices = std::get>( - input.at(groupby_data_attribute::GROUP_LABELS)); - auto const group_max = std::get( - input.at(cudf::make_max_aggregation())); - auto const group_sum = std::get( - input.at(cudf::make_sum_aggregation())); + auto const group_indices = parent.get_group_labels(); + auto const group_max = + parent.compute_aggregation(cudf::make_max_aggregation()); + auto const group_sum = + parent.compute_aggregation(cudf::make_sum_aggregation()); auto const values_dv_ptr = cudf::column_device_view::create(values, stream); auto const output = cudf::make_numeric_column(cudf::data_type{cudf::type_to_id()}, @@ -191,9 +167,6 @@ struct host_udf_groupby_example : cudf::host_udf_base { } // namespace -using doubles_col = cudf::test::fixed_width_column_wrapper; -using int32s_col = cudf::test::fixed_width_column_wrapper; - struct HostUDFGroupbyExampleTest : cudf::test::BaseFixture {}; TEST_F(HostUDFGroupbyExampleTest, SimpleInput) diff --git a/cpp/tests/groupby/host_udf_tests.cpp b/cpp/tests/groupby/host_udf_tests.cpp index 1a0f68c0c6c..17da28cdefc 100644 --- a/cpp/tests/groupby/host_udf_tests.cpp +++ b/cpp/tests/groupby/host_udf_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,178 +26,121 @@ #include namespace { + /** - * @brief A host-based UDF implementation used for unit tests. + * @brief Generate a random aggregation object from {min, max, sum, product}. */ -struct host_udf_test_base : cudf::host_udf_base { +std::unique_ptr get_random_agg() +{ + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution distr(1, 4); + switch (distr(gen)) { + case 1: return cudf::make_min_aggregation(); + case 2: return cudf::make_max_aggregation(); + case 3: return cudf::make_sum_aggregation(); + case 4: return cudf::make_product_aggregation(); + default: CUDF_UNREACHABLE("This should not be reached."); + } + return nullptr; +} + +/** + * @brief A host-based UDF implementation used for unit tests for groupby aggregation. + */ +struct host_udf_groupby_test : cudf::groupby_host_udf { int test_location_line; // the location where testing is called bool* test_run; // to check if the test is accidentally skipped - data_attribute_set_t input_attrs; + bool test_other_agg; // test calling other aggregation - host_udf_test_base(int test_location_line_, bool* test_run_, data_attribute_set_t input_attrs_) - : test_location_line{test_location_line_}, - test_run{test_run_}, - input_attrs(std::move(input_attrs_)) + host_udf_groupby_test(int test_location_line_, bool* test_run_, bool test_other_agg_) + : test_location_line{test_location_line_}, test_run{test_run_}, test_other_agg{test_other_agg_} { } - [[nodiscard]] data_attribute_set_t get_required_data() const override { return input_attrs; } - - // This is the main testing function, which checks for the correctness of input data. - // The rests are just to satisfy the interface. - [[nodiscard]] output_t operator()(input_map_t const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const override + [[nodiscard]] std::size_t do_hash() const override { return 0; } + [[nodiscard]] bool is_equal(host_udf_base const& other) const override { - SCOPED_TRACE("Test instance created at line: " + std::to_string(test_location_line)); - - test_data_attributes(input, stream, mr); - - *test_run = true; // test is run successfully - return get_empty_output(std::nullopt, stream, mr); + // Just check if the other object is also instance of this class. + return dynamic_cast(&other) != nullptr; + } + [[nodiscard]] std::unique_ptr clone() const override + { + return std::make_unique(test_location_line, test_run, test_other_agg); } - [[nodiscard]] output_t get_empty_output( - [[maybe_unused]] std::optional output_dtype, + [[nodiscard]] std::unique_ptr get_empty_output( [[maybe_unused]] rmm::cuda_stream_view stream, [[maybe_unused]] rmm::device_async_resource_ref mr) const override { - // Unused function - dummy output. + // Dummy output. return cudf::make_empty_column(cudf::data_type{cudf::type_id::INT32}); } - [[nodiscard]] std::size_t do_hash() const override { return 0; } - [[nodiscard]] bool is_equal(host_udf_base const& other) const override { return true; } + [[nodiscard]] std::unique_ptr operator()( + rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) const override + { + SCOPED_TRACE("Test instance created at line: " + std::to_string(test_location_line)); - // The main test function, which must be implemented for each kind of aggregations - // (groupby/reduction/segmented_reduction). - virtual void test_data_attributes(input_map_t const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const = 0; -}; + // Perform tests on types for the groupby data: we must ensure the data corresponding to each + // `groupby_data` enum having the correct type. -/** - * @brief A host-based UDF implementation used for unit tests for groupby aggregation. - */ -struct host_udf_groupby_test : host_udf_test_base { - host_udf_groupby_test(int test_location_line_, - bool* test_run_, - data_attribute_set_t input_attrs_ = {}) - : host_udf_test_base(test_location_line_, test_run_, std::move(input_attrs_)) - { - } + { + auto const inp_data = get_input_values(); + EXPECT_TRUE((std::is_same_v>)); + } - [[nodiscard]] std::unique_ptr clone() const override - { - return std::make_unique(test_location_line, test_run, input_attrs); - } + { + auto const inp_data = get_grouped_values(); + EXPECT_TRUE((std::is_same_v>)); + } - void test_data_attributes(input_map_t const& input, - rmm::cuda_stream_view stream, - rmm::device_async_resource_ref mr) const override - { - data_attribute_set_t check_attrs = input_attrs; - if (check_attrs.empty()) { - check_attrs = data_attribute_set_t{groupby_data_attribute::INPUT_VALUES, - groupby_data_attribute::GROUPED_VALUES, - groupby_data_attribute::SORTED_GROUPED_VALUES, - groupby_data_attribute::NUM_GROUPS, - groupby_data_attribute::GROUP_OFFSETS, - groupby_data_attribute::GROUP_LABELS}; + { + auto const inp_data = get_sorted_grouped_values(); + EXPECT_TRUE((std::is_same_v>)); } - EXPECT_EQ(input.size(), check_attrs.size()); - for (auto const& attr : check_attrs) { - EXPECT_TRUE(input.count(attr) > 0); - EXPECT_TRUE(std::holds_alternative(attr.value) || - std::holds_alternative>(attr.value)); - if (std::holds_alternative(attr.value)) { - switch (std::get(attr.value)) { - case groupby_data_attribute::INPUT_VALUES: - EXPECT_TRUE(std::holds_alternative(input.at(attr))); - break; - case groupby_data_attribute::GROUPED_VALUES: - EXPECT_TRUE(std::holds_alternative(input.at(attr))); - break; - case groupby_data_attribute::SORTED_GROUPED_VALUES: - EXPECT_TRUE(std::holds_alternative(input.at(attr))); - break; - case groupby_data_attribute::NUM_GROUPS: - EXPECT_TRUE(std::holds_alternative(input.at(attr))); - break; - case groupby_data_attribute::GROUP_OFFSETS: - EXPECT_TRUE( - std::holds_alternative>(input.at(attr))); - break; - case groupby_data_attribute::GROUP_LABELS: - EXPECT_TRUE( - std::holds_alternative>(input.at(attr))); - break; - default:; - } - } else { // std::holds_alternative>(attr.value) - EXPECT_TRUE(std::holds_alternative(input.at(attr))); - } + + { + auto const inp_data = get_num_groups(); + EXPECT_TRUE((std::is_same_v>)); } - } -}; -/** - * @brief Get a random subset of input data attributes. - */ -cudf::host_udf_base::data_attribute_set_t get_subset( - cudf::host_udf_base::data_attribute_set_t const& attrs) -{ - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution size_distr(1, attrs.size() - 1); - auto const subset_size = size_distr(gen); - auto const elements = - std::vector(attrs.begin(), attrs.end()); - std::uniform_int_distribution idx_distr(0, attrs.size() - 1); - cudf::host_udf_base::data_attribute_set_t output; - while (output.size() < subset_size) { - output.insert(elements[idx_distr(gen)]); - } - return output; -} + { + auto const inp_data = get_group_offsets(); + EXPECT_TRUE((std::is_same_v, + std::decay_t>)); + } -/** - * @brief Generate a random aggregation object from {min, max, sum, product}. - */ -std::unique_ptr get_random_agg() -{ - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution distr(1, 4); - switch (distr(gen)) { - case 1: return cudf::make_min_aggregation(); - case 2: return cudf::make_max_aggregation(); - case 3: return cudf::make_sum_aggregation(); - case 4: return cudf::make_product_aggregation(); - default: CUDF_UNREACHABLE("This should not be reached."); + { + auto const inp_data = get_group_labels(); + EXPECT_TRUE((std::is_same_v, + std::decay_t>)); + } + + // Perform tests on type of the result from computing other aggregations. + if (test_other_agg) { + auto const inp_data = compute_aggregation(get_random_agg()); + EXPECT_TRUE((std::is_same_v>)); + } + + *test_run = true; // test is run successfully + return get_empty_output(stream, mr); } - return nullptr; -} +}; } // namespace using int32s_col = cudf::test::fixed_width_column_wrapper; -// Number of randomly testing on the input data attributes. -// For each test, a subset of data attributes will be randomly generated from all the possible input -// data attributes. The input data corresponding to that subset passed from libcudf will be tested -// for correctness. -constexpr int NUM_RANDOM_TESTS = 20; - struct HostUDFTest : cudf::test::BaseFixture {}; -TEST_F(HostUDFTest, GroupbyAllInput) +TEST_F(HostUDFTest, GroupbyBuiltinInput) { bool test_run = false; auto const keys = int32s_col{0, 1, 2}; auto const vals = int32s_col{0, 1, 2}; auto agg = cudf::make_host_udf_aggregation( - std::make_unique(__LINE__, &test_run)); + std::make_unique(__LINE__, &test_run, /*test_other_agg*/ false)); std::vector requests; requests.emplace_back(); @@ -205,28 +148,22 @@ TEST_F(HostUDFTest, GroupbyAllInput) requests[0].aggregations.push_back(std::move(agg)); cudf::groupby::groupby gb_obj( cudf::table_view({keys}), cudf::null_policy::INCLUDE, cudf::sorted::NO, {}, {}); - [[maybe_unused]] auto const grp_result = - gb_obj.aggregate(requests, cudf::test::get_default_stream()); + [[maybe_unused]] auto const grp_result = gb_obj.aggregate( + requests, cudf::test::get_default_stream(), cudf::get_current_device_resource_ref()); EXPECT_TRUE(test_run); } -TEST_F(HostUDFTest, GroupbySomeInput) +TEST_F(HostUDFTest, GroupbyWithCallingOtherAggregations) { - auto const keys = int32s_col{0, 1, 2}; - auto const vals = int32s_col{0, 1, 2}; - auto const all_attrs = cudf::host_udf_base::data_attribute_set_t{ - cudf::host_udf_base::groupby_data_attribute::INPUT_VALUES, - cudf::host_udf_base::groupby_data_attribute::GROUPED_VALUES, - cudf::host_udf_base::groupby_data_attribute::SORTED_GROUPED_VALUES, - cudf::host_udf_base::groupby_data_attribute::NUM_GROUPS, - cudf::host_udf_base::groupby_data_attribute::GROUP_OFFSETS, - cudf::host_udf_base::groupby_data_attribute::GROUP_LABELS}; + auto const keys = int32s_col{0, 1, 2}; + auto const vals = int32s_col{0, 1, 2}; + + constexpr int NUM_RANDOM_TESTS = 20; + for (int i = 0; i < NUM_RANDOM_TESTS; ++i) { - bool test_run = false; - auto input_attrs = get_subset(all_attrs); - input_attrs.insert(get_random_agg()); - auto agg = cudf::make_host_udf_aggregation( - std::make_unique(__LINE__, &test_run, std::move(input_attrs))); + bool test_run = false; + auto agg = cudf::make_host_udf_aggregation( + std::make_unique(__LINE__, &test_run, /*test_other_agg*/ true)); std::vector requests; requests.emplace_back(); @@ -234,8 +171,8 @@ TEST_F(HostUDFTest, GroupbySomeInput) requests[0].aggregations.push_back(std::move(agg)); cudf::groupby::groupby gb_obj( cudf::table_view({keys}), cudf::null_policy::INCLUDE, cudf::sorted::NO, {}, {}); - [[maybe_unused]] auto const grp_result = - gb_obj.aggregate(requests, cudf::test::get_default_stream()); + [[maybe_unused]] auto const grp_result = gb_obj.aggregate( + requests, cudf::test::get_default_stream(), cudf::get_current_device_resource_ref()); EXPECT_TRUE(test_run); } } diff --git a/cpp/tests/reductions/host_udf_example_tests.cu b/cpp/tests/reductions/host_udf_example_tests.cu new file mode 100644 index 00000000000..67b88c5306b --- /dev/null +++ b/cpp/tests/reductions/host_udf_example_tests.cu @@ -0,0 +1,422 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +using doubles_col = cudf::test::fixed_width_column_wrapper; +using int32s_col = cudf::test::fixed_width_column_wrapper; +using int64s_col = cudf::test::fixed_width_column_wrapper; + +namespace { +/** + * @brief A host-based UDF implementation for reduction. + * + * The aggregation computes `sum(value^2, for value in group)` (this is sum of squared). + */ +struct host_udf_reduction_example : cudf::reduce_host_udf { + host_udf_reduction_example() = default; + + [[nodiscard]] std::unique_ptr operator()( + cudf::column_view const& input, + cudf::data_type output_dtype, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const override + { + return cudf::double_type_dispatcher( + input.type(), output_dtype, reduce_fn{}, input, output_dtype, init, stream, mr); + } + + [[nodiscard]] bool is_equal(host_udf_base const& other) const override + { + // Just check if the other object is also instance of this class. + return dynamic_cast(&other) != nullptr; + } + + [[nodiscard]] std::unique_ptr clone() const override + { + return std::make_unique(); + } + + struct reduce_fn { + // For simplicity, this example only accepts a single type input and output. + using InputType = double; + using OutputType = int64_t; + + template || !std::is_same_v)> + std::unique_ptr operator()(Args...) const + { + CUDF_FAIL("Unsupported input/output type."); + } + + template && std::is_same_v)> + [[nodiscard]] std::unique_ptr operator()( + cudf::column_view const& input, + cudf::data_type output_dtype, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const + { + CUDF_EXPECTS(output_dtype == cudf::data_type{cudf::type_to_id()}, + "Invalid output type."); + if (input.size() == 0) { + return cudf::make_default_constructed_scalar(output_dtype, stream, mr); + } + + auto const init_value = [&]() -> InputType { + if (init.has_value() && init.value().get().is_valid(stream)) { + auto const numeric_init_scalar = + dynamic_cast const*>(&init.value().get()); + CUDF_EXPECTS(numeric_init_scalar != nullptr, "Invalid init scalar for reduction."); + return numeric_init_scalar->value(stream); + } + return InputType{0}; + }(); + + auto const input_dv_ptr = cudf::column_device_view::create(input, stream); + auto const result = thrust::transform_reduce(rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(input.size()), + transform_fn{*input_dv_ptr}, + static_cast(init_value), + thrust::plus<>{}); + + auto output = cudf::make_numeric_scalar(output_dtype, stream, mr); + static_cast*>(output.get())->set_value(result, stream); + return output; + } + + struct transform_fn { + cudf::column_device_view values; + OutputType __device__ operator()(cudf::size_type idx) const + { + if (values.is_null(idx)) { return OutputType{0}; } + auto const val = static_cast(values.element(idx)); + return val * val; + } + }; + }; +}; + +} // namespace + +struct HostUDFReductionExampleTest : cudf::test::BaseFixture {}; + +TEST_F(HostUDFReductionExampleTest, SimpleInput) +{ + auto const vals = doubles_col{0.0, 1.0, 2.0, 3.0, 4.0, 5.0}; + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique()); + auto const reduced = cudf::reduce(vals, + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + EXPECT_TRUE(reduced->is_valid()); + EXPECT_EQ(cudf::type_id::INT64, reduced->type().id()); + auto const result = + static_cast*>(reduced.get())->value(cudf::get_default_stream()); + auto constexpr expected = 55; // 0^2 + 1^2 + 2^2 + 3^2 + 4^2 + 5^2 = 55 + EXPECT_EQ(expected, result); +} + +TEST_F(HostUDFReductionExampleTest, EmptyInput) +{ + auto const vals = doubles_col{}; + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique()); + auto const reduced = cudf::reduce(vals, + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + EXPECT_FALSE(reduced->is_valid()); + EXPECT_EQ(cudf::type_id::INT64, reduced->type().id()); +} + +namespace { + +/** + * @brief A host-based UDF implementation for segmented reduction. + * + * The aggregation computes `sum(value^2, for value in group)` (this is sum of squared). + */ +struct host_udf_segmented_reduction_example : cudf::segmented_reduce_host_udf { + host_udf_segmented_reduction_example() = default; + + [[nodiscard]] std::unique_ptr operator()( + cudf::column_view const& input, + cudf::device_span offsets, + cudf::data_type output_dtype, + cudf::null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const override + { + return cudf::double_type_dispatcher(input.type(), + output_dtype, + segmented_reduce_fn{}, + input, + offsets, + output_dtype, + null_handling, + init, + stream, + mr); + } + + [[nodiscard]] bool is_equal(host_udf_base const& other) const override + { + // Just check if the other object is also instance of this class. + return dynamic_cast(&other) != nullptr; + } + + [[nodiscard]] std::unique_ptr clone() const override + { + return std::make_unique(); + } + + struct segmented_reduce_fn { + // For simplicity, this example only accepts a single type input and output. + using InputType = double; + using OutputType = int64_t; + + template || !std::is_same_v)> + std::unique_ptr operator()(Args...) const + { + CUDF_FAIL("Unsupported input/output type."); + } + + template && std::is_same_v)> + std::unique_ptr operator()( + cudf::column_view const& input, + cudf::device_span offsets, + cudf::data_type output_dtype, + cudf::null_policy null_handling, + std::optional> init, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) const + { + CUDF_EXPECTS(output_dtype == cudf::data_type{cudf::type_to_id()}, + "Invalid output type."); + CUDF_EXPECTS(offsets.size() > 0, "Invalid offsets."); + auto const num_segments = static_cast(offsets.size()) - 1; + + if (input.size() == 0) { + if (num_segments <= 0) { return cudf::make_empty_column(output_dtype); } + return cudf::make_numeric_column( + output_dtype, num_segments, cudf::mask_state::ALL_NULL, stream, mr); + } + + auto const init_value = [&]() -> InputType { + if (init.has_value() && init.value().get().is_valid(stream)) { + auto const numeric_init_scalar = + dynamic_cast const*>(&init.value().get()); + CUDF_EXPECTS(numeric_init_scalar != nullptr, "Invalid init scalar for reduction."); + return numeric_init_scalar->value(stream); + } + return InputType{0}; + }(); + + auto const input_dv_ptr = cudf::column_device_view::create(input, stream); + auto output = cudf::make_numeric_column( + output_dtype, num_segments, cudf::mask_state::UNALLOCATED, stream); + + // Store row index if it is valid, otherwise store a negative value denoting a null row. + rmm::device_uvector valid_idx(num_segments, stream); + + thrust::transform( + rmm::exec_policy(stream), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_segments), + thrust::make_zip_iterator(output->mutable_view().begin(), valid_idx.begin()), + transform_fn{*input_dv_ptr, offsets, static_cast(init_value), null_handling}); + + auto const valid_idx_cv = cudf::column_view{ + cudf::data_type{cudf::type_id::INT32}, num_segments, valid_idx.begin(), nullptr, 0}; + return std::move(cudf::gather(cudf::table_view{{output->view()}}, + valid_idx_cv, + cudf::out_of_bounds_policy::NULLIFY, + stream, + mr) + ->release() + .front()); + } + + struct transform_fn { + cudf::column_device_view values; + cudf::device_span offsets; + OutputType init_value; + cudf::null_policy null_handling; + + thrust::tuple __device__ operator()(cudf::size_type idx) const + { + auto const start = offsets[idx]; + auto const end = offsets[idx + 1]; + + auto constexpr invalid_idx = cuda::std::numeric_limits::lowest(); + if (start == end) { return {OutputType{0}, invalid_idx}; } + + auto sum = init_value; + for (auto i = start; i < end; ++i) { + if (values.is_null(i)) { + if (null_handling == cudf::null_policy::INCLUDE) { sum += init_value * init_value; } + continue; + } + auto const val = static_cast(values.element(i)); + sum += val * val; + } + auto const segment_size = end - start; + return {static_cast(segment_size) * sum, idx}; + } + }; + }; +}; + +} // namespace + +struct HostUDFSegmentedReductionExampleTest : cudf::test::BaseFixture {}; + +TEST_F(HostUDFSegmentedReductionExampleTest, SimpleInput) +{ + double constexpr null = 0.0; + auto const vals = doubles_col{{0.0, null, 2.0, 3.0, null, 5.0, null, null, 8.0, 9.0}, + {true, false, true, true, false, true, false, false, true, true}}; + auto const offsets = int32s_col{0, 3, 5, 10}.release(); + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique()); + + // Test without init value. + { + auto const result = cudf::segmented_reduce( + vals, + cudf::device_span(offsets->view().begin(), offsets->size()), + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::INCLUDE, + std::nullopt, // init value + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + + // When null_policy is set to `INCLUDE`, the null values are replaced with the init value. + // Since init value is not given, it is set to 0. + // [ 3 * (0^2 + init^2 + 2^2), 2 * (3^2 + init^2), 5 * (5^2 + init^2 + init^2 + 8^2 + 9^2) ] + auto const expected = int64s_col{{12, 18, 850}, {true, true, true}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + // Test with init value, and include nulls. + { + auto const init_scalar = cudf::make_fixed_width_scalar(3.0); + auto const result = cudf::segmented_reduce( + vals, + cudf::device_span(offsets->view().begin(), offsets->size()), + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::INCLUDE, + *init_scalar, + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + + // When null_policy is set to `INCLUDE`, the null values are replaced with the init value. + // [ 3 * (3 + 0^2 + 3^2 + 2^2), 2 * (3 + 3^2 + 3^2), 5 * (3 + 5^2 + 3^2 + 3^2 + 8^2 + 9^2) ] + auto const expected = int64s_col{{48, 42, 955}, {true, true, true}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } + + // Test with init value, and exclude nulls. + { + auto const init_scalar = cudf::make_fixed_width_scalar(3.0); + auto const result = cudf::segmented_reduce( + vals, + cudf::device_span(offsets->view().begin(), offsets->size()), + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::EXCLUDE, + *init_scalar, + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + + // [ 3 * (3 + 0^2 + 2^2), 2 * (3 + 3^2), 5 * (3 + 5^2 + 8^2 + 9^2) ] + auto const expected = int64s_col{{21, 24, 865}, {true, true, true}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); + } +} + +TEST_F(HostUDFSegmentedReductionExampleTest, EmptySegments) +{ + auto const vals = doubles_col{}; + auto const offsets = int32s_col{0, 0, 0, 0}.release(); + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique()); + auto const result = cudf::segmented_reduce( + vals, + cudf::device_span(offsets->view().begin(), offsets->size()), + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::INCLUDE, + std::nullopt, // init value + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + auto const expected = int64s_col{{0, 0, 0}, {false, false, false}}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); +} + +TEST_F(HostUDFSegmentedReductionExampleTest, EmptyInput) +{ + auto const vals = doubles_col{}; + auto const offsets = int32s_col{}.release(); + auto const agg = cudf::make_host_udf_aggregation( + std::make_unique()); + auto const result = cudf::segmented_reduce( + vals, + cudf::device_span(offsets->view().begin(), offsets->size()), + *agg, + cudf::data_type{cudf::type_id::INT64}, + cudf::null_policy::INCLUDE, + std::nullopt, // init value + cudf::get_default_stream(), + cudf::get_current_device_resource_ref()); + auto const expected = int64s_col{}; + CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected, *result); +} diff --git a/java/src/main/java/ai/rapids/cudf/Aggregation.java b/java/src/main/java/ai/rapids/cudf/Aggregation.java index 2276b223740..c07a58ed8a5 100644 --- a/java/src/main/java/ai/rapids/cudf/Aggregation.java +++ b/java/src/main/java/ai/rapids/cudf/Aggregation.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java b/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java index 27966ddfdd4..234a9ec1ced 100644 --- a/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java +++ b/java/src/main/java/ai/rapids/cudf/GroupByAggregation.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java b/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java index ba8ae379bae..4f047a68f06 100644 --- a/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java +++ b/java/src/main/java/ai/rapids/cudf/ReductionAggregation.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -161,14 +161,14 @@ public static ReductionAggregation median() { /** * Aggregate to compute the specified quantiles. Uses linear interpolation by default. */ - public static ReductionAggregation quantile(double ... quantiles) { + public static ReductionAggregation quantile(double... quantiles) { return new ReductionAggregation(Aggregation.quantile(quantiles)); } /** * Aggregate to compute various quantiles. */ - public static ReductionAggregation quantile(QuantileMethod method, double ... quantiles) { + public static ReductionAggregation quantile(QuantileMethod method, double... quantiles) { return new ReductionAggregation(Aggregation.quantile(method, quantiles)); } @@ -256,7 +256,7 @@ public static ReductionAggregation collectSet() { * @param nanEquality Flag to specify whether NaN values in floating point column should be considered equal. */ public static ReductionAggregation collectSet(NullPolicy nullPolicy, - NullEquality nullEquality, NaNEquality nanEquality) { + NullEquality nullEquality, NaNEquality nanEquality) { return new ReductionAggregation(Aggregation.collectSet(nullPolicy, nullEquality, nanEquality)); } @@ -286,6 +286,15 @@ public static ReductionAggregation mergeSets(NullEquality nullEquality, NaNEqual return new ReductionAggregation(Aggregation.mergeSets(nullEquality, nanEquality)); } + /** + * Execute a reduction using a host-side user-defined function (UDF). + * @param wrapper The wrapper for the native host UDF instance. + * @return A new ReductionAggregation instance + */ + public static ReductionAggregation hostUDF(HostUDFWrapper wrapper) { + return new ReductionAggregation(Aggregation.hostUDF(wrapper)); + } + /** * Create HistogramAggregation, computing the frequencies for each unique row. * diff --git a/java/src/main/java/ai/rapids/cudf/SegmentedReductionAggregation.java b/java/src/main/java/ai/rapids/cudf/SegmentedReductionAggregation.java index 7ed150a2fec..18e7d874886 100644 --- a/java/src/main/java/ai/rapids/cudf/SegmentedReductionAggregation.java +++ b/java/src/main/java/ai/rapids/cudf/SegmentedReductionAggregation.java @@ -1,6 +1,6 @@ /* * - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -101,4 +101,13 @@ public static SegmentedReductionAggregation any() { public static SegmentedReductionAggregation all() { return new SegmentedReductionAggregation(Aggregation.all()); } + + /** + * Execute a reduction using a host-side user-defined function (UDF). + * @param wrapper The wrapper for the native host UDF instance. + * @return A new SegmentedReductionAggregation instance + */ + public static SegmentedReductionAggregation hostUDF(HostUDFWrapper wrapper) { + return new SegmentedReductionAggregation(Aggregation.hostUDF(wrapper)); + } }