From 958d312d265faa30d76c13d637595862ae8aaab1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 10 Jan 2025 14:28:15 -0800 Subject: [PATCH] Add more aggregation methods in pylibcudf --- python/pylibcudf/pylibcudf/aggregation.pyx | 165 +++++++++++++++++- .../pylibcudf/libcudf/aggregation.pxd | 44 ++++- 2 files changed, 205 insertions(+), 4 deletions(-) diff --git a/python/pylibcudf/pylibcudf/aggregation.pyx b/python/pylibcudf/pylibcudf/aggregation.pyx index 662f76d5c8e..b6160be66af 100644 --- a/python/pylibcudf/pylibcudf/aggregation.pyx +++ b/python/pylibcudf/pylibcudf/aggregation.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. from cython.operator cimport dereference from libcpp.cast cimport dynamic_cast @@ -20,9 +20,16 @@ from pylibcudf.libcudf.aggregation cimport ( make_count_aggregation, make_covariance_aggregation, make_ewma_aggregation, + make_histogram_aggregation, + make_m2_aggregation, make_max_aggregation, make_mean_aggregation, make_median_aggregation, + make_merge_m2_aggregation, + make_merge_histogram_aggregation, + make_merge_lists_aggregation, + make_merge_sets_aggregation, + make_merge_tdigest_aggregation, make_min_aggregation, make_nth_element_aggregation, make_nunique_aggregation, @@ -32,6 +39,7 @@ from pylibcudf.libcudf.aggregation cimport ( make_std_aggregation, make_sum_aggregation, make_sum_of_squares_aggregation, + make_tdigest_aggregation, make_udf_aggregation, make_variance_aggregation, rank_method, @@ -82,9 +90,16 @@ __all__ = [ "count", "covariance", "ewma", + "histogram", + "m2", "max", "mean", "median", + "merge_histogram", + "merge_lists", + "merge_m2", + "merge_sets", + "merge_tdigest", "min", "nth_element", "nunique", @@ -94,6 +109,7 @@ __all__ = [ "std", "sum", "sum_of_squares", + "tdigest", "udf", "variance", ] @@ -639,3 +655,150 @@ cpdef Aggregation rank( ) ) ) + + +cpdef Aggregation histogram(): + """Create a histogram aggregation. + + For details, see :cpp:func:`make_histogram_aggregation`. + + Returns + ------- + Aggregation + The histogram aggregation. + """ + return Aggregation.from_libcudf( + move(make_histogram_aggregation[aggregation]()) + ) + + +cpdef Aggregation m2(): + """Create a M2 aggregation. + + For details, see :cpp:func:`make_m2_aggregation`. + + Returns + ------- + Aggregation + The M2 aggregation. + """ + return Aggregation.from_libcudf( + move(make_m2_aggregation[aggregation]()) + ) + + +cpdef Aggregation merge_m2(): + """Create a merge M2 aggregation. + + For details, see :cpp:func:`make_merge_m2_aggregation`. + + Returns + ------- + Aggregation + The merge M2 aggregation. + """ + return Aggregation.from_libcudf( + move(make_merge_m2_aggregation[aggregation]()) + ) + + +cpdef Aggregation merge_histogram(): + """Create a merge histogram aggregation. + + For details, see :cpp:func:`make_merge_histogram_aggregation`. + + Returns + ------- + Aggregation + The merge histogram aggregation. + """ + return Aggregation.from_libcudf( + move(make_merge_histogram_aggregation[aggregation]()) + ) + + +cpdef Aggregation merge_lists(): + """Create a merge lists aggregation. + + For details, see :cpp:func:`make_merge_lists_aggregation`. + + Returns + ------- + Aggregation + The merge lists aggregation. + """ + return Aggregation.from_libcudf( + move(make_merge_lists_aggregation[aggregation]()) + ) + + +cpdef Aggregation merge_sets( + null_equality nulls_equal = null_equality.EQUAL, + nan_equality nans_equal = nan_equality.ALL_EQUAL, +): + """Create a merge sets aggregation. + + For details, see :cpp:func:`make_merge_sets_aggregation`. + + Parameters + ---------- + nulls_equal : null_equality, default EQUAL + Whether or not nulls should be considered equal. + nans_equal : nan_equality, default ALL_EQUAL + Whether or not NaNs should be considered equal. + + Returns + ------- + Aggregation + The merge sets aggregation. + """ + return Aggregation.from_libcudf( + move( + make_merge_sets_aggregation[aggregation]( + nulls_equal, + nans_equal, + ) + ) + ) + + +cpdef Aggregation merge_tdigest(int max_centroids): + """Create a merge TDIGEST aggregation. + + For details, see :cpp:func:`make_merge_tdigest_aggregation`. + + Parameters + ---------- + max_centroids : int + Parameter controlling compression level and accuracy + on subsequent queries on the output tdigest data. + + Returns + ------- + Aggregation + The merge TDIGEST aggregation. + """ + return Aggregation.from_libcudf( + move(make_merge_tdigest_aggregation[aggregation](max_centroids)) + ) + + +cpdef Aggregation tdigest(int max_centroids): + """Create a TDIGEST aggregation. + + For details, see :cpp:func:`make_tdigest_aggregation`. + + Parameters + ---------- + max_centroids : int + Parameter controlling compression level and accuracy + on subsequent queries on the output tdigest data. + + Returns + ------- + Aggregation + The TDIGEST aggregation. + """ + return Aggregation.from_libcudf( + move(make_tdigest_aggregation[aggregation](max_centroids)) + ) diff --git a/python/pylibcudf/pylibcudf/libcudf/aggregation.pxd b/python/pylibcudf/pylibcudf/libcudf/aggregation.pxd index 52d1e572ff3..eb900b414e6 100644 --- a/python/pylibcudf/pylibcudf/libcudf/aggregation.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/aggregation.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. from libc.stddef cimport size_t from libc.stdint cimport int32_t from libcpp cimport bool @@ -33,6 +33,7 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil: ALL SUM_OF_SQUARES MEAN + M2 VARIANCE STD MEDIAN @@ -41,13 +42,25 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil: ARGMIN NUNIQUE NTH_ELEMENT + ROW_NUMBER + EWMA RANK COLLECT_LIST COLLECT_SET + LEAD + LAG PTX CUDA - CORRELATION + HOST_UDF + MERGE_LISTS + MERGE_SETS + MERGE_M2 COVARIANCE + CORRELATION + TDIGEST + MERGE_TDIGEST + HISTOGRAM + MERGE_HISTOGRAM cdef cppclass aggregation: Kind kind @@ -104,7 +117,7 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil: cdef unique_ptr[T] make_max_aggregation[T]() except +libcudf_exception_handler cdef unique_ptr[T] make_count_aggregation[T]( - null_policy + null_policy null_handling ) except +libcudf_exception_handler cdef unique_ptr[T] make_any_aggregation[T]() except +libcudf_exception_handler @@ -170,3 +183,28 @@ cdef extern from "cudf/aggregation.hpp" namespace "cudf" nogil: null_policy null_handling, null_order null_precedence, rank_percentage percentage) except +libcudf_exception_handler + + cdef unique_ptr[T] make_tdigest_aggregation[T]( + int max_centroids + ) except +libcudf_exception_handler + + cdef unique_ptr[T] make_merge_tdigest_aggregation[T]( + int max_centroids + ) except +libcudf_exception_handler + + cdef unique_ptr[T] make_histogram_aggregation[T]() except +libcudf_exception_handler + + cdef unique_ptr[T] make_merge_histogram_aggregation[T]( + ) except +libcudf_exception_handler + + cdef unique_ptr[T] make_merge_lists_aggregation[T]( + ) except +libcudf_exception_handler + + cdef unique_ptr[T] make_merge_sets_aggregation[T]( + null_equality nulls_equal, + nan_equality nans_equal, + ) except +libcudf_exception_handler + + cdef unique_ptr[T] make_merge_m2_aggregation[T]() except +libcudf_exception_handler + + cdef unique_ptr[T] make_m2_aggregation[T]() except +libcudf_exception_handler