Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into cudf-polars-multi-groupby
Browse files Browse the repository at this point in the history
  • Loading branch information
rjzamora authored Jan 9, 2025
2 parents 24b88f2 + f13d8fc commit 161a53b
Show file tree
Hide file tree
Showing 226 changed files with 5,610 additions and 5,240 deletions.
7 changes: 3 additions & 4 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,9 @@ notebooks/ @rapidsai/cudf-python-codeowners
python/dask_cudf/ @rapidsai/cudf-dask-codeowners

#cmake code owners
cpp/CMakeLists.txt @rapidsai/cudf-cmake-codeowners
cpp/libcudf_kafka/CMakeLists.txt @rapidsai/cudf-cmake-codeowners
**/cmake/ @rapidsai/cudf-cmake-codeowners
*.cmake @rapidsai/cudf-cmake-codeowners
CMakeLists.txt @rapidsai/cudf-cmake-codeowners
**/cmake/ @rapidsai/cudf-cmake-codeowners
*.cmake @rapidsai/cudf-cmake-codeowners

#java code owners
java/ @rapidsai/cudf-java-codeowners
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
arch: "amd64"
branch: ${{ inputs.branch }}
build_type: ${{ inputs.build_type || 'branch' }}
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
container_image: "rapidsai/ci-conda:latest"
date: ${{ inputs.date }}
node_type: "gpu-v100-latest-1"
run_script: "ci/build_docs.sh"
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_java.sh"
static-configure:
needs: checks
Expand All @@ -207,7 +207,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_notebooks.sh"
docs-build:
needs: conda-python-build
Expand All @@ -217,7 +217,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/build_docs.sh"
wheel-build-libcudf:
needs: checks
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_cpp_memcheck.sh"
static-configure:
secrets: inherit
Expand Down Expand Up @@ -94,7 +94,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_java.sh"
conda-notebook-tests:
secrets: inherit
Expand All @@ -106,7 +106,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_notebooks.sh"
wheel-tests-cudf:
secrets: inherit
Expand Down
6 changes: 3 additions & 3 deletions ci/build_wheel_libcudf.sh
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#!/bin/bash
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.

set -euo pipefail

package_name="libcudf"
package_dir="python/libcudf"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

rapids-logger "Generating build requirements"

rapids-dependency-file-generator \
Expand All @@ -28,8 +30,6 @@ export PIP_NO_BUILD_ISOLATION=0
export SKBUILD_CMAKE_ARGS="-DUSE_NVCOMP_RUNTIME_WHEEL=ON"
./ci/build_wheel.sh "${package_name}" "${package_dir}"

RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"

mkdir -p ${package_dir}/final_dist
python -m auditwheel repair \
--exclude libnvcomp.so.4 \
Expand Down
13 changes: 3 additions & 10 deletions ci/test_python_other.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
# Copyright (c) 2022-2025, NVIDIA CORPORATION.

# Support invoking test_python_cudf.sh outside the script directory
cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../
Expand All @@ -24,8 +24,8 @@ EXITCODE=0
trap "EXITCODE=1" ERR
set +e

rapids-logger "pytest dask_cudf (dask-expr)"
DASK_DATAFRAME__QUERY_PLANNING=True ./ci/run_dask_cudf_pytests.sh \
rapids-logger "pytest dask_cudf"
./ci/run_dask_cudf_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf.xml" \
--numprocesses=8 \
--dist=worksteal \
Expand All @@ -34,13 +34,6 @@ DASK_DATAFRAME__QUERY_PLANNING=True ./ci/run_dask_cudf_pytests.sh \
--cov-report=xml:"${RAPIDS_COVERAGE_DIR}/dask-cudf-coverage.xml" \
--cov-report=term

rapids-logger "pytest dask_cudf (legacy)"
DASK_DATAFRAME__QUERY_PLANNING=False ./ci/run_dask_cudf_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-legacy.xml" \
--numprocesses=8 \
--dist=worksteal \
.

rapids-logger "pytest cudf_kafka"
./ci/run_cudf_kafka_pytests.sh \
--junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-kafka.xml"
Expand Down
16 changes: 3 additions & 13 deletions ci/test_wheel_dask_cudf.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/bin/bash
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.

set -eou pipefail

Expand Down Expand Up @@ -30,21 +30,11 @@ RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
mkdir -p "${RAPIDS_TESTS_DIR}"

# Run tests in dask_cudf/tests and dask_cudf/io/tests
rapids-logger "pytest dask_cudf (dask-expr)"
rapids-logger "pytest dask_cudf"
pushd python/dask_cudf/dask_cudf
DASK_DATAFRAME__QUERY_PLANNING=True python -m pytest \
python -m pytest \
--junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf.xml" \
--numprocesses=8 \
--dist=worksteal \
.
popd

# Run tests in dask_cudf/tests and dask_cudf/io/tests (legacy)
rapids-logger "pytest dask_cudf (legacy)"
pushd python/dask_cudf/dask_cudf
DASK_DATAFRAME__QUERY_PLANNING=False python -m pytest \
--junitxml="${RAPIDS_TESTS_DIR}/junit-dask-cudf-legacy.xml" \
--numprocesses=8 \
--dist=worksteal \
.
popd
6 changes: 3 additions & 3 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ dependencies:
- nbsphinx
- ninja
- notebook
- numba-cuda>=0.0.13,<0.0.18
- numba-cuda>=0.2.0,<0.3.0
- numpy>=1.23,<3.0a0
- numpydoc
- nvcc_linux-64=11.8
Expand All @@ -66,12 +66,12 @@ dependencies:
- pandas
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.11,<1.15
- polars>=1.11,<1.18
- pre-commit
- ptxcompiler
- pyarrow>=14.0.0,<19.0.0a0
- pydata-sphinx-theme!=0.14.2
- pynvml>=11.4.1,<12.0.0a0
- pynvml>=12.0.0,<13.0.0a0
- pytest-benchmark
- pytest-cases>=3.8.2
- pytest-cov
Expand Down
6 changes: 3 additions & 3 deletions conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ dependencies:
- nbsphinx
- ninja
- notebook
- numba-cuda>=0.0.13,<0.0.18
- numba-cuda>=0.2.0,<0.3.0
- numpy>=1.23,<3.0a0
- numpydoc
- nvcomp==4.1.0.6
Expand All @@ -64,12 +64,12 @@ dependencies:
- pandas
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.11,<1.15
- polars>=1.11,<1.18
- pre-commit
- pyarrow>=14.0.0,<19.0.0a0
- pydata-sphinx-theme!=0.14.2
- pynvjitlink>=0.0.0a0
- pynvml>=11.4.1,<12.0.0a0
- pynvml>=12.0.0,<13.0.0a0
- pytest-benchmark
- pytest-cases>=3.8.2
- pytest-cov
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf-polars/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
Expand Down Expand Up @@ -43,7 +43,7 @@ requirements:
run:
- python
- pylibcudf ={{ version }}
- polars >=1.11,<1.15
- polars >=1.11,<1.18
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}

test:
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
# Copyright (c) 2018-2025, NVIDIA CORPORATION.

{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
Expand Down Expand Up @@ -80,7 +80,7 @@ requirements:
- typing_extensions >=4.0.0
- pandas >=2.0,<2.2.4dev0
- cupy >=12.0.0
- numba-cuda >=0.0.13,<0.0.18
- numba-cuda >=0.2.0,<0.3.0
- numpy >=1.23,<3.0a0
- pyarrow>=14.0.0,<18.0.0a0
- libcudf ={{ version }}
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/dask-cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ requirements:
run:
- python
- cudf ={{ version }}
- pynvml >=11.4.1,<12.0.0a0
- pynvml >=12.0.0,<13.0.0a0
- rapids-dask-dependency ={{ minor_version }}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}

Expand Down
15 changes: 7 additions & 8 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
# Copyright (c) 2018-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -274,12 +274,9 @@ endif()
# add third party dependencies using CPM
rapids_cpm_init()

# Not using rapids-cmake since we never want to find, always download.
CPMAddPackage(
NAME rapids_logger GITHUB_REPOSITORY rapidsai/rapids-logger GIT_SHALLOW FALSE GIT_TAG
c510947ae9d3a67530cfe3e5eaccb5a3b8ea0e55 VERSION c510947ae9d3a67530cfe3e5eaccb5a3b8ea0e55
)
rapids_make_logger(cudf EXPORT_SET cudf-exports)
include(${rapids-cmake-dir}/cpm/rapids_logger.cmake)
rapids_cpm_rapids_logger()
rapids_make_logger(cudf EXPORT_SET cudf-exports LOGGER_DEFAULT_LEVEL WARN)

# find jitify
include(cmake/thirdparty/get_jitify.cmake)
Expand Down Expand Up @@ -446,14 +443,15 @@ add_library(
src/groupby/sort/group_quantiles.cu
src/groupby/sort/group_std.cu
src/groupby/sort/group_sum.cu
src/groupby/sort/scan.cpp
src/groupby/sort/group_count_scan.cu
src/groupby/sort/group_max_scan.cu
src/groupby/sort/group_min_scan.cu
src/groupby/sort/group_product_scan.cu
src/groupby/sort/group_rank_scan.cu
src/groupby/sort/group_replace_nulls.cu
src/groupby/sort/group_sum_scan.cu
src/groupby/sort/host_udf_aggregation.cpp
src/groupby/sort/scan.cpp
src/groupby/sort/sort_helper.cu
src/hash/md5_hash.cu
src/hash/murmurhash3_x86_32.cu
Expand All @@ -463,6 +461,7 @@ add_library(
src/hash/sha256_hash.cu
src/hash/sha384_hash.cu
src/hash/sha512_hash.cu
src/hash/xxhash_32.cu
src/hash/xxhash_64.cu
src/interop/dlpack.cpp
src/interop/arrow_utilities.cpp
Expand Down
7 changes: 6 additions & 1 deletion cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
# Copyright (c) 2018-2025, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -425,6 +425,11 @@ ConfigureNVBench(DECIMAL_NVBENCH decimal/convert_floating.cpp)
# ---------------------------------------------------------------------------------
ConfigureNVBench(RESHAPE_NVBENCH reshape/interleave.cpp)

# ##################################################################################################
# * rolling benchmark
# ---------------------------------------------------------------------------------
ConfigureNVBench(ROLLING_NVBENCH rolling/grouped_rolling_sum.cpp rolling/rolling_sum.cpp)

add_custom_target(
run_benchmarks
DEPENDS CUDF_BENCHMARKS
Expand Down
20 changes: 5 additions & 15 deletions cpp/benchmarks/join/distinct_join.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -23,13 +23,8 @@ void distinct_inner_join(nvbench::state& state,
auto join = [](cudf::table_view const& probe_input,
cudf::table_view const& build_input,
cudf::null_equality compare_nulls) {
auto const has_nulls =
cudf::has_nested_nulls(build_input) || cudf::has_nested_nulls(probe_input)
? cudf::nullable_join::YES
: cudf::nullable_join::NO;
auto hj_obj = cudf::distinct_hash_join<cudf::has_nested::NO>{
build_input, probe_input, has_nulls, compare_nulls};
return hj_obj.inner_join();
auto hj_obj = cudf::distinct_hash_join{build_input, compare_nulls};
return hj_obj.inner_join(probe_input);
};

BM_join<Key, Nullable>(state, join);
Expand All @@ -42,13 +37,8 @@ void distinct_left_join(nvbench::state& state,
auto join = [](cudf::table_view const& probe_input,
cudf::table_view const& build_input,
cudf::null_equality compare_nulls) {
auto const has_nulls =
cudf::has_nested_nulls(build_input) || cudf::has_nested_nulls(probe_input)
? cudf::nullable_join::YES
: cudf::nullable_join::NO;
auto hj_obj = cudf::distinct_hash_join<cudf::has_nested::NO>{
build_input, probe_input, has_nulls, compare_nulls};
return hj_obj.left_join();
auto hj_obj = cudf::distinct_hash_join{build_input, compare_nulls};
return hj_obj.left_join(probe_input);
};

BM_join<Key, Nullable>(state, join);
Expand Down
Loading

0 comments on commit 161a53b

Please sign in to comment.