From dc99d2f9bc602e40c7bae894b6759e30a8efdddd Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Wed, 8 Jan 2025 13:00:30 +0000
Subject: [PATCH 1/9] Introduce some simple benchmarks for rolling window
 aggregations (#17613)

Previously we did not have any benchmarks for rolling aggregations. Introduce some, so we can measure the effects of any performance improvements we might make.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - MithunR (https://github.com/mythrocks)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/17613
---
 cpp/benchmarks/CMakeLists.txt                 |   7 +-
 .../rolling/grouped_rolling_sum.cpp           |  70 +++++++++
 cpp/benchmarks/rolling/rolling_sum.cpp        | 134 ++++++++++++++++++
 3 files changed, 210 insertions(+), 1 deletion(-)
 create mode 100644 cpp/benchmarks/rolling/grouped_rolling_sum.cpp
 create mode 100644 cpp/benchmarks/rolling/rolling_sum.cpp
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 749e1b628ee..0ff712c1c77 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -1,5 +1,5 @@
 # =============================================================================
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
 # in compliance with the License. You may obtain a copy of the License at
@@ -425,6 +425,11 @@ ConfigureNVBench(DECIMAL_NVBENCH decimal/convert_floating.cpp)
 # ---------------------------------------------------------------------------------
 ConfigureNVBench(RESHAPE_NVBENCH reshape/interleave.cpp)
 
+# ##################################################################################################
+# * rolling benchmark
+# ---------------------------------------------------------------------------------
+ConfigureNVBench(ROLLING_NVBENCH rolling/grouped_rolling_sum.cpp rolling/rolling_sum.cpp)
+
 add_custom_target(
   run_benchmarks
   DEPENDS CUDF_BENCHMARKS
diff --git a/cpp/benchmarks/rolling/grouped_rolling_sum.cpp b/cpp/benchmarks/rolling/grouped_rolling_sum.cpp
new file mode 100644
index 00000000000..04afe5ac661
--- /dev/null
+++ b/cpp/benchmarks/rolling/grouped_rolling_sum.cpp
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+
+#include <cudf/aggregation.hpp>
+#include <cudf/rolling.hpp>
+#include <cudf/sorting.hpp>
+#include <cudf/utilities/default_stream.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+template <typename Type>
+void bench_row_grouped_rolling_sum(nvbench::state& state, nvbench::type_list<Type>)
+{
+  auto const num_rows       = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const cardinality    = static_cast<cudf::size_type>(state.get_int64("cardinality"));
+  auto const preceding_size = static_cast<cudf::size_type>(state.get_int64("preceding_size"));
+  auto const following_size = static_cast<cudf::size_type>(state.get_int64("following_size"));
+  auto const min_periods    = static_cast<cudf::size_type>(state.get_int64("min_periods"));
+
+  auto const keys = [&] {
+    data_profile const profile =
+      data_profile_builder()
+        .cardinality(cardinality)
+        .no_validity()
+        .distribution(cudf::type_to_id<int32_t>(), distribution_id::UNIFORM, 0, num_rows);
+    auto keys = create_random_column(cudf::type_to_id<int32_t>(), row_count{num_rows}, profile);
+    return cudf::sort(cudf::table_view{{keys->view()}});
+  }();
+  data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution(
+    cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 100);
+  auto vals = create_random_column(cudf::type_to_id<Type>(), row_count{num_rows}, profile);
+
+  auto req = cudf::make_sum_aggregation<cudf::rolling_aggregation>();
+
+  auto const mem_stats_logger = cudf::memory_stats_logger();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    auto const result = cudf::grouped_rolling_window(
+      keys->view(), vals->view(), preceding_size, following_size, min_periods, *req);
+  });
+  auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(num_rows) / elapsed_time / 1'000'000., "Mrows/s");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+}
+
+NVBENCH_BENCH_TYPES(bench_row_grouped_rolling_sum,
+                    NVBENCH_TYPE_AXES(nvbench::type_list<std::int32_t, double>))
+  .set_name("row_grouped_rolling_sum")
+  .add_int64_power_of_two_axis("num_rows", {14, 28})
+  .add_int64_axis("preceding_size", {1, 10})
+  .add_int64_axis("following_size", {2})
+  .add_int64_axis("min_periods", {1})
+  .add_int64_axis("cardinality", {10, 100, 1'000'000, 100'000'000});
diff --git a/cpp/benchmarks/rolling/rolling_sum.cpp b/cpp/benchmarks/rolling/rolling_sum.cpp
new file mode 100644
index 00000000000..af9ecd6a26f
--- /dev/null
+++ b/cpp/benchmarks/rolling/rolling_sum.cpp
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) 2024-2025, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+
+#include <cudf/aggregation.hpp>
+#include <cudf/rolling.hpp>
+#include <cudf/sorting.hpp>
+#include <cudf/types.hpp>
+#include <cudf/utilities/default_stream.hpp>
+
+#include <rmm/device_buffer.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/iterator/counting_iterator.h>
+
+#include <nvbench/nvbench.cuh>
+
+#include <algorithm>
+
+template <typename Type>
+void bench_row_fixed_rolling_sum(nvbench::state& state, nvbench::type_list<Type>)
+{
+  auto const num_rows       = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const preceding_size = static_cast<cudf::size_type>(state.get_int64("preceding_size"));
+  auto const following_size = static_cast<cudf::size_type>(state.get_int64("following_size"));
+  auto const min_periods    = static_cast<cudf::size_type>(state.get_int64("min_periods"));
+
+  data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution(
+    cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 100);
+  auto vals = create_random_column(cudf::type_to_id<Type>(), row_count{num_rows}, profile);
+
+  auto req = cudf::make_sum_aggregation<cudf::rolling_aggregation>();
+
+  auto const mem_stats_logger = cudf::memory_stats_logger();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    auto const result =
+      cudf::rolling_window(vals->view(), preceding_size, following_size, min_periods, *req);
+  });
+  auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(num_rows) / elapsed_time / 1'000'000., "Mrows/s");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+}
+
+template <typename Type>
+void bench_row_variable_rolling_sum(nvbench::state& state, nvbench::type_list<Type>)
+{
+  auto const num_rows       = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const preceding_size = static_cast<cudf::size_type>(state.get_int64("preceding_size"));
+  auto const following_size = static_cast<cudf::size_type>(state.get_int64("following_size"));
+
+  auto vals = [&]() {
+    data_profile const profile = data_profile_builder().cardinality(0).no_validity().distribution(
+      cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, 100);
+    return create_random_column(cudf::type_to_id<Type>(), row_count{num_rows}, profile);
+  }();
+
+  auto preceding = [&]() {
+    auto data = std::vector<cudf::size_type>(num_rows);
+    auto it   = thrust::make_counting_iterator<cudf::size_type>(0);
+    std::transform(it, it + num_rows, data.begin(), [num_rows, preceding_size](auto i) {
+      return std::min(i + 1, std::max(preceding_size, i + 1 - num_rows));
+    });
+    auto buf = rmm::device_buffer(
+      data.data(), num_rows * sizeof(cudf::size_type), cudf::get_default_stream());
+    cudf::get_default_stream().synchronize();
+    return std::make_unique<cudf::column>(cudf::data_type(cudf::type_to_id<cudf::size_type>()),
+                                          num_rows,
+                                          std::move(buf),
+                                          rmm::device_buffer{},
+                                          0);
+  }();
+
+  auto following = [&]() {
+    auto data = std::vector<cudf::size_type>(num_rows);
+    auto it   = thrust::make_counting_iterator<cudf::size_type>(0);
+    std::transform(it, it + num_rows, data.begin(), [num_rows, following_size](auto i) {
+      return std::max(-i - 1, std::min(following_size, num_rows - i - 1));
+    });
+    auto buf = rmm::device_buffer(
+      data.data(), num_rows * sizeof(cudf::size_type), cudf::get_default_stream());
+    cudf::get_default_stream().synchronize();
+    return std::make_unique<cudf::column>(cudf::data_type(cudf::type_to_id<cudf::size_type>()),
+                                          num_rows,
+                                          std::move(buf),
+                                          rmm::device_buffer{},
+                                          0);
+  }();
+
+  auto req = cudf::make_sum_aggregation<cudf::rolling_aggregation>();
+
+  auto const mem_stats_logger = cudf::memory_stats_logger();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    auto const result =
+      cudf::rolling_window(vals->view(), preceding->view(), following->view(), 1, *req);
+  });
+  auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(num_rows) / elapsed_time / 1'000'000., "Mrows/s");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+}
+
+NVBENCH_BENCH_TYPES(bench_row_fixed_rolling_sum,
+                    NVBENCH_TYPE_AXES(nvbench::type_list<std::int32_t, double>))
+  .set_name("row_fixed_rolling_sum")
+  .add_int64_power_of_two_axis("num_rows", {14, 22, 28})
+  .add_int64_axis("preceding_size", {1, 10, 100})
+  .add_int64_axis("following_size", {2})
+  .add_int64_axis("min_periods", {1, 20});
+
+NVBENCH_BENCH_TYPES(bench_row_variable_rolling_sum,
+                    NVBENCH_TYPE_AXES(nvbench::type_list<std::int32_t, double>))
+  .set_name("row_variable_rolling_sum")
+  .add_int64_power_of_two_axis("num_rows", {14, 22, 28})
+  .add_int64_axis("preceding_size", {10, 100})
+  .add_int64_axis("following_size", {2});

From d05b78b13e3cce55e1691de86b0c6020d4f1b0cd Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 8 Jan 2025 08:54:22 -0500
Subject: [PATCH 2/9] Fix build metrics report format with long placehold
 filenames (#17679)

Truncates filenames that appear as multiple `placehold_placedhold_...` in the Build Metrics Report.
Example show here: https://downloads.rapids.ai/ci/cudf/pull-request/17669/0710ad6/cuda12_x86_64.ninja_log.html (requires VPN).

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

URL: https://github.com/rapidsai/cudf/pull/17679
---
 cpp/scripts/sort_ninja_log.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/cpp/scripts/sort_ninja_log.py b/cpp/scripts/sort_ninja_log.py
index 42f84e4d0c7..e111367d191 100755
--- a/cpp/scripts/sort_ninja_log.py
+++ b/cpp/scripts/sort_ninja_log.py
@@ -1,8 +1,9 @@
 #
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 #
 import argparse
 import os
+import re
 import sys
 import xml.etree.ElementTree as ET
 from pathlib import Path
@@ -144,6 +145,16 @@ def format_file_size(input_size):
     return file_size_str
 
 
+def replace_placeholder_patterns(input_string: str) -> str:
+    pattern = r'(_h_env_placehold)[_placehold]+'
+    return re.sub(pattern, r'\1...', input_string)
+
+
+# adjust name for display
+def format_file_name(name: str) -> str:
+    return replace_placeholder_patterns(name)
+
+
 # Output chart results in HTML format
 # Builds a standalone html file with no javascript or styles
 def output_html(entries, sorted_list, cmp_entries, args):
@@ -223,7 +234,8 @@ def output_html(entries, sorted_list, cmp_entries, args):
             print("<td height='20px' width='", size, "px' ", sep="", end="")
             # title text is shown as hover-text by most browsers
             print(color, "title='", end="")
-            print(name, "\n", build_time_str, "' ", sep="", end="")
+            display_name = format_file_name(name)
+            print(display_name, "\n", build_time_str, "' ", sep="", end="")
             # centers the name if it fits in the box
             print("align='center' nowrap>", end="")
             # use a slightly smaller, fixed-width font
@@ -265,7 +277,8 @@ def output_html(entries, sorted_list, cmp_entries, args):
         file_size_str = format_file_size(file_size)
 
         # output entry row
-        print("<tr ", color, "><td>", name, "</td>", sep="", end="")
+        display_name = format_file_name(name)
+        print("<tr ", color, "><td>", display_name, "</td>", sep="", end="")
         print("<td align='right'>", build_time_str, "</td>", sep="", end="")
         print("<td align='right'>", file_size_str, "</td>", sep="", end="")
         # output diff column

From a38ce0a2447e9bca15f3a904c54fe1eba27e5940 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 8 Jan 2025 08:55:09 -0500
Subject: [PATCH 3/9] Remove pragma GCC diagnostic from source files (#17637)

Removes the `#pragma GCC diagnostic` from several source files. These do not seem to be necessary and could suppress useful compile warnings.

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Karthikeyan (https://github.com/karthikeyann)

URL: https://github.com/rapidsai/cudf/pull/17637
---
 cpp/src/join/mixed_join_kernel.cuh      | 4 +---
 cpp/src/join/mixed_join_kernels_semi.cu | 4 +---
 cpp/src/join/mixed_join_size_kernel.cuh | 4 +---
 3 files changed, 3 insertions(+), 9 deletions(-)

diff --git a/cpp/src/join/mixed_join_kernel.cuh b/cpp/src/join/mixed_join_kernel.cuh
index 368b1fba870..4565626edad 100644
--- a/cpp/src/join/mixed_join_kernel.cuh
+++ b/cpp/src/join/mixed_join_kernel.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -37,8 +37,6 @@ namespace detail {
 
 namespace cg = cooperative_groups;
 
-#pragma GCC diagnostic ignored "-Wattributes"
-
 template <cudf::size_type block_size, bool has_nulls>
 CUDF_KERNEL void __launch_bounds__(block_size)
   mixed_join(table_device_view left_table,
diff --git a/cpp/src/join/mixed_join_kernels_semi.cu b/cpp/src/join/mixed_join_kernels_semi.cu
index a4ec97af235..4c063b6202e 100644
--- a/cpp/src/join/mixed_join_kernels_semi.cu
+++ b/cpp/src/join/mixed_join_kernels_semi.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -30,8 +30,6 @@ namespace detail {
 
 namespace cg = cooperative_groups;
 
-#pragma GCC diagnostic ignored "-Wattributes"
-
 template <cudf::size_type block_size, bool has_nulls>
 CUDF_KERNEL void __launch_bounds__(block_size)
   mixed_join_semi(table_device_view left_table,
diff --git a/cpp/src/join/mixed_join_size_kernel.cuh b/cpp/src/join/mixed_join_size_kernel.cuh
index 98170ed719a..869d05ce4d3 100644
--- a/cpp/src/join/mixed_join_size_kernel.cuh
+++ b/cpp/src/join/mixed_join_size_kernel.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2025, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -34,8 +34,6 @@ namespace cudf {
 namespace detail {
 namespace cg = cooperative_groups;
 
-#pragma GCC diagnostic ignored "-Wattributes"
-
 template <int block_size, bool has_nulls>
 CUDF_KERNEL void __launch_bounds__(block_size)
   compute_mixed_join_output_size(table_device_view left_table,

From da316860211281807e39fadb2a543bcdd6f56abb Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Wed, 8 Jan 2025 16:25:14 +0000
Subject: [PATCH 4/9] Skip polars test that can generate timezones that
 chrono_tz doesn't know (#17694)

On Ubuntu 20.04, the tzdata package contains a bunch of symlinks for
obsolete timezone names. However, the chrono_tz package that polars
uses doesn't read /usr/share/zoneinfo, instead packaging the current
zoneinfo database from IANA. Consequently, when this
hypothesis-generated test runs and generates timezones from the
available zoneinfo-reported timezones, we can get an error from polars
that the requested timezone is unknown. Since this is random, just
skip it, rather than xfailing.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Peter Andreas Entschev (https://github.com/pentschev)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/17694
---
 .../cudf_polars/cudf_polars/testing/plugin.py | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index 7a759eea2e9..87628242838 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 
 """Plugin for running polars test suite setting GPU engine as default."""
@@ -174,6 +174,19 @@ def pytest_configure(config: pytest.Config) -> None:
 }
 
 
+TESTS_TO_SKIP: Mapping[str, str] = {
+    # On Ubuntu 20.04, the tzdata package contains a bunch of symlinks
+    # for obsolete timezone names. However, the chrono_tz package that
+    # polars uses doesn't read /usr/share/zoneinfo, instead packaging
+    # the current zoneinfo database from IANA. Consequently, when this
+    # hypothesis-generated test runs and generates timezones from the
+    # available zoneinfo-reported timezones, we can get an error from
+    # polars that the requested timezone is unknown.
+    # Since this is random, just skip it, rather than xfailing.
+    "tests/unit/lazyframe/test_serde.py::test_lf_serde_roundtrip_binary": "chrono_tz doesn't have all tzdata symlink names",
+}
+
+
 def pytest_collection_modifyitems(
     session: pytest.Session, config: pytest.Config, items: list[pytest.Item]
 ) -> None:
@@ -182,5 +195,7 @@ def pytest_collection_modifyitems(
         # Don't xfail tests if running without fallback
         return
     for item in items:
-        if item.nodeid in EXPECTED_FAILURES:
+        if item.nodeid in TESTS_TO_SKIP:
+            item.add_marker(pytest.mark.skip(reason=TESTS_TO_SKIP[item.nodeid]))
+        elif item.nodeid in EXPECTED_FAILURES:
             item.add_marker(pytest.mark.xfail(reason=EXPECTED_FAILURES[item.nodeid]))

From f017f869829cb05694d195aab0f118357c0dbbd8 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Wed, 8 Jan 2025 09:30:40 -0800
Subject: [PATCH 5/9] Control pinned memory use with environment variables
 (#17657)

Adds `LIBCUDF_KERNEL_PINNED_COPY_THRESHOLD` and `LIBCUDF_ALLOCATE_HOST_AS_PINNED_THRESHOLD` environment variables to set the pinned memory optimizations' thresholds.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/17657
---
 cpp/src/utilities/host_memory.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/cpp/src/utilities/host_memory.cpp b/cpp/src/utilities/host_memory.cpp
index 73c4567d3a4..94d27d976c3 100644
--- a/cpp/src/utilities/host_memory.cpp
+++ b/cpp/src/utilities/host_memory.cpp
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include "io/utilities/getenv_or.hpp"
+
 #include <cudf/detail/utilities/stream_pool.hpp>
 #include <cudf/logger.hpp>
 #include <cudf/utilities/error.hpp>
@@ -277,7 +279,7 @@ bool config_default_pinned_memory_resource(pinned_mr_options const& opts)
 CUDF_EXPORT auto& kernel_pinned_copy_threshold()
 {
   // use cudaMemcpyAsync for all pinned copies
-  static std::atomic<size_t> threshold = 0;
+  static std::atomic<size_t> threshold = getenv_or("LIBCUDF_KERNEL_PINNED_COPY_THRESHOLD", 0);
   return threshold;
 }
 
@@ -291,7 +293,7 @@ size_t get_kernel_pinned_copy_threshold() { return kernel_pinned_copy_threshold(
 CUDF_EXPORT auto& allocate_host_as_pinned_threshold()
 {
   // use pageable memory for all host allocations
-  static std::atomic<size_t> threshold = 0;
+  static std::atomic<size_t> threshold = getenv_or("LIBCUDF_ALLOCATE_HOST_AS_PINNED_THRESHOLD", 0);
   return threshold;
 }
 

From f1cb88df8eb7862a82969dfdfd746886198a9b22 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 8 Jan 2025 09:41:20 -0800
Subject: [PATCH 6/9] Define cudf repr methods on the Column (#17675)

Refactors cudf Python objects' repr handling to define the core conversion of "cleaning" nulls at the column level and then rolling up the conversions at the `Frame` and its subclasses level.

Notable positive changes:

* `repr(cudf.Series)` no longer deep copies
* Fixes a bug when `repr(cudf.Series)` with a timedelta type to better match pandas (adjusted unit tests accordingly)

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/17675
---
 python/cudf/cudf/core/_base_index.py      |  4 +-
 python/cudf/cudf/core/column/column.py    | 16 +++++-
 python/cudf/cudf/core/column/datetime.py  |  4 +-
 python/cudf/cudf/core/column/lists.py     | 13 ++++-
 python/cudf/cudf/core/column/struct.py    | 13 ++++-
 python/cudf/cudf/core/column/timedelta.py |  2 +
 python/cudf/cudf/core/dataframe.py        | 53 +++----------------
 python/cudf/cudf/core/frame.py            |  9 +++-
 python/cudf/cudf/core/index.py            | 30 ++---------
 python/cudf/cudf/core/indexed_frame.py    |  6 +++
 python/cudf/cudf/core/multiindex.py       | 19 +++----
 python/cudf/cudf/core/series.py           | 29 ++---------
 python/cudf/cudf/tests/test_repr.py       | 62 +++++++++++------------
 13 files changed, 117 insertions(+), 143 deletions(-)

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index c2f3c782d10..2806a1f6c23 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -350,7 +350,7 @@ def names(self, values):
 
         self.name = values[0]
 
-    def _clean_nulls_from_index(self):
+    def _pandas_repr_compatible(self):
         """
         Convert all na values(if any) in Index object
         to `<NA>` as a preprocessing step to `__repr__` methods.
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 24b657f1c32..ef815e44d9d 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -77,6 +77,7 @@
 
     from cudf._typing import ColumnLike, Dtype, ScalarLike
     from cudf.core.column.numerical import NumericalColumn
+    from cudf.core.column.strings import StringColumn
 
 if PANDAS_GE_210:
     NumpyExtensionArray = pd.arrays.NumpyExtensionArray
@@ -92,6 +93,8 @@ class ColumnBase(Column, Serializable, BinaryOperand, Reducible):
         "min",
     }
 
+    _PANDAS_NA_REPR = str(pd.NA)
+
     def data_array_view(
         self, *, mode: Literal["write", "read"] = "write"
     ) -> "cuda.devicearray.DeviceNDArray":
@@ -176,6 +179,17 @@ def __repr__(self):
             f"dtype: {self.dtype}"
         )
 
+    def _prep_pandas_compat_repr(self) -> StringColumn | Self:
+        """
+        Preprocess Column to be compatible with pandas repr, namely handling nulls.
+
+        * null (datetime/timedelta) = str(pd.NaT)
+        * null (other types)= str(pd.NA)
+        """
+        if self.has_nulls():
+            return self.astype("str").fillna(self._PANDAS_NA_REPR)
+        return self
+
     def to_pandas(
         self,
         *,
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index b6a4122ebb9..80551e33115 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -212,6 +212,8 @@ class DatetimeColumn(column.ColumnBase):
         "__rsub__",
     }
 
+    _PANDAS_NA_REPR = str(pd.NaT)
+
     def __init__(
         self,
         data: Buffer,
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index 6283e498842..9c5041df521 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -28,6 +28,7 @@
 
     from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike
     from cudf.core.buffer import Buffer
+    from cudf.core.column.string import StringColumn
 
 
 class ListColumn(ColumnBase):
@@ -67,6 +68,16 @@ def __init__(
             children=children,
         )
 
+    def _prep_pandas_compat_repr(self) -> StringColumn | Self:
+        """
+        Preprocess Column to be compatible with pandas repr, namely handling nulls.
+
+        * null (datetime/timedelta) = str(pd.NaT)
+        * null (other types)= str(pd.NA)
+        """
+        # TODO: handle if self.has_nulls(): case
+        return self
+
     @cached_property
     def memory_usage(self):
         n = super().memory_usage
diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py
index ba765b50729..052a68cec98 100644
--- a/python/cudf/cudf/core/column/struct.py
+++ b/python/cudf/cudf/core/column/struct.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 from __future__ import annotations
 
 from functools import cached_property
@@ -18,6 +18,7 @@
 
     from cudf._typing import Dtype
     from cudf.core.buffer import Buffer
+    from cudf.core.column.string import StringColumn
 
 
 class StructColumn(ColumnBase):
@@ -51,6 +52,16 @@ def __init__(
             children=children,
         )
 
+    def _prep_pandas_compat_repr(self) -> StringColumn | Self:
+        """
+        Preprocess Column to be compatible with pandas repr, namely handling nulls.
+
+        * null (datetime/timedelta) = str(pd.NaT)
+        * null (other types)= str(pd.NA)
+        """
+        # TODO: handle if self.has_nulls(): case
+        return self
+
     @staticmethod
     def _validate_dtype_instance(dtype: StructDtype) -> StructDtype:
         # IntervalDtype is a subclass of StructDtype, so compare types exactly
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 749ab8e837a..302178ea277 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -81,6 +81,8 @@ class TimeDeltaColumn(ColumnBase):
         "__rfloordiv__",
     }
 
+    _PANDAS_NA_REPR = str(pd.NaT)
+
     def __init__(
         self,
         data: Buffer,
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index b2121511a14..40d36a6ff56 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -1894,7 +1894,7 @@ def astype(
             dtype = {cc: dtype for cc in self._column_names}
         return super().astype(dtype, copy, errors)
 
-    def _clean_renderable_dataframe(self, output):
+    def _clean_renderable_dataframe(self, output: Self) -> str:
         """
         This method takes in partial/preprocessed dataframe
         and returns correct representation of it with correct
@@ -1929,41 +1929,7 @@ def _clean_renderable_dataframe(self, output):
             )
         return "\n".join(lines)
 
-    def _clean_nulls_from_dataframe(self, df):
-        """
-        This function converts all ``null`` values to ``<NA>`` for
-        representation as a string in `__repr__`.
-
-        Since we utilize Pandas `__repr__` at all places in our code
-        for formatting purposes, we convert columns to `str` dtype for
-        filling with `<NA>` values.
-        """
-        for col in df._data:
-            if isinstance(
-                df._data[col].dtype, (cudf.StructDtype, cudf.ListDtype)
-            ):
-                # TODO we need to handle this
-                pass
-            elif df._data[col].has_nulls():
-                fill_value = (
-                    str(cudf.NaT)
-                    if isinstance(
-                        df._data[col],
-                        (
-                            cudf.core.column.DatetimeColumn,
-                            cudf.core.column.TimeDeltaColumn,
-                        ),
-                    )
-                    else str(cudf.NA)
-                )
-
-                df[col] = df._data[col].astype("str").fillna(fill_value)
-            else:
-                df[col] = df._data[col]
-
-        return df
-
-    def _get_renderable_dataframe(self):
+    def _get_renderable_dataframe(self) -> Self:
         """
         Takes rows and columns from pandas settings or estimation from size.
         pulls quadrants based off of some known parameters then style for
@@ -1971,9 +1937,9 @@ def _get_renderable_dataframe(self):
         for printing with the dataframe.
         """
         max_rows = pd.options.display.max_rows
-        nrows = np.max([len(self) if max_rows is None else max_rows, 1])
-        if pd.options.display.max_rows == 0:
-            nrows = len(self)
+        if max_rows in {0, None}:
+            max_rows = len(self)
+        nrows = max(max_rows, 1)
         ncols = (
             pd.options.display.max_columns
             if pd.options.display.max_columns
@@ -1981,7 +1947,7 @@ def _get_renderable_dataframe(self):
         )
 
         if len(self) <= nrows and self._num_columns <= ncols:
-            output = self.copy(deep=False)
+            output = self
         elif self.empty and len(self.index) > 0:
             max_seq_items = pd.options.display.max_seq_items
             # In case of Empty DataFrame with index, Pandas prints
@@ -2041,10 +2007,7 @@ def _get_renderable_dataframe(self):
                 lower = cudf.concat([lower_left, lower_right], axis=1)
                 output = cudf.concat([upper, lower])
 
-        output = self._clean_nulls_from_dataframe(output)
-        output.index = output.index._clean_nulls_from_index()
-
-        return output
+        return output._pandas_repr_compatible()
 
     @_performance_tracking
     def __repr__(self):
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 8f45c6f0115..abf9f7b3686 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -820,6 +820,13 @@ def fillna(
             inplace=inplace,
         )
 
+    def _pandas_repr_compatible(self) -> Self:
+        """Return Self but with columns prepared for a pandas-like repr."""
+        columns = (col._prep_pandas_compat_repr() for col in self._columns)
+        return self._from_data_like_self(
+            self._data._from_columns_like_self(columns, verify=False)
+        )
+
     @_performance_tracking
     def _drop_column(
         self, name: abc.Hashable, errors: Literal["ignore", "raise"] = "raise"
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 85be8d21d27..54635b162bc 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -339,7 +339,7 @@ def _values(self) -> ColumnBase:
         else:
             return column.column_empty(0, dtype=self.dtype)
 
-    def _clean_nulls_from_index(self) -> Self:
+    def _pandas_repr_compatible(self) -> Self:
         return self
 
     def _is_numeric(self) -> bool:
@@ -1127,15 +1127,9 @@ def _from_data(cls, data: MutableMapping, name: Any = no_default) -> Self:
             out.name = name
         return out
 
-    @classmethod
     @_performance_tracking
-    def _from_data_like_self(
-        cls, data: MutableMapping, name: Any = no_default
-    ) -> Self:
-        out = _index_from_data(data, name)
-        if name is not no_default:
-            out.name = name
-        return out
+    def _from_data_like_self(self, data: MutableMapping) -> Self:
+        return _index_from_data(data, self.name)
 
     @classmethod
     @_performance_tracking
@@ -1494,7 +1488,7 @@ def __repr__(self) -> str:
             if isinstance(self._values, StringColumn):
                 output = repr(self.to_pandas(nullable=True))
             else:
-                output = repr(self._clean_nulls_from_index().to_pandas())
+                output = repr(self._pandas_repr_compatible().to_pandas())
                 # We should remove all the single quotes
                 # from the output due to the type-cast to
                 # object dtype happening above.
@@ -1650,20 +1644,6 @@ def __contains__(self, item) -> bool:
         hash(item)
         return item in self._column
 
-    def _clean_nulls_from_index(self) -> Index:
-        if self._values.has_nulls():
-            fill_value = (
-                str(cudf.NaT)
-                if isinstance(self, (DatetimeIndex, TimedeltaIndex))
-                else str(cudf.NA)
-            )
-            return Index._from_column(
-                self._column.astype("str").fillna(fill_value),
-                name=self.name,
-            )
-
-        return self
-
     def any(self) -> bool:
         return self._column.any()
 
@@ -3615,7 +3595,7 @@ def _is_interval(self) -> bool:
     def _is_boolean(self) -> bool:
         return False
 
-    def _clean_nulls_from_index(self) -> Self:
+    def _pandas_repr_compatible(self) -> Self:
         return self
 
     @property
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index e9ed74f804b..c779e1ebe97 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -4410,6 +4410,12 @@ def _apply_boolean_mask(self, boolean_mask: BooleanMask, keep_index=True):
             index_names=self.index.names if keep_index else None,
         )
 
+    def _pandas_repr_compatible(self) -> Self:
+        """Return Self but with columns prepared for a pandas-like repr."""
+        result = super()._pandas_repr_compatible()
+        result.index = self.index._pandas_repr_compatible()
+        return result
+
     def take(self, indices, axis=0):
         """Return a new frame containing the rows specified by *indices*.
 
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 1e613e49ffc..e7efd01ca85 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -361,6 +361,13 @@ def _from_data(
             name=name,
         )
 
+    @_performance_tracking
+    def _from_data_like_self(self, data: MutableMapping) -> Self:
+        mi = type(self)._from_data(data, name=self.name)
+        if mi.nlevels == self.nlevels:
+            mi.names = self.names
+        return mi
+
     @classmethod
     def _simple_new(
         cls,
@@ -1753,16 +1760,6 @@ def nunique(self, dropna: bool = True) -> int:
         mi = self.dropna(how="all") if dropna else self
         return len(mi.unique())
 
-    def _clean_nulls_from_index(self) -> Self:
-        """
-        Convert all na values(if any) in MultiIndex object
-        to `<NA>` as a preprocessing step to `__repr__` methods.
-        """
-        index_df = self.to_frame(index=False, name=list(range(self.nlevels)))
-        return MultiIndex.from_frame(
-            index_df._clean_nulls_from_dataframe(index_df), names=self.names
-        )
-
     @_performance_tracking
     def memory_usage(self, deep: bool = False) -> int:
         usage = sum(col.memory_usage for col in self._columns)
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 49c2c8cf387..3b047ee5ed4 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -1449,35 +1449,16 @@ def __repr__(self):
                 warnings.simplefilter("ignore", FutureWarning)
                 preprocess = cudf.concat([top, bottom])
         else:
-            preprocess = self.copy()
-        preprocess.index = preprocess.index._clean_nulls_from_index()
-        if (
-            preprocess.nullable
-            and not isinstance(
-                preprocess.dtype,
-                (
-                    cudf.CategoricalDtype,
-                    cudf.ListDtype,
-                    cudf.StructDtype,
-                    cudf.core.dtypes.DecimalDtype,
-                ),
-            )
-        ) or preprocess.dtype.kind == "m":
-            fill_value = (
-                str(cudf.NaT)
-                if preprocess.dtype.kind in "mM"
-                else str(cudf.NA)
-            )
-            output = repr(
-                preprocess.astype("str").fillna(fill_value).to_pandas()
-            )
-        elif isinstance(preprocess.dtype, cudf.CategoricalDtype):
+            preprocess = self
+        if isinstance(preprocess.dtype, cudf.CategoricalDtype):
             min_rows = (
                 height
                 if pd.get_option("display.min_rows") == 0
                 else pd.get_option("display.min_rows")
             )
             show_dimensions = pd.get_option("display.show_dimensions")
+            preprocess = preprocess.copy(deep=False)
+            preprocess.index = preprocess.index._pandas_repr_compatible()
             if preprocess.dtype.categories.dtype.kind == "f":
                 pd_series = (
                     preprocess.astype("str")
@@ -1502,7 +1483,7 @@ def __repr__(self):
                 na_rep=str(cudf.NA),
             )
         else:
-            output = repr(preprocess.to_pandas())
+            output = repr(preprocess._pandas_repr_compatible().to_pandas())
 
         lines = output.split("\n")
         if isinstance(preprocess.dtype, cudf.CategoricalDtype):
diff --git a/python/cudf/cudf/tests/test_repr.py b/python/cudf/cudf/tests/test_repr.py
index bf0c97adb00..2cb742727cc 100644
--- a/python/cudf/cudf/tests/test_repr.py
+++ b/python/cudf/cudf/tests/test_repr.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 
 import textwrap
 
@@ -618,9 +618,9 @@ def test_timedelta_series_s_us_repr(data, dtype):
             cudf.Series([1000000, 200000, 3000000], dtype="timedelta64[ns]"),
             textwrap.dedent(
                 """
-            0    0 days 00:00:00.001000000
-            1    0 days 00:00:00.000200000
-            2    0 days 00:00:00.003000000
+            0    0 days 00:00:00.001000
+            1    0 days 00:00:00.000200
+            2    0 days 00:00:00.003000
             dtype: timedelta64[ns]
             """
             ),
@@ -710,12 +710,12 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
             textwrap.dedent(
                 """
-            0    0 days 00:00:00.012
-            1    0 days 00:00:00.012
-            2    0 days 00:00:00.022
-            3    0 days 00:00:00.343
-            4    0 days 01:12:33.534
-            5    0 days 00:07:15.342
+            0    0 days 00:00:00.012000
+            1    0 days 00:00:00.012000
+            2    0 days 00:00:00.022000
+            3    0 days 00:00:00.343000
+            4    0 days 01:12:33.534000
+            5    0 days 00:07:15.342000
             dtype: timedelta64[ms]
             """
             ),
@@ -745,13 +745,13 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
             textwrap.dedent(
                 """
-            0    0 days 00:00:00.001
-            1    0 days 00:00:01.132
-            2    0 days 06:27:03.231
-            3    0 days 00:00:00.233
-            4        0 days 00:00:00
-            5    0 days 00:00:00.332
-            6    0 days 00:00:00.323
+            0    0 days 00:00:00.001000
+            1    0 days 00:00:01.132000
+            2    0 days 06:27:03.231000
+            3    0 days 00:00:00.233000
+            4           0 days 00:00:00
+            5    0 days 00:00:00.332000
+            6    0 days 00:00:00.323000
             dtype: timedelta64[ms]
             """
             ),
@@ -771,13 +771,13 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
             textwrap.dedent(
                 """
-            0    157937 days 02:23:52.432
-            1         1 days 13:25:36.784
-            2         2 days 20:09:05.345
-            3         2 days 14:03:52.411
-            4     11573 days 23:39:03.241
-            5        42 days 01:35:48.734
-            6         0 days 00:00:23.234
+            0    157937 days 02:23:52.432000
+            1         1 days 13:25:36.784000
+            2         2 days 20:09:05.345000
+            3         2 days 14:03:52.411000
+            4     11573 days 23:39:03.241000
+            5        42 days 01:35:48.734000
+            6         0 days 00:00:23.234000
             dtype: timedelta64[ms]
             """
             ),
@@ -824,13 +824,13 @@ def test_timedelta_series_s_us_repr(data, dtype):
             ),
             textwrap.dedent(
                 """
-            0    157937 days 02:23:52.432
-            1         1 days 13:25:36.784
-            2         2 days 20:09:05.345
-            3         2 days 14:03:52.411
-            4     11573 days 23:39:03.241
-            5        42 days 01:35:48.734
-            6         0 days 00:00:23.234
+            0    157937 days 02:23:52.432000
+            1         1 days 13:25:36.784000
+            2         2 days 20:09:05.345000
+            3         2 days 14:03:52.411000
+            4     11573 days 23:39:03.241000
+            5        42 days 01:35:48.734000
+            6         0 days 00:00:23.234000
             Name: abc, dtype: timedelta64[ms]
             """
             ),

From 2c385c456d71fddb74298871b0918b0fb7ad72f4 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 8 Jan 2025 12:58:58 -0800
Subject: [PATCH 7/9] Convert cudf.Scalar usage to pylibcudf and pyarrow usage
 (#17686)

A lot of `cudf.Scalar` usage is to eventually end up with a device scalar object (`pylibcudf.Scalar`) to pass to a pylibcudf routine. The conversion logic to get there can be achieved by converting to a `pyarrow.Scalar` and using `pylibcudf.interop.from_arrow`. This way we offload a lot of scalar-conversion-logic in `cudf.Scalar` to `pyarrow.Scalar` which can further be converted using the interop method.

This PR just tackles some straightforward cases of the above. Another PR will tackle the more involved cases

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/17686
---
 python/cudf/cudf/api/types.py                |   4 +-
 python/cudf/cudf/core/byte_pair_encoding.py  |   5 +-
 python/cudf/cudf/core/column/column.py       |  22 ++-
 python/cudf/cudf/core/column/datetime.py     |   4 +-
 python/cudf/cudf/core/column/lists.py        |  14 +-
 python/cudf/cudf/core/column/numerical.py    |  11 +-
 python/cudf/cudf/core/column/string.py       | 190 ++++++++++---------
 python/cudf/cudf/core/dataframe.py           |   4 +-
 python/cudf/cudf/core/groupby/groupby.py     |   6 +-
 python/cudf/cudf/core/index.py               |   3 +-
 python/cudf/cudf/core/indexed_frame.py       |   2 +-
 python/cudf/cudf/core/series.py              |   4 +-
 python/cudf/cudf/core/tokenize_vocabulary.py |   5 +-
 python/cudf/cudf/core/window/rolling.py      |  11 +-
 python/cudf/cudf/tests/test_list.py          |  21 +-
 15 files changed, 167 insertions(+), 139 deletions(-)

diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py
index 9c436dfad18..cad4b1aa72c 100644
--- a/python/cudf/cudf/api/types.py
+++ b/python/cudf/cudf/api/types.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+# Copyright (c) 2021-2025, NVIDIA CORPORATION.
 
 """Define common type operations."""
 
@@ -13,6 +13,7 @@
 import cupy as cp
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 from pandas.api import types as pd_types
 
 import cudf
@@ -144,6 +145,7 @@ def is_scalar(val):
             cudf.Scalar,
             cudf._lib.scalar.DeviceScalar,
             cudf.core.tools.datetimes.DateOffset,
+            pa.Scalar,
         ),
     ) or (
         pd_types.is_scalar(val)
diff --git a/python/cudf/cudf/core/byte_pair_encoding.py b/python/cudf/cudf/core/byte_pair_encoding.py
index b49f5154697..0fe47255368 100644
--- a/python/cudf/cudf/core/byte_pair_encoding.py
+++ b/python/cudf/cudf/core/byte_pair_encoding.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -53,7 +53,6 @@ def __call__(self, text: cudf.Series, separator: str = " ") -> cudf.Series:
         1             this is it
         dtype: object
         """
-        sep = cudf.Scalar(separator, dtype="str")
         return cudf.Series._from_column(
-            text._column.byte_pair_encoding(self.merge_pairs, sep)
+            text._column.byte_pair_encoding(self.merge_pairs, separator)
         )
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index ef815e44d9d..e23ca810065 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -253,8 +253,12 @@ def find_and_replace(
     def clip(self, lo: ScalarLike, hi: ScalarLike) -> Self:
         plc_column = plc.replace.clamp(
             self.to_pylibcudf(mode="read"),
-            cudf.Scalar(lo, self.dtype).device_value.c_value,
-            cudf.Scalar(hi, self.dtype).device_value.c_value,
+            plc.interop.from_arrow(
+                pa.scalar(lo, type=cudf_dtype_to_pa_type(self.dtype))
+            ),
+            plc.interop.from_arrow(
+                pa.scalar(hi, type=cudf_dtype_to_pa_type(self.dtype))
+            ),
         )
         return type(self).from_pylibcudf(plc_column)  # type: ignore[return-value]
 
@@ -1029,7 +1033,7 @@ def _obtain_isin_result(self, rhs: ColumnBase) -> ColumnBase:
             # https://github.com/rapidsai/cudf/issues/14515 by
             # providing a mode in which cudf::contains does not mask
             # the result.
-            result = result.fillna(cudf.Scalar(rhs.null_count > 0))
+            result = result.fillna(rhs.null_count > 0)
         return result
 
     def as_mask(self) -> Buffer:
@@ -1995,12 +1999,12 @@ def as_column(
             column = Column.from_pylibcudf(
                 plc.filling.sequence(
                     len(arbitrary),
-                    cudf.Scalar(
-                        arbitrary.start, dtype=np.dtype(np.int64)
-                    ).device_value.c_value,
-                    cudf.Scalar(
-                        arbitrary.step, dtype=np.dtype(np.int64)
-                    ).device_value.c_value,
+                    plc.interop.from_arrow(
+                        pa.scalar(arbitrary.start, type=pa.int64())
+                    ),
+                    plc.interop.from_arrow(
+                        pa.scalar(arbitrary.step, type=pa.int64())
+                    ),
                 )
             )
         if cudf.get_option("default_integer_bitwidth") and dtype is None:
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 80551e33115..1bde7d27700 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -353,8 +353,8 @@ def is_year_end(self) -> ColumnBase:
         day_of_year = self.day_of_year
         leap_dates = self.is_leap_year
 
-        leap = day_of_year == cudf.Scalar(366)
-        non_leap = day_of_year == cudf.Scalar(365)
+        leap = day_of_year == 366
+        non_leap = day_of_year == 365
         return leap.copy_if_else(non_leap, leap_dates).fillna(False)
 
     @property
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index 9c5041df521..6fc2b5d4ca2 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -285,7 +285,7 @@ def as_string_column(self) -> cudf.core.column.StringColumn:
         with acquire_spill_lock():
             plc_column = plc.strings.convert.convert_lists.format_list_column(
                 lc.to_pylibcudf(mode="read"),
-                cudf.Scalar("None").device_value.c_value,
+                plc.interop.from_arrow(pa.scalar("None")),
                 separators.to_pylibcudf(mode="read"),
             )
             return type(self).from_pylibcudf(plc_column)  # type: ignore[return-value]
@@ -391,20 +391,20 @@ def extract_element_column(self, index: ColumnBase) -> ColumnBase:
         )
 
     @acquire_spill_lock()
-    def contains_scalar(self, search_key: cudf.Scalar) -> ColumnBase:
+    def contains_scalar(self, search_key: pa.Scalar) -> ColumnBase:
         return type(self).from_pylibcudf(
             plc.lists.contains(
                 self.to_pylibcudf(mode="read"),
-                search_key.device_value.c_value,
+                plc.interop.from_arrow(search_key),
             )
         )
 
     @acquire_spill_lock()
-    def index_of_scalar(self, search_key: cudf.Scalar) -> ColumnBase:
+    def index_of_scalar(self, search_key: pa.Scalar) -> ColumnBase:
         return type(self).from_pylibcudf(
             plc.lists.index_of(
                 self.to_pylibcudf(mode="read"),
-                search_key.device_value.c_value,
+                plc.interop.from_arrow(search_key),
                 plc.lists.DuplicateFindOption.FIND_FIRST,
             )
         )
@@ -569,7 +569,7 @@ def contains(self, search_key: ScalarLike) -> ParentType:
         dtype: bool
         """
         return self._return_or_inplace(
-            self._column.contains_scalar(cudf.Scalar(search_key))
+            self._column.contains_scalar(pa.scalar(search_key))
         )
 
     def index(self, search_key: ScalarLike | ColumnLike) -> ParentType:
@@ -618,7 +618,7 @@ def index(self, search_key: ScalarLike | ColumnLike) -> ParentType:
         """
 
         if is_scalar(search_key):
-            result = self._column.index_of_scalar(cudf.Scalar(search_key))
+            result = self._column.index_of_scalar(pa.scalar(search_key))
         else:
             result = self._column.index_of_column(as_column(search_key))
         return self._return_or_inplace(result)
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 8fe5299fcdd..70103745926 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+# Copyright (c) 2018-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -7,6 +7,7 @@
 
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 from numba.np import numpy_support
 from typing_extensions import Self
 
@@ -382,12 +383,8 @@ def as_string_column(self) -> cudf.core.column.StringColumn:
         elif self.dtype.kind == "b":
             conv_func = functools.partial(
                 plc.strings.convert.convert_booleans.from_booleans,
-                true_string=cudf.Scalar(
-                    "True", dtype="str"
-                ).device_value.c_value,
-                false_string=cudf.Scalar(
-                    "False", dtype="str"
-                ).device_value.c_value,
+                true_string=plc.interop.from_arrow(pa.scalar("True")),
+                false_string=plc.interop.from_arrow(pa.scalar("False")),
             )
         elif self.dtype.kind in {"i", "u"}:
             conv_func = plc.strings.convert.convert_integers.from_integers
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index fcdcb789f23..20eded9a27f 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
+# Copyright (c) 2019-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -302,8 +302,10 @@ def cat(self, others=None, sep=None, na_rep=None):
             with acquire_spill_lock():
                 plc_column = plc.strings.combine.join_strings(
                     self._column.to_pylibcudf(mode="read"),
-                    cudf.Scalar(sep).device_value.c_value,
-                    cudf.Scalar(na_rep, "str").device_value.c_value,
+                    plc.interop.from_arrow(pa.scalar(sep)),
+                    plc.interop.from_arrow(
+                        pa.scalar(na_rep, type=pa.string())
+                    ),
                 )
                 data = Column.from_pylibcudf(plc_column)
         else:
@@ -359,8 +361,10 @@ def cat(self, others=None, sep=None, na_rep=None):
                             )
                         ]
                     ),
-                    cudf.Scalar(sep).device_value.c_value,
-                    cudf.Scalar(na_rep, "str").device_value.c_value,
+                    plc.interop.from_arrow(pa.scalar(sep)),
+                    plc.interop.from_arrow(
+                        pa.scalar(na_rep, type=pa.string())
+                    ),
                 )
                 data = Column.from_pylibcudf(plc_column)
 
@@ -522,11 +526,9 @@ def join(
             with acquire_spill_lock():
                 plc_column = plc.strings.combine.join_list_elements(
                     strings_column.to_pylibcudf(mode="read"),
-                    cudf.Scalar(sep).device_value.c_value,
-                    cudf.Scalar(string_na_rep).device_value.c_value,
-                    cudf._lib.scalar.DeviceScalar(
-                        "", cudf.dtype("object")
-                    ).c_value,
+                    plc.interop.from_arrow(pa.scalar(sep)),
+                    plc.interop.from_arrow(pa.scalar(string_na_rep)),
+                    plc.interop.from_arrow(pa.scalar("")),
                     plc.strings.combine.SeparatorOnNulls.YES,
                     plc.strings.combine.OutputIfEmptyList.NULL_ELEMENT,
                 )
@@ -547,8 +549,8 @@ def join(
                 plc_column = plc.strings.combine.join_list_elements(
                     strings_column.to_pylibcudf(mode="read"),
                     sep_column.to_pylibcudf(mode="read"),
-                    cudf.Scalar(sep_na_rep).device_value.c_value,
-                    cudf.Scalar(string_na_rep).device_value.c_value,
+                    plc.interop.from_arrow(pa.scalar(sep_na_rep)),
+                    plc.interop.from_arrow(pa.scalar(string_na_rep)),
                     plc.strings.combine.SeparatorOnNulls.YES,
                     plc.strings.combine.OutputIfEmptyList.NULL_ELEMENT,
                 )
@@ -800,14 +802,14 @@ def contains(
             else:
                 if case is False:
                     input_column = self.lower()._column  # type: ignore[union-attr]
-                    plc_pat = cudf.Scalar(pat.lower(), dtype="str")  # type: ignore[union-attr]
+                    pat_normed = pat.lower()  # type: ignore[union-attr]
                 else:
                     input_column = self._column
-                    plc_pat = cudf.Scalar(pat, dtype="str")
+                    pat_normed = pat
                 with acquire_spill_lock():
                     plc_result = plc.strings.find.contains(
                         input_column.to_pylibcudf(mode="read"),
-                        plc_pat.device_value.c_value,
+                        plc.interop.from_arrow(pa.scalar(pat_normed)),
                     )
                     result_col = Column.from_pylibcudf(plc_result)
         else:
@@ -892,8 +894,8 @@ def like(self, pat: str, esc: str | None = None) -> SeriesOrIndex:
         with acquire_spill_lock():
             plc_result = plc.strings.contains.like(
                 self._column.to_pylibcudf(mode="read"),
-                cudf.Scalar(pat, "str").device_value.c_value,
-                cudf.Scalar(esc, "str").device_value.c_value,
+                plc.interop.from_arrow(pa.scalar(pat)),
+                plc.interop.from_arrow(pa.scalar(esc)),
             )
             result = Column.from_pylibcudf(plc_result)
 
@@ -1071,14 +1073,14 @@ def replace(
                     plc.strings.regex_program.RegexProgram.create(
                         pat, plc.strings.regex_flags.RegexFlags.DEFAULT
                     ),
-                    cudf.Scalar(repl, "str").device_value.c_value,
+                    plc.interop.from_arrow(pa.scalar(repl)),
                     n,
                 )
             else:
                 plc_result = plc.strings.replace.replace(
                     self._column.to_pylibcudf(mode="read"),
-                    cudf.Scalar(pat).device_value.c_value,
-                    cudf.Scalar(repl).device_value.c_value,
+                    plc.interop.from_arrow(pa.scalar(pat)),
+                    plc.interop.from_arrow(pa.scalar(repl)),
                     n,
                 )
             result = Column.from_pylibcudf(plc_result)
@@ -1194,13 +1196,13 @@ def slice(
         2    cm
         dtype: object
         """
-        param_dtype = np.dtype(np.int32)
+        param_dtype = pa.int32()
         with acquire_spill_lock():
             plc_result = plc.strings.slice.slice_strings(
                 self._column.to_pylibcudf(mode="read"),
-                cudf.Scalar(start, param_dtype).device_value.c_value,
-                cudf.Scalar(stop, param_dtype).device_value.c_value,
-                cudf.Scalar(step, param_dtype).device_value.c_value,
+                plc.interop.from_arrow(pa.scalar(start, param_dtype)),
+                plc.interop.from_arrow(pa.scalar(stop, param_dtype)),
+                plc.interop.from_arrow(pa.scalar(step, param_dtype)),
             )
             result = Column.from_pylibcudf(plc_result)
         return self._return_or_inplace(result)
@@ -2174,7 +2176,7 @@ def filter_alphanum(
                 plc.strings.char_types.StringCharacterTypes.ALL_TYPES
                 if keep
                 else plc.strings.char_types.StringCharacterTypes.ALPHANUM,
-                cudf.Scalar(repl, "str").device_value.c_value,
+                plc.interop.from_arrow(pa.scalar(repl, type=pa.string())),
                 plc.strings.char_types.StringCharacterTypes.ALPHANUM
                 if keep
                 else plc.strings.char_types.StringCharacterTypes.ALL_TYPES,
@@ -2318,7 +2320,7 @@ def slice_replace(
         with acquire_spill_lock():
             plc_result = plc.strings.replace.replace_slice(
                 self._column.to_pylibcudf(mode="read"),
-                cudf.Scalar(repl, "str").device_value.c_value,
+                plc.interop.from_arrow(pa.scalar(repl, type=pa.string())),
                 start,
                 stop,
             )
@@ -2499,7 +2501,7 @@ def get_json_object(
         with acquire_spill_lock():
             plc_result = plc.json.get_json_object(
                 self._column.to_pylibcudf(mode="read"),
-                cudf.Scalar(json_path, "str").device_value.c_value,
+                plc.interop.from_arrow(pa.scalar(json_path)),
                 options,
             )
             result = Column.from_pylibcudf(plc_result)
@@ -2657,7 +2659,12 @@ def split(
                 if regex is True:
                     data = self._column.split_re(pat, n)
                 else:
-                    data = self._column.split(cudf.Scalar(pat, "str"), n)
+                    data = self._column.split(
+                        plc.interop.from_arrow(
+                            pa.scalar(pat, type=pa.string())
+                        ),
+                        n,
+                    )
                 if len(data) == 1 and data[0].null_count == len(self._column):
                     result_table = {}
                 else:
@@ -2667,7 +2674,7 @@ def split(
                 result_table = self._column.split_record_re(pat, n)
             else:
                 result_table = self._column.split_record(
-                    cudf.Scalar(pat, "str"), n
+                    plc.interop.from_arrow(pa.scalar(pat, type=pa.string())), n
                 )
 
         return self._return_or_inplace(result_table, expand=expand)
@@ -2829,7 +2836,12 @@ def rsplit(
                 if regex is True:
                     data = self._column.rsplit_re(pat, n)
                 else:
-                    data = self._column.rsplit(cudf.Scalar(pat, "str"), n)
+                    data = self._column.rsplit(
+                        plc.interop.from_arrow(
+                            pa.scalar(pat, type=pa.string())
+                        ),
+                        n,
+                    )
                 if len(data) == 1 and data[0].null_count == len(self._column):
                     result_table = {}
                 else:
@@ -2839,7 +2851,7 @@ def rsplit(
                 result_table = self._column.rsplit_record_re(pat, n)
             else:
                 result_table = self._column.rsplit_record(
-                    cudf.Scalar(pat, "str"), n
+                    plc.interop.from_arrow(pa.scalar(pat, type=pa.string())), n
                 )
 
         return self._return_or_inplace(result_table, expand=expand)
@@ -2924,7 +2936,9 @@ def partition(self, sep: str = " ", expand: bool = True) -> SeriesOrIndex:
             sep = " "
 
         return self._return_or_inplace(
-            self._column.partition(cudf.Scalar(sep, "str")),
+            self._column.partition(
+                plc.interop.from_arrow(pa.scalar(sep, type=pa.string()))
+            ),
             expand=expand,
         )
 
@@ -2989,7 +3003,9 @@ def rpartition(self, sep: str = " ", expand: bool = True) -> SeriesOrIndex:
             sep = " "
 
         return self._return_or_inplace(
-            self._column.rpartition(cudf.Scalar(sep, "str")),
+            self._column.rpartition(
+                plc.interop.from_arrow(pa.scalar(sep, type=pa.string()))
+            ),
             expand=expand,
         )
 
@@ -3303,7 +3319,7 @@ def _strip(
             plc_result = plc.strings.strip.strip(
                 self._column.to_pylibcudf(mode="read"),
                 side,
-                cudf.Scalar(to_strip, "str").device_value.c_value,
+                plc.interop.from_arrow(pa.scalar(to_strip, type=pa.string())),
             )
             result = Column.from_pylibcudf(plc_result)
         return self._return_or_inplace(result)
@@ -3920,7 +3936,7 @@ def _starts_ends_with(
                 f"{type(pat).__name__}"
             )
         elif is_scalar(pat):
-            plc_pat = cudf.Scalar(pat, "str").device_value.c_value
+            plc_pat = plc.interop.from_arrow(pa.scalar(pat, type=pa.string()))
         else:
             plc_pat = column.as_column(pat, dtype="str").to_pylibcudf(
                 mode="read"
@@ -4120,7 +4136,7 @@ def _find(
         with acquire_spill_lock():
             plc_result = method(
                 self._column.to_pylibcudf(mode="read"),
-                cudf.Scalar(sub, "str").device_value.c_value,
+                plc.interop.from_arrow(pa.scalar(sub, type=pa.string())),
                 start,
                 end,
             )
@@ -4603,7 +4619,7 @@ def filter_characters(
                 plc.strings.translate.FilterType.KEEP
                 if keep
                 else plc.strings.translate.FilterType.REMOVE,
-                cudf.Scalar(repl, "str").device_value.c_value,
+                plc.interop.from_arrow(pa.scalar(repl, type=pa.string())),
             )
             result = Column.from_pylibcudf(plc_result)
         return self._return_or_inplace(result)
@@ -4710,10 +4726,10 @@ def tokenize(self, delimiter: str = " ") -> SeriesOrIndex:
 
         if isinstance(delim, Column):
             result = self._return_or_inplace(
-                self._column.tokenize_column(delim),
+                self._column.tokenize_column(delim),  # type: ignore[arg-type]
                 retain_index=False,
             )
-        elif isinstance(delim, cudf.Scalar):
+        elif isinstance(delim, plc.Scalar):
             result = self._return_or_inplace(
                 self._column.tokenize_scalar(delim),
                 retain_index=False,
@@ -4851,10 +4867,10 @@ def token_count(self, delimiter: str = " ") -> SeriesOrIndex:
         delim = _massage_string_arg(delimiter, "delimiter", allow_col=True)
         if isinstance(delim, Column):
             return self._return_or_inplace(
-                self._column.count_tokens_column(delim)
+                self._column.count_tokens_column(delim)  # type: ignore[arg-type]
             )
 
-        elif isinstance(delim, cudf.Scalar):
+        elif isinstance(delim, plc.Scalar):
             return self._return_or_inplace(
                 self._column.count_tokens_scalar(delim)  # type: ignore[arg-type]
             )
@@ -5112,7 +5128,7 @@ def replace_tokens(
             self._column.replace_tokens(
                 targets_column,  # type: ignore[arg-type]
                 replacements_column,  # type: ignore[arg-type]
-                cudf.Scalar(delimiter, dtype="str"),
+                plc.interop.from_arrow(pa.scalar(delimiter, type=pa.string())),
             ),
         )
 
@@ -5181,8 +5197,10 @@ def filter_tokens(
         return self._return_or_inplace(
             self._column.filter_tokens(
                 min_token_length,
-                cudf.Scalar(replacement, dtype="str"),
-                cudf.Scalar(delimiter, dtype="str"),
+                plc.interop.from_arrow(
+                    pa.scalar(replacement, type=pa.string())
+                ),
+                plc.interop.from_arrow(pa.scalar(delimiter, type=pa.string())),
             ),
         )
 
@@ -5501,12 +5519,12 @@ def jaccard_index(self, input: cudf.Series, width: int) -> SeriesOrIndex:
 
 def _massage_string_arg(
     value, name, allow_col: bool = False
-) -> StringColumn | cudf.Scalar:
+) -> StringColumn | plc.Scalar:
     if isinstance(value, cudf.Scalar):
         return value
 
     if isinstance(value, str):
-        return cudf.Scalar(value, dtype="str")
+        return plc.interop.from_arrow(pa.scalar(value, type=pa.string()))
 
     allowed_types = ["Scalar"]
 
@@ -5747,8 +5765,8 @@ def sum(
             with acquire_spill_lock():
                 plc_column = plc.strings.combine.join_strings(
                     result_col.to_pylibcudf(mode="read"),
-                    cudf.Scalar("").device_value.c_value,
-                    cudf.Scalar(None, "str").device_value.c_value,
+                    plc.interop.from_arrow(pa.scalar("")),
+                    plc.interop.from_arrow(pa.scalar(None, type=pa.string())),
                 )
                 return Column.from_pylibcudf(plc_column).element_indexing(0)
         else:
@@ -5766,7 +5784,7 @@ def as_numerical_column(self, dtype: Dtype) -> NumericalColumn:
                     self.to_pylibcudf(mode="read")
                 )
                 result = Column.from_pylibcudf(plc_column)
-            return (result > cudf.Scalar(0, dtype="int8")).fillna(False)
+            return (result > np.int8(0)).fillna(False)
         elif out_dtype.kind in {"i", "u"}:
             if not self.is_integer().all():
                 raise ValueError(
@@ -6033,8 +6051,10 @@ def _binaryop(
                                 rhs.to_pylibcudf(mode="read"),
                             ]
                         ),
-                        cudf.Scalar("").device_value.c_value,
-                        cudf.Scalar(None, "str").device_value.c_value,
+                        plc.interop.from_arrow(pa.scalar("")),
+                        plc.interop.from_arrow(
+                            pa.scalar(None, type=pa.string())
+                        ),
                     )
                     return Column.from_pylibcudf(plc_column)
             elif op in {
@@ -6120,11 +6140,11 @@ def jaccard_index(self, other: Self, width: int) -> NumericalColumn:
         return type(self).from_pylibcudf(result)  # type: ignore[return-value]
 
     @acquire_spill_lock()
-    def generate_ngrams(self, ngrams: int, separator: cudf.Scalar) -> Self:
+    def generate_ngrams(self, ngrams: int, separator: plc.Scalar) -> Self:
         result = plc.nvtext.generate_ngrams.generate_ngrams(
             self.to_pylibcudf(mode="read"),
             ngrams,
-            separator.device_value.c_value,
+            separator,
         )
         return type(self).from_pylibcudf(result)  # type: ignore[return-value]
 
@@ -6160,13 +6180,13 @@ def edit_distance_matrix(self) -> ListColumn:
     def byte_pair_encoding(
         self,
         merge_pairs: plc.nvtext.byte_pair_encode.BPEMergePairs,
-        separator: cudf.Scalar,
+        separator: str,
     ) -> Self:
         return type(self).from_pylibcudf(  # type: ignore[return-value]
             plc.nvtext.byte_pair_encode.byte_pair_encoding(
                 self.to_pylibcudf(mode="read"),
                 merge_pairs,
-                separator.device_value.c_value,
+                plc.interop.from_arrow(pa.scalar(separator)),
             )
         )
 
@@ -6174,15 +6194,15 @@ def byte_pair_encoding(
     def ngrams_tokenize(
         self,
         ngrams: int,
-        delimiter: cudf.Scalar,
-        separator: cudf.Scalar,
+        delimiter: plc.Scalar,
+        separator: plc.Scalar,
     ) -> Self:
         return type(self).from_pylibcudf(  # type: ignore[return-value]
             plc.nvtext.ngrams_tokenize.ngrams_tokenize(
                 self.to_pylibcudf(mode="read"),
                 ngrams,
-                delimiter.device_value.c_value,
-                separator.device_value.c_value,
+                delimiter,
+                separator,
             )
         )
 
@@ -6205,14 +6225,14 @@ def normalize_characters(self, do_lower: bool = True) -> Self:
 
     @acquire_spill_lock()
     def replace_tokens(
-        self, targets: Self, replacements: Self, delimiter: cudf.Scalar
+        self, targets: Self, replacements: Self, delimiter: plc.Scalar
     ) -> Self:
         return type(self).from_pylibcudf(  # type: ignore[return-value]
             plc.nvtext.replace.replace_tokens(
                 self.to_pylibcudf(mode="read"),
                 targets.to_pylibcudf(mode="read"),
                 replacements.to_pylibcudf(mode="read"),
-                delimiter.device_value.c_value,
+                delimiter,
             )
         )
 
@@ -6220,15 +6240,15 @@ def replace_tokens(
     def filter_tokens(
         self,
         min_token_length: int,
-        replacement: cudf.Scalar,
-        delimiter: cudf.Scalar,
+        replacement: plc.Scalar,
+        delimiter: plc.Scalar,
     ) -> Self:
         return type(self).from_pylibcudf(  # type: ignore[return-value]
             plc.nvtext.replace.filter_tokens(
                 self.to_pylibcudf(mode="read"),
                 min_token_length,
-                replacement.device_value.c_value,
-                delimiter.device_value.c_value,
+                replacement,
+                delimiter,
             )
         )
 
@@ -6279,10 +6299,10 @@ def subword_tokenize(
         return tokens, masks, metadata
 
     @acquire_spill_lock()
-    def tokenize_scalar(self, delimiter: cudf.Scalar) -> Self:
+    def tokenize_scalar(self, delimiter: plc.Scalar) -> Self:
         return type(self).from_pylibcudf(  # type: ignore[return-value]
             plc.nvtext.tokenize.tokenize_scalar(
-                self.to_pylibcudf(mode="read"), delimiter.device_value.c_value
+                self.to_pylibcudf(mode="read"), delimiter
             )
         )
 
@@ -6296,10 +6316,10 @@ def tokenize_column(self, delimiters: Self) -> Self:
         )
 
     @acquire_spill_lock()
-    def count_tokens_scalar(self, delimiter: cudf.Scalar) -> NumericalColumn:
+    def count_tokens_scalar(self, delimiter: plc.Scalar) -> NumericalColumn:
         return type(self).from_pylibcudf(  # type: ignore[return-value]
             plc.nvtext.tokenize.count_tokens_scalar(
-                self.to_pylibcudf(mode="read"), delimiter.device_value.c_value
+                self.to_pylibcudf(mode="read"), delimiter
             )
         )
 
@@ -6324,25 +6344,25 @@ def character_tokenize(self) -> Self:
     def tokenize_with_vocabulary(
         self,
         vocabulary: plc.nvtext.tokenize.TokenizeVocabulary,
-        delimiter: cudf.Scalar,
+        delimiter: str,
         default_id: int,
     ) -> Self:
         return type(self).from_pylibcudf(  # type: ignore[return-value]
             plc.nvtext.tokenize.tokenize_with_vocabulary(
                 self.to_pylibcudf(mode="read"),
                 vocabulary,
-                delimiter.device_value.c_value,
+                plc.interop.from_arrow(pa.scalar(delimiter)),
                 default_id,
             )
         )
 
     @acquire_spill_lock()
-    def detokenize(self, indices: ColumnBase, separator: cudf.Scalar) -> Self:
+    def detokenize(self, indices: ColumnBase, separator: plc.Scalar) -> Self:
         return type(self).from_pylibcudf(  # type: ignore[return-value]
             plc.nvtext.tokenize.detokenize(
                 self.to_pylibcudf(mode="read"),
                 indices.to_pylibcudf(mode="read"),
-                separator.device_value.c_value,
+                separator,
             )
         )
 
@@ -6491,23 +6511,23 @@ def rsplit_re(self, pattern: str, maxsplit: int) -> dict[int, Self]:
     @acquire_spill_lock()
     def _split_record(
         self,
-        delimiter: cudf.Scalar,
+        delimiter: plc.Scalar,
         maxsplit: int,
         method: Callable[[plc.Column, plc.Scalar, int], plc.Column],
     ) -> Self:
         plc_column = method(
             self.to_pylibcudf(mode="read"),
-            delimiter.device_value.c_value,
+            delimiter,
             maxsplit,
         )
         return type(self).from_pylibcudf(plc_column)  # type: ignore[return-value]
 
-    def split_record(self, delimiter: cudf.Scalar, maxsplit: int) -> Self:
+    def split_record(self, delimiter: plc.Scalar, maxsplit: int) -> Self:
         return self._split_record(
             delimiter, maxsplit, plc.strings.split.split.split_record
         )
 
-    def rsplit_record(self, delimiter: cudf.Scalar, maxsplit: int) -> Self:
+    def rsplit_record(self, delimiter: plc.Scalar, maxsplit: int) -> Self:
         return self._split_record(
             delimiter, maxsplit, plc.strings.split.split.rsplit_record
         )
@@ -6515,13 +6535,13 @@ def rsplit_record(self, delimiter: cudf.Scalar, maxsplit: int) -> Self:
     @acquire_spill_lock()
     def _split(
         self,
-        delimiter: cudf.Scalar,
+        delimiter: plc.Scalar,
         maxsplit: int,
         method: Callable[[plc.Column, plc.Scalar, int], plc.Column],
     ) -> dict[int, Self]:
         plc_table = method(
             self.to_pylibcudf(mode="read"),
-            delimiter.device_value.c_value,
+            delimiter,
             maxsplit,
         )
         return dict(
@@ -6531,21 +6551,21 @@ def _split(
             )
         )
 
-    def split(self, delimiter: cudf.Scalar, maxsplit: int) -> dict[int, Self]:
+    def split(self, delimiter: plc.Scalar, maxsplit: int) -> dict[int, Self]:
         return self._split(delimiter, maxsplit, plc.strings.split.split.split)
 
-    def rsplit(self, delimiter: cudf.Scalar, maxsplit: int) -> dict[int, Self]:
+    def rsplit(self, delimiter: plc.Scalar, maxsplit: int) -> dict[int, Self]:
         return self._split(delimiter, maxsplit, plc.strings.split.split.rsplit)
 
     @acquire_spill_lock()
     def _partition(
         self,
-        delimiter: cudf.Scalar,
+        delimiter: plc.Scalar,
         method: Callable[[plc.Column, plc.Scalar], plc.Column],
     ) -> dict[int, Self]:
         plc_table = method(
             self.to_pylibcudf(mode="read"),
-            delimiter.device_value.c_value,
+            delimiter,
         )
         return dict(
             enumerate(
@@ -6554,12 +6574,12 @@ def _partition(
             )
         )
 
-    def partition(self, delimiter: cudf.Scalar) -> dict[int, Self]:
+    def partition(self, delimiter: plc.Scalar) -> dict[int, Self]:
         return self._partition(
             delimiter, plc.strings.split.partition.partition
         )
 
-    def rpartition(self, delimiter: cudf.Scalar) -> dict[int, Self]:
+    def rpartition(self, delimiter: plc.Scalar) -> dict[int, Self]:
         return self._partition(
             delimiter, plc.strings.split.partition.rpartition
         )
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 40d36a6ff56..5cea35ac0d6 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6229,10 +6229,8 @@ def isin(self, values):
         # TODO: propagate nulls through isin
         # https://github.com/rapidsai/cudf/issues/7556
 
-        fill_value = cudf.Scalar(False)
-
         def make_false_column_like_self():
-            return column.as_column(fill_value, length=len(self), dtype="bool")
+            return column.as_column(False, length=len(self), dtype="bool")
 
         # Preprocess different input types into a mapping from column names to
         # a list of values to check.
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 6ae524d6346..17302311a7e 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -14,6 +14,7 @@
 import cupy as cp
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 
 import pylibcudf as plc
 
@@ -45,6 +46,7 @@
 from cudf.core.mixins import Reducible, Scannable
 from cudf.core.multiindex import MultiIndex
 from cudf.core.udf.groupby_utils import _can_be_jitted, jit_groupby_apply
+from cudf.utils.dtypes import cudf_dtype_to_pa_type
 from cudf.utils.performance_tracking import _performance_tracking
 from cudf.utils.utils import GetAttrGetItemMixin
 
@@ -852,7 +854,9 @@ def _shift(
             plc.table.Table([col.to_pylibcudf(mode="read") for col in values]),
             [periods] * len(values),
             [
-                cudf.Scalar(val, dtype=col.dtype).device_value.c_value
+                plc.interop.from_arrow(
+                    pa.scalar(val, type=cudf_dtype_to_pa_type(col.dtype))
+                )
                 for val, col in zip(fill_values, values)
             ],
         )
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 54635b162bc..b535e8aabd2 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -2327,8 +2327,7 @@ def microsecond(self) -> Index:
                 # Need to manually promote column to int32 because
                 # pandas-matching binop behaviour requires that this
                 # __mul__ returns an int16 column.
-                self._column.millisecond.astype("int32")
-                * cudf.Scalar(1000, dtype="int32")
+                self._column.millisecond.astype("int32") * np.int32(1000)
             )
             + self._column.microsecond,
             name=self.name,
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index c779e1ebe97..eded681baf0 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -3255,7 +3255,7 @@ def duplicated(
             )
             distinct = libcudf.column.Column.from_pylibcudf(plc_column)
         result = copying.scatter(
-            [cudf.Scalar(False, dtype=bool)],
+            [cudf.Scalar(False)],
             distinct,
             [as_column(True, length=len(self), dtype=bool)],
             bounds_check=False,
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 3b047ee5ed4..805f9f9a9f9 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -4106,8 +4106,8 @@ def microsecond(self) -> Series:
         # Need to manually promote column to int32 because
         # pandas-matching binop behaviour requires that this
         # __mul__ returns an int16 column.
-        extra = self.series._column.millisecond.astype("int32") * cudf.Scalar(
-            1000, dtype="int32"
+        extra = self.series._column.millisecond.astype("int32") * np.int32(
+            1000
         )
         return self._return_result_like_self(micro + extra)
 
diff --git a/python/cudf/cudf/core/tokenize_vocabulary.py b/python/cudf/cudf/core/tokenize_vocabulary.py
index fb8b9b3131c..58dabc85491 100644
--- a/python/cudf/cudf/core/tokenize_vocabulary.py
+++ b/python/cudf/cudf/core/tokenize_vocabulary.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+# Copyright (c) 2023-2025, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
@@ -42,9 +42,8 @@ def tokenize(
         """
         if delimiter is None:
             delimiter = ""
-        delim = cudf.Scalar(delimiter, dtype="str")
         result = text._column.tokenize_with_vocabulary(
-            self.vocabulary, delim, default_id
+            self.vocabulary, delimiter, default_id
         )
 
         return cudf.Series._from_column(result)
diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py
index 2f8a6d9e5e7..e2c332f34f5 100644
--- a/python/cudf/cudf/core/window/rolling.py
+++ b/python/cudf/cudf/core/window/rolling.py
@@ -1,10 +1,11 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION
+# Copyright (c) 2020-2025, NVIDIA CORPORATION
 from __future__ import annotations
 
 import warnings
 from typing import TYPE_CHECKING
 
 import numba
+import numpy as np
 import pandas as pd
 from pandas.api.indexers import BaseIndexer
 
@@ -273,12 +274,8 @@ def _apply_agg_column(self, source_column, agg_name):
             end = as_column(end, dtype="int32")
 
             idx = as_column(range(len(start)))
-            preceding_window = (idx - start + cudf.Scalar(1, "int32")).astype(
-                "int32"
-            )
-            following_window = (end - idx - cudf.Scalar(1, "int32")).astype(
-                "int32"
-            )
+            preceding_window = (idx - start + np.int32(1)).astype("int32")
+            following_window = (end - idx - np.int32(1)).astype("int32")
             window = None
         else:
             preceding_window = as_column(self.window)
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index da0aa5be6f5..b1f81edfc54 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 
 import functools
 import operator
@@ -14,6 +14,7 @@
 from cudf.core.column.column import column_empty
 from cudf.testing import assert_eq
 from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES
+from cudf.utils.dtypes import cudf_dtype_to_pa_type
 
 
 @pytest.mark.parametrize(
@@ -423,7 +424,9 @@ def test_get_ind_sequence():
 def test_contains_scalar(data, scalar, expect):
     sr = cudf.Series(data)
     expect = cudf.Series(expect)
-    got = sr.list.contains(cudf.Scalar(scalar, sr.dtype.element_type))
+    got = sr.list.contains(
+        pa.scalar(scalar, type=cudf_dtype_to_pa_type(sr.dtype.element_type))
+    )
     assert_eq(expect, got)
 
 
@@ -455,7 +458,9 @@ def test_contains_scalar(data, scalar, expect):
 def test_contains_null_search_key(data, expect):
     sr = cudf.Series(data)
     expect = cudf.Series(expect, dtype="bool")
-    got = sr.list.contains(cudf.Scalar(cudf.NA, sr.dtype.element_type))
+    got = sr.list.contains(
+        pa.scalar(None, type=cudf_dtype_to_pa_type(sr.dtype.element_type))
+    )
     assert_eq(expect, got)
 
 
@@ -518,12 +523,12 @@ def test_contains_invalid(data, scalar):
         ),
         (
             [["d", None, "e"], [None, "f"], []],
-            cudf.Scalar(cudf.NA, "O"),
+            pa.scalar(None, type=pa.string()),
             [None, None, None],
         ),
         (
             [None, [10, 9, 8], [5, 8, None]],
-            cudf.Scalar(cudf.NA, "int64"),
+            pa.scalar(None, type=pa.int64()),
             [None, None, None],
         ),
     ],
@@ -532,7 +537,11 @@ def test_index(data, search_key, expect):
     sr = cudf.Series(data)
     expect = cudf.Series(expect, dtype="int32")
     if is_scalar(search_key):
-        got = sr.list.index(cudf.Scalar(search_key, sr.dtype.element_type))
+        got = sr.list.index(
+            pa.scalar(
+                search_key, type=cudf_dtype_to_pa_type(sr.dtype.element_type)
+            )
+        )
     else:
         got = sr.list.index(
             cudf.Series(search_key, dtype=sr.dtype.element_type)

From 76f1c8ba9f2fd7ab6a6f3fd017ce11dd27963827 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Wed, 8 Jan 2025 15:02:10 -0600
Subject: [PATCH 8/9] Use latest ci-conda images (#17690)

Use `ci-conda:latest` tags for all jobs. All jobs should now support `ci-conda:latest`, and older pinnings are probably not necessary anymore.

Authors:
  - Bradley Dice (https://github.com/bdice)

Approvers:
  - James Lamb (https://github.com/jameslamb)

URL: https://github.com/rapidsai/cudf/pull/17690
---
 .github/workflows/build.yaml | 2 +-
 .github/workflows/pr.yaml    | 6 +++---
 .github/workflows/test.yaml  | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml
index fb7182f4133..65aebfb7f8c 100644
--- a/.github/workflows/build.yaml
+++ b/.github/workflows/build.yaml
@@ -62,7 +62,7 @@ jobs:
       arch: "amd64"
       branch: ${{ inputs.branch }}
       build_type: ${{ inputs.build_type || 'branch' }}
-      container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
+      container_image: "rapidsai/ci-conda:latest"
       date: ${{ inputs.date }}
       node_type: "gpu-v100-latest-1"
       run_script: "ci/build_docs.sh"
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index 9d79733703c..e955b8f1f80 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -186,7 +186,7 @@ jobs:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
+      container_image: "rapidsai/ci-conda:latest"
       run_script: "ci/test_java.sh"
   static-configure:
     needs: checks
@@ -207,7 +207,7 @@ jobs:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
+      container_image: "rapidsai/ci-conda:latest"
       run_script: "ci/test_notebooks.sh"
   docs-build:
     needs: conda-python-build
@@ -217,7 +217,7 @@ jobs:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
+      container_image: "rapidsai/ci-conda:latest"
       run_script: "ci/build_docs.sh"
   wheel-build-libcudf:
     needs: checks
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
index 858352f515d..dc82c17022a 100644
--- a/.github/workflows/test.yaml
+++ b/.github/workflows/test.yaml
@@ -41,7 +41,7 @@ jobs:
       sha: ${{ inputs.sha }}
       node_type: "gpu-v100-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
+      container_image: "rapidsai/ci-conda:latest"
       run_script: "ci/test_cpp_memcheck.sh"
   static-configure:
     secrets: inherit
@@ -94,7 +94,7 @@ jobs:
       sha: ${{ inputs.sha }}
       node_type: "gpu-v100-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
+      container_image: "rapidsai/ci-conda:latest"
       run_script: "ci/test_java.sh"
   conda-notebook-tests:
     secrets: inherit
@@ -106,7 +106,7 @@ jobs:
       sha: ${{ inputs.sha }}
       node_type: "gpu-v100-latest-1"
       arch: "amd64"
-      container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
+      container_image: "rapidsai/ci-conda:latest"
       run_script: "ci/test_notebooks.sh"
   wheel-tests-cudf:
     secrets: inherit

From cb77046d8baad31f4856c097f7052b3a3858c363 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 8 Jan 2025 21:05:41 -0500
Subject: [PATCH 9/9] Bump Polars version to <1.18 (#17632)

This PR upgrades the Polars version to 1.17. It xfails some polars tests due to known issues and adds the `maintain_order` param to joins (not implemented yet).

Notable change

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/17632
---
 .../all_cuda-118_arch-x86_64.yaml             |  2 +-
 .../all_cuda-125_arch-x86_64.yaml             |  2 +-
 conda/recipes/cudf-polars/meta.yaml           |  4 +-
 dependencies.yaml                             |  2 +-
 python/cudf_polars/cudf_polars/dsl/ir.py      | 42 ++++++++++++++++---
 .../cudf_polars/cudf_polars/dsl/translate.py  |  4 +-
 .../cudf_polars/cudf_polars/testing/plugin.py | 21 ++++++++++
 python/cudf_polars/pyproject.toml             |  4 +-
 python/cudf_polars/tests/test_join.py         | 11 ++++-
 9 files changed, 76 insertions(+), 16 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index a4b3f4fe174..6ff9a5f832b 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -66,7 +66,7 @@ dependencies:
 - pandas
 - pandas>=2.0,<2.2.4dev0
 - pandoc
-- polars>=1.11,<1.15
+- polars>=1.11,<1.18
 - pre-commit
 - ptxcompiler
 - pyarrow>=14.0.0,<19.0.0a0
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index 7173c955116..e82192b8cdb 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -64,7 +64,7 @@ dependencies:
 - pandas
 - pandas>=2.0,<2.2.4dev0
 - pandoc
-- polars>=1.11,<1.15
+- polars>=1.11,<1.18
 - pre-commit
 - pyarrow>=14.0.0,<19.0.0a0
 - pydata-sphinx-theme!=0.14.2
diff --git a/conda/recipes/cudf-polars/meta.yaml b/conda/recipes/cudf-polars/meta.yaml
index b6c03dc1bc2..7a0005497df 100644
--- a/conda/recipes/cudf-polars/meta.yaml
+++ b/conda/recipes/cudf-polars/meta.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
 {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
@@ -43,7 +43,7 @@ requirements:
   run:
     - python
     - pylibcudf ={{ version }}
-    - polars >=1.11,<1.15
+    - polars >=1.11,<1.18
     - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
 
 test:
diff --git a/dependencies.yaml b/dependencies.yaml
index b0f217a6770..50b4cd3c372 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -747,7 +747,7 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - polars>=1.11,<1.15
+          - polars>=1.11,<1.18
   run_cudf_polars_experimental:
     common:
       - output_types: [conda, requirements, pyproject]
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 1c1d4860eec..fd56329a48e 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 """
 DSL nodes for the LogicalPlan of polars.
@@ -34,9 +34,11 @@
 from cudf_polars.utils.versions import POLARS_VERSION_GT_112
 
 if TYPE_CHECKING:
-    from collections.abc import Callable, Hashable, MutableMapping, Sequence
+    from collections.abc import Callable, Hashable, Iterable, MutableMapping, Sequence
     from typing import Literal
 
+    from polars.polars import _expr_nodes as pl_expr
+
     from cudf_polars.typing import Schema
 
 
@@ -1019,7 +1021,27 @@ class ConditionalJoin(IR):
     __slots__ = ("ast_predicate", "options", "predicate")
     _non_child = ("schema", "predicate", "options")
     predicate: expr.Expr
-    options: tuple
+    """Expression predicate to join on"""
+    options: tuple[
+        tuple[
+            str,
+            pl_expr.Operator | Iterable[pl_expr.Operator],
+        ],
+        bool,
+        tuple[int, int] | None,
+        str,
+        bool,
+        Literal["none", "left", "right", "left_right", "right_left"],
+    ]
+    """
+    tuple of options:
+    - predicates: tuple of ir join type (eg. ie_join) and (In)Equality conditions
+    - join_nulls: do nulls compare equal?
+    - slice: optional slice to perform after joining.
+    - suffix: string suffix for right columns if names match
+    - coalesce: should key columns be coalesced (only makes sense for outer joins)
+    - maintain_order: which DataFrame row order to preserve, if any
+    """
 
     def __init__(
         self, schema: Schema, predicate: expr.Expr, options: tuple, left: IR, right: IR
@@ -1029,15 +1051,16 @@ def __init__(
         self.options = options
         self.children = (left, right)
         self.ast_predicate = to_ast(predicate)
-        _, join_nulls, zlice, suffix, coalesce = self.options
+        _, join_nulls, zlice, suffix, coalesce, maintain_order = self.options
         # Preconditions from polars
         assert not join_nulls
         assert not coalesce
+        assert maintain_order == "none"
         if self.ast_predicate is None:
             raise NotImplementedError(
                 f"Conditional join with predicate {predicate}"
             )  # pragma: no cover; polars never delivers expressions we can't handle
-        self._non_child_args = (self.ast_predicate, zlice, suffix)
+        self._non_child_args = (self.ast_predicate, zlice, suffix, maintain_order)
 
     @classmethod
     def do_evaluate(
@@ -1045,6 +1068,7 @@ def do_evaluate(
         predicate: plc.expressions.Expression,
         zlice: tuple[int, int] | None,
         suffix: str,
+        maintain_order: Literal["none", "left", "right", "left_right", "right_left"],
         left: DataFrame,
         right: DataFrame,
     ) -> DataFrame:
@@ -1088,6 +1112,7 @@ class Join(IR):
         tuple[int, int] | None,
         str,
         bool,
+        Literal["none", "left", "right", "left_right", "right_left"],
     ]
     """
     tuple of options:
@@ -1096,6 +1121,7 @@ class Join(IR):
     - slice: optional slice to perform after joining.
     - suffix: string suffix for right columns if names match
     - coalesce: should key columns be coalesced (only makes sense for outer joins)
+    - maintain_order: which DataFrame row order to preserve, if any
     """
 
     def __init__(
@@ -1113,6 +1139,9 @@ def __init__(
         self.options = options
         self.children = (left, right)
         self._non_child_args = (self.left_on, self.right_on, self.options)
+        # TODO: Implement maintain_order
+        if options[5] != "none":
+            raise NotImplementedError("maintain_order not implemented yet")
         if any(
             isinstance(e.value, expr.Literal)
             for e in itertools.chain(self.left_on, self.right_on)
@@ -1222,12 +1251,13 @@ def do_evaluate(
             tuple[int, int] | None,
             str,
             bool,
+            Literal["none", "left", "right", "left_right", "right_left"],
         ],
         left: DataFrame,
         right: DataFrame,
     ) -> DataFrame:
         """Evaluate and return a dataframe."""
-        how, join_nulls, zlice, suffix, coalesce = options
+        how, join_nulls, zlice, suffix, coalesce, _ = options
         if how == "cross":
             # Separate implementation, since cross_join returns the
             # result, not the gather maps
diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py
index 37cf36dc4dd..2138ac0c700 100644
--- a/python/cudf_polars/cudf_polars/dsl/translate.py
+++ b/python/cudf_polars/cudf_polars/dsl/translate.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 
 """Translate polars IR representation to ours."""
@@ -84,7 +84,7 @@ def translate_ir(self, *, n: int | None = None) -> ir.IR:
         # IR is versioned with major.minor, minor is bumped for backwards
         # compatible changes (e.g. adding new nodes), major is bumped for
         # incompatible changes (e.g. renaming nodes).
-        if (version := self.visitor.version()) >= (4, 0):
+        if (version := self.visitor.version()) >= (4, 3):
             e = NotImplementedError(
                 f"No support for polars IR {version=}"
             )  # pragma: no cover; no such version for now.
diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
index 87628242838..c16df320ceb 100644
--- a/python/cudf_polars/cudf_polars/testing/plugin.py
+++ b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -123,6 +123,11 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_parquet-write_parquet]": "Need to add include_file_path to IR",
     "tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_csv-write_csv]": "Need to add include_file_path to IR",
     "tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_ndjson-write_ndjson]": "Need to add include_file_path to IR",
+    "tests/unit/io/test_write.py::test_write_async[read_parquet-write_parquet]": "Need to add include_file_path to IR",
+    "tests/unit/io/test_write.py::test_write_async[<lambda>-write_csv]": "Need to add include_file_path to IR",
+    "tests/unit/io/test_write.py::test_write_async[read_parquet-<lambda>]": "Need to add include_file_path to IR",
+    "tests/unit/io/test_write.py::test_write_async[<lambda>-<lambda>0]": "Need to add include_file_path to IR",
+    "tests/unit/io/test_write.py::test_write_async[<lambda>-<lambda>2]": "Need to add include_file_path to IR",
     "tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[gpu]": "Expect this to pass because cudf-polars is installed",
     "tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[engine1]": "Expect this to pass because cudf-polars is installed",
     "tests/unit/lazyframe/test_lazyframe.py::test_round[dtype1-123.55-1-123.6]": "Rounding midpoints is handled incorrectly",
@@ -140,6 +145,22 @@ def pytest_configure(config: pytest.Config) -> None:
     "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func1-none]": "cudf-polars doesn't nullify division by zero",
     "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func2-none]": "cudf-polars doesn't nullify division by zero",
     "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func3-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
+    "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
     "tests/unit/operations/test_abs.py::test_abs_duration": "Need to raise for unsupported uops on timelike values",
     "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input7-expected7-Float32-Float32]": "Mismatching dtypes, needs cudf#15852",
     "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input10-expected10-Date-output_dtype10]": "Unsupported groupby-agg for a particular dtype",
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index 5904942aea2..9fb9bbf391e 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -1,4 +1,4 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
+# Copyright (c) 2024-2025, NVIDIA CORPORATION.
 
 [build-system]
 build-backend = "rapids_build_backend.build"
@@ -19,7 +19,7 @@ authors = [
 license = { text = "Apache 2.0" }
 requires-python = ">=3.10"
 dependencies = [
-    "polars>=1.11,<1.15",
+    "polars>=1.11,<1.18",
     "pylibcudf==25.2.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [
diff --git a/python/cudf_polars/tests/test_join.py b/python/cudf_polars/tests/test_join.py
index 2fcbbf21f1c..f1f47bfb9f1 100644
--- a/python/cudf_polars/tests/test_join.py
+++ b/python/cudf_polars/tests/test_join.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
@@ -53,6 +53,15 @@ def right():
     )
 
 
+@pytest.mark.parametrize(
+    "maintain_order", ["left", "left_right", "right_left", "right"]
+)
+def test_join_maintain_order_param_unsupported(left, right, maintain_order):
+    q = left.join(right, on=pl.col("a"), how="inner", maintain_order=maintain_order)
+
+    assert_ir_translation_raises(q, NotImplementedError)
+
+
 @pytest.mark.parametrize(
     "join_expr",
     [