Merge branch 'branch-25.02' into streams-copying

rapidsai · Jan 14, 2025 · b28009b · b28009b
2 parents c4efa52 + 8470291
commit b28009b
Show file tree

Hide file tree

Showing 4 changed files with 32 additions and 3 deletions.
diff --git a/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java b/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java
@@ -24,8 +24,10 @@
  * <p>
  * A new host UDF aggregation implementation must extend this class and override the
  * {@code hashCode} and {@code equals} methods for such purposes.
+ * In addition, since this class implements {@code AutoCloseable}, the {@code close} method must
+ * also be overridden to automatically delete the native UDF instance upon class destruction.
  */
-public abstract class HostUDFWrapper {
+public abstract class HostUDFWrapper implements AutoCloseable {
   public final long udfNativeHandle;
 
   public HostUDFWrapper(long udfNativeHandle) {

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
@@ -2413,7 +2413,11 @@ def as_column(
                 and pa.types.is_integer(arbitrary.type)
                 and arbitrary.null_count > 0
             ):
+                # TODO: Need to re-visit this cast and fill_null
+                # calls while addressing the following issue:
+                # https://github.com/rapidsai/cudf/issues/14149
                 arbitrary = arbitrary.cast(pa.float64())
+                arbitrary = pc.fill_null(arbitrary, np.nan)
             if (
                 cudf.get_option("default_integer_bitwidth")
                 and pa.types.is_integer(arbitrary.type)

diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+# Copyright (c) 2020-2025, NVIDIA CORPORATION.
 import datetime
 import decimal
 import hashlib
@@ -3003,3 +3003,12 @@ def test_dtype_dtypes_equal():
     ser = cudf.Series([0])
     assert ser.dtype is ser.dtypes
     assert ser.dtypes is ser.to_pandas().dtypes
+
+
+def test_null_like_to_nan_pandas_compat():
+    with cudf.option_context("mode.pandas_compatible", True):
+        ser = cudf.Series([1, 2, np.nan, 10, None])
+        pser = pd.Series([1, 2, np.nan, 10, None])
+
+        assert pser.dtype == ser.dtype
+        assert_eq(ser, pser)
diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py
@@ -8,7 +8,9 @@
 from functools import partialmethod
 from typing import TYPE_CHECKING
 
+import fastexcel
 import pytest
+from packaging import version
 
 import polars
 
@@ -44,7 +46,7 @@ def pytest_configure(config: pytest.Config) -> None:
     )
 
 
-EXPECTED_FAILURES: Mapping[str, str] = {
+EXPECTED_FAILURES: Mapping[str, str | tuple[str, bool]] = {
     "tests/unit/io/test_csv.py::test_compressed_csv": "Need to determine if file is compressed",
     "tests/unit/io/test_csv.py::test_read_csv_only_loads_selected_columns": "Memory usage won't be correct due to GPU",
     "tests/unit/io/test_delta.py::test_scan_delta_version": "Need to expose hive partitioning",
@@ -192,6 +194,10 @@ def pytest_configure(config: pytest.Config) -> None:
     # Maybe flaky, order-dependent?
     "tests/unit/test_projections.py::test_schema_full_outer_join_projection_pd_13287": "Order-specific result check, query is correct but in different order",
     "tests/unit/test_queries.py::test_group_by_agg_equals_zero_3535": "libcudf sums all nulls to null, not zero",
+    "tests/unit/io/test_spreadsheet.py::test_write_excel_bytes[calamine]": (
+        "Fails when fastexcel version >= 0.12.1. tracking issue: https://github.com/pola-rs/polars/issues/20698",
+        version.parse(fastexcel.__version__) >= version.parse("0.12.1"),
+    ),
 }
 
 
@@ -219,4 +225,12 @@ def pytest_collection_modifyitems(
         if item.nodeid in TESTS_TO_SKIP:
             item.add_marker(pytest.mark.skip(reason=TESTS_TO_SKIP[item.nodeid]))
         elif item.nodeid in EXPECTED_FAILURES:
+            if isinstance(EXPECTED_FAILURES[item.nodeid], tuple):
+                # the second entry in the tuple is the condition to xfail on
+                item.add_marker(
+                    pytest.mark.xfail(
+                        condition=EXPECTED_FAILURES[item.nodeid][1],
+                        reason=EXPECTED_FAILURES[item.nodeid][0],
+                    ),
+                )
             item.add_marker(pytest.mark.xfail(reason=EXPECTED_FAILURES[item.nodeid]))