diff --git a/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java b/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java index 0b6ecf2e140..124f2c99188 100644 --- a/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java +++ b/java/src/main/java/ai/rapids/cudf/HostUDFWrapper.java @@ -24,8 +24,10 @@ *

* A new host UDF aggregation implementation must extend this class and override the * {@code hashCode} and {@code equals} methods for such purposes. + * In addition, since this class implements {@code AutoCloseable}, the {@code close} method must + * also be overridden to automatically delete the native UDF instance upon class destruction. */ -public abstract class HostUDFWrapper { +public abstract class HostUDFWrapper implements AutoCloseable { public final long udfNativeHandle; public HostUDFWrapper(long udfNativeHandle) { diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 30da8727366..19f2802553d 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -2413,7 +2413,11 @@ def as_column( and pa.types.is_integer(arbitrary.type) and arbitrary.null_count > 0 ): + # TODO: Need to re-visit this cast and fill_null + # calls while addressing the following issue: + # https://github.com/rapidsai/cudf/issues/14149 arbitrary = arbitrary.cast(pa.float64()) + arbitrary = pc.fill_null(arbitrary, np.nan) if ( cudf.get_option("default_integer_bitwidth") and pa.types.is_integer(arbitrary.type) diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py index f8697c5c6b8..891c0ede9a4 100644 --- a/python/cudf/cudf/tests/test_series.py +++ b/python/cudf/cudf/tests/test_series.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. import datetime import decimal import hashlib @@ -3003,3 +3003,12 @@ def test_dtype_dtypes_equal(): ser = cudf.Series([0]) assert ser.dtype is ser.dtypes assert ser.dtypes is ser.to_pandas().dtypes + + +def test_null_like_to_nan_pandas_compat(): + with cudf.option_context("mode.pandas_compatible", True): + ser = cudf.Series([1, 2, np.nan, 10, None]) + pser = pd.Series([1, 2, np.nan, 10, None]) + + assert pser.dtype == ser.dtype + assert_eq(ser, pser) diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py index c16df320ceb..e453a8b89b9 100644 --- a/python/cudf_polars/cudf_polars/testing/plugin.py +++ b/python/cudf_polars/cudf_polars/testing/plugin.py @@ -8,7 +8,9 @@ from functools import partialmethod from typing import TYPE_CHECKING +import fastexcel import pytest +from packaging import version import polars @@ -44,7 +46,7 @@ def pytest_configure(config: pytest.Config) -> None: ) -EXPECTED_FAILURES: Mapping[str, str] = { +EXPECTED_FAILURES: Mapping[str, str | tuple[str, bool]] = { "tests/unit/io/test_csv.py::test_compressed_csv": "Need to determine if file is compressed", "tests/unit/io/test_csv.py::test_read_csv_only_loads_selected_columns": "Memory usage won't be correct due to GPU", "tests/unit/io/test_delta.py::test_scan_delta_version": "Need to expose hive partitioning", @@ -192,6 +194,10 @@ def pytest_configure(config: pytest.Config) -> None: # Maybe flaky, order-dependent? "tests/unit/test_projections.py::test_schema_full_outer_join_projection_pd_13287": "Order-specific result check, query is correct but in different order", "tests/unit/test_queries.py::test_group_by_agg_equals_zero_3535": "libcudf sums all nulls to null, not zero", + "tests/unit/io/test_spreadsheet.py::test_write_excel_bytes[calamine]": ( + "Fails when fastexcel version >= 0.12.1. tracking issue: https://github.com/pola-rs/polars/issues/20698", + version.parse(fastexcel.__version__) >= version.parse("0.12.1"), + ), } @@ -219,4 +225,12 @@ def pytest_collection_modifyitems( if item.nodeid in TESTS_TO_SKIP: item.add_marker(pytest.mark.skip(reason=TESTS_TO_SKIP[item.nodeid])) elif item.nodeid in EXPECTED_FAILURES: + if isinstance(EXPECTED_FAILURES[item.nodeid], tuple): + # the second entry in the tuple is the condition to xfail on + item.add_marker( + pytest.mark.xfail( + condition=EXPECTED_FAILURES[item.nodeid][1], + reason=EXPECTED_FAILURES[item.nodeid][0], + ), + ) item.add_marker(pytest.mark.xfail(reason=EXPECTED_FAILURES[item.nodeid]))