From cb77046d8baad31f4856c097f7052b3a3858c363 Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Wed, 8 Jan 2025 21:05:41 -0500 Subject: [PATCH] Bump Polars version to <1.18 (#17632) This PR upgrades the Polars version to 1.17. It xfails some polars tests due to known issues and adds the `maintain_order` param to joins (not implemented yet). Notable change Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/17632 --- .../all_cuda-118_arch-x86_64.yaml | 2 +- .../all_cuda-125_arch-x86_64.yaml | 2 +- conda/recipes/cudf-polars/meta.yaml | 4 +- dependencies.yaml | 2 +- python/cudf_polars/cudf_polars/dsl/ir.py | 42 ++++++++++++++++--- .../cudf_polars/cudf_polars/dsl/translate.py | 4 +- .../cudf_polars/cudf_polars/testing/plugin.py | 21 ++++++++++ python/cudf_polars/pyproject.toml | 4 +- python/cudf_polars/tests/test_join.py | 11 ++++- 9 files changed, 76 insertions(+), 16 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index a4b3f4fe174..6ff9a5f832b 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -66,7 +66,7 @@ dependencies: - pandas - pandas>=2.0,<2.2.4dev0 - pandoc -- polars>=1.11,<1.15 +- polars>=1.11,<1.18 - pre-commit - ptxcompiler - pyarrow>=14.0.0,<19.0.0a0 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 7173c955116..e82192b8cdb 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -64,7 +64,7 @@ dependencies: - pandas - pandas>=2.0,<2.2.4dev0 - pandoc -- polars>=1.11,<1.15 +- polars>=1.11,<1.18 - pre-commit - pyarrow>=14.0.0,<19.0.0a0 - pydata-sphinx-theme!=0.14.2 diff --git a/conda/recipes/cudf-polars/meta.yaml b/conda/recipes/cudf-polars/meta.yaml index b6c03dc1bc2..7a0005497df 100644 --- a/conda/recipes/cudf-polars/meta.yaml +++ b/conda/recipes/cudf-polars/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. {% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %} {% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %} @@ -43,7 +43,7 @@ requirements: run: - python - pylibcudf ={{ version }} - - polars >=1.11,<1.15 + - polars >=1.11,<1.18 - {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }} test: diff --git a/dependencies.yaml b/dependencies.yaml index b0f217a6770..50b4cd3c372 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -747,7 +747,7 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - polars>=1.11,<1.15 + - polars>=1.11,<1.18 run_cudf_polars_experimental: common: - output_types: [conda, requirements, pyproject] diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py index 1c1d4860eec..fd56329a48e 100644 --- a/python/cudf_polars/cudf_polars/dsl/ir.py +++ b/python/cudf_polars/cudf_polars/dsl/ir.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. # SPDX-License-Identifier: Apache-2.0 """ DSL nodes for the LogicalPlan of polars. @@ -34,9 +34,11 @@ from cudf_polars.utils.versions import POLARS_VERSION_GT_112 if TYPE_CHECKING: - from collections.abc import Callable, Hashable, MutableMapping, Sequence + from collections.abc import Callable, Hashable, Iterable, MutableMapping, Sequence from typing import Literal + from polars.polars import _expr_nodes as pl_expr + from cudf_polars.typing import Schema @@ -1019,7 +1021,27 @@ class ConditionalJoin(IR): __slots__ = ("ast_predicate", "options", "predicate") _non_child = ("schema", "predicate", "options") predicate: expr.Expr - options: tuple + """Expression predicate to join on""" + options: tuple[ + tuple[ + str, + pl_expr.Operator | Iterable[pl_expr.Operator], + ], + bool, + tuple[int, int] | None, + str, + bool, + Literal["none", "left", "right", "left_right", "right_left"], + ] + """ + tuple of options: + - predicates: tuple of ir join type (eg. ie_join) and (In)Equality conditions + - join_nulls: do nulls compare equal? + - slice: optional slice to perform after joining. + - suffix: string suffix for right columns if names match + - coalesce: should key columns be coalesced (only makes sense for outer joins) + - maintain_order: which DataFrame row order to preserve, if any + """ def __init__( self, schema: Schema, predicate: expr.Expr, options: tuple, left: IR, right: IR @@ -1029,15 +1051,16 @@ def __init__( self.options = options self.children = (left, right) self.ast_predicate = to_ast(predicate) - _, join_nulls, zlice, suffix, coalesce = self.options + _, join_nulls, zlice, suffix, coalesce, maintain_order = self.options # Preconditions from polars assert not join_nulls assert not coalesce + assert maintain_order == "none" if self.ast_predicate is None: raise NotImplementedError( f"Conditional join with predicate {predicate}" ) # pragma: no cover; polars never delivers expressions we can't handle - self._non_child_args = (self.ast_predicate, zlice, suffix) + self._non_child_args = (self.ast_predicate, zlice, suffix, maintain_order) @classmethod def do_evaluate( @@ -1045,6 +1068,7 @@ def do_evaluate( predicate: plc.expressions.Expression, zlice: tuple[int, int] | None, suffix: str, + maintain_order: Literal["none", "left", "right", "left_right", "right_left"], left: DataFrame, right: DataFrame, ) -> DataFrame: @@ -1088,6 +1112,7 @@ class Join(IR): tuple[int, int] | None, str, bool, + Literal["none", "left", "right", "left_right", "right_left"], ] """ tuple of options: @@ -1096,6 +1121,7 @@ class Join(IR): - slice: optional slice to perform after joining. - suffix: string suffix for right columns if names match - coalesce: should key columns be coalesced (only makes sense for outer joins) + - maintain_order: which DataFrame row order to preserve, if any """ def __init__( @@ -1113,6 +1139,9 @@ def __init__( self.options = options self.children = (left, right) self._non_child_args = (self.left_on, self.right_on, self.options) + # TODO: Implement maintain_order + if options[5] != "none": + raise NotImplementedError("maintain_order not implemented yet") if any( isinstance(e.value, expr.Literal) for e in itertools.chain(self.left_on, self.right_on) @@ -1222,12 +1251,13 @@ def do_evaluate( tuple[int, int] | None, str, bool, + Literal["none", "left", "right", "left_right", "right_left"], ], left: DataFrame, right: DataFrame, ) -> DataFrame: """Evaluate and return a dataframe.""" - how, join_nulls, zlice, suffix, coalesce = options + how, join_nulls, zlice, suffix, coalesce, _ = options if how == "cross": # Separate implementation, since cross_join returns the # result, not the gather maps diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py index 37cf36dc4dd..2138ac0c700 100644 --- a/python/cudf_polars/cudf_polars/dsl/translate.py +++ b/python/cudf_polars/cudf_polars/dsl/translate.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. # SPDX-License-Identifier: Apache-2.0 """Translate polars IR representation to ours.""" @@ -84,7 +84,7 @@ def translate_ir(self, *, n: int | None = None) -> ir.IR: # IR is versioned with major.minor, minor is bumped for backwards # compatible changes (e.g. adding new nodes), major is bumped for # incompatible changes (e.g. renaming nodes). - if (version := self.visitor.version()) >= (4, 0): + if (version := self.visitor.version()) >= (4, 3): e = NotImplementedError( f"No support for polars IR {version=}" ) # pragma: no cover; no such version for now. diff --git a/python/cudf_polars/cudf_polars/testing/plugin.py b/python/cudf_polars/cudf_polars/testing/plugin.py index 87628242838..c16df320ceb 100644 --- a/python/cudf_polars/cudf_polars/testing/plugin.py +++ b/python/cudf_polars/cudf_polars/testing/plugin.py @@ -123,6 +123,11 @@ def pytest_configure(config: pytest.Config) -> None: "tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_parquet-write_parquet]": "Need to add include_file_path to IR", "tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_csv-write_csv]": "Need to add include_file_path to IR", "tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_ndjson-write_ndjson]": "Need to add include_file_path to IR", + "tests/unit/io/test_write.py::test_write_async[read_parquet-write_parquet]": "Need to add include_file_path to IR", + "tests/unit/io/test_write.py::test_write_async[-write_csv]": "Need to add include_file_path to IR", + "tests/unit/io/test_write.py::test_write_async[read_parquet-]": "Need to add include_file_path to IR", + "tests/unit/io/test_write.py::test_write_async[-0]": "Need to add include_file_path to IR", + "tests/unit/io/test_write.py::test_write_async[-2]": "Need to add include_file_path to IR", "tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[gpu]": "Expect this to pass because cudf-polars is installed", "tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[engine1]": "Expect this to pass because cudf-polars is installed", "tests/unit/lazyframe/test_lazyframe.py::test_round[dtype1-123.55-1-123.6]": "Rounding midpoints is handled incorrectly", @@ -140,6 +145,22 @@ def pytest_configure(config: pytest.Config) -> None: "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func1-none]": "cudf-polars doesn't nullify division by zero", "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func2-none]": "cudf-polars doesn't nullify division by zero", "tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func3-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_left-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_right-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_both-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_none-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_left-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_right-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_both-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_none-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "cudf-polars doesn't nullify division by zero", + "tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "cudf-polars doesn't nullify division by zero", "tests/unit/operations/test_abs.py::test_abs_duration": "Need to raise for unsupported uops on timelike values", "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input7-expected7-Float32-Float32]": "Mismatching dtypes, needs cudf#15852", "tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input10-expected10-Date-output_dtype10]": "Unsupported groupby-agg for a particular dtype", diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml index 5904942aea2..9fb9bbf391e 100644 --- a/python/cudf_polars/pyproject.toml +++ b/python/cudf_polars/pyproject.toml @@ -1,4 +1,4 @@ -# Copyright (c) 2024, NVIDIA CORPORATION. +# Copyright (c) 2024-2025, NVIDIA CORPORATION. [build-system] build-backend = "rapids_build_backend.build" @@ -19,7 +19,7 @@ authors = [ license = { text = "Apache 2.0" } requires-python = ">=3.10" dependencies = [ - "polars>=1.11,<1.15", + "polars>=1.11,<1.18", "pylibcudf==25.2.*,>=0.0.0a0", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ diff --git a/python/cudf_polars/tests/test_join.py b/python/cudf_polars/tests/test_join.py index 2fcbbf21f1c..f1f47bfb9f1 100644 --- a/python/cudf_polars/tests/test_join.py +++ b/python/cudf_polars/tests/test_join.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. +# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES. # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations @@ -53,6 +53,15 @@ def right(): ) +@pytest.mark.parametrize( + "maintain_order", ["left", "left_right", "right_left", "right"] +) +def test_join_maintain_order_param_unsupported(left, right, maintain_order): + q = left.join(right, on=pl.col("a"), how="inner", maintain_order=maintain_order) + + assert_ir_translation_raises(q, NotImplementedError) + + @pytest.mark.parametrize( "join_expr", [