Skip to content

Commit

Permalink
Merge branch 'branch-25.02' into mukernels_strings
Browse files Browse the repository at this point in the history
  • Loading branch information
pmattione-nvidia authored Jan 9, 2025
2 parents 1a7eb2a + f13d8fc commit 8712b53
Show file tree
Hide file tree
Showing 14 changed files with 88 additions and 33 deletions.
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ dependencies:
- nbsphinx
- ninja
- notebook
- numba-cuda>=0.0.13,<0.0.18
- numba-cuda>=0.2.0,<0.3.0
- numpy>=1.23,<3.0a0
- numpydoc
- nvcc_linux-64=11.8
Expand All @@ -66,7 +66,7 @@ dependencies:
- pandas
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.11,<1.15
- polars>=1.11,<1.18
- pre-commit
- ptxcompiler
- pyarrow>=14.0.0,<19.0.0a0
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/all_cuda-125_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ dependencies:
- nbsphinx
- ninja
- notebook
- numba-cuda>=0.0.13,<0.0.18
- numba-cuda>=0.2.0,<0.3.0
- numpy>=1.23,<3.0a0
- numpydoc
- nvcomp==4.1.0.6
Expand All @@ -64,7 +64,7 @@ dependencies:
- pandas
- pandas>=2.0,<2.2.4dev0
- pandoc
- polars>=1.11,<1.15
- polars>=1.11,<1.18
- pre-commit
- pyarrow>=14.0.0,<19.0.0a0
- pydata-sphinx-theme!=0.14.2
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf-polars/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
Expand Down Expand Up @@ -43,7 +43,7 @@ requirements:
run:
- python
- pylibcudf ={{ version }}
- polars >=1.11,<1.15
- polars >=1.11,<1.18
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}

test:
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
# Copyright (c) 2018-2025, NVIDIA CORPORATION.

{% set version = environ['RAPIDS_PACKAGE_VERSION'].lstrip('v') %}
{% set minor_version = version.split('.')[0] + '.' + version.split('.')[1] %}
Expand Down Expand Up @@ -80,7 +80,7 @@ requirements:
- typing_extensions >=4.0.0
- pandas >=2.0,<2.2.4dev0
- cupy >=12.0.0
- numba-cuda >=0.0.13,<0.0.18
- numba-cuda >=0.2.0,<0.3.0
- numpy >=1.23,<3.0a0
- pyarrow>=14.0.0,<18.0.0a0
- libcudf ={{ version }}
Expand Down
3 changes: 0 additions & 3 deletions cpp/src/io/parquet/decode_fixed.cu
Original file line number Diff line number Diff line change
Expand Up @@ -991,9 +991,6 @@ CUDF_KERNEL void __launch_bounds__(decode_block_size_t, 8)
return;
}

// if we have no work to do (eg, in a skip_rows/num_rows case) in this page.
if (s->num_rows == 0) { return; }

bool const should_process_nulls = is_nullable(s) && maybe_has_nulls(s);

// shared buffer. all shared memory is suballocated out of here
Expand Down
8 changes: 4 additions & 4 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -688,7 +688,7 @@ dependencies:
- output_types: [conda, requirements, pyproject]
packages:
- cachetools
- &numba-cuda-dep numba-cuda>=0.0.13,<0.0.18
- &numba-cuda-dep numba-cuda>=0.2.0,<0.3.0
- nvtx>=0.2.1
- packaging
- rich
Expand Down Expand Up @@ -747,7 +747,7 @@ dependencies:
common:
- output_types: [conda, requirements, pyproject]
packages:
- polars>=1.11,<1.15
- polars>=1.11,<1.18
run_cudf_polars_experimental:
common:
- output_types: [conda, requirements, pyproject]
Expand Down Expand Up @@ -810,11 +810,11 @@ dependencies:
matrices:
- matrix: {dependencies: "oldest"}
packages:
- *numba-cuda-dep
- numba-cuda==0.2.0
- pandas==2.0.*
- matrix: {dependencies: "latest"}
packages:
- numba-cuda==0.0.15
- *numba-cuda-dep
- pandas==2.2.3
- matrix:
packages:
Expand Down
6 changes: 2 additions & 4 deletions python/cudf/cudf/utils/_numba.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.

import glob
import os
Expand Down Expand Up @@ -130,9 +130,7 @@ def _setup_numba():
if driver_version < (12, 0):
patch_numba_linker_cuda_11()
else:
from pynvjitlink.patch import patch_numba_linker

patch_numba_linker()
numba_config.CUDA_ENABLE_PYNVJITLINK = True


class _CUDFNumbaConfig:
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
# Copyright (c) 2021-2025, NVIDIA CORPORATION.

[build-system]
build-backend = "rapids_build_backend.build"
Expand All @@ -24,7 +24,7 @@ dependencies = [
"cupy-cuda11x>=12.0.0",
"fsspec>=0.6.0",
"libcudf==25.2.*,>=0.0.0a0",
"numba-cuda>=0.0.13,<0.0.18",
"numba-cuda>=0.2.0,<0.3.0",
"numpy>=1.23,<3.0a0",
"nvtx>=0.2.1",
"packaging",
Expand Down
42 changes: 36 additions & 6 deletions python/cudf_polars/cudf_polars/dsl/ir.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: Apache-2.0
"""
DSL nodes for the LogicalPlan of polars.
Expand Down Expand Up @@ -34,9 +34,11 @@
from cudf_polars.utils.versions import POLARS_VERSION_GT_112

if TYPE_CHECKING:
from collections.abc import Callable, Hashable, MutableMapping, Sequence
from collections.abc import Callable, Hashable, Iterable, MutableMapping, Sequence
from typing import Literal

from polars.polars import _expr_nodes as pl_expr

from cudf_polars.typing import Schema


Expand Down Expand Up @@ -1019,7 +1021,27 @@ class ConditionalJoin(IR):
__slots__ = ("ast_predicate", "options", "predicate")
_non_child = ("schema", "predicate", "options")
predicate: expr.Expr
options: tuple
"""Expression predicate to join on"""
options: tuple[
tuple[
str,
pl_expr.Operator | Iterable[pl_expr.Operator],
],
bool,
tuple[int, int] | None,
str,
bool,
Literal["none", "left", "right", "left_right", "right_left"],
]
"""
tuple of options:
- predicates: tuple of ir join type (eg. ie_join) and (In)Equality conditions
- join_nulls: do nulls compare equal?
- slice: optional slice to perform after joining.
- suffix: string suffix for right columns if names match
- coalesce: should key columns be coalesced (only makes sense for outer joins)
- maintain_order: which DataFrame row order to preserve, if any
"""

def __init__(
self, schema: Schema, predicate: expr.Expr, options: tuple, left: IR, right: IR
Expand All @@ -1029,22 +1051,24 @@ def __init__(
self.options = options
self.children = (left, right)
self.ast_predicate = to_ast(predicate)
_, join_nulls, zlice, suffix, coalesce = self.options
_, join_nulls, zlice, suffix, coalesce, maintain_order = self.options
# Preconditions from polars
assert not join_nulls
assert not coalesce
assert maintain_order == "none"
if self.ast_predicate is None:
raise NotImplementedError(
f"Conditional join with predicate {predicate}"
) # pragma: no cover; polars never delivers expressions we can't handle
self._non_child_args = (self.ast_predicate, zlice, suffix)
self._non_child_args = (self.ast_predicate, zlice, suffix, maintain_order)

@classmethod
def do_evaluate(
cls,
predicate: plc.expressions.Expression,
zlice: tuple[int, int] | None,
suffix: str,
maintain_order: Literal["none", "left", "right", "left_right", "right_left"],
left: DataFrame,
right: DataFrame,
) -> DataFrame:
Expand Down Expand Up @@ -1088,6 +1112,7 @@ class Join(IR):
tuple[int, int] | None,
str,
bool,
Literal["none", "left", "right", "left_right", "right_left"],
]
"""
tuple of options:
Expand All @@ -1096,6 +1121,7 @@ class Join(IR):
- slice: optional slice to perform after joining.
- suffix: string suffix for right columns if names match
- coalesce: should key columns be coalesced (only makes sense for outer joins)
- maintain_order: which DataFrame row order to preserve, if any
"""

def __init__(
Expand All @@ -1113,6 +1139,9 @@ def __init__(
self.options = options
self.children = (left, right)
self._non_child_args = (self.left_on, self.right_on, self.options)
# TODO: Implement maintain_order
if options[5] != "none":
raise NotImplementedError("maintain_order not implemented yet")
if any(
isinstance(e.value, expr.Literal)
for e in itertools.chain(self.left_on, self.right_on)
Expand Down Expand Up @@ -1222,12 +1251,13 @@ def do_evaluate(
tuple[int, int] | None,
str,
bool,
Literal["none", "left", "right", "left_right", "right_left"],
],
left: DataFrame,
right: DataFrame,
) -> DataFrame:
"""Evaluate and return a dataframe."""
how, join_nulls, zlice, suffix, coalesce = options
how, join_nulls, zlice, suffix, coalesce, _ = options
if how == "cross":
# Separate implementation, since cross_join returns the
# result, not the gather maps
Expand Down
4 changes: 2 additions & 2 deletions python/cudf_polars/cudf_polars/dsl/translate.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: Apache-2.0

"""Translate polars IR representation to ours."""
Expand Down Expand Up @@ -84,7 +84,7 @@ def translate_ir(self, *, n: int | None = None) -> ir.IR:
# IR is versioned with major.minor, minor is bumped for backwards
# compatible changes (e.g. adding new nodes), major is bumped for
# incompatible changes (e.g. renaming nodes).
if (version := self.visitor.version()) >= (4, 0):
if (version := self.visitor.version()) >= (4, 3):
e = NotImplementedError(
f"No support for polars IR {version=}"
) # pragma: no cover; no such version for now.
Expand Down
21 changes: 21 additions & 0 deletions python/cudf_polars/cudf_polars/testing/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,11 @@ def pytest_configure(config: pytest.Config) -> None:
"tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_parquet-write_parquet]": "Need to add include_file_path to IR",
"tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_csv-write_csv]": "Need to add include_file_path to IR",
"tests/unit/io/test_scan.py::test_scan_include_file_name[False-scan_ndjson-write_ndjson]": "Need to add include_file_path to IR",
"tests/unit/io/test_write.py::test_write_async[read_parquet-write_parquet]": "Need to add include_file_path to IR",
"tests/unit/io/test_write.py::test_write_async[<lambda>-write_csv]": "Need to add include_file_path to IR",
"tests/unit/io/test_write.py::test_write_async[read_parquet-<lambda>]": "Need to add include_file_path to IR",
"tests/unit/io/test_write.py::test_write_async[<lambda>-<lambda>0]": "Need to add include_file_path to IR",
"tests/unit/io/test_write.py::test_write_async[<lambda>-<lambda>2]": "Need to add include_file_path to IR",
"tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[gpu]": "Expect this to pass because cudf-polars is installed",
"tests/unit/lazyframe/test_engine_selection.py::test_engine_import_error_raises[engine1]": "Expect this to pass because cudf-polars is installed",
"tests/unit/lazyframe/test_lazyframe.py::test_round[dtype1-123.55-1-123.6]": "Rounding midpoints is handled incorrectly",
Expand All @@ -140,6 +145,22 @@ def pytest_configure(config: pytest.Config) -> None:
"tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func1-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func2-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list_arithmetic.py::test_list_arithmetic_values[func1-func3-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_array.py::test_array_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_left-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_right-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_both-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/arithmetic/test_list.py::test_list_arithmetic_values[exec_op_with_expr_no_type_coercion-broadcast_none-none]": "cudf-polars doesn't nullify division by zero",
"tests/unit/operations/test_abs.py::test_abs_duration": "Need to raise for unsupported uops on timelike values",
"tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input7-expected7-Float32-Float32]": "Mismatching dtypes, needs cudf#15852",
"tests/unit/operations/test_group_by.py::test_group_by_mean_by_dtype[input10-expected10-Date-output_dtype10]": "Unsupported groupby-agg for a particular dtype",
Expand Down
4 changes: 2 additions & 2 deletions python/cudf_polars/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Copyright (c) 2024-2025, NVIDIA CORPORATION.

[build-system]
build-backend = "rapids_build_backend.build"
Expand All @@ -19,7 +19,7 @@ authors = [
license = { text = "Apache 2.0" }
requires-python = ">=3.10"
dependencies = [
"polars>=1.11,<1.15",
"polars>=1.11,<1.18",
"pylibcudf==25.2.*,>=0.0.0a0",
] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
classifiers = [
Expand Down
11 changes: 10 additions & 1 deletion python/cudf_polars/tests/test_join.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
# SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations

Expand Down Expand Up @@ -53,6 +53,15 @@ def right():
)


@pytest.mark.parametrize(
"maintain_order", ["left", "left_right", "right_left", "right"]
)
def test_join_maintain_order_param_unsupported(left, right, maintain_order):
q = left.join(right, on=pl.col("a"), how="inner", maintain_order=maintain_order)

assert_ir_translation_raises(q, NotImplementedError)


@pytest.mark.parametrize(
"join_expr",
[
Expand Down
2 changes: 1 addition & 1 deletion python/dask_cudf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ cudf = "dask_cudf.backends:CudfBackendEntrypoint"
[project.optional-dependencies]
test = [
"dask-cuda==25.2.*,>=0.0.0a0",
"numba-cuda>=0.0.13,<0.0.18",
"numba-cuda>=0.2.0,<0.3.0",
"pytest-cov",
"pytest-xdist",
"pytest<8",
Expand Down

0 comments on commit 8712b53

Please sign in to comment.