Skip to content

Commit

Permalink
merge conflict
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt711 committed Jan 9, 2025
2 parents 6f1741f + cb77046 commit 6a10590
Show file tree
Hide file tree
Showing 27 changed files with 325 additions and 292 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ jobs:
arch: "amd64"
branch: ${{ inputs.branch }}
build_type: ${{ inputs.build_type || 'branch' }}
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
container_image: "rapidsai/ci-conda:latest"
date: ${{ inputs.date }}
node_type: "gpu-v100-latest-1"
run_script: "ci/build_docs.sh"
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_java.sh"
static-configure:
needs: checks
Expand All @@ -207,7 +207,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_notebooks.sh"
docs-build:
needs: conda-python-build
Expand All @@ -217,7 +217,7 @@ jobs:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/build_docs.sh"
wheel-build-libcudf:
needs: checks
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_cpp_memcheck.sh"
static-configure:
secrets: inherit
Expand Down Expand Up @@ -94,7 +94,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_java.sh"
conda-notebook-tests:
secrets: inherit
Expand All @@ -106,7 +106,7 @@ jobs:
sha: ${{ inputs.sha }}
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:cuda12.5.1-ubuntu22.04-py3.11"
container_image: "rapidsai/ci-conda:latest"
run_script: "ci/test_notebooks.sh"
wheel-tests-cudf:
secrets: inherit
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/utilities/host_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* limitations under the License.
*/

#include "io/utilities/getenv_or.hpp"

#include <cudf/detail/utilities/stream_pool.hpp>
#include <cudf/logger.hpp>
#include <cudf/utilities/error.hpp>
Expand Down Expand Up @@ -277,7 +279,7 @@ bool config_default_pinned_memory_resource(pinned_mr_options const& opts)
CUDF_EXPORT auto& kernel_pinned_copy_threshold()
{
// use cudaMemcpyAsync for all pinned copies
static std::atomic<size_t> threshold = 0;
static std::atomic<size_t> threshold = getenv_or("LIBCUDF_KERNEL_PINNED_COPY_THRESHOLD", 0);
return threshold;
}

Expand All @@ -291,7 +293,7 @@ size_t get_kernel_pinned_copy_threshold() { return kernel_pinned_copy_threshold(
CUDF_EXPORT auto& allocate_host_as_pinned_threshold()
{
// use pageable memory for all host allocations
static std::atomic<size_t> threshold = 0;
static std::atomic<size_t> threshold = getenv_or("LIBCUDF_ALLOCATE_HOST_AS_PINNED_THRESHOLD", 0);
return threshold;
}

Expand Down
4 changes: 3 additions & 1 deletion python/cudf/cudf/api/types.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
# Copyright (c) 2021-2025, NVIDIA CORPORATION.

"""Define common type operations."""

Expand All @@ -13,6 +13,7 @@
import cupy as cp
import numpy as np
import pandas as pd
import pyarrow as pa
from pandas.api import types as pd_types

import cudf
Expand Down Expand Up @@ -144,6 +145,7 @@ def is_scalar(val):
cudf.Scalar,
cudf._lib.scalar.DeviceScalar,
cudf.core.tools.datetimes.DateOffset,
pa.Scalar,
),
) or (
pd_types.is_scalar(val)
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
# Copyright (c) 2021-2025, NVIDIA CORPORATION.

from __future__ import annotations

Expand Down Expand Up @@ -350,7 +350,7 @@ def names(self, values):

self.name = values[0]

def _clean_nulls_from_index(self):
def _pandas_repr_compatible(self):
"""
Convert all na values(if any) in Index object
to `<NA>` as a preprocessing step to `__repr__` methods.
Expand Down
5 changes: 2 additions & 3 deletions python/cudf/cudf/core/byte_pair_encoding.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Copyright (c) 2023-2025, NVIDIA CORPORATION.

from __future__ import annotations

Expand Down Expand Up @@ -53,7 +53,6 @@ def __call__(self, text: cudf.Series, separator: str = " ") -> cudf.Series:
1 this is it
dtype: object
"""
sep = cudf.Scalar(separator, dtype="str")
return cudf.Series._from_column(
text._column.byte_pair_encoding(self.merge_pairs, sep)
text._column.byte_pair_encoding(self.merge_pairs, separator)
)
38 changes: 28 additions & 10 deletions python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
# Copyright (c) 2018-2025, NVIDIA CORPORATION.

from __future__ import annotations

Expand Down Expand Up @@ -77,6 +77,7 @@

from cudf._typing import ColumnLike, Dtype, ScalarLike
from cudf.core.column.numerical import NumericalColumn
from cudf.core.column.strings import StringColumn

if PANDAS_GE_210:
NumpyExtensionArray = pd.arrays.NumpyExtensionArray
Expand All @@ -92,6 +93,8 @@ class ColumnBase(Column, Serializable, BinaryOperand, Reducible):
"min",
}

_PANDAS_NA_REPR = str(pd.NA)

def data_array_view(
self, *, mode: Literal["write", "read"] = "write"
) -> "cuda.devicearray.DeviceNDArray":
Expand Down Expand Up @@ -176,6 +179,17 @@ def __repr__(self):
f"dtype: {self.dtype}"
)

def _prep_pandas_compat_repr(self) -> StringColumn | Self:
"""
Preprocess Column to be compatible with pandas repr, namely handling nulls.
* null (datetime/timedelta) = str(pd.NaT)
* null (other types)= str(pd.NA)
"""
if self.has_nulls():
return self.astype("str").fillna(self._PANDAS_NA_REPR)
return self

def to_pandas(
self,
*,
Expand Down Expand Up @@ -239,8 +253,12 @@ def find_and_replace(
def clip(self, lo: ScalarLike, hi: ScalarLike) -> Self:
plc_column = plc.replace.clamp(
self.to_pylibcudf(mode="read"),
cudf.Scalar(lo, self.dtype).device_value.c_value,
cudf.Scalar(hi, self.dtype).device_value.c_value,
plc.interop.from_arrow(
pa.scalar(lo, type=cudf_dtype_to_pa_type(self.dtype))
),
plc.interop.from_arrow(
pa.scalar(hi, type=cudf_dtype_to_pa_type(self.dtype))
),
)
return type(self).from_pylibcudf(plc_column) # type: ignore[return-value]

Expand Down Expand Up @@ -1015,7 +1033,7 @@ def _obtain_isin_result(self, rhs: ColumnBase) -> ColumnBase:
# https://github.com/rapidsai/cudf/issues/14515 by
# providing a mode in which cudf::contains does not mask
# the result.
result = result.fillna(cudf.Scalar(rhs.null_count > 0))
result = result.fillna(rhs.null_count > 0)
return result

def as_mask(self) -> Buffer:
Expand Down Expand Up @@ -1981,12 +1999,12 @@ def as_column(
column = Column.from_pylibcudf(
plc.filling.sequence(
len(arbitrary),
cudf.Scalar(
arbitrary.start, dtype=np.dtype(np.int64)
).device_value.c_value,
cudf.Scalar(
arbitrary.step, dtype=np.dtype(np.int64)
).device_value.c_value,
plc.interop.from_arrow(
pa.scalar(arbitrary.start, type=pa.int64())
),
plc.interop.from_arrow(
pa.scalar(arbitrary.step, type=pa.int64())
),
)
)
if cudf.get_option("default_integer_bitwidth") and dtype is None:
Expand Down
8 changes: 5 additions & 3 deletions python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2024, NVIDIA CORPORATION.
# Copyright (c) 2019-2025, NVIDIA CORPORATION.

from __future__ import annotations

Expand Down Expand Up @@ -212,6 +212,8 @@ class DatetimeColumn(column.ColumnBase):
"__rsub__",
}

_PANDAS_NA_REPR = str(pd.NaT)

def __init__(
self,
data: Buffer,
Expand Down Expand Up @@ -351,8 +353,8 @@ def is_year_end(self) -> ColumnBase:
day_of_year = self.day_of_year
leap_dates = self.is_leap_year

leap = day_of_year == cudf.Scalar(366)
non_leap = day_of_year == cudf.Scalar(365)
leap = day_of_year == 366
non_leap = day_of_year == 365
return leap.copy_if_else(non_leap, leap_dates).fillna(False)

@property
Expand Down
27 changes: 19 additions & 8 deletions python/cudf/cudf/core/column/lists.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.

from __future__ import annotations

Expand Down Expand Up @@ -28,6 +28,7 @@

from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike
from cudf.core.buffer import Buffer
from cudf.core.column.string import StringColumn


class ListColumn(ColumnBase):
Expand Down Expand Up @@ -67,6 +68,16 @@ def __init__(
children=children,
)

def _prep_pandas_compat_repr(self) -> StringColumn | Self:
"""
Preprocess Column to be compatible with pandas repr, namely handling nulls.
* null (datetime/timedelta) = str(pd.NaT)
* null (other types)= str(pd.NA)
"""
# TODO: handle if self.has_nulls(): case
return self

@cached_property
def memory_usage(self):
n = super().memory_usage
Expand Down Expand Up @@ -274,7 +285,7 @@ def as_string_column(self) -> cudf.core.column.StringColumn:
with acquire_spill_lock():
plc_column = plc.strings.convert.convert_lists.format_list_column(
lc.to_pylibcudf(mode="read"),
cudf.Scalar("None").device_value.c_value,
plc.interop.from_arrow(pa.scalar("None")),
separators.to_pylibcudf(mode="read"),
)
return type(self).from_pylibcudf(plc_column) # type: ignore[return-value]
Expand Down Expand Up @@ -380,20 +391,20 @@ def extract_element_column(self, index: ColumnBase) -> ColumnBase:
)

@acquire_spill_lock()
def contains_scalar(self, search_key: cudf.Scalar) -> ColumnBase:
def contains_scalar(self, search_key: pa.Scalar) -> ColumnBase:
return type(self).from_pylibcudf(
plc.lists.contains(
self.to_pylibcudf(mode="read"),
search_key.device_value.c_value,
plc.interop.from_arrow(search_key),
)
)

@acquire_spill_lock()
def index_of_scalar(self, search_key: cudf.Scalar) -> ColumnBase:
def index_of_scalar(self, search_key: pa.Scalar) -> ColumnBase:
return type(self).from_pylibcudf(
plc.lists.index_of(
self.to_pylibcudf(mode="read"),
search_key.device_value.c_value,
plc.interop.from_arrow(search_key),
plc.lists.DuplicateFindOption.FIND_FIRST,
)
)
Expand Down Expand Up @@ -558,7 +569,7 @@ def contains(self, search_key: ScalarLike) -> ParentType:
dtype: bool
"""
return self._return_or_inplace(
self._column.contains_scalar(cudf.Scalar(search_key))
self._column.contains_scalar(pa.scalar(search_key))
)

def index(self, search_key: ScalarLike | ColumnLike) -> ParentType:
Expand Down Expand Up @@ -607,7 +618,7 @@ def index(self, search_key: ScalarLike | ColumnLike) -> ParentType:
"""

if is_scalar(search_key):
result = self._column.index_of_scalar(cudf.Scalar(search_key))
result = self._column.index_of_scalar(pa.scalar(search_key))
else:
result = self._column.index_of_column(as_column(search_key))
return self._return_or_inplace(result)
Expand Down
11 changes: 4 additions & 7 deletions python/cudf/cudf/core/column/numerical.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2018-2024, NVIDIA CORPORATION.
# Copyright (c) 2018-2025, NVIDIA CORPORATION.

from __future__ import annotations

Expand All @@ -7,6 +7,7 @@

import numpy as np
import pandas as pd
import pyarrow as pa
from numba.np import numpy_support
from typing_extensions import Self

Expand Down Expand Up @@ -382,12 +383,8 @@ def as_string_column(self) -> cudf.core.column.StringColumn:
elif self.dtype.kind == "b":
conv_func = functools.partial(
plc.strings.convert.convert_booleans.from_booleans,
true_string=cudf.Scalar(
"True", dtype="str"
).device_value.c_value,
false_string=cudf.Scalar(
"False", dtype="str"
).device_value.c_value,
true_string=plc.interop.from_arrow(pa.scalar("True")),
false_string=plc.interop.from_arrow(pa.scalar("False")),
)
elif self.dtype.kind in {"i", "u"}:
conv_func = plc.strings.convert.convert_integers.from_integers
Expand Down
Loading

0 comments on commit 6a10590

Please sign in to comment.