Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/branch-25.02' into rm/scalar/d…
Browse files Browse the repository at this point in the history
…evicescalar
  • Loading branch information
mroeschke committed Jan 9, 2025
2 parents 9fbe28f + a8a4197 commit 133256b
Show file tree
Hide file tree
Showing 25 changed files with 236 additions and 279 deletions.
2 changes: 1 addition & 1 deletion python/cudf/cudf/_lib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# the License.
# =============================================================================

set(cython_sources column.pyx strings_udf.pyx types.pyx)
set(cython_sources column.pyx strings_udf.pyx)
set(linked_libraries cudf::cudf)

rapids_cython_create_modules(
Expand Down
4 changes: 3 additions & 1 deletion python/cudf/cudf/_lib/column.pxd
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.

from typing import Literal

Expand All @@ -13,6 +13,8 @@ from pylibcudf.libcudf.column.column_view cimport (
from pylibcudf.libcudf.types cimport size_type
from rmm.librmm.device_buffer cimport device_buffer

cdef dtype_from_lists_column_view(column_view cv)
cdef dtype_from_column_view(column_view cv)

cdef class Column:
cdef public:
Expand Down
100 changes: 88 additions & 12 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,25 @@ from cudf.core.buffer import (
as_buffer,
cuda_array_interface_wrapper,
)
from cudf.utils.dtypes import _get_base_dtype
from cudf.utils.dtypes import (
_get_base_dtype,
dtype_to_pylibcudf_type,
PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES,
)

from cpython.buffer cimport PyObject_CheckBuffer
from libc.stdint cimport uintptr_t
from libcpp.memory cimport make_unique, unique_ptr
from libc.stdint cimport uintptr_t, int32_t
from libcpp.memory cimport make_shared, make_unique, shared_ptr, unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector

from rmm.pylibrmm.device_buffer cimport DeviceBuffer

from cudf._lib.types cimport (
dtype_from_column_view,
dtype_to_pylibcudf_type,
from pylibcudf cimport (
DataType as plc_DataType,
Column as plc_Column,
Scalar as plc_Scalar,
)

from cudf._lib.types import dtype_from_pylibcudf_column

from pylibcudf cimport DataType as plc_DataType, Scalar as plc_Scalar
cimport pylibcudf.libcudf.copying as cpp_copying
cimport pylibcudf.libcudf.types as libcudf_types
cimport pylibcudf.libcudf.unary as libcudf_unary
Expand All @@ -45,6 +46,7 @@ from pylibcudf.libcudf.column.column_factories cimport (
make_numeric_column
)
from pylibcudf.libcudf.column.column_view cimport column_view
from pylibcudf.libcudf.lists.lists_column_view cimport lists_column_view
from pylibcudf.libcudf.null_mask cimport null_count as cpp_null_count
from pylibcudf.libcudf.scalar.scalar cimport scalar

Expand All @@ -60,6 +62,80 @@ cdef get_element(column_view col_view, size_type index):
return pylibcudf.interop.to_arrow(plc_scalar).as_py()


def dtype_from_pylibcudf_column(plc_Column col not None):
type_ = col.type()
tid = type_.id()

if tid == pylibcudf.TypeId.LIST:
child = col.list_view().child()
return cudf.ListDtype(dtype_from_pylibcudf_column(child))
elif tid == pylibcudf.TypeId.STRUCT:
fields = {
str(i): dtype_from_pylibcudf_column(col.child(i))
for i in range(col.num_children())
}
return cudf.StructDtype(fields)
elif tid == pylibcudf.TypeId.DECIMAL64:
return cudf.Decimal64Dtype(
precision=cudf.Decimal64Dtype.MAX_PRECISION,
scale=-type_.scale()
)
elif tid == pylibcudf.TypeId.DECIMAL32:
return cudf.Decimal32Dtype(
precision=cudf.Decimal32Dtype.MAX_PRECISION,
scale=-type_.scale()
)
elif tid == pylibcudf.TypeId.DECIMAL128:
return cudf.Decimal128Dtype(
precision=cudf.Decimal128Dtype.MAX_PRECISION,
scale=-type_.scale()
)
else:
return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[tid]


cdef dtype_from_lists_column_view(column_view cv):
# lists_column_view have no default constructor, so we heap
# allocate it to get around Cython's limitation of requiring
# default constructors for stack allocated objects
cdef shared_ptr[lists_column_view] lv = make_shared[lists_column_view](cv)
cdef column_view child = lv.get()[0].child()

if child.type().id() == libcudf_types.type_id.LIST:
return cudf.ListDtype(dtype_from_lists_column_view(child))
else:
return cudf.ListDtype(dtype_from_column_view(child))


cdef dtype_from_column_view(column_view cv):
cdef libcudf_types.type_id tid = cv.type().id()
if tid == libcudf_types.type_id.LIST:
return dtype_from_lists_column_view(cv)
elif tid == libcudf_types.type_id.STRUCT:
fields = {
str(i): dtype_from_column_view(cv.child(i))
for i in range(cv.num_children())
}
return cudf.StructDtype(fields)
elif tid == libcudf_types.type_id.DECIMAL64:
return cudf.Decimal64Dtype(
precision=cudf.Decimal64Dtype.MAX_PRECISION,
scale=-cv.type().scale()
)
elif tid == libcudf_types.type_id.DECIMAL32:
return cudf.Decimal32Dtype(
precision=cudf.Decimal32Dtype.MAX_PRECISION,
scale=-cv.type().scale()
)
elif tid == libcudf_types.type_id.DECIMAL128:
return cudf.Decimal128Dtype(
precision=cudf.Decimal128Dtype.MAX_PRECISION,
scale=-cv.type().scale()
)
else:
return PYLIBCUDF_TO_SUPPORTED_NUMPY_TYPES[<int32_t>(tid)]


cdef class Column:
"""
A Column stores columnar data in device memory.
Expand Down Expand Up @@ -357,7 +433,7 @@ cdef class Column:
col = self
data_dtype = col.dtype

cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype)
cdef plc_DataType dtype = <plc_DataType?>dtype_to_pylibcudf_type(data_dtype)
cdef libcudf_types.size_type offset = self.offset
cdef vector[mutable_column_view] children
cdef void* data
Expand Down Expand Up @@ -420,7 +496,7 @@ cdef class Column:
col = self
data_dtype = col.dtype

cdef plc_DataType dtype = dtype_to_pylibcudf_type(data_dtype)
cdef plc_DataType dtype = <plc_DataType?>dtype_to_pylibcudf_type(data_dtype)
cdef libcudf_types.size_type offset = self.offset
cdef vector[column_view] children
cdef void* data
Expand Down
11 changes: 0 additions & 11 deletions python/cudf/cudf/_lib/types.pxd

This file was deleted.

172 changes: 0 additions & 172 deletions python/cudf/cudf/_lib/types.pyx

This file was deleted.

9 changes: 6 additions & 3 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from typing_extensions import Self

import cudf
from cudf._lib.types import size_type_dtype
from cudf.api.extensions import no_default
from cudf.api.types import is_integer, is_list_like, is_scalar
from cudf.core._internals import copying
Expand All @@ -24,7 +23,11 @@
from cudf.core.copy_types import GatherMap
from cudf.errors import MixedTypeError
from cudf.utils import ioutils
from cudf.utils.dtypes import can_convert_to_column, is_mixed_with_object_dtype
from cudf.utils.dtypes import (
SIZE_TYPE_DTYPE,
can_convert_to_column,
is_mixed_with_object_dtype,
)
from cudf.utils.utils import _is_same_name

if TYPE_CHECKING:
Expand Down Expand Up @@ -2047,7 +2050,7 @@ def _gather(self, gather_map, nullify=False, check_bounds=True):
# TODO: For performance, the check and conversion of gather map should
# be done by the caller. This check will be removed in future release.
if gather_map.dtype.kind not in "iu":
gather_map = gather_map.astype(size_type_dtype)
gather_map = gather_map.astype(SIZE_TYPE_DTYPE)

GatherMap(gather_map, len(self), nullify=not check_bounds or nullify)
return self._from_columns_like_self(
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/core/_internals/aggregation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.
# Copyright (c) 2020-2025, NVIDIA CORPORATION.
from __future__ import annotations

from typing import TYPE_CHECKING, Literal
Expand All @@ -8,9 +8,9 @@
import pylibcudf as plc

import cudf
from cudf._lib.types import SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES
from cudf.api.types import is_scalar
from cudf.utils import cudautils
from cudf.utils.dtypes import SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES

if TYPE_CHECKING:
from collections.abc import Callable
Expand Down
Loading

0 comments on commit 133256b

Please sign in to comment.