Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into strings-benchmarks
Browse files Browse the repository at this point in the history
  • Loading branch information
lamarrr authored Oct 28, 2024
2 parents 7d7fcd2 + 8c4d1f2 commit f7f6084
Show file tree
Hide file tree
Showing 121 changed files with 470 additions and 457 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,6 @@ ENV PYTHONDONTWRITEBYTECODE="1"

ENV SCCACHE_REGION="us-east-2"
ENV SCCACHE_BUCKET="rapids-sccache-devs"
ENV VAULT_HOST="https://vault.ops.k8s.rapids.ai"
ENV AWS_ROLE_ARN="arn:aws:iam::279114543810:role/nv-gha-token-sccache-devs"
ENV HISTFILE="/home/coder/.cache/._bash_history"
ENV LIBCUDF_KERNEL_CACHE_PATH="/home/coder/cudf/cpp/build/${PYTHON_PACKAGE_MANAGER}/cuda-${CUDA_VERSION}/latest/jitify_cache"
12 changes: 1 addition & 11 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,6 @@ repos:
^cpp/cmake/thirdparty/patches/.*|
^python/cudf/cudf/tests/data/subword_tokenizer_data/.*
)
- repo: https://github.com/PyCQA/isort
rev: 5.13.2
hooks:
- id: isort
# Use the config file specific to each subproject so that each
# project can specify its own first/third-party packages.
args: ["--config-root=python/", "--resolve-all-configs"]
files: python/.*
exclude: |
(?x)^(^python/cudf_polars/.*)
types_or: [python, cython, pyi]
- repo: https://github.com/MarcoGorelli/cython-lint
rev: v0.16.2
hooks:
Expand Down Expand Up @@ -150,6 +139,7 @@ repos:
rev: v0.4.8
hooks:
- id: ruff
args: ["--fix"]
files: python/.*$
- id: ruff-format
files: python/.*$
Expand Down
4 changes: 2 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,8 @@ In order to run doxygen as a linter on C++/CUDA code, run
./ci/checks/doxygen.sh
```

Python code runs several linters including [Black](https://black.readthedocs.io/en/stable/),
[isort](https://pycqa.github.io/isort/), and [flake8](https://flake8.pycqa.org/en/latest/).
Python code runs several linters including [Ruff](https://docs.astral.sh/ruff/)
with its various rules for Black-like formatting or Isort.

cuDF also uses [codespell](https://github.com/codespell-project/codespell) to find spelling
mistakes, and this check is run as a pre-commit hook. To apply the suggested spelling fixes,
Expand Down
13 changes: 3 additions & 10 deletions cpp/include/cudf/detail/null_mask.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -166,16 +166,9 @@ size_type inplace_bitmask_binop(Binop op,

rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref();
cudf::detail::device_scalar<size_type> d_counter{0, stream, mr};
rmm::device_uvector<bitmask_type const*> d_masks(masks.size(), stream, mr);
rmm::device_uvector<size_type> d_begin_bits(masks_begin_bits.size(), stream, mr);

CUDF_CUDA_TRY(cudaMemcpyAsync(
d_masks.data(), masks.data(), masks.size_bytes(), cudaMemcpyDefault, stream.value()));
CUDF_CUDA_TRY(cudaMemcpyAsync(d_begin_bits.data(),
masks_begin_bits.data(),
masks_begin_bits.size_bytes(),
cudaMemcpyDefault,
stream.value()));

auto d_masks = cudf::detail::make_device_uvector_async(masks, stream, mr);
auto d_begin_bits = cudf::detail::make_device_uvector_async(masks_begin_bits, stream, mr);

auto constexpr block_size = 256;
cudf::detail::grid_1d config(dest_mask.size(), block_size);
Expand Down
3 changes: 1 addition & 2 deletions docs/cudf/source/developer_guide/contributing_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ Developers are strongly recommended to set up `pre-commit` prior to any developm
The `.pre-commit-config.yaml` file at the root of the repo is the primary source of truth linting.
Specifically, cuDF uses the following tools:

- [`ruff`](https://beta.ruff.rs/) checks for general code formatting compliance.
- [`isort`](https://pycqa.github.io/isort/) ensures imports are sorted consistently.
- [`ruff`](https://docs.astral.sh/ruff/) checks for general code formatting compliance.
- [`mypy`](http://mypy-lang.org/) performs static type checking.
In conjunction with [type hints](https://docs.python.org/3/library/typing.html),
`mypy` can help catch various bugs that are otherwise difficult to find.
Expand Down
2 changes: 1 addition & 1 deletion docs/cudf/source/user_guide/10min.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,10 @@
"import os\n",
"\n",
"import cupy as cp\n",
"import dask_cudf\n",
"import pandas as pd\n",
"\n",
"import cudf\n",
"import dask_cudf\n",
"\n",
"cp.random.seed(12)\n",
"\n",
Expand Down
6 changes: 6 additions & 0 deletions docs/cudf/source/user_guide/guide-to-udfs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@
"outputs": [],
"source": [
"# define a scalar function\n",
"\n",
"\n",
"def f(x):\n",
" return x + 1"
]
Expand Down Expand Up @@ -247,6 +249,8 @@
"outputs": [],
"source": [
"# redefine the same function from above\n",
"\n",
"\n",
"def f(x):\n",
" return x + 1"
]
Expand Down Expand Up @@ -1622,6 +1626,8 @@
"outputs": [],
"source": [
"# a user defined aggregation function.\n",
"\n",
"\n",
"def udaf(df):\n",
" return df[\"b\"].max() - df[\"b\"].min() / 2"
]
Expand Down
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ select = [
"F",
# pycodestyle Warning
"W",
# isort
"I",
# no-blank-line-before-function
"D201",
# one-blank-line-after-class
Expand Down
18 changes: 7 additions & 11 deletions python/cudf/benchmarks/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,27 +56,23 @@
# into the main repo.
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "common"))

from config import cudf # noqa: W0611, E402, F401
from utils import ( # noqa: E402
OrderedSet,
collapse_fixtures,
column_generators,
make_fixture,
)

# Turn off isort until we upgrade to 5.8.0
# https://github.com/pycqa/isort/issues/1594
# isort: off
from config import ( # noqa: W0611, E402, F401
NUM_COLS,
NUM_ROWS,
collect_ignore,
cudf, # noqa: W0611, E402, F401
pytest_collection_modifyitems,
pytest_sessionfinish,
pytest_sessionstart,
)

# isort: on
from utils import ( # noqa: E402
OrderedSet,
collapse_fixtures,
column_generators,
make_fixture,
)


@pytest_cases.fixture(params=[0, 1], ids=["AxisIndex", "AxisColumn"])
Expand Down
69 changes: 8 additions & 61 deletions python/cudf/cudf/_lib/interop.pyx
Original file line number Diff line number Diff line change
@@ -1,49 +1,22 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

from cpython cimport pycapsule
from libcpp.memory cimport unique_ptr
from libcpp.utility cimport move

import pylibcudf

from pylibcudf.libcudf.interop cimport (
DLManagedTensor,
from_dlpack as cpp_from_dlpack,
to_dlpack as cpp_to_dlpack,
)
from pylibcudf.libcudf.table.table cimport table
from pylibcudf.libcudf.table.table_view cimport table_view

from cudf._lib.utils cimport (
columns_from_pylibcudf_table,
columns_from_unique_ptr,
table_view_from_columns,
)
from cudf._lib.utils cimport columns_from_pylibcudf_table

from cudf.core.buffer import acquire_spill_lock
from cudf.core.dtypes import ListDtype, StructDtype


def from_dlpack(dlpack_capsule):
def from_dlpack(object dlpack_capsule):
"""
Converts a DLPack Tensor PyCapsule into a list of columns.
DLPack Tensor PyCapsule is expected to have the name "dltensor".
"""
cdef DLManagedTensor* dlpack_tensor = <DLManagedTensor*>pycapsule.\
PyCapsule_GetPointer(dlpack_capsule, 'dltensor')
pycapsule.PyCapsule_SetName(dlpack_capsule, 'used_dltensor')

cdef unique_ptr[table] c_result

with nogil:
c_result = move(
cpp_from_dlpack(dlpack_tensor)
)

res = columns_from_unique_ptr(move(c_result))
dlpack_tensor.deleter(dlpack_tensor)
return res
return columns_from_pylibcudf_table(
pylibcudf.interop.from_dlpack(dlpack_capsule)
)


def to_dlpack(list source_columns):
Expand All @@ -52,39 +25,13 @@ def to_dlpack(list source_columns):
DLPack Tensor PyCapsule will have the name "dltensor".
"""
if any(column.null_count for column in source_columns):
raise ValueError(
"Cannot create a DLPack tensor with null values. \
Input is required to have null count as zero."
)

cdef DLManagedTensor *dlpack_tensor
cdef table_view source_table_view = table_view_from_columns(source_columns)

with nogil:
dlpack_tensor = cpp_to_dlpack(
source_table_view
return pylibcudf.interop.to_dlpack(
pylibcudf.Table(
[col.to_pylibcudf(mode="read") for col in source_columns]
)

return pycapsule.PyCapsule_New(
dlpack_tensor,
'dltensor',
dlmanaged_tensor_pycapsule_deleter
)


cdef void dlmanaged_tensor_pycapsule_deleter(object pycap_obj) noexcept:
cdef DLManagedTensor* dlpack_tensor = <DLManagedTensor*>0
try:
dlpack_tensor = <DLManagedTensor*>pycapsule.PyCapsule_GetPointer(
pycap_obj, 'used_dltensor')
return # we do not call a used capsule's deleter
except Exception:
dlpack_tensor = <DLManagedTensor*>pycapsule.PyCapsule_GetPointer(
pycap_obj, 'dltensor')
dlpack_tensor.deleter(dlpack_tensor)


def gather_metadata(object cols_dtypes):
"""
Generates a ColumnMetadata vector for each column.
Expand Down
10 changes: 5 additions & 5 deletions python/cudf/cudf/_typing.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

import sys
from collections.abc import Callable
from typing import TYPE_CHECKING, Any, Dict, Iterable, TypeVar, Union
from collections.abc import Callable, Iterable
from typing import TYPE_CHECKING, Any, TypeVar, Union

import numpy as np
from pandas import Period, Timedelta, Timestamp
Expand Down Expand Up @@ -42,7 +42,7 @@
SeriesOrSingleColumnIndex = Union["cudf.Series", "cudf.core.index.Index"]

# Groupby aggregation
AggType = Union[str, Callable]
MultiColumnAggType = Union[
AggType, Iterable[AggType], Dict[Any, Iterable[AggType]]
AggType = Union[str, Callable] # noqa: UP007
MultiColumnAggType = Union[ # noqa: UP007
AggType, Iterable[AggType], dict[Any, Iterable[AggType]]
]
5 changes: 4 additions & 1 deletion python/cudf/cudf/core/buffer/buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pickle
import weakref
from types import SimpleNamespace
from typing import Any, Literal, Mapping
from typing import TYPE_CHECKING, Any, Literal

import numpy
from typing_extensions import Self
Expand All @@ -18,6 +18,9 @@
from cudf.core.abc import Serializable
from cudf.utils.string import format_bytes

if TYPE_CHECKING:
from collections.abc import Mapping


def host_memory_allocation(nbytes: int) -> memoryview:
"""Allocate host memory using NumPy
Expand Down
5 changes: 4 additions & 1 deletion python/cudf/cudf/core/buffer/exposure_tracked_buffer.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@

from __future__ import annotations

from typing import Literal, Mapping
from typing import TYPE_CHECKING, Literal

from typing_extensions import Self

import cudf
from cudf.core.buffer.buffer import Buffer, BufferOwner

if TYPE_CHECKING:
from collections.abc import Mapping


class ExposureTrackedBuffer(Buffer):
"""An exposure tracked buffer.
Expand Down
1 change: 0 additions & 1 deletion python/cudf/cudf/core/column/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,3 @@
Decimal128Column,
DecimalBaseColumn,
)
from cudf.core.column.interval import IntervalColumn # noqa: F401
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/column/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import warnings
from functools import cached_property
from typing import TYPE_CHECKING, Any, Mapping, Sequence, cast
from typing import TYPE_CHECKING, Any, cast

import numpy as np
import pandas as pd
Expand All @@ -26,6 +26,7 @@

if TYPE_CHECKING:
from collections import abc
from collections.abc import Mapping, Sequence

import numba.cuda

Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/column/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@

import pickle
from collections import abc
from collections.abc import MutableSequence, Sequence
from functools import cached_property
from itertools import chain
from types import SimpleNamespace
from typing import TYPE_CHECKING, Any, Literal, MutableSequence, Sequence, cast
from typing import TYPE_CHECKING, Any, Literal, cast

import cupy
import numpy as np
Expand Down
4 changes: 3 additions & 1 deletion python/cudf/cudf/core/column/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import locale
import re
from locale import nl_langinfo
from typing import TYPE_CHECKING, Literal, Sequence, cast
from typing import TYPE_CHECKING, Literal, cast

import numpy as np
import pandas as pd
Expand All @@ -31,6 +31,8 @@
from cudf.utils.utils import _all_bools_with_nulls

if TYPE_CHECKING:
from collections.abc import Sequence

from cudf._typing import (
ColumnBinaryOperand,
DatetimeLikeScalar,
Expand Down
3 changes: 2 additions & 1 deletion python/cudf/cudf/core/column/decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
from __future__ import annotations

import warnings
from collections.abc import Sequence
from decimal import Decimal
from typing import TYPE_CHECKING, Sequence, cast
from typing import TYPE_CHECKING, cast

import cupy as cp
import numpy as np
Expand Down
4 changes: 3 additions & 1 deletion python/cudf/cudf/core/column/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from __future__ import annotations

from functools import cached_property
from typing import TYPE_CHECKING, Sequence, cast
from typing import TYPE_CHECKING, cast

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -34,6 +34,8 @@
from cudf.core.missing import NA

if TYPE_CHECKING:
from collections.abc import Sequence

from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike
from cudf.core.buffer import Buffer

Expand Down
Loading

0 comments on commit f7f6084

Please sign in to comment.