Skip to content

Commit

Permalink
cuda.parallel: Forbid non-contiguous arrays as inputs (or outputs) (#…
Browse files Browse the repository at this point in the history
…3233)

* Forbid non-contiguous arrays as inputs (or outputs)

* Implement a more robust way to check for contiguity

* Don't bother if cublas unavailable

* Fix how we check for zero-element arrays

* sort imports

---------

Co-authored-by: Ashwin Srinath <[email protected]>
  • Loading branch information
shwina and shwina authored Jan 4, 2025
1 parent 36e27f7 commit 7b8563f
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 2 deletions.
7 changes: 5 additions & 2 deletions python/cuda_parallel/cuda/parallel/experimental/_cccl.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import numpy as np
from numba import cuda, types

from ._utils.cai import DeviceArrayLike, get_dtype, is_contiguous
from .iterators._iterators import IteratorBase


Expand Down Expand Up @@ -131,8 +132,10 @@ def _numpy_type_to_info(numpy_type: np.dtype) -> TypeInfo:
return _numba_type_to_info(numba_type)


def _device_array_to_cccl_iter(array) -> Iterator:
info = _numpy_type_to_info(array.dtype)
def _device_array_to_cccl_iter(array: DeviceArrayLike) -> Iterator:
if not is_contiguous(array):
raise ValueError("Non-contiguous arrays are not supported.")
info = _numpy_type_to_info(get_dtype(array))
return Iterator(
info.size,
info.alignment,
Expand Down
47 changes: 47 additions & 0 deletions python/cuda_parallel/cuda/parallel/experimental/_utils/cai.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,57 @@
Utilities for extracting information from `__cuda_array_interface__`.
"""

from typing import Optional, Tuple

import numpy as np

from ..typing import DeviceArrayLike


def get_dtype(arr: DeviceArrayLike) -> np.dtype:
return np.dtype(arr.__cuda_array_interface__["typestr"])


def get_strides(arr: DeviceArrayLike) -> Optional[Tuple]:
return arr.__cuda_array_interface__["strides"]


def get_shape(arr: DeviceArrayLike) -> Tuple:
return arr.__cuda_array_interface__["shape"]


def is_contiguous(arr: DeviceArrayLike) -> bool:
shape, strides = get_shape(arr), get_strides(arr)

if strides is None:
return True

if any(dim == 0 for dim in shape):
# array has no elements
return True

if all(dim == 1 for dim in shape):
# there is a single element:
return True

itemsize = get_dtype(arr).itemsize

if strides[-1] == itemsize:
# assume C-contiguity
expected_stride = itemsize
for dim, stride in zip(reversed(shape), reversed(strides)):
if stride != expected_stride:
return False
expected_stride *= dim
return True
elif strides[0] == itemsize:
# assume F-contiguity
expected_stride = itemsize
for dim, stride in zip(shape, strides):
if stride != expected_stride:
return False
expected_stride *= dim
return True
else:
# not contiguous
return False
48 changes: 48 additions & 0 deletions python/cuda_parallel/tests/test_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,3 +502,51 @@ def op3(x):
np.zeros([0], dtype="int64"),
)
assert reducer_1 is not reducer_2


@pytest.fixture(params=[True, False])
def array_2d(request):
f_contiguous = request.param
arr = cp.random.rand(5, 10)
if f_contiguous:
try:
return cp.asfortranarray(arr)
except ImportError: # cublas unavailable
return arr
else:
return arr


def test_reduce_2d_array(array_2d):
def binary_op(x, y):
return x + y

d_out = cp.empty(1, dtype=array_2d.dtype)
h_init = np.asarray([0], dtype=array_2d.dtype)
d_in = array_2d
reduce_into = algorithms.reduce_into(
d_in=d_in, d_out=d_out, op=binary_op, h_init=h_init
)
temp_storage_size = reduce_into(
None, d_in=d_in, d_out=d_out, num_items=d_in.size, h_init=h_init
)
d_temp_storage = cp.empty(temp_storage_size, dtype=np.uint8)
reduce_into(d_temp_storage, d_in, d_out, d_in.size, h_init)
np.testing.assert_allclose(d_in.sum().get(), d_out.get())


def test_reduce_non_contiguous():
def binary_op(x, y):
return x + y

size = 10
d_out = cp.empty(1, dtype="int64")
h_init = np.asarray([0], dtype="int64")

d_in = cp.zeros((size, 2))[:, 0]
with pytest.raises(ValueError, match="Non-contiguous arrays are not supported."):
_ = algorithms.reduce_into(d_in, d_out, binary_op, h_init)

d_in = cp.zeros(size)[::2]
with pytest.raises(ValueError, match="Non-contiguous arrays are not supported."):
_ = algorithms.reduce_into(d_in, d_out, binary_op, h_init)

0 comments on commit 7b8563f

Please sign in to comment.