diff --git a/cpp/include/cuvs/core/detail/interop.hpp b/cpp/include/cuvs/core/detail/interop.hpp index 2ed0b330d..19e4a922c 100644 --- a/cpp/include/cuvs/core/detail/interop.hpp +++ b/cpp/include/cuvs/core/detail/interop.hpp @@ -86,7 +86,6 @@ inline MdspanType from_dlpack(DLManagedTensor* managed_tensor) RAFT_EXPECTS(to_data_type.lanes == tensor.dtype.lanes, "lanes mismatch between return mdspan and DLTensor"); RAFT_EXPECTS(tensor.dtype.lanes == 1, "More than 1 DLTensor lanes not supported"); - RAFT_EXPECTS(tensor.strides == nullptr, "Strided memory layout for DLTensor not supported"); auto to_device = accessor_type_to_DLDevice(); if (to_device.device_type == kDLCUDA) { @@ -110,4 +109,36 @@ inline MdspanType from_dlpack(DLManagedTensor* managed_tensor) return MdspanType{reinterpret_cast(tensor.data), exts}; } +inline bool is_f_contiguous(DLManagedTensor* managed_tensor) +{ + auto tensor = managed_tensor->dl_tensor; + + if (!tensor.strides) { return false; } + int64_t expected_stride = 1; + for (int64_t i = 0; i < tensor.ndim; ++i) { + if (tensor.strides[i] != expected_stride) { return false; } + expected_stride *= tensor.shape[i]; + } + + return true; +} + +inline bool is_c_contiguous(DLManagedTensor* managed_tensor) +{ + auto tensor = managed_tensor->dl_tensor; + + if (!tensor.strides) { + // no stride information indicates a row-major tensor according to the dlpack spec + return true; + } + + int64_t expected_stride = 1; + for (int64_t i = tensor.ndim - 1; i >= 0; --i) { + if (tensor.strides[i] != expected_stride) { return false; } + expected_stride *= tensor.shape[i]; + } + + return true; +} + } // namespace cuvs::core::detail diff --git a/cpp/include/cuvs/core/interop.hpp b/cpp/include/cuvs/core/interop.hpp index 2462f02ec..096885f2f 100644 --- a/cpp/include/cuvs/core/interop.hpp +++ b/cpp/include/cuvs/core/interop.hpp @@ -51,9 +51,25 @@ inline bool is_dlpack_host_compatible(DLTensor tensor) return detail::is_dlpack_host_compatible(tensor); } +/** + * @brief Check if DLManagedTensor has a row-major (c-contiguous) layout + * + * @param tensor DLManagedTensor object to check + * @return bool + */ +inline bool is_c_contiguous(DLManagedTensor* tensor) { return detail::is_c_contiguous(tensor); } + +/** + * @brief Check if DLManagedTensor has a col-major (f-contiguous) layout + * + * @param tensor DLManagedTensor object to check + * @return bool + */ +inline bool is_f_contiguous(DLManagedTensor* tensor) { return detail::is_f_contiguous(tensor); } + /** * @brief Convert a DLManagedTensor to an mdspan - * NOTE: This function only supports compact row-major layouts. + * NOTE: This function only supports compact row-major and col-major layouts. * * @code {.cpp} * #include diff --git a/cpp/src/distance/pairwise_distance_c.cpp b/cpp/src/distance/pairwise_distance_c.cpp index 061adaa2c..5344a554c 100644 --- a/cpp/src/distance/pairwise_distance_c.cpp +++ b/cpp/src/distance/pairwise_distance_c.cpp @@ -29,7 +29,7 @@ namespace { -template +template void _pairwise_distance(cuvsResources_t res, DLManagedTensor* x_tensor, DLManagedTensor* y_tensor, @@ -39,8 +39,8 @@ void _pairwise_distance(cuvsResources_t res, { auto res_ptr = reinterpret_cast(res); - using mdspan_type = raft::device_matrix_view; - using distances_mdspan_type = raft::device_matrix_view; + using mdspan_type = raft::device_matrix_view; + using distances_mdspan_type = raft::device_matrix_view; auto x_mds = cuvs::core::from_dlpack(x_tensor); auto y_mds = cuvs::core::from_dlpack(y_tensor); @@ -70,17 +70,64 @@ extern "C" cuvsError_t cuvsPairwiseDistance(cuvsResources_t res, RAFT_FAIL("Inputs to cuvsPairwiseDistance must all have the same dtype"); } - if (x_dt.bits == 32) { - _pairwise_distance( - res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); - } else if (x_dt.bits == 16) { - _pairwise_distance( - res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); - } else if (x_dt.bits == 64) { - _pairwise_distance( - res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + bool x_row_major; + if (cuvs::core::is_c_contiguous(x_tensor)) { + x_row_major = true; + } else if (cuvs::core::is_f_contiguous(x_tensor)) { + x_row_major = false; } else { - RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits); + RAFT_FAIL("X input to cuvsPairwiseDistance must be contiguous (non-strided)"); + } + + bool y_row_major; + if (cuvs::core::is_c_contiguous(y_tensor)) { + y_row_major = true; + } else if (cuvs::core::is_f_contiguous(y_tensor)) { + y_row_major = false; + } else { + RAFT_FAIL("Y input to cuvsPairwiseDistance must be contiguous (non-strided)"); + } + + bool distances_row_major; + if (cuvs::core::is_c_contiguous(distances_tensor)) { + distances_row_major = true; + } else if (cuvs::core::is_f_contiguous(distances_tensor)) { + distances_row_major = false; + } else { + RAFT_FAIL("distances input to cuvsPairwiseDistance must be contiguous (non-strided)"); + } + + if ((x_row_major != y_row_major) || (x_row_major != distances_row_major)) { + RAFT_FAIL( + "Inputs to cuvsPairwiseDistance must all have the same layout (row-major or col-major"); + } + + if (x_row_major) { + if (x_dt.bits == 32) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else if (x_dt.bits == 16) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else if (x_dt.bits == 64) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else { + RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits); + } + } else { + if (x_dt.bits == 32) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else if (x_dt.bits == 16) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else if (x_dt.bits == 64) { + _pairwise_distance( + res, x_tensor, y_tensor, distances_tensor, metric, metric_arg); + } else { + RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits); + } } }); } diff --git a/python/cuvs/cuvs/common/cydlpack.pyx b/python/cuvs/cuvs/common/cydlpack.pyx index 79f88cddc..bee8d9afa 100644 --- a/python/cuvs/cuvs/common/cydlpack.pyx +++ b/python/cuvs/cuvs/common/cydlpack.pyx @@ -25,6 +25,8 @@ cdef void deleter(DLManagedTensor* tensor) noexcept: if tensor.manager_ctx is NULL: return stdlib.free(tensor.dl_tensor.shape) + if tensor.dl_tensor.strides is not NULL: + stdlib.free(tensor.dl_tensor.strides) tensor.manager_ctx = NULL stdlib.free(tensor) @@ -95,11 +97,20 @@ cdef DLManagedTensor* dlpack_c(ary): tensor.data = tensor_ptr tensor.device = dev tensor.dtype = dtype - tensor.strides = NULL tensor.ndim = ndim tensor.shape = shape tensor.byte_offset = 0 + if ary.c_contiguous: + tensor.strides = NULL + elif ary.f_contiguous: + tensor.strides = stdlib.malloc(ndim * sizeof(int64_t)) + tensor.strides[0] = 1 + for i in range(1, ndim): + tensor.strides[i] = tensor.strides[i-1] * tensor.shape[i-1] + else: + raise ValueError("Input data must be contiguous") + dlm.dl_tensor = tensor dlm.manager_ctx = NULL dlm.deleter = deleter diff --git a/python/cuvs/cuvs/distance/distance.pyx b/python/cuvs/cuvs/distance/distance.pyx index 187532bfe..d00e6b1b1 100644 --- a/python/cuvs/cuvs/distance/distance.pyx +++ b/python/cuvs/cuvs/distance/distance.pyx @@ -103,7 +103,9 @@ def pairwise_distance(X, Y, out=None, metric="euclidean", metric_arg=2.0, output_dtype = y_cai.dtype if np.issubdtype(y_cai.dtype, np.float16): output_dtype = np.float32 - out = device_ndarray.empty((m, n), dtype=output_dtype) + + order = "C" if getattr(X, "flags", X).c_contiguous else "F" + out = device_ndarray.empty((m, n), dtype=output_dtype, order=order) out_cai = wrap_array(out) x_k = x_cai.shape[1] diff --git a/python/cuvs/cuvs/test/test_distance.py b/python/cuvs/cuvs/test/test_distance.py index f466c2743..9f206064c 100644 --- a/python/cuvs/cuvs/test/test_distance.py +++ b/python/cuvs/cuvs/test/test_distance.py @@ -40,10 +40,11 @@ ], ) @pytest.mark.parametrize("inplace", [True, False]) +@pytest.mark.parametrize("order", ["F", "C"]) @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16]) -def test_distance(n_rows, n_cols, inplace, metric, dtype): +def test_distance(n_rows, n_cols, inplace, order, metric, dtype): input1 = np.random.random_sample((n_rows, n_cols)) - input1 = np.asarray(input1).astype(dtype) + input1 = np.asarray(input1, order=order).astype(dtype) # RussellRao expects boolean arrays if metric == "russellrao": @@ -58,7 +59,7 @@ def test_distance(n_rows, n_cols, inplace, metric, dtype): output_dtype = dtype if np.issubdtype(dtype, np.float16): output_dtype = np.float32 - output = np.zeros((n_rows, n_rows), dtype=output_dtype) + output = np.zeros((n_rows, n_rows), dtype=output_dtype, order=order) if metric == "inner_product": expected = np.matmul(input1, input1.T)