rapidsai · benfred · Jan 15, 2025 · Jan 15, 2025 · Jan 15, 2025 · Jan 15, 2025
@@ -86,7 +86,6 @@ inline MdspanType from_dlpack(DLManagedTensor* managed_tensor)
   RAFT_EXPECTS(to_data_type.lanes == tensor.dtype.lanes,
                "lanes mismatch between return mdspan and DLTensor");
   RAFT_EXPECTS(tensor.dtype.lanes == 1, "More than 1 DLTensor lanes not supported");
-  RAFT_EXPECTS(tensor.strides == nullptr, "Strided memory layout for DLTensor not supported");
 
   auto to_device = accessor_type_to_DLDevice<typename MdspanType::accessor_type>();
   if (to_device.device_type == kDLCUDA) {
@@ -110,4 +109,36 @@ inline MdspanType from_dlpack(DLManagedTensor* managed_tensor)
   return MdspanType{reinterpret_cast<typename MdspanType::data_handle_type>(tensor.data), exts};
 }
 
+inline bool is_f_contiguous(DLManagedTensor* managed_tensor)
+{
+  auto tensor = managed_tensor->dl_tensor;
+
+  if (!tensor.strides) { return false; }
+  int64_t expected_stride = 1;
+  for (int64_t i = 0; i < tensor.ndim; ++i) {
+    if (tensor.strides[i] != expected_stride) { return false; }
+    expected_stride *= tensor.shape[i];
+  }
+
+  return true;
+}
+
+inline bool is_c_contiguous(DLManagedTensor* managed_tensor)
+{
+  auto tensor = managed_tensor->dl_tensor;
+
+  if (!tensor.strides) {
+    // no stride information indicates a row-major tensor according to the dlpack spec
+    return true;
+  }
+
+  int64_t expected_stride = 1;
+  for (int64_t i = tensor.ndim - 1; i >= 0; --i) {
+    if (tensor.strides[i] != expected_stride) { return false; }
+    expected_stride *= tensor.shape[i];
+  }
+
+  return true;
+}
+
 }  // namespace cuvs::core::detail
@@ -51,9 +51,25 @@ inline bool is_dlpack_host_compatible(DLTensor tensor)
   return detail::is_dlpack_host_compatible(tensor);
 }
 
+/**
+ * @brief Check if DLManagedTensor has a row-major (c-contiguous) layout
+ *
+ * @param tensor DLManagedTensor object to check
+ * @return bool
+ */
+inline bool is_c_contiguous(DLManagedTensor* tensor) { return detail::is_c_contiguous(tensor); }
+
+/**
+ * @brief Check if DLManagedTensor has a col-major (f-contiguous) layout
+ *
+ * @param tensor DLManagedTensor object to check
+ * @return bool
+ */
+inline bool is_f_contiguous(DLManagedTensor* tensor) { return detail::is_f_contiguous(tensor); }
+
 /**
  * @brief Convert a DLManagedTensor to an mdspan
- * NOTE: This function only supports compact row-major layouts.
+ * NOTE: This function only supports compact row-major and col-major layouts.
  *
  * @code {.cpp}
  * #include <raft/core/device_mdspan.hpp>

@@ -29,7 +29,7 @@
 
 namespace {
 
-template <typename T, typename DistT>
+template <typename T, typename DistT, typename LayoutT = raft::row_major>
 void _pairwise_distance(cuvsResources_t res,
                         DLManagedTensor* x_tensor,
                         DLManagedTensor* y_tensor,
@@ -39,8 +39,8 @@ void _pairwise_distance(cuvsResources_t res,
 {
   auto res_ptr = reinterpret_cast<raft::resources*>(res);
 
-  using mdspan_type           = raft::device_matrix_view<T const, int64_t, raft::row_major>;
-  using distances_mdspan_type = raft::device_matrix_view<DistT, int64_t, raft::row_major>;
+  using mdspan_type           = raft::device_matrix_view<T const, int64_t, LayoutT>;
+  using distances_mdspan_type = raft::device_matrix_view<DistT, int64_t, LayoutT>;
 
   auto x_mds         = cuvs::core::from_dlpack<mdspan_type>(x_tensor);
   auto y_mds         = cuvs::core::from_dlpack<mdspan_type>(y_tensor);
@@ -70,17 +70,64 @@ extern "C" cuvsError_t cuvsPairwiseDistance(cuvsResources_t res,
       RAFT_FAIL("Inputs to cuvsPairwiseDistance must all have the same dtype");
     }
 
-    if (x_dt.bits == 32) {
-      _pairwise_distance<float, float>(
-        res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
-    } else if (x_dt.bits == 16) {
-      _pairwise_distance<half, float>(
-        res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
-    } else if (x_dt.bits == 64) {
-      _pairwise_distance<double, double>(
-        res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
+    bool x_row_major;
+    if (cuvs::core::is_c_contiguous(x_tensor)) {
+      x_row_major = true;
+    } else if (cuvs::core::is_f_contiguous(x_tensor)) {
+      x_row_major = false;
     } else {
-      RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits);
+      RAFT_FAIL("X input to cuvsPairwiseDistance must be contiguous (non-strided)");
+    }
+
+    bool y_row_major;
+    if (cuvs::core::is_c_contiguous(y_tensor)) {
+      y_row_major = true;
+    } else if (cuvs::core::is_f_contiguous(y_tensor)) {
+      y_row_major = false;
+    } else {
+      RAFT_FAIL("Y input to cuvsPairwiseDistance must be contiguous (non-strided)");
+    }
+
+    bool distances_row_major;
+    if (cuvs::core::is_c_contiguous(distances_tensor)) {
+      distances_row_major = true;
+    } else if (cuvs::core::is_f_contiguous(distances_tensor)) {
+      distances_row_major = false;
+    } else {
+      RAFT_FAIL("distances input to cuvsPairwiseDistance must be contiguous (non-strided)");
+    }
+
+    if ((x_row_major != y_row_major) || (x_row_major != distances_row_major)) {
+      RAFT_FAIL(
+        "Inputs to cuvsPairwiseDistance must all have the same layout (row-major or col-major");
+    }
+
+    if (x_row_major) {
+      if (x_dt.bits == 32) {
+        _pairwise_distance<float, float>(
+          res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
+      } else if (x_dt.bits == 16) {
+        _pairwise_distance<half, float>(
+          res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
+      } else if (x_dt.bits == 64) {
+        _pairwise_distance<double, double>(
+          res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
+      } else {
+        RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits);
+      }
+    } else {
+      if (x_dt.bits == 32) {
+        _pairwise_distance<float, float, raft::col_major>(
+          res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
+      } else if (x_dt.bits == 16) {
+        _pairwise_distance<half, float, raft::col_major>(
+          res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
+      } else if (x_dt.bits == 64) {
+        _pairwise_distance<double, double, raft::col_major>(
+          res, x_tensor, y_tensor, distances_tensor, metric, metric_arg);
+      } else {
+        RAFT_FAIL("Unsupported DLtensor dtype: %d and bits: %d", x_dt.code, x_dt.bits);
+      }
     }
   });
 }
@@ -25,6 +25,8 @@ cdef void deleter(DLManagedTensor* tensor) noexcept:
     if tensor.manager_ctx is NULL:
         return
     stdlib.free(tensor.dl_tensor.shape)
+    if tensor.dl_tensor.strides is not NULL:
+        stdlib.free(tensor.dl_tensor.strides)
     tensor.manager_ctx = NULL
     stdlib.free(tensor)
 
@@ -95,11 +97,20 @@ cdef DLManagedTensor* dlpack_c(ary):
     tensor.data = <void*> tensor_ptr
     tensor.device = dev
     tensor.dtype = dtype
-    tensor.strides = NULL
     tensor.ndim = ndim
     tensor.shape = shape
     tensor.byte_offset = 0
 
+    if ary.c_contiguous:
+        tensor.strides = NULL
+    elif ary.f_contiguous:
+        tensor.strides = <int64_t*>stdlib.malloc(ndim * sizeof(int64_t))
+        tensor.strides[0] = 1
+        for i in range(1, ndim):
+            tensor.strides[i] = tensor.strides[i-1] * tensor.shape[i-1]
+    else:
+        raise ValueError("Input data must be contiguous")
+
     dlm.dl_tensor = tensor
     dlm.manager_ctx = NULL
     dlm.deleter = deleter

@@ -103,7 +103,9 @@ def pairwise_distance(X, Y, out=None, metric="euclidean", metric_arg=2.0,
         output_dtype = y_cai.dtype
         if np.issubdtype(y_cai.dtype, np.float16):
             output_dtype = np.float32
-        out = device_ndarray.empty((m, n), dtype=output_dtype)
+
+        order = "C" if getattr(X, "flags", X).c_contiguous else "F"
+        out = device_ndarray.empty((m, n), dtype=output_dtype, order=order)
     out_cai = wrap_array(out)
 
     x_k = x_cai.shape[1]

@@ -40,10 +40,11 @@
     ],
 )
 @pytest.mark.parametrize("inplace", [True, False])
+@pytest.mark.parametrize("order", ["F", "C"])
 @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.float16])
-def test_distance(n_rows, n_cols, inplace, metric, dtype):
+def test_distance(n_rows, n_cols, inplace, order, metric, dtype):
     input1 = np.random.random_sample((n_rows, n_cols))
-    input1 = np.asarray(input1).astype(dtype)
+    input1 = np.asarray(input1, order=order).astype(dtype)
 
     # RussellRao expects boolean arrays
     if metric == "russellrao":
@@ -58,7 +59,7 @@ def test_distance(n_rows, n_cols, inplace, metric, dtype):
     output_dtype = dtype
     if np.issubdtype(dtype, np.float16):
         output_dtype = np.float32
-    output = np.zeros((n_rows, n_rows), dtype=output_dtype)
+    output = np.zeros((n_rows, n_rows), dtype=output_dtype, order=order)
 
     if metric == "inner_product":
         expected = np.matmul(input1, input1.T)