Skip to content

Commit

Permalink
Bind mem_allocation_handle_type. Expand supported_handle_types
Browse files Browse the repository at this point in the history
  • Loading branch information
lukstafi committed Jul 21, 2024
1 parent e243e6a commit 7492340
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 16 deletions.
43 changes: 38 additions & 5 deletions cuda_ffi/bindings_types.ml
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,16 @@ type cu_flush_GPU_direct_RDMA_writes_options =
| CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_UNCATEGORIZED of int64
[@@deriving sexp]

type cu_mem_allocation_handle_type =
| CU_MEM_HANDLE_TYPE_NONE
| CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR
| CU_MEM_HANDLE_TYPE_WIN32
| CU_MEM_HANDLE_TYPE_WIN32_KMT
| CU_MEM_HANDLE_TYPE_FABRIC
| CU_MEM_HANDLE_TYPE_MAX
| CU_MEM_HANDLE_TYPE_UNCATEGORIZED of int64
[@@deriving sexp]

type cu_limit =
| CU_LIMIT_STACK_SIZE
| CU_LIMIT_PRINTF_FIFO_SIZE
Expand Down Expand Up @@ -1299,11 +1309,11 @@ module Types (T : Ctypes.TYPE) = struct
( CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM,
cu_device_attribute_can_use_host_pointer_for_registered_mem );
(* ( CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS_V1,
cu_device_attribute_can_use_stream_mem_ops_v1 );
( CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V1,
cu_device_attribute_can_use_64_bit_stream_mem_ops_v1 );
( CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V1,
cu_device_attribute_can_use_stream_wait_value_nor_v1 ); *)
cu_device_attribute_can_use_stream_mem_ops_v1 ); (
CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V1,
cu_device_attribute_can_use_64_bit_stream_mem_ops_v1 ); (
CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V1,
cu_device_attribute_can_use_stream_wait_value_nor_v1 ); *)
(CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH, cu_device_attribute_cooperative_launch);
( CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH,
cu_device_attribute_cooperative_multi_device_launch );
Expand Down Expand Up @@ -1429,6 +1439,29 @@ module Types (T : Ctypes.TYPE) = struct
cu_flush_gpu_direct_rdma_writes_option_memops );
]

let cu_mem_handle_type_none = T.constant "CU_MEM_HANDLE_TYPE_NONE" T.int64_t

let cu_mem_handle_type_posix_file_descriptor =
T.constant "CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR" T.int64_t

let cu_mem_handle_type_win32 = T.constant "CU_MEM_HANDLE_TYPE_WIN32" T.int64_t
let cu_mem_handle_type_win32_kmt = T.constant "CU_MEM_HANDLE_TYPE_WIN32_KMT" T.int64_t
let cu_mem_handle_type_fabric = T.constant "CU_MEM_HANDLE_TYPE_FABRIC" T.int64_t
let cu_mem_handle_type_max = T.constant "CU_MEM_HANDLE_TYPE_MAX" T.int64_t

let cu_mem_allocation_handle_type =
T.enum ~typedef:true
~unexpected:(fun error_code -> CU_MEM_HANDLE_TYPE_UNCATEGORIZED error_code)
"CUmemAllocationHandleType"
[
(CU_MEM_HANDLE_TYPE_NONE, cu_mem_handle_type_none);
(CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR, cu_mem_handle_type_posix_file_descriptor);
(CU_MEM_HANDLE_TYPE_WIN32, cu_mem_handle_type_win32);
(CU_MEM_HANDLE_TYPE_WIN32_KMT, cu_mem_handle_type_win32_kmt);
(CU_MEM_HANDLE_TYPE_FABRIC, cu_mem_handle_type_fabric);
(CU_MEM_HANDLE_TYPE_MAX, cu_mem_handle_type_max);
]

let cu_limit_stack_size = T.constant "CU_LIMIT_STACK_SIZE" T.int64_t
let cu_limit_printf_fifo_size = T.constant "CU_LIMIT_PRINTF_FIFO_SIZE" T.int64_t
let cu_limit_malloc_heap_size = T.constant "CU_LIMIT_MALLOC_HEAP_SIZE" T.int64_t
Expand Down
33 changes: 25 additions & 8 deletions cudajit.ml
Original file line number Diff line number Diff line change
Expand Up @@ -612,6 +612,18 @@ let int_of_flush_GPU_direct_RDMA_writes_options =

(* TODO: export CUmemAllocationHandleType to use in mempool_supported_handle_types. *)

type mem_allocation_handle_type = NONE | POSIX_FILE_DESCRIPTOR | WIN32 | WIN32_KMT | FABRIC
[@@deriving sexp]

let int_of_mem_allocation_handle_type =
let open Cuda_ffi.Types_generated in
function
| NONE -> Int64.to_int cu_mem_handle_type_none
| POSIX_FILE_DESCRIPTOR -> Int64.to_int cu_mem_handle_type_posix_file_descriptor
| WIN32 -> Int64.to_int cu_mem_handle_type_win32
| WIN32_KMT -> Int64.to_int cu_mem_handle_type_win32_kmt
| FABRIC -> Int64.to_int cu_mem_handle_type_fabric

type device_attributes = {
name : string;
max_threads_per_block : int;
Expand Down Expand Up @@ -725,7 +737,7 @@ type device_attributes = {
gpu_direct_rdma_supported : bool;
gpu_direct_rdma_flush_writes_options : flush_GPU_direct_RDMA_writes_options list;
gpu_direct_rdma_writes_ordering : bool;
mempool_supported_handle_types : int;
mempool_supported_handle_types : mem_allocation_handle_type list;
cluster_launch : bool;
deferred_mapping_cuda_array_supported : bool;
can_use_64_bit_stream_mem_ops : bool;
Expand Down Expand Up @@ -1266,7 +1278,7 @@ let device_get_attributes device =
@@ Cuda.cu_device_get_attribute gpu_direct_rdma_supported
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED device;
let gpu_direct_rdma_supported = 0 <> !@gpu_direct_rdma_supported in
let rec unfold flags remaining =
let rec unfold f flags remaining =
let open Int in
match remaining with
| [] ->
Expand All @@ -1275,16 +1287,18 @@ let device_get_attributes device =
| flag :: remaining ->
if equal flags zero then []
else
let uflag = int_of_flush_GPU_direct_RDMA_writes_options flag in
if equal (flags land uflag) zero then unfold flags remaining
else flag :: unfold (flags lxor uflag) remaining
let uflag = f flag in
if equal (flags land uflag) zero then unfold f flags remaining
else flag :: unfold f (flags lxor uflag) remaining
in
let gpu_direct_rdma_flush_writes_options = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute gpu_direct_rdma_flush_writes_options
CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS device;
let gpu_direct_rdma_flush_writes_options =
unfold !@gpu_direct_rdma_flush_writes_options [ HOST; MEMOPS ]
unfold int_of_flush_GPU_direct_RDMA_writes_options
!@gpu_direct_rdma_flush_writes_options
[ HOST; MEMOPS ]
in
let gpu_direct_rdma_writes_ordering = allocate int 0 in
check "cu_device_get_attribute"
Expand All @@ -1295,8 +1309,11 @@ let device_get_attributes device =
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute mempool_supported_handle_types
CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES device;
(* TODO: flesh out as a separate type. *)
let mempool_supported_handle_types = !@mempool_supported_handle_types in
let mempool_supported_handle_types =
unfold int_of_mem_allocation_handle_type !@mempool_supported_handle_types
[ NONE; POSIX_FILE_DESCRIPTOR; WIN32; WIN32_KMT; FABRIC ]
in

let cluster_launch = allocate int 0 in
check "cu_device_get_attribute"
@@ Cuda.cu_device_get_attribute cluster_launch CU_DEVICE_ATTRIBUTE_CLUSTER_LAUNCH device;
Expand Down
11 changes: 8 additions & 3 deletions cudajit.mli
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,12 @@ type computemode =
CUflushGPUDirectRDMAWritesOptions}. *)
type flush_GPU_direct_RDMA_writes_options = HOST | MEMOPS [@@deriving sexp]

(** See
{{:https://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__TYPES.html#group__CUDA__TYPES_1g450a23153d86fce0afe30e25d63caef9}
CUmemAllocationHandleType}. *)
type mem_allocation_handle_type = NONE | POSIX_FILE_DESCRIPTOR | WIN32 | WIN32_KMT | FABRIC
[@@deriving sexp]

type device_attributes = {
name : string;
(** See
Expand Down Expand Up @@ -659,9 +665,8 @@ type device_attributes = {
(** See {{:https://docs.nvidia.com/cuda/gpudirect-rdma/} GPUDirect RDMA}. *)
gpu_direct_rdma_flush_writes_options : flush_GPU_direct_RDMA_writes_options list;
gpu_direct_rdma_writes_ordering : bool;
mempool_supported_handle_types : int;
(** Bitmask of handle types supported with mempool based IPC. TODO: flesh out as a separate
type. *)
mempool_supported_handle_types : mem_allocation_handle_type list;
(** Handle types supported with mempool based IPC. *)
cluster_launch : bool;
deferred_mapping_cuda_array_supported : bool;
can_use_64_bit_stream_mem_ops : bool;
Expand Down

0 comments on commit 7492340

Please sign in to comment.