Skip to content

Commit

Permalink
Untested: missing new primitive ops for optional backends CUDA, GCC
Browse files Browse the repository at this point in the history
  • Loading branch information
lukstafi committed Feb 2, 2025
1 parent ae845d0 commit b4fa5c3
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 8 deletions.
70 changes: 69 additions & 1 deletion arrayjit/lib/cuda_backend.cudajit.ml
Original file line number Diff line number Diff line change
Expand Up @@ -309,10 +309,25 @@ module Fresh () = struct
| Satur01_gate, Byte_prec _ -> ("(abs(", ") > 0 ? 0 : (", ")")
| Satur01_gate, Half_prec _ ->
( "(__hgt(__habs(htrunc(",
")), __ushort_as_half((unsigned short)0x0000U)) ? __ushort_as_half((unsigned short)0x0000U) : (",
")), __ushort_as_half((unsigned short)0x0000U)) ? __ushort_as_half((unsigned \
short)0x0000U) : (",
"))" )
| Satur01_gate, Double_prec _ -> ("(fabs(trunc(", ")) > 0.0 ? 0.0 : (", "))")
| Satur01_gate, Single_prec _ -> ("(fabsf(truncf(", ")) > 0.0 ? 0.0 : (", "))")
| Max, Byte_prec _ -> ("max(", ", ", ")")
| Max, Half_prec _ -> ("__hmax(", ", ", ")")
| Max, Double_prec _ -> ("fmax(", ", ", ")")
| Max, Single_prec _ -> ("fmaxf(", ", ", ")")
| Min, Byte_prec _ -> ("min(", ", ", ")")
| Min, Half_prec _ -> ("__hmin(", ", ", ")")
| Min, Double_prec _ -> ("fmin(", ", ", ")")
| Min, Single_prec _ -> ("fminf(", ", ", ")")
| Mod, Byte_prec _ -> ("(", " % ", ")")
| Mod, _ -> ("fmod(", ", ", ")")
| Cmplt, _ -> ("(", " < ", ")")
| Cmpeq, _ -> ("(", " == ", ")")
| Or, _ -> ("(", " || ", ")")
| And, _ -> ("(", " && ", ")")

let unop_syntax prec v =
match (v, prec) with
Expand All @@ -321,6 +336,59 @@ module Fresh () = struct
| Relu, Ops.Half_prec _ -> ("__hmax_nan(__ushort_as_half((unsigned short)0x0000U), ", ")")
| Relu, Ops.Byte_prec _ -> ("fmax(0, ", ")")
| Relu, _ -> ("fmax(0.0, ", ")")
| Satur01, Byte_prec _ -> ("fmax(0, fmin(1, ", "))")
| Satur01, Half_prec _ ->
( "__hmax_nan(__ushort_as_half((unsigned short)0x0000U), \
__hmin_nan(__ushort_as_half((unsigned short)0x3C00U), ",
"))" )
| Satur01, Single_prec _ -> ("fmaxf(0.0f, fminf(1.0f, ", "))")
| Satur01, _ -> ("fmax(0.0, fmin(1.0, ", "))")
| Exp, Half_prec _ -> ("hexp(", ")")
| Exp, Double_prec _ -> ("exp(", ")")
| Exp, _ -> ("expf(", ")")
| Log, Half_prec _ -> ("hlog(", ")")
| Log, Double_prec _ -> ("log(", ")")
| Log, _ -> ("logf(", ")")
| Exp2, Half_prec _ -> ("hexp2(", ")")
| Exp2, Double_prec _ -> ("exp2(", ")")
| Exp2, _ -> ("exp2f(", ")")
| Log2, Half_prec _ -> ("hlog2(", ")")
| Log2, Double_prec _ -> ("log2(", ")")
| Log2, _ -> ("log2f(", ")")
| Sin, Half_prec _ -> ("hsin(", ")")
| Sin, Double_prec _ -> ("sin(", ")")
| Sin, _ -> ("sinf(", ")")
| Cos, Half_prec _ -> ("hcos(", ")")
| Cos, Double_prec _ -> ("cos(", ")")
| Cos, _ -> ("cosf(", ")")
| Sqrt, Half_prec _ -> ("hsqrt(", ")")
| Sqrt, Double_prec _ -> ("sqrt(", ")")
| Sqrt, _ -> ("sqrtf(", ")")
| Recip, Byte_prec _ ->
invalid_arg "Cuda_backend.unop_syntax: Recip not supported for byte/integer precisions"
| Recip, Half_prec _ -> ("hrcp(", ")")
| Recip, _ -> ("(1.0 / (", "))")
| Recip_sqrt, Byte_prec _ ->
invalid_arg
"Cuda_backend.unop_syntax: Recip_sqrt not supported for byte/integer precisions"
| Recip_sqrt, Half_prec _ -> ("hrsqrt(", ")")
| Recip_sqrt, Double_prec _ -> ("(1.0 / sqrt(", "))")
| Recip_sqrt, _ -> ("(1.0 / sqrtf(", "))")
| Neg, _ -> ("(-(", "))")
| Tanh_approx, Byte_prec _ ->
invalid_arg
"Cuda_backend.unop_syntax: Tanh_approx not supported for byte/integer precisions"
| Tanh_approx, Half_prec _ -> ("htanh_approx(", ")")
| Tanh_approx, Single_prec _ -> ("__tanhf(", ")")
| Tanh_approx, _ -> ("tanh(", ")")
| Not, _ -> ("(", " == 0.0 ? 1.0 : 0.0)")

let ternop_syntax prec v =
match (v, prec) with
| Ops.Where, _ -> ("(", " ? ", " : ", ")")
| FMA, Ops.Half_prec _ -> ("__hfma(", ", ", ", ", ")")
| FMA, Ops.Single_prec _ -> ("fmaf(", ", ", ", ", ")")
| FMA, _ -> ("fma(", ", ", ", ", ")")

let convert_precision ~from ~to_ =
match (from, to_) with
Expand Down
94 changes: 87 additions & 7 deletions arrayjit/lib/gcc_backend.gccjit.ml
Original file line number Diff line number Diff line change
Expand Up @@ -166,14 +166,15 @@ let prec_to_kind prec =

let is_builtin_op = function
| Ops.Add | Sub | Mul | Div -> true
| ToPowOf | Relu_gate | Satur01_gate | Arg2 | Arg1 -> false
| ToPowOf | Relu_gate | Satur01_gate | Arg2 | Arg1 | Max | Min | Mod | Cmplt | Cmpeq | Or | And ->
false

let builtin_op = function
| Ops.Add -> Gccjit.Plus
| Sub -> Gccjit.Minus
| Mul -> Gccjit.Mult
| Div -> Gccjit.Divide
| ToPowOf | Relu_gate | Satur01_gate | Arg2 | Arg1 ->
| ToPowOf | Relu_gate | Satur01_gate | Arg2 | Arg1 | Max | Min | Mod | Cmplt | Cmpeq | Or | And ->
invalid_arg "Exec_as_gccjit.builtin_op: not a builtin"

let node_debug_name get_ident node = get_ident node.tn
Expand Down Expand Up @@ -278,13 +279,34 @@ let compile_main ~name ~log_functions ~env { ctx; nodes; get_ident; merge_node;
| Satur01_gate, _ ->
let cmp =
cast_bool num_typ
@@ RValue.binary_op ctx And
@@ RValue.binary_op ctx Logical_and (Type.get ctx Type.Bool)
(RValue.comparison ctx Lt (RValue.zero ctx num_typ) v1)
(RValue.comparison ctx Lt v1 (RValue.one ctx num_typ))
in
RValue.binary_op ctx Mult num_typ cmp @@ v2
| Arg2, _ -> v2
| Arg1, _ -> v1
| Max, Double_prec _ ->
RValue.cast ctx (RValue.call ctx (Function.builtin ctx "fmax") [ to_d v1; to_d v2 ]) num_typ
| Max, Single_prec _ ->
RValue.cast ctx (RValue.call ctx (Function.builtin ctx "fmaxf") [ v1; v2 ]) num_typ
| Max, Half_prec _ ->
RValue.cast ctx (RValue.call ctx (Function.builtin ctx "fmaxf") [ v1; v2 ]) num_typ
| Max, Byte_prec _ ->
RValue.cast ctx (RValue.call ctx (Function.builtin ctx "max") [ v1; v2 ]) num_typ
| Min, Double_prec _ ->
RValue.cast ctx (RValue.call ctx (Function.builtin ctx "fmin") [ to_d v1; to_d v2 ]) num_typ
| Min, Single_prec _ ->
RValue.cast ctx (RValue.call ctx (Function.builtin ctx "fminf") [ v1; v2 ]) num_typ
| Min, Half_prec _ ->
RValue.cast ctx (RValue.call ctx (Function.builtin ctx "fminf") [ v1; v2 ]) num_typ
| Min, Byte_prec _ ->
RValue.cast ctx (RValue.call ctx (Function.builtin ctx "min") [ v1; v2 ]) num_typ
| Mod, _ -> RValue.binary_op ctx Modulo num_typ v1 v2
| Cmplt, _ -> RValue.comparison ctx Lt v1 v2
| Cmpeq, _ -> RValue.comparison ctx Eq v1 v2
| Or, _ -> RValue.binary_op ctx Logical_or num_typ v1 v2
| And, _ -> RValue.binary_op ctx Logical_and num_typ v1 v2
in
let log_comment c =
match log_functions with
Expand Down Expand Up @@ -343,6 +365,17 @@ let compile_main ~name ~log_functions ~env { ctx; nodes; get_ident; merge_node;
| Unop (Relu, v) ->
let v, fillers = loop v in
(String.concat [ "("; v; " > 0.0 ? "; v; " : 0.0)" ], fillers @ fillers)
| Unop (op, v) ->
let prefix, postfix = Ops.unop_c_syntax prec op in
let v, fillers = loop v in
(String.concat [ prefix; v; postfix ], fillers)
| Ternop (op, cond_v, then_v, else_v) ->
let prefix, infix1, infix2, postfix = Ops.ternop_c_syntax prec op in
let cond, fillers1 = loop cond_v in
let then_, fillers2 = loop then_v in
let else_, fillers3 = loop else_v in
( String.concat [ prefix; cond; infix1; then_; infix2; else_; postfix ],
fillers1 @ fillers2 @ fillers3 )
in
let debug_log_assignment ~env debug idcs node accum_op value v_code =
match log_functions with
Expand Down Expand Up @@ -484,12 +517,59 @@ let compile_main ~name ~log_functions ~env { ctx; nodes; get_ident; merge_node;
| Binop (op, c1, c2) -> loop_binop op ~num_typ prec ~v1:(loop c1) ~v2:(loop c2)
| Unop (Identity, c) -> loop c
| Unop (Relu, c) ->
(* FIXME: don't recompute c *)
let cmp =
cast_bool num_typ @@ RValue.comparison ctx Lt (RValue.zero ctx num_typ) @@ loop c
let v = loop c in
let cmp = cast_bool num_typ @@ RValue.comparison ctx Lt (RValue.zero ctx num_typ) v in
RValue.binary_op ctx Mult num_typ cmp v
| Unop (Satur01, c) ->
let v = loop c in
let zero = RValue.zero ctx num_typ in
let one = RValue.double ctx num_typ 1.0 in
let min =
RValue.binary_op ctx Plus num_typ zero
(RValue.binary_op ctx Mult num_typ (RValue.comparison ctx Lt v zero)
(RValue.binary_op ctx Minus num_typ zero v))
in
RValue.binary_op ctx Mult num_typ cmp @@ loop c
RValue.binary_op ctx Plus num_typ min
(RValue.binary_op ctx Mult num_typ (RValue.comparison ctx Gt v one)
(RValue.binary_op ctx Minus num_typ one v))
| Unop (((Exp | Log | Exp2 | Log2 | Sin | Cos | Sqrt | Tanh_approx) as op), c) ->
let prefix, suffix = Ops.unop_c_syntax prec op in
assert (
String.is_suffix ~suffix:"(" prefix
&& String.equal suffix ")"
&& not (String.is_prefix ~prefix:"(" prefix));
let f = Function.builtin ctx (String.drop_suffix prefix 1) in
RValue.call ctx f [ loop c ]
| Ternop (FMA as op, c1, c2, c3) ->
let prefix, _, _, _ = Ops.ternop_c_syntax prec op in
let f = Function.builtin ctx (String.drop_suffix prefix 1) in
RValue.call ctx f [ loop c1; loop c2; loop c3 ]
| Ternop (Where, c1, c2, c3) ->
let cond = loop c1 in
let zero = RValue.zero ctx num_typ in
let cmp = RValue.comparison ctx Eq cond zero in
let v1 = loop c2 in
let v2 = loop c3 in
RValue.binary_op ctx Plus num_typ
(RValue.binary_op ctx Mult num_typ cmp v2)
(RValue.binary_op ctx Mult num_typ
(RValue.binary_op ctx Minus num_typ (RValue.one ctx num_typ) cmp)
v1)
| Constant v -> RValue.double ctx num_typ v
| Unop (Recip, c) ->
let v = loop c in
RValue.binary_op ctx Divide num_typ (RValue.one ctx num_typ) v
| Unop (Recip_sqrt, c) ->
let v = loop c in
RValue.binary_op ctx Divide num_typ (RValue.one ctx num_typ)
(RValue.call ctx (Function.builtin ctx "sqrtf") [ v ])
| Unop (Neg, c) ->
let v = loop c in
RValue.unary_op ctx Negate num_typ v
| Unop (Not, c) ->
let v = loop c in
cast_bool num_typ @@ RValue.unary_op ctx Logical_negate (Type.get ctx Type.Bool)
(RValue.comparison ctx Eq v (RValue.zero ctx num_typ))
and loop_for_loop ~toplevel ~env key ~from_ ~to_ body =
let open Gccjit in
let i = Indexing.symbol_ident key in
Expand Down

0 comments on commit b4fa5c3

Please sign in to comment.