Skip to content

Commit

Permalink
Changes to ensure dependency lower bounds are sufficient
Browse files Browse the repository at this point in the history
  • Loading branch information
lukstafi committed Sep 30, 2024
1 parent 2dd9527 commit 559fc9d
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 65 deletions.
2 changes: 1 addition & 1 deletion cudajit.opam
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ homepage: "https://github.com/lukstafi/ocaml-cudajit"
doc: "https://github.com/lukstafi/ocaml-cudajit/blob/master/README.md"
bug-reports: "https://github.com/lukstafi/ocaml-cudajit/issues"
depends: [
"ocaml" {>= "4.08"}
"ocaml" {>= "4.12"}
"dune" {>= "3.11"}
"ctypes" {>= "0.14.0"}
"ctypes-foreign"
Expand Down
2 changes: 1 addition & 1 deletion dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
"Bindings to manually selected parts of `lcuda` and `lnvrtc`, with a few types and conversion functions to facilitate use.")
(depends
(ocaml
(>= 4.08))
(>= 4.12))
dune
(ctypes
(>= 0.14.0))
Expand Down
126 changes: 63 additions & 63 deletions test_no_device/saxpy_ptx.ml
Original file line number Diff line number Diff line change
Expand Up @@ -133,75 +133,75 @@ let%expect_test "SAXPY half precision compilation" =
NNN
.address_size 64

// .globl saxpy
// .globl saxpy

.visible .entry saxpy(
.param .align 2 .b8 saxpy_param_0[2],
.param .u64 saxpy_param_1,
.param .u64 saxpy_param_2,
.param .u64 saxpy_param_3,
.param .u64 saxpy_param_4
.param .align 2 .b8 saxpy_param_0[2],
.param .u64 saxpy_param_1,
.param .u64 saxpy_param_2,
.param .u64 saxpy_param_3,
.param .u64 saxpy_param_4
)
{
.reg .pred %p<2>;
.reg .b16 %rs<8>;
.reg .f32 %f<7>;
.reg .b32 %r<5>;
.reg .b64 %rd<13>;


ld.param.u16 %rs1, [saxpy_param_0];
ld.param.u64 %rd2, [saxpy_param_1];
ld.param.u64 %rd3, [saxpy_param_2];
ld.param.u64 %rd4, [saxpy_param_3];
ld.param.u64 %rd5, [saxpy_param_4];
mov.u32 %r1, %ctaid.x;
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %tid.x;
mad.lo.s32 %r4, %r1, %r2, %r3;
cvt.u64.u32 %rd1, %r4;
setp.ge.u64 %p1, %rd1, %rd5;
@%p1 bra $L__BB0_2;

cvta.to.global.u64 %rd6, %rd2;
shl.b64 %rd7, %rd1, 1;
add.s64 %rd8, %rd6, %rd7;
ld.global.u16 %rs3, [%rd8];
// begin inline asm
{ cvt.f32.f16 %f1, %rs1;}

// end inline asm
// begin inline asm
{ cvt.f32.f16 %f2, %rs3;}

// end inline asm
mul.ftz.f32 %f3, %f1, %f2;
// begin inline asm
{ cvt.rn.f16.f32 %rs4, %f3;}

// end inline asm
cvta.to.global.u64 %rd9, %rd3;
add.s64 %rd10, %rd9, %rd7;
ld.global.u16 %rs6, [%rd10];
// begin inline asm
{ cvt.f32.f16 %f4, %rs4;}

// end inline asm
// begin inline asm
{ cvt.f32.f16 %f5, %rs6;}

// end inline asm
add.ftz.f32 %f6, %f4, %f5;
// begin inline asm
{ cvt.rn.f16.f32 %rs7, %f6;}

// end inline asm
cvta.to.global.u64 %rd11, %rd4;
add.s64 %rd12, %rd11, %rd7;
st.global.u16 [%rd12], %rs7;
.reg .pred %p<2>;
.reg .b16 %rs<8>;
.reg .f32 %f<7>;
.reg .b32 %r<5>;
.reg .b64 %rd<13>;


ld.param.u16 %rs1, [saxpy_param_0];
ld.param.u64 %rd2, [saxpy_param_1];
ld.param.u64 %rd3, [saxpy_param_2];
ld.param.u64 %rd4, [saxpy_param_3];
ld.param.u64 %rd5, [saxpy_param_4];
mov.u32 %r1, %ctaid.x;
mov.u32 %r2, %ntid.x;
mov.u32 %r3, %tid.x;
mad.lo.s32 %r4, %r1, %r2, %r3;
cvt.u64.u32 %rd1, %r4;
setp.ge.u64 %p1, %rd1, %rd5;
@%p1 bra $L__BB0_2;

cvta.to.global.u64 %rd6, %rd2;
shl.b64 %rd7, %rd1, 1;
add.s64 %rd8, %rd6, %rd7;
ld.global.u16 %rs3, [%rd8];
// begin inline asm
{ cvt.f32.f16 %f1, %rs1;}

// end inline asm
// begin inline asm
{ cvt.f32.f16 %f2, %rs3;}

// end inline asm
mul.ftz.f32 %f3, %f1, %f2;
// begin inline asm
{ cvt.rn.f16.f32 %rs4, %f3;}

// end inline asm
cvta.to.global.u64 %rd9, %rd3;
add.s64 %rd10, %rd9, %rd7;
ld.global.u16 %rs6, [%rd10];
// begin inline asm
{ cvt.f32.f16 %f4, %rs4;}

// end inline asm
// begin inline asm
{ cvt.f32.f16 %f5, %rs6;}

// end inline asm
add.ftz.f32 %f6, %f4, %f5;
// begin inline asm
{ cvt.rn.f16.f32 %rs7, %f6;}

// end inline asm
cvta.to.global.u64 %rd11, %rd4;
add.s64 %rd12, %rd11, %rd7;
st.global.u16 [%rd12], %rs7;

$L__BB0_2:
ret;
ret;

}
|}]

0 comments on commit 559fc9d

Please sign in to comment.