Skip to content

Commit

Permalink
Merge pull request #1606 from FL33TW00D/feature/larger-batches
Browse files Browse the repository at this point in the history
fix: larger batches
  • Loading branch information
FL33TW00D authored Jan 29, 2024
2 parents fd7c856 + b6afb46 commit 6d83d42
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 7 deletions.
13 changes: 6 additions & 7 deletions candle-metal-kernels/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1364,13 +1364,12 @@ pub fn call_gemm(
// TODO byte_stride_d
let byte_stride_d = 0;

let mut buffer: Vec<u64> = Vec::with_capacity(b * 4);
for i in 0..b {
buffer.push((i * byte_stride_a) as u64);
buffer.push((i * byte_stride_b) as u64);
buffer.push((i * byte_stride_c) as u64);
buffer.push((i * byte_stride_d) as u64);
}
let buffer: Vec<u64> = vec![
byte_stride_a as _,
byte_stride_b as _,
byte_stride_c as _,
byte_stride_d as _,
];
encoder.set_bytes(
10,
(buffer.len() * core::mem::size_of::<u64>()) as NSUInteger,
Expand Down
Binary file modified candle-metal-kernels/src/libMetalFlashAttention.metallib
Binary file not shown.

0 comments on commit 6d83d42

Please sign in to comment.