diff --git a/candle-nn/src/layer_norm.rs b/candle-nn/src/layer_norm.rs index 65cd17292c..c7ef0bf5fb 100644 --- a/candle-nn/src/layer_norm.rs +++ b/candle-nn/src/layer_norm.rs @@ -145,7 +145,7 @@ impl crate::Module for LayerNorm { #[cfg(feature = "cuda")] impl crate::Module for LayerNorm { fn forward(&self, x: &Tensor) -> Result { - const K_CUDABLOCK_REDUCE_NUM_THREADS: i32 = 512; + const K_CUDABLOCK_REDUCE_NUM_THREADS: u32 = 512; let cuda_dev = match x.device() { Device::Cpu | Device::Metal(_) => return self.forward_slow(x), Device::Cuda(dev) => dev @@ -171,7 +171,7 @@ impl crate::Module for LayerNorm { }.slice; let cfg_1 = LaunchConfig { - grid_dim: (m,1,1), + grid_dim: (m as u32,1,1), block_dim: (K_CUDABLOCK_REDUCE_NUM_THREADS,1,1), shared_mem_bytes: 0, };