Enable cudnn dropout

apple · Jan 28, 2025 · 5d50977 · 5d50977
1 parent b125f00
commit 5d50977
Showing 1 changed file with 1 addition and 2 deletions.
diff --git a/axlearn/common/flash_attention/utils.py b/axlearn/common/flash_attention/utils.py
@@ -224,7 +224,6 @@ def get_segment_ids(segment_ids: SegmentIdAttentionBias) -> Optional[Tensor]:
                 or explicit_bias.has_value()
                 or jnp.float32 in (query.dtype, key.dtype, value.dtype)
                 or query.shape[1] != key.shape[1]
-                or dropout_rate != 0.0
             ):
                 logging.warning("Flash attention falling back to Triton GPU kernel.")
                 return gpu_flash_attention(
@@ -248,7 +247,7 @@ def get_segment_ids(segment_ids: SegmentIdAttentionBias) -> Optional[Tensor]:
                     bias=explicit_bias.value(),
                     softmax_scale=softmax_scale,
                     causal=causal.has_value(),
-                    dropout_rate=0.0,
+                    dropout_rate=dropout_rate,
                 )
 
         elif backend == "tpu":