PR #21123: Disable cuDNN fusions explicitly in tests that are testing…

… the Triton path Imported from GitHub PR #21123 cuDNN fusions are OFF by default, and some tests that are testing the Triton codegen path implicitly rely on this. It is best to turn off cuDNN fusions explicitly in these tests, e.g., NVIDIA has internal builds that turn on cuDNN fusions and these tests suddenly start to fail in CI. Copybara import of the project: -- ab98276 by Dimitris Vardoulakis <[email protected]>: Disable cuDNN fusions explicitly in tests that are testing the Triton path. Merging this change closes #21123 COPYBARA_INTEGRATE_REVIEW=#21123 from dimvar:disable-cudnn-in-triton-tests ab98276 PiperOrigin-RevId: 714287755
openxla · Jan 11, 2025 · 1f4bd66 · 1f4bd66
1 parent fcb4fc0
commit 1f4bd66
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 1 deletion.
diff --git a/xla/service/gpu/determinism_test.cc b/xla/service/gpu/determinism_test.cc
@@ -223,6 +223,7 @@ TEST_F(DeterminismTest, ExcludingNonDeterministicOpsDoesNotDisableAutotuning) {
   }
 
   debug_options_.set_xla_gpu_cublas_fallback(false);
+  debug_options_.set_xla_gpu_cudnn_gemm_fusion_level(0);
   ASSERT_TRUE(debug_options_.xla_gpu_exclude_nondeterministic_ops());
   ASSERT_FALSE(debug_options_.xla_gpu_deterministic_ops());
   AutotunerUtil::ClearAutotuneResults();

diff --git a/xla/service/gpu/fusions/triton/triton_fusion_emitter_device_legacy_test.cc b/xla/service/gpu/fusions/triton/triton_fusion_emitter_device_legacy_test.cc
@@ -95,8 +95,9 @@ class TritonGemmTest : public TritonTest {
  public:
   DebugOptions GetDebugOptionsForTest() const override {
     DebugOptions debug_options = TritonTest::GetDebugOptionsForTest();
-    // Do not fall back to cuBLAS, we are testing Triton.
+    // Do not fall back to cuBLAS and disable cuDNN; we are testing Triton.
     debug_options.set_xla_gpu_cublas_fallback(false);
+    debug_options.set_xla_gpu_cudnn_gemm_fusion_level(0);
     // Do not autotune split-k by default, since this prevents deterministically
     // matching the optimized HLO.
     debug_options.set_xla_gpu_enable_split_k_autotuning(false);

diff --git a/xla/service/gpu/gpu_compiler_test.cc b/xla/service/gpu/gpu_compiler_test.cc
@@ -680,6 +680,7 @@ ENTRY main {
     DebugOptions debug_options = GetDebugOptionsForTest();
     debug_options.set_xla_gpu_cublas_fallback(enable_blas_fallback);
     debug_options.set_xla_gpu_enable_triton_gemm(enable_triton);
+    debug_options.set_xla_gpu_cudnn_gemm_fusion_level(0);
     if (!enable_blas) {
       debug_options.add_xla_disable_hlo_passes("cublas-gemm-rewriter");
     }