diff --git a/xla/service/gpu/gpu_compiler.cc b/xla/service/gpu/gpu_compiler.cc index f4e87d417eec5..faeaa7a6c4667 100755 --- a/xla/service/gpu/gpu_compiler.cc +++ b/xla/service/gpu/gpu_compiler.cc @@ -1584,9 +1584,6 @@ absl::Status GpuCompiler::OptimizeHloPostLayoutAssignment( if ((cuda_cc != nullptr && cuda_cc->IsAtLeast(se::CudaComputeCapability::AMPERE)) || rocm_cc != nullptr) { - // Triton compilation needs normalized operations on bf16 (i.e. converted - // to f32). - add_float_normalization(pipeline); pipeline.AddPass>(simplifier_options, gpu_version); pipeline.AddPass(/*is_layout_sensitive=*/true);