From 88f21ec75c369f5ea2166e664edada95bb39cf93 Mon Sep 17 00:00:00 2001
From: Penporn Koanantakool <penporn@google.com>
Date: Fri, 10 Jan 2025 05:22:46 -0800
Subject: [PATCH] Roll back https://github.com/openxla/xla/pull/19067 because
 it broke tests.

Reverts d5450b47643a45750c67608208d1e0ea40c7b7ec

PiperOrigin-RevId: 714013905
---
 xla/service/cpu/cpu_compiler.cc               | 48 +++++++++----------
 .../cpu/tests/onednn_convolution_test.cc      | 31 ++----------
 2 files changed, 27 insertions(+), 52 deletions(-)
diff --git a/xla/service/cpu/cpu_compiler.cc b/xla/service/cpu/cpu_compiler.cc
index f7546234d447f..5c28de6021def 100644
--- a/xla/service/cpu/cpu_compiler.cc
+++ b/xla/service/cpu/cpu_compiler.cc
@@ -791,30 +791,6 @@ absl::Status CpuCompiler::RunHloPassesAfterLayoutAssn(
 
   pipeline.AddPass<ReshapeDecomposer>();
 
-  // The LayoutAssignment pass may leave behind kCopy instructions which are
-  // duplicate or NOPs, so remove them with algebraic simplification and CSE.
-  // Run this to a fixed point.
-  [&pipeline = pipeline.AddPass<HloPassFix<HloPassPipeline>>(
-       "simplification after layout assignment"),
-   this] {
-    AddHloVerifier(
-        &pipeline,
-        HloVerifierOpts{}.MakeLayoutSensitive().WithInstructionCanChangeLayout(
-            LayoutAssignment::InstructionCanChangeLayout),
-        /*debug_only=*/true);
-    AlgebraicSimplifierOptions options;
-    options.set_is_layout_sensitive(true);
-    options.set_supports_non_canonical_dots(false);
-    options.set_enable_dot_strength_reduction(false);
-    // TODO(b/209827141): XLA:CPU doesn't propagate NaN through min/max, but
-    // other platforms do, so it should be changed.
-    options.set_minmax_propagate_nan(false);
-    options.set_executing_on_cpu(true);
-    pipeline.AddPass<AlgebraicSimplifier>(options);
-    pipeline.AddPass<HloDCE>();
-    pipeline.AddPass<HloCSE>(/*is_layout_sensitive=*/true);
-  }();
-
   const int max_parallelism =
       module->config().intra_op_parallelism_threads() > 0
           ? module->config().intra_op_parallelism_threads()
@@ -846,6 +822,30 @@ absl::Status CpuCompiler::RunHloPassesAfterLayoutAssn(
   // Add a fusion pass now that layout assignment is done.
   pipeline.AddPass<CpuInstructionFusion>();
 
+  // The LayoutAssignment pass may leave behind kCopy instructions which are
+  // duplicate or NOPs, so remove them with algebraic simplification and CSE.
+  // Run this to a fixed point.
+  [&pipeline = pipeline.AddPass<HloPassFix<HloPassPipeline>>(
+       "simplification after layout assignment"),
+   this] {
+    AddHloVerifier(
+        &pipeline,
+        HloVerifierOpts{}.MakeLayoutSensitive().WithInstructionCanChangeLayout(
+            LayoutAssignment::InstructionCanChangeLayout),
+        /*debug_only=*/true);
+    AlgebraicSimplifierOptions options;
+    options.set_is_layout_sensitive(true);
+    options.set_supports_non_canonical_dots(false);
+    options.set_enable_dot_strength_reduction(false);
+    // TODO(b/209827141): XLA:CPU doesn't propagate NaN through min/max, but
+    // other platforms do, so it should be changed.
+    options.set_minmax_propagate_nan(false);
+    options.set_executing_on_cpu(true);
+    pipeline.AddPass<AlgebraicSimplifier>(options);
+    pipeline.AddPass<HloDCE>();
+    pipeline.AddPass<HloCSE>(/*is_layout_sensitive=*/true);
+  }();
+
   // Outline ops in the entry computation into calls to subcomputations.
   if (!is_aot_compile) {
     // Run ParallelTaskAssigner to assign parallel tasks to HLOs in module.
diff --git a/xla/service/cpu/tests/onednn_convolution_test.cc b/xla/service/cpu/tests/onednn_convolution_test.cc
index 4c011af8eabcb..c94ada9dda190 100644
--- a/xla/service/cpu/tests/onednn_convolution_test.cc
+++ b/xla/service/cpu/tests/onednn_convolution_test.cc
@@ -141,18 +141,13 @@ class ConvolutionTest : public HloTestBase,
 
   void RunCompareAndMatchOptimizedHlo(
       const absl::string_view outline,
-      const std::vector<absl::string_view> fused_ops,
-      const absl::string_view custom_match = "") {
+      const std::vector<absl::string_view> fused_ops) {
     const std::string convolution_module_str = absl::StrReplaceAll(
         outline,
         {{"$dtype", dtypeString_}, {"$pdtype", PromotedDtypeToString()}});
     EXPECT_TRUE(RunAndCompare(convolution_module_str, ErrorSpec{atol_, rtol_}));
-    if (custom_match.empty()) {
-      MatchOptimizedHlo(convolution_module_str,
-                        ConvStringWithOptimizations(fused_ops));
-    } else {
-      MatchOptimizedHlo(convolution_module_str, custom_match);
-    }
+    MatchOptimizedHlo(convolution_module_str,
+                      ConvStringWithOptimizations(fused_ops));
   }
 };
 
@@ -598,26 +593,6 @@ TEST_P(ConvolutionTest, Conv2DWithBiasAndGeluExactPattern2Test) {
   RunCompareAndMatchOptimizedHlo(outline, {"BIAS", "GELU_ERF"});
 }
 
-TEST_P(ConvolutionTest, TransposeSimplifiedToBitcast) {
-  const char* outline = R"(
-  HloModule convolution.test.with.transpose
-
-  ENTRY convolution.test.with.transpose {
-    param_inp = $dtype[1,3,224,224] parameter(0)
-    transpose = $dtype[1,224,224,3] transpose(param_inp), dimensions={0,2,3,1}
-    param_wei = $dtype[64,3,7,7] parameter(1)
-    transpose.1 = $dtype[7,7,3,64] transpose(param_wei), dimensions={2,3,1,0}
-    ROOT convolution = $dtype[1,112,112,64] convolution(transpose, transpose.1),
-          window={size=7x7 stride=2x2 pad=3_3x3_3}, dim_labels=b01f_01io->b01f
-  })";
-
-  constexpr static const char* kBitcastCopyStr = R"(
-    ; CHECK: bitcast
-    ; CHECK: copy
-    ; CHECK: custom_call_target="__onednn$convolution")";
-  RunCompareAndMatchOptimizedHlo(outline, {}, kBitcastCopyStr);
-}
-
 INSTANTIATE_TEST_SUITE_P(
     OneDnnConvolutionTestSuite, ConvolutionTest,
     ::testing::Values(F32, BF16, F16),