Skip to content

Commit

Permalink
PR #19067: [XLA:CPU][oneDNN] Move simplification pass before oneDNN pass
Browse files Browse the repository at this point in the history
Imported from GitHub PR #19067

This PR moves the simplification pass before oneDNN rewriter pass which simplifies the pattern matching for quantization support by getting rid of redundant copy ops.
Copybara import of the project:

--
57f2f3b by Mahmoud Abuzaina <[email protected]>:

Move simplification pass before oneDNN pass

--
5248e33 by Mahmoud Abuzaina <[email protected]>:

Added a unit test

Merging this change closes #19067

COPYBARA_INTEGRATE_REVIEW=#19067 from Intel-tensorflow:mabuzain/reorder-passes 5248e33
PiperOrigin-RevId: 713956033
  • Loading branch information
mahmoud-abuzaina authored and Google-ML-Automation committed Jan 10, 2025
1 parent e6a6acf commit d5450b4
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 27 deletions.
48 changes: 24 additions & 24 deletions xla/service/cpu/cpu_compiler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,30 @@ absl::Status CpuCompiler::RunHloPassesAfterLayoutAssn(

pipeline.AddPass<ReshapeDecomposer>();

// The LayoutAssignment pass may leave behind kCopy instructions which are
// duplicate or NOPs, so remove them with algebraic simplification and CSE.
// Run this to a fixed point.
[&pipeline = pipeline.AddPass<HloPassFix<HloPassPipeline>>(
"simplification after layout assignment"),
this] {
AddHloVerifier(
&pipeline,
HloVerifierOpts{}.MakeLayoutSensitive().WithInstructionCanChangeLayout(
LayoutAssignment::InstructionCanChangeLayout),
/*debug_only=*/true);
AlgebraicSimplifierOptions options;
options.set_is_layout_sensitive(true);
options.set_supports_non_canonical_dots(false);
options.set_enable_dot_strength_reduction(false);
// TODO(b/209827141): XLA:CPU doesn't propagate NaN through min/max, but
// other platforms do, so it should be changed.
options.set_minmax_propagate_nan(false);
options.set_executing_on_cpu(true);
pipeline.AddPass<AlgebraicSimplifier>(options);
pipeline.AddPass<HloDCE>();
pipeline.AddPass<HloCSE>(/*is_layout_sensitive=*/true);
}();

const int max_parallelism =
module->config().intra_op_parallelism_threads() > 0
? module->config().intra_op_parallelism_threads()
Expand Down Expand Up @@ -822,30 +846,6 @@ absl::Status CpuCompiler::RunHloPassesAfterLayoutAssn(
// Add a fusion pass now that layout assignment is done.
pipeline.AddPass<CpuInstructionFusion>();

// The LayoutAssignment pass may leave behind kCopy instructions which are
// duplicate or NOPs, so remove them with algebraic simplification and CSE.
// Run this to a fixed point.
[&pipeline = pipeline.AddPass<HloPassFix<HloPassPipeline>>(
"simplification after layout assignment"),
this] {
AddHloVerifier(
&pipeline,
HloVerifierOpts{}.MakeLayoutSensitive().WithInstructionCanChangeLayout(
LayoutAssignment::InstructionCanChangeLayout),
/*debug_only=*/true);
AlgebraicSimplifierOptions options;
options.set_is_layout_sensitive(true);
options.set_supports_non_canonical_dots(false);
options.set_enable_dot_strength_reduction(false);
// TODO(b/209827141): XLA:CPU doesn't propagate NaN through min/max, but
// other platforms do, so it should be changed.
options.set_minmax_propagate_nan(false);
options.set_executing_on_cpu(true);
pipeline.AddPass<AlgebraicSimplifier>(options);
pipeline.AddPass<HloDCE>();
pipeline.AddPass<HloCSE>(/*is_layout_sensitive=*/true);
}();

// Outline ops in the entry computation into calls to subcomputations.
if (!is_aot_compile) {
// Run ParallelTaskAssigner to assign parallel tasks to HLOs in module.
Expand Down
31 changes: 28 additions & 3 deletions xla/service/cpu/tests/onednn_convolution_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,18 @@ class ConvolutionTest : public HloTestBase,

void RunCompareAndMatchOptimizedHlo(
const absl::string_view outline,
const std::vector<absl::string_view> fused_ops) {
const std::vector<absl::string_view> fused_ops,
const absl::string_view custom_match = "") {
const std::string convolution_module_str = absl::StrReplaceAll(
outline,
{{"$dtype", dtypeString_}, {"$pdtype", PromotedDtypeToString()}});
EXPECT_TRUE(RunAndCompare(convolution_module_str, ErrorSpec{atol_, rtol_}));
MatchOptimizedHlo(convolution_module_str,
ConvStringWithOptimizations(fused_ops));
if (custom_match.empty()) {
MatchOptimizedHlo(convolution_module_str,
ConvStringWithOptimizations(fused_ops));
} else {
MatchOptimizedHlo(convolution_module_str, custom_match);
}
}
};

Expand Down Expand Up @@ -593,6 +598,26 @@ TEST_P(ConvolutionTest, Conv2DWithBiasAndGeluExactPattern2Test) {
RunCompareAndMatchOptimizedHlo(outline, {"BIAS", "GELU_ERF"});
}

TEST_P(ConvolutionTest, TransposeSimplifiedToBitcast) {
const char* outline = R"(
HloModule convolution.test.with.transpose
ENTRY convolution.test.with.transpose {
param_inp = $dtype[1,3,224,224] parameter(0)
transpose = $dtype[1,224,224,3] transpose(param_inp), dimensions={0,2,3,1}
param_wei = $dtype[64,3,7,7] parameter(1)
transpose.1 = $dtype[7,7,3,64] transpose(param_wei), dimensions={2,3,1,0}
ROOT convolution = $dtype[1,112,112,64] convolution(transpose, transpose.1),
window={size=7x7 stride=2x2 pad=3_3x3_3}, dim_labels=b01f_01io->b01f
})";

constexpr static const char* kBitcastCopyStr = R"(
; CHECK: bitcast
; CHECK: copy
; CHECK: custom_call_target="__onednn$convolution")";
RunCompareAndMatchOptimizedHlo(outline, {}, kBitcastCopyStr);
}

INSTANTIATE_TEST_SUITE_P(
OneDnnConvolutionTestSuite, ConvolutionTest,
::testing::Values(F32, BF16, F16),
Expand Down

0 comments on commit d5450b4

Please sign in to comment.