-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AIEX] Propagate MMO for loads without this information
This can prevent MachinePipeliner from considering some loads as barriers (isDependenceBarrier).
- Loading branch information
1 parent
800ab8d
commit 093ac91
Showing
5 changed files
with
334 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
123 changes: 123 additions & 0 deletions
123
llvm/test/CodeGen/AIE/aie2/GlobalISel/propagate-mmo-noptr.mir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py | ||
# | ||
# This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
# See https://llvm.org/LICENSE.txt for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates | ||
|
||
# RUN: llc -mtriple aie2 -run-pass=aie-post-select-optimize %s -o - | FileCheck %s | ||
# RUN: llc -mtriple aie2 -run-pass=aie-post-select-optimize \ | ||
# RUN: -aie-post-load-mmo-deep-search=true %s -o - | FileCheck %s | ||
|
||
--- | | ||
target datalayout = "e-m:e-p:20:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-f32:32:32-i64:32-f64:32-a:0:32-n32" | ||
target triple = "aie2" | ||
|
||
@softmax_ilut_ab = dso_local global [1 x i16] [i16 16256], align 32 | ||
@softmax_ilut_cd = dso_local global [1 x i16] [i16 16256], align 32 | ||
@softmax_flut_ab = dso_local global [1 x i16] [i16 16256], align 32 | ||
@softmax_flut_cd = dso_local global [1 x i16] [i16 16256], align 32 | ||
|
||
define void @test_4x16_load() { ret void } | ||
... | ||
--- | ||
name: test_4x16_load | ||
alignment: 16 | ||
legalized: true | ||
regBankSelected: true | ||
selected: true | ||
tracksRegLiveness: true | ||
body: | | ||
bb.0: | ||
liveins: $p0, $wl0 | ||
; CHECK-LABEL: name: test_4x16_load | ||
; CHECK: liveins: $p0, $wl0 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 | ||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec256 = COPY $wl0 | ||
; CHECK-NEXT: [[MOVXM_lng_cg:%[0-9]+]]:ep_as_32bit = MOVXM_lng_cg target-flags(aie2-global) @softmax_ilut_ab | ||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY [[MOVXM_lng_cg]] | ||
; CHECK-NEXT: [[MOVXM_lng_cg1:%[0-9]+]]:ep_as_32bit = MOVXM_lng_cg target-flags(aie2-global) @softmax_ilut_cd | ||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:er = COPY [[MOVXM_lng_cg1]] | ||
; CHECK-NEXT: [[MOVXM_lng_cg2:%[0-9]+]]:ep_as_32bit = MOVXM_lng_cg target-flags(aie2-global) @softmax_flut_ab | ||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:er = COPY [[MOVXM_lng_cg2]] | ||
; CHECK-NEXT: [[MOVXM_lng_cg3:%[0-9]+]]:ep_as_32bit = MOVXM_lng_cg target-flags(aie2-global) @softmax_flut_cd | ||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:er = COPY [[MOVXM_lng_cg3]] | ||
; CHECK-NEXT: [[COPY6:%[0-9]+]]:mdm = COPY [[COPY2]] | ||
; CHECK-NEXT: [[COPY7:%[0-9]+]]:er = COPY [[COPY6]] | ||
; CHECK-NEXT: [[COPY8:%[0-9]+]]:mdm = COPY [[COPY3]] | ||
; CHECK-NEXT: [[COPY9:%[0-9]+]]:er = COPY [[COPY8]] | ||
; CHECK-NEXT: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:ers4 = MOV_RLC_imm10_pseudo 0 | ||
; CHECK-NEXT: [[MOV_RLC_imm10_pseudo1:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 2 | ||
; CHECK-NEXT: [[MOV_RLC_imm10_pseudo2:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 6 | ||
; CHECK-NEXT: [[COPY10:%[0-9]+]]:mss = COPY [[COPY]] | ||
; CHECK-NEXT: [[VFLOOR_S32_BF16_mFl2FxSrc_W:%[0-9]+]]:vec512 = VFLOOR_S32_BF16_mFl2FxSrc_W [[COPY1]], [[COPY10]], implicit-def dead $srf2iflags, implicit $crf2imask | ||
; CHECK-NEXT: [[VSHUFFLE:%[0-9]+]]:vec512 = VSHUFFLE [[VFLOOR_S32_BF16_mFl2FxSrc_W]], [[VFLOOR_S32_BF16_mFl2FxSrc_W]], [[MOV_RLC_imm10_pseudo1]] | ||
; CHECK-NEXT: [[COPY11:%[0-9]+]]:ewl = COPY [[VSHUFFLE]].sub_256_lo | ||
; CHECK-NEXT: [[COPY12:%[0-9]+]]:vec256 = COPY [[COPY11]] | ||
; CHECK-NEXT: [[COPY13:%[0-9]+]]:mss = COPY [[MOV_RLC_imm10_pseudo]] | ||
; CHECK-NEXT: [[VUPS_S64_D16_mv_ups_w2c:%[0-9]+]]:acc1024 = VUPS_S64_D16_mv_ups_w2c [[COPY12]], [[COPY13]], implicit-def dead $srups_of, implicit $crsat, implicit $crupssign | ||
; CHECK-NEXT: [[COPY14:%[0-9]+]]:mss = COPY [[MOV_RLC_imm10_pseudo2]] | ||
; CHECK-NEXT: [[VSRS_S32_S64_mv_x_srs:%[0-9]+]]:vec512 = VSRS_S32_S64_mv_x_srs [[VUPS_S64_D16_mv_ups_w2c]], [[COPY14]], implicit-def dead $srsrs_of, implicit $crsat, implicit $crrnd | ||
; CHECK-NEXT: [[VBCST_32_:%[0-9]+]]:vec512 = VBCST_32 [[COPY7]] | ||
; CHECK-NEXT: [[VBCST_32_1:%[0-9]+]]:vec512 = VBCST_32 [[COPY9]] | ||
; CHECK-NEXT: [[MOVXM_lng_cg4:%[0-9]+]]:ers8 = MOVXM_lng_cg 52428 | ||
; CHECK-NEXT: [[VSEL_32_:%[0-9]+]]:vec512 = VSEL_32 [[VBCST_32_]], [[VBCST_32_1]], [[MOVXM_lng_cg4]] | ||
; CHECK-NEXT: [[VADD_32_:%[0-9]+]]:vec512 = VADD_32 [[VSEL_32_]], [[VSRS_S32_S64_mv_x_srs]] | ||
; CHECK-NEXT: [[COPY15:%[0-9]+]]:ewl = COPY [[VADD_32_]].sub_256_lo | ||
; CHECK-NEXT: [[COPY16:%[0-9]+]]:ewh = COPY [[VADD_32_]].sub_256_hi | ||
; CHECK-NEXT: [[VLDB_4x16_LO:%[0-9]+]]:vec256 = VLDB_4x16_LO [[COPY15]] :: (load unknown-size from @softmax_ilut_ab, align 1), (load unknown-size from @softmax_ilut_cd, align 1) | ||
; CHECK-NEXT: [[VLDB_4x16_HI:%[0-9]+]]:vec256 = VLDB_4x16_HI [[COPY15]] :: (load unknown-size from @softmax_ilut_ab, align 1), (load unknown-size from @softmax_ilut_cd, align 1) | ||
; CHECK-NEXT: [[VLDB_4x16_LO1:%[0-9]+]]:vec256 = VLDB_4x16_LO [[COPY16]] :: (load unknown-size from @softmax_ilut_ab, align 1), (load unknown-size from @softmax_ilut_cd, align 1) | ||
; CHECK-NEXT: [[VLDB_4x16_HI1:%[0-9]+]]:vec256 = VLDB_4x16_HI [[COPY16]] :: (load unknown-size from @softmax_ilut_ab, align 1), (load unknown-size from @softmax_ilut_cd, align 1) | ||
; CHECK-NEXT: $wl0 = COPY [[VLDB_4x16_LO]] | ||
; CHECK-NEXT: $wl1 = COPY [[VLDB_4x16_HI]] | ||
; CHECK-NEXT: $wl2 = COPY [[VLDB_4x16_LO1]] | ||
; CHECK-NEXT: $wl3 = COPY [[VLDB_4x16_HI1]] | ||
; CHECK-NEXT: PseudoRET implicit $lr | ||
%0:ep = COPY $p0 | ||
%121:vec256 = COPY $wl0 | ||
%97:ep_as_32bit = MOVXM_lng_cg target-flags(aie2-global) @softmax_ilut_ab | ||
%96:er = COPY %97 | ||
%100:ep_as_32bit = MOVXM_lng_cg target-flags(aie2-global) @softmax_ilut_cd | ||
%99:er = COPY %100 | ||
%103:ep_as_32bit = MOVXM_lng_cg target-flags(aie2-global) @softmax_flut_ab | ||
%102:er = COPY %103 | ||
%106:ep_as_32bit = MOVXM_lng_cg target-flags(aie2-global) @softmax_flut_cd | ||
%105:er = COPY %106 | ||
%284:mdm = COPY %96 | ||
%98:er = COPY %284 | ||
%285:mdm = COPY %99 | ||
%101:er = COPY %285 | ||
%95:ers4 = MOV_RLC_imm10_pseudo 0 | ||
%127:er = MOV_RLC_imm10_pseudo 2 | ||
%134:er = MOV_RLC_imm10_pseudo 6 | ||
%297:mss = COPY %0 | ||
%122:vec512 = VFLOOR_S32_BF16_mFl2FxSrc_W %121, %297, implicit-def dead $srf2iflags, implicit $crf2imask | ||
%126:vec512 = VSHUFFLE %122, %122, %127 | ||
%128:ewl = COPY %126.sub_256_lo | ||
%129:vec256 = COPY %128 | ||
%296:mss = COPY %95 | ||
%132:acc1024 = VUPS_S64_D16_mv_ups_w2c %129, %296, implicit-def dead $srups_of, implicit $crsat, implicit $crupssign | ||
%295:mss = COPY %134 | ||
%133:vec512 = VSRS_S32_S64_mv_x_srs %132, %295, implicit-def dead $srsrs_of, implicit $crsat, implicit $crrnd | ||
%136:vec512 = VBCST_32 %98 | ||
%137:vec512 = VBCST_32 %101 | ||
%139:ers8 = MOVXM_lng_cg 52428 | ||
%138:vec512 = VSEL_32 %136, %137, %139 | ||
%140:vec512 = VADD_32 %138, %133 | ||
%141:ewl = COPY %140.sub_256_lo | ||
%147:ewh = COPY %140.sub_256_hi | ||
%142:vec256 = VLDB_4x16_LO %141 | ||
%145:vec256 = VLDB_4x16_HI %141 | ||
%148:vec256 = VLDB_4x16_LO %147 | ||
%150:vec256 = VLDB_4x16_HI %147 | ||
$wl0 = COPY %142 | ||
$wl1 = COPY %145 | ||
$wl2 = COPY %148 | ||
$wl3 = COPY %150 | ||
PseudoRET implicit $lr | ||
... | ||
|