Skip to content

Commit

Permalink
Add region liveness in WAW DAG mutator
Browse files Browse the repository at this point in the history
  • Loading branch information
abhinay-anubola committed Jun 18, 2024
1 parent 56eba40 commit 156e566
Show file tree
Hide file tree
Showing 12 changed files with 83 additions and 60 deletions.
60 changes: 53 additions & 7 deletions llvm/lib/Target/AIE/AIEBaseSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,8 @@ void dumpDependencies(ScheduleDAGInstrs *DAG, SDep::Kind depType,
/// live set of MBB, backtrack the DAG and update the live set. Whenever an edge
/// points to a non-live write, it is updated to the subsequent live write.
class WAWEdges : public ScheduleDAGMutation {

AIEPostRASchedStrategy *Scheduler = nullptr;
// Collect all edges in a separate vector. This allows modifying SU.Preds
// without invalidating iterators.
SmallVector<SDep, 4> getPreds(SUnit &SU) {
Expand All @@ -446,17 +448,41 @@ class WAWEdges : public ScheduleDAGMutation {
}
}
}

public:
void setScheduler(AIEPostRASchedStrategy *Scheduler) {
this->Scheduler = Scheduler;
}

void apply(ScheduleDAGInstrs *DAG) override {
MachineFunction &MF = DAG->MF;
MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
auto *RI = static_cast<const AIEBaseRegisterInfo *>(TRI);
LivePhysRegs LiveRegs;
LiveRegs.init(*TRI);
// Reserved registers are considered always live
for (MCPhysReg PhysReg : MRI.getReservedRegs().set_bits()) {
if (RI->isSimplifiableReservedReg(PhysReg))
LiveRegs.addReg(PhysReg);
bool AddReservedRegs = true;
if (Scheduler) {
MachineBasicBlock *MBB = DAG->getBB();
const BlockState &BS = Scheduler->getInterBlock().getBlockState(MBB);
auto Region = BS.getCurrentRegion();
auto BottomRegion = BS.getBottom();
if (*Region.begin() == *BottomRegion.begin()) {
// If the region is bottom region, liveouts of region are same as
// liveouts of the MBB
for (const MCPhysReg Reg : BS.LiveOuts) {
LiveRegs.addReg(Reg);
}
AddReservedRegs = false;
}
}

if (AddReservedRegs) {
// Reserved registers are considered always live
for (const MCPhysReg PhysReg : MRI.getReservedRegs().set_bits()) {
if (RI->isSimplifiableReservedReg(PhysReg))
LiveRegs.addReg(PhysReg);
}
}
// Stores latest live write of physical register.
std::map<Register, SUnit *> PhysRegWriters;
Expand All @@ -482,6 +508,26 @@ class WAWEdges : public ScheduleDAGMutation {
};
};

// Adds WAW edges for scheduling in the context of the Scheduler.
// This class extends WAWEdges to apply WAW edges using a Scheduler if available
// It overrides the apply method to retrieve the Scheduler from the DAG if a
// BasicBlock is present, otherwise, it uses nullptr.
class SchedWAWEdges : public WAWEdges {
void apply(ScheduleDAGInstrs *DAG) override {
AIEPostRASchedStrategy *Scheduler =
DAG->getBB() ? static_cast<AIEScheduleDAGMI *>(DAG)->getSchedImpl()
: nullptr;
setScheduler(Scheduler);
WAWEdges::apply(DAG);
}
};

// This class extends WAWEdges to apply WAW edges without using a Scheduler.
// This is useful for scenarios where the SWP (Software Pipelining) is performed
// independently of the Scheduler.
class SWPWAWEdges : public WAWEdges {
void apply(ScheduleDAGInstrs *DAG) override { WAWEdges::apply(DAG); }
};
} // namespace

std::vector<std::unique_ptr<ScheduleDAGMutation>>
Expand All @@ -491,7 +537,7 @@ AIEBaseSubtarget::getPostRAMutationsImpl(const Triple &TT) {
if (!TT.isAIE1()) {
Mutations.emplace_back(std::make_unique<RegionEndEdges>());
Mutations.emplace_back(std::make_unique<MemoryEdges>());
Mutations.emplace_back(std::make_unique<WAWEdges>());
Mutations.emplace_back(std::make_unique<SchedWAWEdges>());
}
return Mutations;
}
Expand All @@ -504,7 +550,7 @@ AIEBaseSubtarget::getInterBlockMutationsImpl(const Triple &TT) {
if (!TT.isAIE1()) {
Mutations.emplace_back(std::make_unique<RegionEndEdges>());
Mutations.emplace_back(std::make_unique<MemoryEdges>());
Mutations.emplace_back(std::make_unique<WAWEdges>());
Mutations.emplace_back(std::make_unique<SchedWAWEdges>());
}
return Mutations;
}
Expand All @@ -523,7 +569,7 @@ std::vector<std::unique_ptr<ScheduleDAGMutation>>
AIEBaseSubtarget::getSMSMutationsImpl(const Triple &TT) {
std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
if (!TT.isAIE1()) {
Mutations.emplace_back(std::make_unique<WAWEdges>());
Mutations.emplace_back(std::make_unique<SWPWAWEdges>());
if (EnablePipelinerSchedPropagateIncomingLatencies)
Mutations.emplace_back(std::make_unique<PropagateIncomingLatencies>());
}
Expand Down
16 changes: 5 additions & 11 deletions llvm/test/CodeGen/AIE/aie2/hardware-loops/nested.ll
Original file line number Diff line number Diff line change
Expand Up @@ -39,32 +39,26 @@ define void @nested(ptr nocapture %out, ptr nocapture readonly %in, i32 noundef
; CHECK-NEXT: .LBB0_2: // %for.body6
; CHECK-NEXT: // Parent Loop BB0_1 Depth=1
; CHECK-NEXT: // => This Inner Loop Header: Depth=2
; CHECK-NEXT: nopa ; lshl r7, r6, r4
; CHECK-NEXT: nopb ; nopa ; nops ; lshl r7, r6, r4; nopm ; nopv
; CHECK-NEXT: mov dj0, r7
; CHECK-NEXT: lda r7, [p4, dj0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: add r6, r6, #1
; CHECK-NEXT: add r2, r2, r7
; CHECK-NEXT: jnzd r5, r5, p2
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: add r6, r6, #1 // Delay Slot 3
; CHECK-NEXT: add r2, r2, r7 // Delay Slot 2
; CHECK-NEXT: st r2, [p0, #0] // Delay Slot 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: // %bb.3: // %for.cond3.for.cond.cleanup5_crit_edge
; CHECK-NEXT: // in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: nopb ; nopa ; nops ; add r3, r3, #1; nopm ; nopv
; CHECK-NEXT: nopa ; jnzd r0, r0, p3
; CHECK-NEXT: jnzd r0, r0, p3
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: nop // Delay Slot 1
; CHECK-NEXT: add r3, r3, #1 // Delay Slot 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: // %bb.4: // %for.cond.cleanup
; CHECK-NEXT: nopa ; ret lr
Expand Down
22 changes: 6 additions & 16 deletions llvm/test/CodeGen/AIE/aie2/hardware-loops/sibling.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,21 +20,16 @@ define void @sibling(ptr nocapture %out, ptr nocapture readonly %in, i32 noundef
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: nopa ; lshl r6, r5, r4
; CHECK-NEXT: nopb ; nopa ; nops ; lshl r6, r5, r4; nopm ; nopv
; CHECK-NEXT: mov dj0, r6
; CHECK-NEXT: lda r6, [p1, dj0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: add r5, r5, #1
; CHECK-NEXT: add r3, r3, r6
; CHECK-NEXT: jnzd r0, r0, p2
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: add r5, r5, #1 // Delay Slot 3
; CHECK-NEXT: add r3, r3, r6 // Delay Slot 2
; CHECK-NEXT: st r3, [p0, #0] // Delay Slot 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: // %bb.2: // %for.body6.lr.ph
Expand All @@ -45,21 +40,16 @@ define void @sibling(ptr nocapture %out, ptr nocapture readonly %in, i32 noundef
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_3: // %for.body6
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: nopa ; lshl r4, r2, r3
; CHECK-NEXT: nopb ; nopa ; nops ; lshl r4, r2, r3; nopm ; nopv
; CHECK-NEXT: mov dj0, r4
; CHECK-NEXT: lda r4, [p1, dj0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: add r2, r2, #1
; CHECK-NEXT: add r0, r0, r4
; CHECK-NEXT: jnzd r1, r1, p2
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: add r2, r2, #1 // Delay Slot 3
; CHECK-NEXT: add r0, r0, r4 // Delay Slot 2
; CHECK-NEXT: st r0, [p0, #0] // Delay Slot 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: // %bb.4: // %for.cond.cleanup5
Expand Down
11 changes: 3 additions & 8 deletions llvm/test/CodeGen/AIE/aie2/hardware-loops/simple.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,16 @@ define void @simple(ptr nocapture %out, ptr nocapture readonly %in, i32 noundef
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: // %for.body
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: nopa ; lshl r4, r2, r3
; CHECK-NEXT: nopb ; nopa ; nops ; lshl r4, r2, r3; nopm ; nopv
; CHECK-NEXT: mov dj0, r4
; CHECK-NEXT: lda r4, [p1, dj0]
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK-NEXT: add r2, r2, #1
; CHECK-NEXT: add r1, r1, r4
; CHECK-NEXT: jnzd r0, r0, p2
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: nop // Delay Slot 2
; CHECK-NEXT: add r2, r2, #1 // Delay Slot 3
; CHECK-NEXT: add r1, r1, r4 // Delay Slot 2
; CHECK-NEXT: st r1, [p0, #0] // Delay Slot 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: // %bb.2: // %for.cond.cleanup
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $crsat = MOVX_mvx_scl killed $r0
; CHECK-NEXT: $crsat = MOV_mv_scl killed $r1
; CHECK-NEXT: $r2 = MOV_D1 $crsat
; CHECK-NEXT: NOP
$crsat = MOVX_mvx_scl $r0
$crsat = MOV_mv_scl $r1
$r2 = MOV_D1 $crsat
...
3 changes: 2 additions & 1 deletion llvm/test/CodeGen/AIE/aie2/schedule/interblock/liveness.mir
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
# RUN: cat %t.log | FileCheck %s --check-prefix=CHECK-LIVENESS
# REQUIRES: asserts

# These tests verify liveouts of every MachineBasicBlock in MachineFunction
# These tests verify liveouts of every MachineBasicBlock in MachineFunction and
# liveins of every Region in MachineBasicBlock.

# bb.0
# / \
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AIE/aie2/schedule/loopaware/latency.mir
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,8 @@ body: |
; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
; CHECK-NEXT: liveins: $p0, $r0, $r1, $r2, $r3
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $r0 = ADD_add_r_ri killed $r0, -11, implicit-def $srcarry
; CHECK-NEXT: $r10 = MOVX_alu_cg 4
; CHECK-NEXT: $r0 = ADD_add_r_ri killed $r0, -11, implicit-def $srcarry
; CHECK-NEXT: JNZ $r0, %bb.1
; CHECK-NEXT: $r2 = ADD_add_r_ri killed $r2, 1, implicit-def $srcarry
; CHECK-NEXT: $r2 = MUL_mul_r_rr killed $r2, killed $r10
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AIE/aie2/schedule/resource/ups_unit.mir
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ body: |
; CHECK-NEXT: VMOV_mv_mcd killed $bmh4, implicit $crmcden
; CHECK-NEXT: VMOV_mv_mcd killed $bmh5, implicit $crmcden
; CHECK-NEXT: VMOV_mv_mcd killed $bmh6, implicit $crmcden
; CHECK-NEXT: NOP
; CHECK-NEXT: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
$bmh0 = VLDA_UPS_S32_S16_ag_idx_imm $s0, $p0, 0, implicit-def $srups_of, implicit $crsat
VMOV_mv_mcd $bmh1, implicit $crmcden
VMOV_mv_mcd $bmh2, implicit $crmcden
Expand All @@ -53,12 +53,12 @@ body: |
; CHECK-NEXT: VMOV_mv_mcd killed $bmh4, implicit $crmcden
; CHECK-NEXT: VMOV_mv_mcd killed $bmh5, implicit $crmcden
; CHECK-NEXT: VMOV_mv_mcd killed $bmh6, implicit $crmcden
; CHECK-NEXT: NOP
; CHECK-NEXT: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
$bmh0, $p0 = VLDA_UPS_S32_D16_ag_pstm_nrm_imm $s0, $p0, 0, implicit-def $srups_of, implicit $crsat, implicit $crupssign
VMOV_mv_mcd $bmh1, implicit $crmcden
VMOV_mv_mcd $bmh2, implicit $crmcden
Expand All @@ -82,12 +82,12 @@ body: |
; CHECK-NEXT: VMOV_mv_mcd killed $bmh4, implicit $crmcden
; CHECK-NEXT: VMOV_mv_mcd killed $bmh5, implicit $crmcden
; CHECK-NEXT: VMOV_mv_mcd killed $bmh6, implicit $crmcden
; CHECK-NEXT: NOP
; CHECK-NEXT: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
$bmh0, $p0, $dc4 = VLDA_2D_UPS_S32_D16 $s0, $p0, $d4, implicit-def $srups_of, implicit $crsat, implicit $crupssign
VMOV_mv_mcd $bmh1, implicit $crmcden
VMOV_mv_mcd $bmh2, implicit $crmcden
Expand All @@ -113,12 +113,12 @@ body: |
; CHECK-NEXT: VMOV_mv_mcd killed $bmh4, implicit $crmcden
; CHECK-NEXT: VMOV_mv_mcd killed $bmh5, implicit $crmcden
; CHECK-NEXT: VMOV_mv_mcd killed $bmh6, implicit $crmcden
; CHECK-NEXT: NOP
; CHECK-NEXT: $cm8 = VUPS_S64_D32_mv_ups_x2c killed $x2, killed $s2, implicit-def $srups_of, implicit $crsat, implicit $crupssign
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
$bmh0, $p0, $dc3, $dc7 = VLDA_3D_UPS_S32_D16 $s0, $p0, $d3_3d, implicit-def $srups_of, implicit $crsat, implicit $crupssign
VMOV_mv_mcd $bmh1, implicit $crmcden
VMOV_mv_mcd $bmh2, implicit $crmcden
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AIE/aie2/schedule/resource/w_wa.mir
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,12 @@ body: |
; CHECK: $wl0 = VLDA_dmw_lda_w_ag_idx_imm killed $p0, 0
; CHECK-NEXT: $x1 = VFLOOR_S32_BF16_mFl2FxSrc_W $wl2, $s1, implicit-def $srf2iflags, implicit $crf2imask
; CHECK-NEXT: $x1 = VFLOOR_S32_BF16_mFl2FxSrc_W $wl2, $s1, implicit-def $srf2iflags, implicit $crf2imask
; CHECK-NEXT: NOP
; CHECK-NEXT: $x1 = VFLOOR_S32_BF16_mFl2FxSrc_W killed $wl2, killed $s1, implicit-def $srf2iflags, implicit $crf2imask
; CHECK-NEXT: NOP
; CHECK-NEXT: NOP
; CHECK-NEXT: $x0 = VFLOOR_S32_BF16_mFl2FxSrc_AM $amll3, $s0, implicit-def $srf2iflags, implicit $crf2imask
; CHECK-NEXT: $x0 = VFLOOR_S32_BF16_mFl2FxSrc_AM $amll3, $s0, implicit-def $srf2iflags, implicit $crf2imask
; CHECK-NEXT: $x0 = VFLOOR_S32_BF16_mFl2FxSrc_AM killed $amll3, killed $s0, implicit-def $srf2iflags, implicit $crf2imask
; CHECK-NEXT: $x1 = VFLOOR_S32_BF16_mFl2FxSrc_W killed $wl2, killed $s1, implicit-def $srf2iflags, implicit $crf2imask
; CHECK-NEXT: NOP
$wl0 = VLDA_dmw_lda_w_ag_idx_imm $p0, 0
$x0 = VFLOOR_S32_BF16_mFl2FxSrc_AM $amll3, $s0, implicit-def $srf2iflags, implicit $crf2imask
Expand Down
6 changes: 1 addition & 5 deletions llvm/test/CodeGen/AIE/aie2/schedule/status_regs/srWAW.mir
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ body: |
; CHECK-WAW-NEXT: SU(0)->SU(2) Out Latency=1 $srcarry
; CHECK-WAW-NEXT: SU(1)->SU(2) Out Latency=1 $srcarry
; CHECK-WAW-NEXT: SU(2)->SU(3) Out Latency=1 $r0
; CHECK-WAW-NEXT: SU(2)->SU(3) Out Latency=1 $srcarry
renamable $r0 = nsw ADD $r1, $r2, implicit-def $srcarry
renamable $r1 = nsw ADD $r3, $r4, implicit-def $srcarry
Expand All @@ -48,7 +47,6 @@ body: |
; CHECK-WAW-NEXT: SU(1)->SU(3) Out Latency=1 $srcarry
; CHECK-WAW-NEXT: SU(2)->SU(4) Out Latency=1 $r0
; CHECK-WAW-NEXT: SU(2)->SU(3) Out Latency=1 $srcarry
; CHECK-WAW-NEXT: SU(3)->SU(4) Out Latency=1 $srcarry
renamable $r0 = nsw ADD $r1, $r2, implicit-def $srcarry
renamable $r1 = nsw ADD $r3, $r4, implicit-def $srcarry
Expand All @@ -73,8 +71,7 @@ body: |
; CHECK-WAW-NEXT: SU(1)->SU(3) Out Latency=1 $r1
; CHECK-WAW-NEXT: SU(1)->SU(2) Out Latency=1 $srcarry
; CHECK-WAW-NEXT: SU(2)->SU(4) Out Latency=1 $r0
; CHECK-WAW-NEXT: SU(2)->SU(4) Out Latency=1 $srcarry
renamable $r0 = nsw ADD $r1, $r2, implicit-def $srcarry
renamable $r1 = nsw ADD $r2, $r3, implicit-def $srcarry
renamable $r0 = nsw ADD killed renamable $r0, killed renamable $r1, implicit-def $srcarry
Expand All @@ -100,7 +97,6 @@ body: |
; CHECK-WAW-NEXT: SU(2)->SU(3) Out Latency=1 $r0
; CHECK-WAW-NEXT: SU(2)->SU(3) Out Latency=1 $srcarry
; CHECK-WAW-NEXT: SU(3)->SU(5) Out Latency=1 $r0
; CHECK-WAW-NEXT: SU(3)->SU(5) Out Latency=1 $srcarry
renamable $r0 = nsw ADD $r1, $r2, implicit-def $srcarry
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AIE/aie2/vextract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -429,8 +429,8 @@ define dso_local noundef signext i8 @_Z5test1Dv128_a(<128 x i8> noundef %vec) {
; CHECK-NEXT: movx r2, #64
; CHECK-NEXT: lt r27, r0, r2
; CHECK-NEXT: sel.nez r1, r1, r2, r27
; CHECK-NEXT: sub r16, r0, r1
; CHECK-NEXT: add r24, r27, #-1
; CHECK-NEXT: sub r16, r0, r1
; CHECK-NEXT: ret lr
; CHECK-NEXT: mov r25, r24 // Delay Slot 5
; CHECK-NEXT: vsel.8 x0, x4, x5, r25:r24 // Delay Slot 4
Expand Down Expand Up @@ -460,8 +460,8 @@ define dso_local noundef signext i16 @_Z5test2Dv64_s(<64 x i16> noundef %vec) {
; CHECK-NEXT: movx r2, #32
; CHECK-NEXT: lt r27, r0, r2
; CHECK-NEXT: sel.nez r1, r1, r2, r27
; CHECK-NEXT: sub r16, r0, r1
; CHECK-NEXT: add r17, r27, #-1
; CHECK-NEXT: sub r16, r0, r1
; CHECK-NEXT: ret lr
; CHECK-NEXT: vsel.16 x0, x4, x5, r17 // Delay Slot 5
; CHECK-NEXT: mov r17, r4 // Delay Slot 4
Expand Down Expand Up @@ -491,8 +491,8 @@ define dso_local noundef i32 @_Z5test3Dv32_i(<32 x i32> noundef %vec) {
; CHECK-NEXT: movx r2, #16
; CHECK-NEXT: lt r27, r0, r2
; CHECK-NEXT: sel.nez r1, r1, r2, r27
; CHECK-NEXT: sub r16, r0, r1
; CHECK-NEXT: add r17, r27, #-1
; CHECK-NEXT: sub r16, r0, r1
; CHECK-NEXT: ret lr
; CHECK-NEXT: vsel.32 x0, x4, x5, r17 // Delay Slot 5
; CHECK-NEXT: mov r17, r4 // Delay Slot 4
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AIE/aie2/vinsert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -229,10 +229,9 @@ define <32 x i32> @test_1024bit_dyn_idx(<32 x i32> %vec, i32 %b, i32 %c) {
; CHECK-NEXT: nopb ; mova r2, #16; nops ; nopxm ; nopv
; CHECK-NEXT: mova r3, #0; nopb ; lt r27, r0, r2
; CHECK-NEXT: sel.nez r2, r3, r2, r27
; CHECK-NEXT: sub r29, r0, r2
; CHECK-NEXT: add r16, r27, #-1; mov r4, r16
; CHECK-NEXT: ret lr
; CHECK-NEXT: vsel.32 x0, x6, x7, r16 // Delay Slot 5
; CHECK-NEXT: sub r29, r0, r2; vsel.32 x0, x6, x7, r16 // Delay Slot 5
; CHECK-NEXT: vinsert.32 x0, x0, r29, r1 // Delay Slot 4
; CHECK-NEXT: vsel.32 x4, x0, x6, r16 // Delay Slot 3
; CHECK-NEXT: vsel.32 x5, x7, x0, r16 // Delay Slot 2
Expand Down

0 comments on commit 156e566

Please sign in to comment.