Skip to content

Commit

Permalink
[AIE2P] Combine G_SHUFFLE_VECTOR into G_AIE_VSEL (#272)
Browse files Browse the repository at this point in the history
* [AIE2P]  Combine G_SHUFFLE_VECTOR into G_AIE_VSEL

* [AIE2P] Instruction selection for G_AIE_VSEL
  • Loading branch information
katerynamuts authored Jan 20, 2025
1 parent 74d329a commit 8bb96b0
Show file tree
Hide file tree
Showing 10 changed files with 294 additions and 4 deletions.
4 changes: 4 additions & 0 deletions llvm/lib/Target/AIE/AIEBaseInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,10 @@ struct AIEBaseInstrInfo : public TargetInstrInfo {
llvm_unreachable(
"Target didn't implement getGenericBroadcastVectorOpcode!");
}
/// Return the opcode to be used for select between the words of two vectors.
virtual unsigned getGenericVSelOpcode() const {
llvm_unreachable("Target didn't implement getGenericVSelOpcode!");
}
/// Check whether Opc represents a lock instruction
virtual bool isLock(unsigned Opc) const { return false; }
/// Check whether this is a delayed scheduling barrier induced from
Expand Down
10 changes: 9 additions & 1 deletion llvm/lib/Target/AIE/AIECombine.td
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@ def combine_vector_shuffle_broadcast : GICombineRule<
[{ return matchShuffleToBroadcast(*${root}, MRI, ${matchinfo}); }]),
(apply [{ applySplatVector(*${root}, MRI, B, ${matchinfo}); }])>;

def combine_vsel_matchdata: GIDefMatchData<"std::tuple<Register, Register, Register, uint64_t>">;
def combine_vector_shuffle_vsel : GICombineRule<
(defs root:$root, combine_vsel_matchdata:$matchinfo),
(match (wip_match_opcode G_SHUFFLE_VECTOR): $root,
[{ return matchShuffleToVSel(*${root}, MRI, ${matchinfo}); }]),
(apply [{ applyVSel(*${root}, MRI, B, ${matchinfo}); }])>;

def AIE2PreLegalizerCombiner
: GICombiner<"AIE2PreLegalizerCombinerImpl", [ combine_unpad_vector, combine_pad_vector,
all_combines, combine_S20NarrowingOpt,
Expand All @@ -92,7 +99,8 @@ def AIE2PPreLegalizerCombiner
combine_extract_vector_elt_and_zsa_ext,
combine_splat_vector, combine_vector_broadcast,
combine_concat_to_pad_vector,
combine_vector_shuffle_broadcast]> {
combine_vector_shuffle_broadcast,
combine_vector_shuffle_vsel]> {
let CombineAllMethodName = "tryCombineAllImpl";
}

Expand Down
58 changes: 58 additions & 0 deletions llvm/lib/Target/AIE/AIECombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1261,6 +1261,17 @@ void llvm::applyPadVector(MachineInstr &MI, MachineRegisterInfo &MRI,
MI.eraseFromParent();
}

void llvm::applyVSel(
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
std::tuple<Register, Register, Register, uint64_t> &MatchInfo) {
B.setInstrAndDebugLoc(MI);
const AIEBaseInstrInfo &AIETII = (const AIEBaseInstrInfo &)B.getTII();
auto [DstVecReg, Src1Reg, Src2Reg, Mask] = MatchInfo;
B.buildInstr(AIETII.getGenericVSelOpcode(), {DstVecReg},
{Src1Reg, Src2Reg, Mask});
MI.eraseFromParent();
}

/// Match something like this:
/// %68:_(s32) = G_CONSTANT i32 0
/// %93:_(s32) = G_CONSTANT i32 1
Expand Down Expand Up @@ -1768,3 +1779,50 @@ bool llvm::matchShuffleToBroadcast(MachineInstr &MI, MachineRegisterInfo &MRI,
MatchInfo = std::make_pair(DstReg, Src1Reg);
return true;
}

bool llvm::matchShuffleToVSel(
MachineInstr &MI, MachineRegisterInfo &MRI,
std::tuple<Register, Register, Register, uint64_t> &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
const Register DstReg = MI.getOperand(0).getReg();
const Register Src1Reg = MI.getOperand(1).getReg();
const Register Src2Reg = MI.getOperand(2).getReg();
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();

const LLT DstTy = MRI.getType(DstReg);
const LLT Src1Ty = MRI.getType(Src1Reg);
if (Src1Ty.getSizeInBits() != 512)
return false;

const unsigned NumDstElems = DstTy.getNumElements();
const unsigned NumSrcElems = Src1Ty.getNumElements();
assert(NumDstElems == NumSrcElems &&
"Expected same number of elements in dst and src vector types");

// Check that the shuffle mask can be converted into VSel mask:
// 1. The shuffle mask doesn't contain indices that correspond to the same
// index in Src1 and Src2, i.e., for each i only the i-th element from Src1 or
// the i-th element from Src2 is used.
// 2. The mask indices modulo the number of elements are in strictly ascending
// order.
int PrevIdx = Mask[0] % NumSrcElems;
const size_t NumElems = Mask.size();
for (unsigned I = 1; I < NumElems; I++) {
int CurrIdx = Mask[I] % NumSrcElems;
if (CurrIdx <= PrevIdx)
return false;
}

// Create the mask
unsigned long long DstMask = 0;
for (unsigned I = 0; I < NumElems; I++) {
int Idx = Mask[I];
if (Idx >= (int)NumSrcElems) {
unsigned long long ElemMask = 1 << I;
DstMask |= ElemMask;
}
}

MatchInfo = std::make_tuple(DstReg, Src1Reg, Src2Reg, DstMask);
return true;
}
7 changes: 7 additions & 0 deletions llvm/lib/Target/AIE/AIECombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ bool matchBroadcastElement(MachineInstr &MI, MachineRegisterInfo &MRI,
std::pair<Register, Register> &MatchInfo);
bool matchShuffleToBroadcast(MachineInstr &MI, MachineRegisterInfo &MRI,
std::pair<Register, Register> &MatchInfo);
/// Combine G_SHUFFLE_VECTOR(G_BUILD_VECTOR (VAL, UNDEF, ...), mask<0,0,...>)
/// idiom into G_AIE_VSEL
bool matchShuffleToVSel(
MachineInstr &MI, MachineRegisterInfo &MRI,
std::tuple<Register, Register, Register, uint64_t> &MatchInfo);
/// \return true if \a MemI can be moved just before \a Dest in order to allow
/// post-increment combining
bool canDelayMemOp(MachineInstr &MemI, MachineInstr &Dest,
Expand Down Expand Up @@ -156,6 +161,8 @@ bool matchConcatPadVector(MachineInstr &MI, MachineRegisterInfo &MRI,
Register &MatchedInputVector);
void applyPadVector(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &B, Register MatchedInputVector);
void applyVSel(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
std::tuple<Register, Register, Register, uint64_t> &MatchInfo);
bool tryToCombineVectorShiftsByZero(MachineInstr &MI, MachineRegisterInfo &MRI);

bool matchExtractConcat(MachineInstr &MI, MachineRegisterInfo &MRI,
Expand Down
9 changes: 9 additions & 0 deletions llvm/lib/Target/AIE/AIEInstrGISel.td
Original file line number Diff line number Diff line change
Expand Up @@ -109,3 +109,12 @@ def G_AIE_UNPAD_VECTOR : AIEGenericInstruction {
let InOperandList = (ins type1:$src);
let hasSideEffects = false;
}

// Select between the words of two vectors. The selection is performed between
// each word in the position corresponding to the bit position in
// the sel parameter.
def G_AIE_VSEL : AIEGenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2, type1:$sel);
let hasSideEffects = false;
}
4 changes: 4 additions & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1575,6 +1575,10 @@ unsigned AIE2PInstrInfo::getGenericBroadcastVectorOpcode() const {
return AIE2P::G_AIE_BROADCAST_VECTOR;
}

unsigned AIE2PInstrInfo::getGenericVSelOpcode() const {
return AIE2P::G_AIE_VSEL;
}

Register AIE2PInstrInfo::getSSStatusReg() const { return AIE2P::srSS0; }

Register AIE2PInstrInfo::getMSStatusReg() const { return AIE2P::srMS0; }
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class AIE2PInstrInfo : public AIE2PGenInstrInfo {
unsigned getGenericPadVectorOpcode() const override;
unsigned getGenericUnpadVectorOpcode() const override;
unsigned getGenericBroadcastVectorOpcode() const override;
unsigned getGenericVSelOpcode() const override;
bool isLock(unsigned Opc) const override;
bool isDelayedSchedBarrier(const MachineInstr &MI) const override;
bool isSchedBarrier(const MachineInstr &MI) const override;
Expand Down
13 changes: 12 additions & 1 deletion llvm/lib/Target/AIE/aie2p/AIE2PInstrPatterns.td
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
//
//===----------------------------------------------------------------------===//
//
Expand Down Expand Up @@ -908,6 +908,17 @@ def : Pat<(int_aie2p_vsel16 VEC512:$src1, VEC512:$src2, eRS16:$rsel),
def : Pat<(int_aie2p_vsel32 VEC512:$src1, VEC512:$src2, eRS16:$rsel),
(VSEL_32 VEC512:$src1, VEC512:$src2, eRS16:$rsel)>;

def vsel_node : SDNode<"AIE2P::G_AIE_VSEL",
SDTypeProfile<1, 3, []>>;
def : GINodeEquiv<G_AIE_VSEL, vsel_node>;

def : Pat<(v64i8(vsel_node (v64i8 VEC512:$src1), (v64i8 VEC512:$src2), (v2i32 eL:$lsel))),
(VSEL_8 VEC512:$src1, VEC512:$src2, eL:$lsel)>;
def : Pat<(v32i16(vsel_node (v32i16 VEC512:$src1), (v32i16 VEC512:$src2), (i32 eRS16:$rsel))),
(VSEL_16 VEC512:$src1, VEC512:$src2, eRS16:$rsel)>;
def : Pat<(v16i32(vsel_node (v16i32 VEC512:$src1), (v16i32 VEC512:$src2), (i32 eRS16:$rsel))),
(VSEL_32 VEC512:$src1, VEC512:$src2, eRS16:$rsel)>;

// VSUB_LT/VSUB_GE
// Note : Non-constant sign is handled in .cpp
class VSUB_LT_GE_INSTR<SDPatternOperator OpNode, AIE2PInst Inst, bits<1> sgn>:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
# (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
#
# RUN: llc -mtriple aie2p -run-pass=aie2p-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s

Expand Down Expand Up @@ -119,3 +119,111 @@ body: |
%0:_(<32 x s8>) = G_SHUFFLE_VECTOR %1(<4 x s8>), %2, shufflemask(0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3)
PseudoRET implicit $lr, implicit %0
...
---
name: shuffle_vector_vsel_valid_mask_ordered_indices
alignment: 16
exposesReturnsTwice: false
legalized: false
body: |
bb.1.entry:
liveins: $r0, $wl4, $x2
; CHECK-LABEL: name: shuffle_vector_vsel_valid_mask_ordered_indices
; CHECK: liveins: $r0, $wl4, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY1]](<8 x s32>)
; CHECK-NEXT: [[AIE_PAD_VECTOR_UNDEF:%[0-9]+]]:_(<16 x s32>) = G_AIE_PAD_VECTOR_UNDEF [[AIE_UNPAD_VECTOR]](<4 x s32>)
; CHECK-NEXT: [[AIE_VSEL:%[0-9]+]]:_(<16 x s32>) = G_AIE_VSEL [[AIE_PAD_VECTOR_UNDEF]], [[COPY]], 65520
; CHECK-NEXT: $x0 = COPY [[AIE_VSEL]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
%1:_(<16 x s32>) = COPY $x2
%4:_(<8 x s32>) = COPY $wl4
%3:_(<4 x s32>) = G_AIE_UNPAD_VECTOR %4(<8 x s32>)
%8:_(<16 x s32>) = G_AIE_PAD_VECTOR_UNDEF %3(<4 x s32>)
%0:_(<16 x s32>) = G_SHUFFLE_VECTOR %8(<16 x s32>), %1, shufflemask(0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)
$x0 = COPY %0(<16 x s32>)
PseudoRET implicit $lr, implicit $x0
...
---
name: shuffle_vector_vsel_valid_mask_mixed_indices
alignment: 16
exposesReturnsTwice: false
legalized: false
body: |
bb.1.entry:
liveins: $r0, $wl4, $x2
; CHECK-LABEL: name: shuffle_vector_vsel_valid_mask_mixed_indices
; CHECK: liveins: $r0, $wl4, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY1]](<8 x s32>)
; CHECK-NEXT: [[AIE_PAD_VECTOR_UNDEF:%[0-9]+]]:_(<16 x s32>) = G_AIE_PAD_VECTOR_UNDEF [[AIE_UNPAD_VECTOR]](<4 x s32>)
; CHECK-NEXT: [[AIE_VSEL:%[0-9]+]]:_(<16 x s32>) = G_AIE_VSEL [[AIE_PAD_VECTOR_UNDEF]], [[COPY]], 65521
; CHECK-NEXT: $x0 = COPY [[AIE_VSEL]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
%1:_(<16 x s32>) = COPY $x2
%4:_(<8 x s32>) = COPY $wl4
%3:_(<4 x s32>) = G_AIE_UNPAD_VECTOR %4(<8 x s32>)
%8:_(<16 x s32>) = G_AIE_PAD_VECTOR_UNDEF %3(<4 x s32>)
%0:_(<16 x s32>) = G_SHUFFLE_VECTOR %8(<16 x s32>), %1, shufflemask(16, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)
$x0 = COPY %0(<16 x s32>)
PseudoRET implicit $lr, implicit $x0
...
---
name: shuffle_vector_vsel_invalid_mask_with_repeated_index
alignment: 16
exposesReturnsTwice: false
legalized: false
body: |
bb.1.entry:
liveins: $r0, $wl4, $x2
; CHECK-LABEL: name: shuffle_vector_vsel_invalid_mask_with_repeated_index
; CHECK: liveins: $r0, $wl4, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY1]](<8 x s32>)
; CHECK-NEXT: [[AIE_PAD_VECTOR_UNDEF:%[0-9]+]]:_(<16 x s32>) = G_AIE_PAD_VECTOR_UNDEF [[AIE_UNPAD_VECTOR]](<4 x s32>)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[AIE_PAD_VECTOR_UNDEF]](<16 x s32>), [[COPY]], shufflemask(0, 1, 2, 3, 16, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)
; CHECK-NEXT: $x0 = COPY [[SHUF]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
%1:_(<16 x s32>) = COPY $x2
%4:_(<8 x s32>) = COPY $wl4
%3:_(<4 x s32>) = G_AIE_UNPAD_VECTOR %4(<8 x s32>)
%8:_(<16 x s32>) = G_AIE_PAD_VECTOR_UNDEF %3(<4 x s32>)
%0:_(<16 x s32>) = G_SHUFFLE_VECTOR %8(<16 x s32>), %1, shufflemask(0, 1, 2, 3, 16, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)
$x0 = COPY %0(<16 x s32>)
PseudoRET implicit $lr, implicit $x0
...
---
name: shuffle_vector_vsel_invalid_mask_with_unordered_indices
alignment: 16
exposesReturnsTwice: false
legalized: false
body: |
bb.1.entry:
liveins: $r0, $wl4, $x2
; CHECK-LABEL: name: shuffle_vector_vsel_invalid_mask_with_unordered_indices
; CHECK: liveins: $r0, $wl4, $x2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<8 x s32>) = COPY $wl4
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_AIE_UNPAD_VECTOR [[COPY1]](<8 x s32>)
; CHECK-NEXT: [[AIE_PAD_VECTOR_UNDEF:%[0-9]+]]:_(<16 x s32>) = G_AIE_PAD_VECTOR_UNDEF [[AIE_UNPAD_VECTOR]](<4 x s32>)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s32>) = G_SHUFFLE_VECTOR [[AIE_PAD_VECTOR_UNDEF]](<16 x s32>), [[COPY]], shufflemask(1, 0, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)
; CHECK-NEXT: $x0 = COPY [[SHUF]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
%1:_(<16 x s32>) = COPY $x2
%4:_(<8 x s32>) = COPY $wl4
%3:_(<4 x s32>) = G_AIE_UNPAD_VECTOR %4(<8 x s32>)
%8:_(<16 x s32>) = G_AIE_PAD_VECTOR_UNDEF %3(<4 x s32>)
%0:_(<16 x s32>) = G_SHUFFLE_VECTOR %8(<16 x s32>), %1, shufflemask(1, 0, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)
$x0 = COPY %0(<16 x s32>)
PseudoRET implicit $lr, implicit $x0
...
82 changes: 81 additions & 1 deletion llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vsel.mir
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
# (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates
# RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s

---
Expand Down Expand Up @@ -86,3 +86,83 @@ body: |
$x0 = COPY %0:vregbank(<16 x s32>)
PseudoRET implicit $lr, implicit $x0
...

---
name: g_aie_vsel_8
alignment: 16
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $r0, $r1, $x2, $x4
; CHECK-LABEL: name: g_aie_vsel_8
; CHECK: liveins: $r0, $r1, $x2, $x4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x4
; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:er = COPY $r1
; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:el = REG_SEQUENCE [[COPY2]], %subreg.sub_l_even, [[COPY3]], %subreg.sub_l_odd
; CHECK-NEXT: [[VSEL_8_:%[0-9]+]]:vec512 = VSEL_8 [[COPY]], [[COPY1]], [[REG_SEQUENCE]]
; CHECK-NEXT: $x0 = COPY [[VSEL_8_]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
%1:vregbank(<64 x s8>) = COPY $x2
%2:vregbank(<64 x s8>) = COPY $x4
%4:gprregbank(s32) = COPY $r0
%5:gprregbank(s32) = COPY $r1
%3:gprregbank(s64) = G_MERGE_VALUES %4:gprregbank(s32), %5:gprregbank(s32)
%6:gprregbank(<2 x s32>) = G_BITCAST %3:gprregbank(s64)
%0:vregbank(<64 x s8>) = G_AIE_VSEL %1:vregbank(<64 x s8>), %2:vregbank(<64 x s8>), %6:gprregbank(<2 x s32>)
$x0 = COPY %0:vregbank(<64 x s8>)
PseudoRET implicit $lr, implicit $x0
...

---
name: g_aie_vsel_16
alignment: 16
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $r0, $r1, $x2, $x4
; CHECK-LABEL: name: g_aie_vsel_16
; CHECK: liveins: $r0, $r1, $x2, $x4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x4
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ers16 = COPY $r0
; CHECK-NEXT: [[VSEL_16_:%[0-9]+]]:vec512 = VSEL_16 [[COPY]], [[COPY1]], [[COPY2]]
; CHECK-NEXT: $x0 = COPY [[VSEL_16_]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
%1:vregbank(<32 x s16>) = COPY $x2
%2:vregbank(<32 x s16>) = COPY $x4
%3:gprregbank(s32) = COPY $r0
%0:vregbank(<32 x s16>) = G_AIE_VSEL %1:vregbank(<32 x s16>), %2:vregbank(<32 x s16>), %3:gprregbank(s32)
$x0 = COPY %0:vregbank(<32 x s16>)
PseudoRET implicit $lr, implicit $x0
...

---
name: g_aie_vsel_32
alignment: 16
legalized: true
regBankSelected: true
body: |
bb.1.entry:
liveins: $r0, $r1, $x2, $x4
; CHECK-LABEL: name: g_aie_vsel_32
; CHECK: liveins: $r0, $r1, $x2, $x4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x2
; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x4
; CHECK-NEXT: [[COPY2:%[0-9]+]]:ers16 = COPY $r0
; CHECK-NEXT: [[VSEL_32_:%[0-9]+]]:vec512 = VSEL_32 [[COPY]], [[COPY1]], [[COPY2]]
; CHECK-NEXT: $x0 = COPY [[VSEL_32_]]
; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0
%1:vregbank(<16 x s32>) = COPY $x2
%2:vregbank(<16 x s32>) = COPY $x4
%3:gprregbank(s32) = COPY $r0
%0:vregbank(<16 x s32>) = G_AIE_VSEL %1:vregbank(<16 x s32>), %2:vregbank(<16 x s32>), %3:gprregbank(s32)
$x0 = COPY %0:vregbank(<16 x s32>)
PseudoRET implicit $lr, implicit $x0
...

0 comments on commit 8bb96b0

Please sign in to comment.