Skip to content

Commit

Permalink
[AIEX] Combine G_SHUFFLE_VECTOR to UNMERGE
Browse files Browse the repository at this point in the history
  • Loading branch information
katerynamuts committed Feb 7, 2025
1 parent 099ac64 commit dbb4889
Show file tree
Hide file tree
Showing 4 changed files with 411 additions and 227 deletions.
148 changes: 135 additions & 13 deletions llvm/lib/Target/AIE/AIECombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1874,8 +1874,8 @@ static bool checkExtractSubvectorPrerequisites(const AIEBaseInstrInfo &TII,
(DstTySize != ScalarRegSize && DstTySize != 2 * ScalarRegSize))
return false;

// Currently, we cannot extract vectors of the size less than vector register
// size.
// Currently, we cannot extract vectors for the case when the size of the
// source vector is less than the basic vector register size (of the target).
if (SrcTySize < VecRegSize)
return false;

Expand Down Expand Up @@ -1944,6 +1944,38 @@ buildExtractSubvector(MachineIRBuilder &B, MachineRegisterInfo &MRI,
return B.buildInstr(Opc, {DstVecReg}, {NewSrcReg, Cst});
}

/// Match something like this:
/// %1:_(<16 x s32>) = COPY $x0
/// %2:_(<16 x s32>) = COPY $x1
/// %0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>),
/// shufflemask(8, 9, 10, 11, 12, 13, 14, 15)
/// PseudoRET implicit $lr, implicit %0

/// To convert to:
/// %1:_(<16 x s32>) = COPY $x0
/// %2:_(<8 x s32>), %3:_(<8 x s32>) = G_UNMERGE_VALUES %1(<16 x s32>)
/// PseudoRET implicit $lr, implicit %3(<8 x s32>)
static bool matchShuffleToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI,
BuildFnTy &MatchInfo, unsigned SubIdx,
unsigned NumSubVectors) {
const Register DstReg = MI.getOperand(0).getReg();
const Register Src1Reg = MI.getOperand(1).getReg();
const LLT DstTy = MRI.getType(DstReg);

// TODO: Select into G_EXTRACT_SUBVECTOR once it is more widely supported
MatchInfo = [=, &MRI](MachineIRBuilder &B) {
SmallVector<Register, 4> SubVecs;
for (unsigned I = 0; I < NumSubVectors; I++) {
if (I == (unsigned)SubIdx)
SubVecs.push_back(DstReg);
else
SubVecs.push_back(MRI.createGenericVirtualRegister(DstTy));
}
B.buildUnmerge(SubVecs, Src1Reg);
};
return true;
}

/// Match something like this:
/// %1:_(<16 x s16>) = COPY $wl0
/// %2:_(<16 x s16>) = COPY $wl1
Expand All @@ -1954,6 +1986,81 @@ buildExtractSubvector(MachineIRBuilder &B, MachineRegisterInfo &MRI,
/// %1:_(<16 x s16>) = COPY $wl0
/// %2:_(s32) = G_CONSTANT i32 1
/// %3:_(<4 x s16>) = G_AIE_EXTRACT_SUBVECTOR %1(<16 x s16>), %2(s32)
/// NOTE: This combine works ONLY for 32- and 64-bit outputs!
static bool matchShuffleToAIEExtractSubvec(
MachineInstr &MI, MachineRegisterInfo &MRI, const AIEBaseInstrInfo &TII,
BuildFnTy &MatchInfo, unsigned SubIdx, unsigned NumSubVectors) {
const unsigned GPRSize = TII.getScalarRegSize();
const unsigned ExtractSubvecNativeSrcSize = TII.getBasicVectorBitSize();

const Register DstReg = MI.getOperand(0).getReg();
const Register Src1Reg = MI.getOperand(1).getReg();

const LLT DstTy = MRI.getType(DstReg);
const LLT Src1Ty = MRI.getType(Src1Reg);
const unsigned Src1TySize = Src1Ty.getSizeInBits();

if (!checkExtractSubvectorPrerequisites(TII, DstTy, Src1Ty))
return false;

const unsigned Opc = TII.getGenericExtractSubvectorOpcode();

// Natively supported source vector type
if (Src1TySize == ExtractSubvecNativeSrcSize) {
MatchInfo = [=](MachineIRBuilder &B) {
auto Cst = B.buildConstant(LLT::scalar(GPRSize), SubIdx);
B.buildInstr(Opc, {DstReg}, {Src1Reg, Cst});
};

return true;
}

// Source vectors of a non-native size are converted to vectors of the native
// size
const unsigned Src1ElmtSize = Src1Ty.getElementType().getSizeInBits();
const unsigned Src1Vec512BitLen = ExtractSubvecNativeSrcSize / Src1ElmtSize;
const LLT NewSrc1Ty = LLT::fixed_vector(Src1Vec512BitLen, Src1ElmtSize);
const Register NewSrcReg = MRI.createGenericVirtualRegister(NewSrc1Ty);

if (Src1TySize < ExtractSubvecNativeSrcSize) {
MatchInfo = [=](MachineIRBuilder &B) {
const Register ImplicitDef = B.buildUndef(Src1Ty).getReg(0);
SmallVector<Register, 15> ConcatOps = {Src1Reg};
unsigned NumImplicitDef = ExtractSubvecNativeSrcSize / Src1TySize - 1;
while (NumImplicitDef-- > 0) {
ConcatOps.push_back(ImplicitDef);
}
B.buildConcatVectors({NewSrcReg}, ConcatOps);
auto Cst = B.buildConstant(LLT::scalar(GPRSize), SubIdx);
B.buildInstr(Opc, {DstReg}, {NewSrcReg, Cst});
};
return true;
}

// Source vectors with the size greater than the native source vector size
MatchInfo = [=, &MRI](MachineIRBuilder &B) {
const unsigned SizeCoefficient = Src1TySize / ExtractSubvecNativeSrcSize;
const unsigned NumSubVectorsNativeSize = NumSubVectors / SizeCoefficient;
unsigned NewSubIdx = SubIdx % NumSubVectorsNativeSize;

SmallVector<Register, 4> SubRegs;
unsigned NewSrcRegPosition = SubIdx / NumSubVectorsNativeSize;
for (unsigned I = 0; I < SizeCoefficient; ++I) {
if (I == NewSrcRegPosition)
SubRegs.push_back(NewSrcReg);
else
SubRegs.push_back(MRI.createGenericVirtualRegister(NewSrc1Ty));
}

B.buildUnmerge(SubRegs, Src1Reg);
auto Cst = B.buildConstant(LLT::scalar(GPRSize), NewSubIdx);
B.buildInstr(Opc, {DstReg}, {NewSrcReg, Cst});
};
return true;
}

/// The method does some checks and calls matchShuffleToAIEExtractSubvec and
/// matchShuffleToUnmerge which extract subvectors is possible.
bool llvm::matchShuffleToExtractSubvec(MachineInstr &MI,
MachineRegisterInfo &MRI,
const AIEBaseInstrInfo &TII,
Expand All @@ -1962,45 +2069,60 @@ bool llvm::matchShuffleToExtractSubvec(MachineInstr &MI,

const Register DstReg = MI.getOperand(0).getReg();
const Register Src1Reg = MI.getOperand(1).getReg();
const ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();

const LLT DstTy = MRI.getType(DstReg);
const LLT Src1Ty = MRI.getType(Src1Reg);
const unsigned Src1TySize = Src1Ty.getSizeInBits();

if (!checkExtractSubvectorPrerequisites(TII, DstTy, Src1Ty))
if (!DstTy.isVector() || !Src1Ty.isVector())
return false;

// Boolean vectors are unlikely to select into subregister copy
if (DstTy.getElementType() == LLT::scalar(1))
return false;

// This should be handled by a separate combine that copies Src1Reg to
// DstReg.
if (Src1TySize == DstTy.getSizeInBits())
return false;

const unsigned NumDstElems = DstTy.getNumElements();
const unsigned NumSrc1Elems = Src1Ty.getNumElements();
const unsigned NumSubVectors = NumSrc1Elems / NumDstElems;

// Not an extract pattern
if (NumSrc1Elems < NumDstElems)
if (NumSrc1Elems <= NumDstElems)
return false;

// Unlikely to select into a subregister copy
if (NumSrc1Elems % NumDstElems != 0)
return false;

auto GetSubIdx = [=, &Mask]() -> std::optional<unsigned> {
const unsigned NumSubVectors = NumSrc1Elems / NumDstElems;
auto GetSubvecExtractIdx = [=, &Mask]() -> std::optional<unsigned> {
for (unsigned SubVecIdx = 0; SubVecIdx < NumSubVectors; ++SubVecIdx) {
if (checkSequentialMask(Mask, SubVecIdx * NumDstElems, NumDstElems)) {
return SubVecIdx;
}
}

return std::nullopt;
};

std::optional<unsigned> SubIdx = GetSubIdx();
std::optional<unsigned> SubvecExtractIdx = GetSubvecExtractIdx();

// Not an extract pattern
if (!SubIdx)
if (!SubvecExtractIdx)
return false;

MatchInfo = [=, &MRI, &TII](MachineIRBuilder &B) {
buildExtractSubvector(B, MRI, TII, DstReg, Src1Reg, SubIdx.value());
};
return true;
if (matchShuffleToAIEExtractSubvec(MI, MRI, TII, MatchInfo,
SubvecExtractIdx.value(), NumSubVectors))
return true;
if (matchShuffleToUnmerge(MI, MRI, MatchInfo, SubvecExtractIdx.value(),
NumSubVectors))
return true;

return false;
}

/// Match something like this:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -784,3 +784,147 @@ body: |
%0:_(<64 x s32>) = G_SHUFFLE_VECTOR %1(<64 x s32>), %2(<64 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1)
PseudoRET implicit $lr, implicit %0
...

# Test G_SHUFFLE_VECTOR to UNMERGE
# Note: G_UNMERGE_VALUES is itself combined to G_AIE_UNPAD_VECTOR
---
name: shuffle_vector_unmerge_lo
tracksRegLiveness: true
body: |
bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: shuffle_vector_unmerge_lo
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_AIE_UNPAD_VECTOR [[COPY]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_UNPAD_VECTOR]](<8 x s32>)
%1:_(<16 x s32>) = COPY $x0
%2:_(<16 x s32>) = COPY $x1
%0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7)
PseudoRET implicit $lr, implicit %0
...
---
name: shuffle_vector_unmerge_lo_4_unmerge_outputs
tracksRegLiveness: true
body: |
bb.1:
liveins: $y0, $y1
; CHECK-LABEL: name: shuffle_vector_unmerge_lo_4_unmerge_outputs
; CHECK: liveins: $y0, $y1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y0
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>), [[UV2:%[0-9]+]]:_(<8 x s32>), [[UV3:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV]](<8 x s32>)
%1:_(<32 x s32>) = COPY $y0
%2:_(<32 x s32>) = COPY $y1
%0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1(<32 x s32>), %2(<32 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7)
PseudoRET implicit $lr, implicit %0
...
---
name: shuffle_vector_unmerge_hi_128
tracksRegLiveness: true
body: |
bb.1:
liveins: $wl0, $wl1
; CHECK-LABEL: name: shuffle_vector_unmerge_hi_128
; CHECK: liveins: $wl0, $wl1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $wl0
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s32>), [[UV1:%[0-9]+]]:_(<4 x s32>) = G_UNMERGE_VALUES [[COPY]](<8 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<4 x s32>)
%1:_(<8 x s32>) = COPY $wl0
%2:_(<8 x s32>) = COPY $wl1
%0:_(<4 x s32>) = G_SHUFFLE_VECTOR %1(<8 x s32>), %2(<8 x s32>), shufflemask(4, 5, 6, 7)
PseudoRET implicit $lr, implicit %0
...
---
name: shuffle_vector_unmerge_hi_256
tracksRegLiveness: true
body: |
bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: shuffle_vector_unmerge_hi_256
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<8 x s32>), [[UV1:%[0-9]+]]:_(<8 x s32>) = G_UNMERGE_VALUES [[COPY]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<8 x s32>)
%1:_(<16 x s32>) = COPY $x0
%2:_(<16 x s32>) = COPY $x1
%0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>), shufflemask(8, 9, 10, 11, 12, 13, 14, 15)
PseudoRET implicit $lr, implicit %0
...
---
name: shuffle_vector_unmerge_hi_512
tracksRegLiveness: true
body: |
bb.1:
liveins: $y0, $y1
; CHECK-LABEL: name: shuffle_vector_unmerge_hi_512
; CHECK: liveins: $y0, $y1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s32>) = COPY $y0
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[COPY]](<32 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<16 x s32>)
%1:_(<32 x s32>) = COPY $y0
%2:_(<32 x s32>) = COPY $y1
%0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1(<32 x s32>), %2(<32 x s32>), shufflemask(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)
PseudoRET implicit $lr, implicit %0
...
---
name: shuffle_vector_unmerge_hi_1024
tracksRegLiveness: true
body: |
bb.1:
liveins: $dm0, $dm1
; CHECK-LABEL: name: shuffle_vector_unmerge_hi_1024
; CHECK: liveins: $dm0, $dm1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<32 x s64>) = COPY $dm0
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s64>), [[UV1:%[0-9]+]]:_(<16 x s64>) = G_UNMERGE_VALUES [[COPY]](<32 x s64>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[UV1]](<16 x s64>)
%1:_(<32 x s64>) = COPY $dm0
%2:_(<32 x s64>) = COPY $dm1
%0:_(<16 x s64>) = G_SHUFFLE_VECTOR %1(<32 x s64>), %2(<32 x s64>), shufflemask(16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)
PseudoRET implicit $lr, implicit %0
...
---
name: shuffle_vector_subreg_boundary_cross_invalid
tracksRegLiveness: true
body: |
bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: shuffle_vector_subreg_boundary_cross_invalid
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x1
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s32>) = G_SHUFFLE_VECTOR [[COPY]](<16 x s32>), [[COPY1]], shufflemask(4, 5, 6, 7, 8, 9, 10, 11)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[SHUF]](<8 x s32>)
%1:_(<16 x s32>) = COPY $x0
%2:_(<16 x s32>) = COPY $x1
%0:_(<8 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>), shufflemask(4, 5, 6, 7, 8, 9, 10, 11)
PseudoRET implicit $lr, implicit %0
...
# Note: currently it is combined to G_AIE_VSEL but it should be combined to COPY
# which is not implemented yet.
---
name: shuffle_vector_to_copy
tracksRegLiveness: true
body: |
bb.1:
liveins: $x0, $x1
; CHECK-LABEL: name: shuffle_vector_to_copy
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $x0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY $x1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; CHECK-NEXT: [[AIE_VSEL:%[0-9]+]]:_(<16 x s32>) = G_AIE_VSEL [[COPY]], [[COPY1]], [[C]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_VSEL]](<16 x s32>)
%1:_(<16 x s32>) = COPY $x0
%2:_(<16 x s32>) = COPY $x1
%0:_(<16 x s32>) = G_SHUFFLE_VECTOR %1(<16 x s32>), %2(<16 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
PseudoRET implicit $lr, implicit %0
...
Loading

0 comments on commit dbb4889

Please sign in to comment.