Skip to content

Commit

Permalink
[AIE2P] Legalize G_TRUNC for vector types
Browse files Browse the repository at this point in the history
  • Loading branch information
niwinanto committed Feb 7, 2025
1 parent 482f102 commit 2f8c42a
Show file tree
Hide file tree
Showing 6 changed files with 233 additions and 27 deletions.
32 changes: 32 additions & 0 deletions llvm/lib/Target/AIE/AIELegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1529,4 +1529,36 @@ bool AIELegalizerHelper::legalizeG_AIE_EXTRACT_VECTOR_ELT(
return true;
}

bool AIELegalizerHelper::legalizeG_TRUNC(LegalizerHelper &Helper,
MachineInstr &MI) const {
MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
MachineRegisterInfo &MRI = *MIRBuilder.getMRI();

const Register DstReg = MI.getOperand(0).getReg();
const Register SrcReg = MI.getOperand(1).getReg();
const LLT SrcVecTy = MRI.getType(SrcReg);
const LLT DstVecTy = MRI.getType(DstReg);
const unsigned SrcVecSize = SrcVecTy.getSizeInBits();

assert(SrcVecSize == 256 && "Expected G_TRUNC input vector size is 256!");

const AIEBaseInstrInfo *II = ST.getInstrInfo();
const unsigned PadOpc = II->getGenericPadVectorOpcode();
const unsigned UnpadOpc = II->getGenericUnpadVectorOpcode();

const LLT NewPadRegTy = LLT::fixed_vector(SrcVecTy.getNumElements() * 2,
SrcVecTy.getScalarType());
const Register NewPadReg = MRI.createGenericVirtualRegister(NewPadRegTy);
const LLT NewUnPadRegTy = LLT::fixed_vector(DstVecTy.getNumElements() * 2,
DstVecTy.getScalarType());
const Register NewUnPadReg = MRI.createGenericVirtualRegister(NewUnPadRegTy);

MIRBuilder.buildInstr(PadOpc, {NewPadReg}, {SrcReg});
MIRBuilder.buildTrunc(NewUnPadReg, NewPadReg);
MIRBuilder.buildInstr(UnpadOpc, {DstReg}, {NewUnPadReg});

MI.eraseFromParent();
return true;
}

} // namespace llvm
1 change: 1 addition & 0 deletions llvm/lib/Target/AIE/AIELegalizerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ class AIELegalizerHelper {
bool legalizeG_AIE_EXTRACT_VECTOR_ELT(LegalizerHelper &Helper,
MachineInstr &MI,
const unsigned LegalVectorSize) const;
bool legalizeG_TRUNC(LegalizerHelper &Helper, MachineInstr &MI) const;
};

} // namespace llvm
Expand Down
32 changes: 31 additions & 1 deletion llvm/lib/Target/AIE/aie2p/AIE2PLegalizerInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,35 @@ AIE2PLegalizerInfo::AIE2PLegalizerInfo(const AIE2PSubtarget &ST)
.clampScalar(0, S32, S32)
.clampScalar(1, S32, S32);

getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
getActionDefinitionsBuilder(G_TRUNC)
.legalIf([=](const LegalityQuery &Query) {
const LLT &SrcTy = Query.Types[1];
const LLT &DstTy = Query.Types[0];
return SrcTy.isVector() && DstTy.isVector() &&
SrcTy.getSizeInBits() > 256 && SrcTy.getSizeInBits() < 2048 &&
DstTy.getElementType().getSizeInBits() * 2 ==
SrcTy.getElementType().getSizeInBits();
})
.legalIf([=](const LegalityQuery &Query) {
const LLT &SrcTy = Query.Types[1];
const LLT &DstTy = Query.Types[0];
return SrcTy.isScalar() && DstTy.isScalar();
})
.customIf([=](const LegalityQuery &Query) {
const LLT &SrcTy = Query.Types[1];
return SrcTy.isVector() && SrcTy.getSizeInBits() == 256;
})
.fewerElementsIf(
[=](const LegalityQuery &Query) {
const LLT &SrcTy = Query.Types[1];
return SrcTy.isVector() && SrcTy.getSizeInBits() == 2048;
},
[=](const LegalityQuery &Query) {
const LLT &SrcTy = Query.Types[1];
return std::make_pair(1,
LLT::fixed_vector(SrcTy.getNumElements() / 2,
SrcTy.getElementType()));
});

getActionDefinitionsBuilder(G_SELECT)
.legalFor({{S32, S32}, {P0, S32}})
Expand Down Expand Up @@ -676,6 +704,8 @@ bool AIE2PLegalizerInfo::legalizeCustom(
case TargetOpcode::G_SUB:
case TargetOpcode::G_XOR:
return AIEHelper.legalizeBinOp(Helper, MI);
case TargetOpcode::G_TRUNC:
return AIEHelper.legalizeG_TRUNC(Helper, MI);
}
llvm_unreachable("Un-expected custom legalization");
}
Expand Down
135 changes: 135 additions & 0 deletions llvm/test/CodeGen/AIE/aie2p/GlobalIsel/legalize-trunc.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
# RUN: llc -mtriple aie2p -run-pass=legalizer %s -verify-machineinstrs -o - | FileCheck %s


---
name: v16s32_trunc_v16s64_acc1024
body: |
bb.1.entry:
; CHECK-LABEL: name: v16s32_trunc_v16s64_acc1024
; CHECK: [[DEF:%[0-9]+]]:_(<16 x s64>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s32>) = G_TRUNC [[DEF]](<16 x s64>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](<16 x s32>)
%1:_(<16 x s64>) = G_IMPLICIT_DEF
%0:_(<16 x s32>) = G_TRUNC %1(<16 x s64>)
PseudoRET implicit $lr, implicit %0
...
---
name: v32s16_trunc_v32s32_acc1024
body: |
bb.1.entry:
; CHECK-LABEL: name: v32s16_trunc_v32s32_acc1024
; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s16>) = G_TRUNC [[DEF]](<32 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](<32 x s16>)
%1:_(<32 x s32>) = G_IMPLICIT_DEF
%0:_(<32 x s16>) = G_TRUNC %1(<32 x s32>)
PseudoRET implicit $lr, implicit %0
...
---
name: v8s32_trunc_v8s64_acc512
body: |
bb.1.entry:
; CHECK-LABEL: name: v8s32_trunc_v8s64_acc512
; CHECK: [[DEF:%[0-9]+]]:_(<8 x s64>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<8 x s32>) = G_TRUNC [[DEF]](<8 x s64>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](<8 x s32>)
%1:_(<8 x s64>) = G_IMPLICIT_DEF
%0:_(<8 x s32>) = G_TRUNC %1(<8 x s64>)
PseudoRET implicit $lr, implicit %0
...
---
name: v16s16_trunc_v16s32_acc512
body: |
bb.1.entry:
; CHECK-LABEL: name: v16s16_trunc_v16s32_acc512
; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s16>) = G_TRUNC [[DEF]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](<16 x s16>)
%1:_(<16 x s32>) = G_IMPLICIT_DEF
%0:_(<16 x s16>) = G_TRUNC %1(<16 x s32>)
PseudoRET implicit $lr, implicit %0
...
---
name: v32s16_trunc_v32s32_vec1024
body: |
bb.1.entry:
; CHECK-LABEL: name: v32s16_trunc_v32s32_vec1024
; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s16>) = G_TRUNC [[DEF]](<32 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](<32 x s16>)
%1:_(<32 x s32>) = G_IMPLICIT_DEF
%0:_(<32 x s16>) = G_TRUNC %1(<32 x s32>)
PseudoRET implicit $lr, implicit %0
...
---
name: v64s8_trunc_v64s16_vec1024
body: |
bb.1.entry:
; CHECK-LABEL: name: v64s8_trunc_v64s16_vec1024
; CHECK: [[DEF:%[0-9]+]]:_(<64 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<64 x s8>) = G_TRUNC [[DEF]](<64 x s16>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](<64 x s8>)
%1:_(<64 x s16>) = G_IMPLICIT_DEF
%0:_(<64 x s8>) = G_TRUNC %1(<64 x s16>)
PseudoRET implicit $lr, implicit %0
...
---
name: v16s16_trunc_v16s32_vec512
body: |
bb.1.entry:
; CHECK-LABEL: name: v16s16_trunc_v16s32_vec512
; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s16>) = G_TRUNC [[DEF]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](<16 x s16>)
%1:_(<16 x s32>) = G_IMPLICIT_DEF
%0:_(<16 x s16>) = G_TRUNC %1(<16 x s32>)
PseudoRET implicit $lr, implicit %0
...
---
name: v32s8_trunc_v32s16_vec512
body: |
bb.1.entry:
; CHECK-LABEL: name: v32s8_trunc_v32s16_vec512
; CHECK: [[DEF:%[0-9]+]]:_(<32 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s8>) = G_TRUNC [[DEF]](<32 x s16>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[TRUNC]](<32 x s8>)
%1:_(<32 x s16>) = G_IMPLICIT_DEF
%0:_(<32 x s8>) = G_TRUNC %1(<32 x s16>)
PseudoRET implicit $lr, implicit %0
...
---
name: v32s32_trunc_v32s64_acc2048
body: |
bb.1.entry:
; CHECK-LABEL: name: v32s32_trunc_v32s64_acc2048
; CHECK: [[DEF:%[0-9]+]]:_(<32 x s64>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[UV:%[0-9]+]]:_(<16 x s64>), [[UV1:%[0-9]+]]:_(<16 x s64>) = G_UNMERGE_VALUES [[DEF]](<32 x s64>)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<16 x s32>) = G_TRUNC [[UV]](<16 x s64>)
; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<16 x s32>) = G_TRUNC [[UV1]](<16 x s64>)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[TRUNC]](<16 x s32>), [[TRUNC1]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<32 x s32>)
%1:_(<32 x s64>) = G_IMPLICIT_DEF
%0:_(<32 x s32>) = G_TRUNC %1(<32 x s64>)
PseudoRET implicit $lr, implicit %0
...
---
name: v16s8_trunc_v16s16_vec256
body: |
bb.1.entry:
; CHECK-LABEL: name: v16s8_trunc_v16s16_vec256
; CHECK: [[DEF:%[0-9]+]]:_(<16 x s16>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[AIE_PAD_VECTOR_UNDEF:%[0-9]+]]:_(<32 x s16>) = G_AIE_PAD_VECTOR_UNDEF [[DEF]](<16 x s16>)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<32 x s8>) = G_TRUNC [[AIE_PAD_VECTOR_UNDEF]](<32 x s16>)
; CHECK-NEXT: [[AIE_UNPAD_VECTOR:%[0-9]+]]:_(<16 x s8>) = G_AIE_UNPAD_VECTOR [[TRUNC]](<32 x s8>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_UNPAD_VECTOR]](<16 x s8>)
%1:_(<16 x s16>) = G_IMPLICIT_DEF
%0:_(<16 x s8>) = G_TRUNC %1(<16 x s16>)
PseudoRET implicit $lr, implicit %0
...
24 changes: 16 additions & 8 deletions llvm/test/CodeGen/AIE/aie2p/load-store-aligned.ll
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,29 @@ define dso_local void @test_load_store_aligned(<8 x i16> noundef %a, <4 x i32> n
; CHECK-LABEL: test_load_store_aligned:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopx ; vmov q0, wl2
; CHECK-NEXT: mova r0, #2; nopx
; CHECK-NEXT: paddxm [sp], #768
; CHECK-NEXT: vmov x2, bmll0
; CHECK-NEXT: vst.128 wl0, [sp, #-768]
; CHECK-NEXT: vst.128 wl4, [sp, #-736]
; CHECK-NEXT: vmov x1, bmll0
; CHECK-NEXT: vshuffle x0, x0, x0, r0
; CHECK-NEXT: mova r0, #0
; CHECK-NEXT: vst wl6, [sp, #-704]
; CHECK-NEXT: vst bmll1, [sp, #-512]
; CHECK-NEXT: vlda bmll0, [sp, #-512]
; CHECK-NEXT: vst wl8, [sp, #-640]
; CHECK-NEXT: vst bmll1, [sp, #-512]
; CHECK-NEXT: vst x10, [sp, #-448]
; CHECK-NEXT: vmov q0, wl0
; CHECK-NEXT: vshuffle x4, x4, x4, r0
; CHECK-NEXT: vst wl1, [sp, #-576]
; CHECK-NEXT: vlda bmll0, [sp, #-512]
; CHECK-NEXT: vmov q0, wl2
; CHECK-NEXT: vmov wh0, q0
; CHECK-NEXT: vmov wh2, q0
; CHECK-NEXT: vmov q0, wl4
; CHECK-NEXT: vst.128 wh0, [sp, #-768]
; CHECK-NEXT: vlda.128 wh0, [sp, #-768]
; CHECK-NEXT: vst wl2, [sp, #-576]
; CHECK-NEXT: vst.128 wh0, [sp, #-752]
; CHECK-NEXT: vmov wh4, q0
; CHECK-NEXT: vst.128 wh2, [sp, #-752]
; CHECK-NEXT: vst bmll0, [sp, #-128]
; CHECK-NEXT: vst.128 wh4, [sp, #-736]
; CHECK-NEXT: vlda.128 wh0, [sp, #-752]
; CHECK-NEXT: vlda.128 wh0, [sp, #-736]
; CHECK-NEXT: vlda wh0, [sp, #-704]
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/AIE/extractelement.ll
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,10 @@ define signext i8 @extract_v16i8_signext(<16 x i8> %v) nounwind {
; AIE2P-LABEL: extract_v16i8_signext:
; AIE2P: .p2align 4
; AIE2P-NEXT: // %bb.0:
; AIE2P-NEXT: ret lr
; AIE2P-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv
; AIE2P-NEXT: nop // Delay Slot 5
; AIE2P-NEXT: nop // Delay Slot 4
; AIE2P-NEXT: nop // Delay Slot 3
; AIE2P-NEXT: mova r0, #0 // Delay Slot 4
; AIE2P-NEXT: vshuffle x0, x0, x0, r0 // Delay Slot 3
; AIE2P-NEXT: vextract.8 r0, x0, #0, vaddsign1 // Delay Slot 2
; AIE2P-NEXT: nop // Delay Slot 1
%1 = extractelement <16 x i8> %v, i32 0
Expand All @@ -96,10 +96,10 @@ define zeroext i8 @extract_v16i8_zeroext(<16 x i8> %v) nounwind {
; AIE2P-LABEL: extract_v16i8_zeroext:
; AIE2P: .p2align 4
; AIE2P-NEXT: // %bb.0:
; AIE2P-NEXT: ret lr
; AIE2P-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv
; AIE2P-NEXT: nop // Delay Slot 5
; AIE2P-NEXT: nop // Delay Slot 4
; AIE2P-NEXT: nop // Delay Slot 3
; AIE2P-NEXT: mova r0, #0 // Delay Slot 4
; AIE2P-NEXT: vshuffle x0, x0, x0, r0 // Delay Slot 3
; AIE2P-NEXT: vextract.8 r0, x0, #0, vaddsign0 // Delay Slot 2
; AIE2P-NEXT: nop // Delay Slot 1
%1 = extractelement <16 x i8> %v, i32 0
Expand All @@ -120,10 +120,10 @@ define zeroext i8 @extract_v16i8_dyn(<16 x i8> %v, i32 %idx) nounwind {
; AIE2P-LABEL: extract_v16i8_dyn:
; AIE2P: .p2align 4
; AIE2P-NEXT: // %bb.0:
; AIE2P-NEXT: ret lr
; AIE2P-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv
; AIE2P-NEXT: nop // Delay Slot 5
; AIE2P-NEXT: nop // Delay Slot 4
; AIE2P-NEXT: nop // Delay Slot 3
; AIE2P-NEXT: mova r0, #0 // Delay Slot 4
; AIE2P-NEXT: vshuffle x0, x0, x0, r0 // Delay Slot 3
; AIE2P-NEXT: vextract.8 r0, x0, r1, vaddsign0 // Delay Slot 2
; AIE2P-NEXT: nop // Delay Slot 1
%1 = extractelement <16 x i8> %v, i32 %idx
Expand All @@ -144,10 +144,10 @@ define signext i16 @extract_v8i16_signext(<8 x i16> %v) nounwind {
; AIE2P-LABEL: extract_v8i16_signext:
; AIE2P: .p2align 4
; AIE2P-NEXT: // %bb.0:
; AIE2P-NEXT: ret lr
; AIE2P-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv
; AIE2P-NEXT: nop // Delay Slot 5
; AIE2P-NEXT: nop // Delay Slot 4
; AIE2P-NEXT: nop // Delay Slot 3
; AIE2P-NEXT: mova r0, #2 // Delay Slot 4
; AIE2P-NEXT: vshuffle x0, x0, x0, r0 // Delay Slot 3
; AIE2P-NEXT: vextract.16 r0, x0, #0, vaddsign1 // Delay Slot 2
; AIE2P-NEXT: nop // Delay Slot 1
%1 = extractelement <8 x i16> %v, i32 0
Expand All @@ -168,10 +168,10 @@ define zeroext i16 @extract_v8i16_zeroext(<8 x i16> %v) nounwind {
; AIE2P-LABEL: extract_v8i16_zeroext:
; AIE2P: .p2align 4
; AIE2P-NEXT: // %bb.0:
; AIE2P-NEXT: ret lr
; AIE2P-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv
; AIE2P-NEXT: nop // Delay Slot 5
; AIE2P-NEXT: nop // Delay Slot 4
; AIE2P-NEXT: nop // Delay Slot 3
; AIE2P-NEXT: mova r0, #2 // Delay Slot 4
; AIE2P-NEXT: vshuffle x0, x0, x0, r0 // Delay Slot 3
; AIE2P-NEXT: vextract.16 r0, x0, #0, vaddsign0 // Delay Slot 2
; AIE2P-NEXT: nop // Delay Slot 1
%1 = extractelement <8 x i16> %v, i32 0
Expand All @@ -192,10 +192,10 @@ define signext i16 @extract_v8i16_dyn(<8 x i16> %v, i32 %idx) nounwind {
; AIE2P-LABEL: extract_v8i16_dyn:
; AIE2P: .p2align 4
; AIE2P-NEXT: // %bb.0:
; AIE2P-NEXT: ret lr
; AIE2P-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv
; AIE2P-NEXT: nop // Delay Slot 5
; AIE2P-NEXT: nop // Delay Slot 4
; AIE2P-NEXT: nop // Delay Slot 3
; AIE2P-NEXT: mova r0, #2 // Delay Slot 4
; AIE2P-NEXT: vshuffle x0, x0, x0, r0 // Delay Slot 3
; AIE2P-NEXT: vextract.16 r0, x0, r1, vaddsign1 // Delay Slot 2
; AIE2P-NEXT: nop // Delay Slot 1
%1 = extractelement <8 x i16> %v, i32 %idx
Expand Down

0 comments on commit 2f8c42a

Please sign in to comment.