Skip to content

Commit

Permalink
Fix G_BUILD_VECTOR legalization
Browse files Browse the repository at this point in the history
  • Loading branch information
abhinay-anubola committed Jan 8, 2025
1 parent d0eaceb commit 7909ccf
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 42 deletions.
4 changes: 2 additions & 2 deletions llvm/lib/Target/AIE/AIELegalizerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,13 +161,13 @@ bool AIELegalizerHelper::legalizeG_BUILD_VECTOR(LegalizerHelper &Helper,
MIRBuilder.buildUndef(Src);

const AIEBaseInstrInfo *II = ST.getInstrInfo();
MachineOperand *OperandBegin = MI.operands_begin() + 1;
MachineOperand *OperandEnd = std::prev(MI.operands_end());
for (auto &Operand : drop_begin(MI.operands(), 1)) {
Register EltReg = Operand.getReg();
LLT EltRegTy = MRI.getType(EltReg);
Register Dst = MRI.createGenericVirtualRegister(VecTy);

if (DstVecSize == 512 && &Operand == OperandBegin) {
if (DstVecSize == 512 && &Operand == OperandEnd) {
Dst = DstReg;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ body: |
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI30:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI29]], [[C3]](s32)
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI31:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI30]], [[C3]](s32)
; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; CHECK-NEXT: G_STORE [[AIE_ADD_VECTOR_ELT_HI]](<32 x s16>), [[FRAME_INDEX]](p0) :: (store (<32 x s16>), align 32)
; CHECK-NEXT: G_STORE [[AIE_ADD_VECTOR_ELT_HI31]](<32 x s16>), [[FRAME_INDEX]](p0) :: (store (<32 x s16>), align 32)
%2:_(s32) = G_CONSTANT i32 4
%3:_(s32) = G_CONSTANT i32 12
%4:_(s32) = G_CONSTANT i32 9
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ body: |
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI14:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI13]], [[C3]](s32)
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI15:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI14]], [[C3]](s32)
; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; CHECK-NEXT: G_STORE [[AIE_ADD_VECTOR_ELT_HI]](<16 x s32>), [[FRAME_INDEX]](p0) :: (store (<16 x s32>), align 32)
; CHECK-NEXT: G_STORE [[AIE_ADD_VECTOR_ELT_HI15]](<16 x s32>), [[FRAME_INDEX]](p0) :: (store (<16 x s32>), align 32)
%2:_(s32) = G_CONSTANT i32 4
%3:_(s32) = G_CONSTANT i32 12
%4:_(s32) = G_CONSTANT i32 9
Expand Down Expand Up @@ -138,7 +138,7 @@ body: |
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI29:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI28]], [[C3]](s32)
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI30:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI29]], [[C3]](s32)
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI31:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI30]], [[C3]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[AIE_ADD_VECTOR_ELT_HI]](<16 x s32>), [[AIE_ADD_VECTOR_ELT_HI16]](<16 x s32>)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[AIE_ADD_VECTOR_ELT_HI15]](<16 x s32>), [[AIE_ADD_VECTOR_ELT_HI31]](<16 x s32>)
; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<32 x s32>), [[FRAME_INDEX]](p0) :: (store (<32 x s32>), align 32)
%2:_(s32) = G_CONSTANT i32 42
Expand Down Expand Up @@ -371,7 +371,7 @@ body: |
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI62:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI61]], [[COPY59]](s32)
; CHECK-NEXT: [[COPY60:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI63:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI62]], [[COPY60]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s16>) = G_CONCAT_VECTORS [[AIE_ADD_VECTOR_ELT_HI]](<32 x s16>), [[AIE_ADD_VECTOR_ELT_HI32]](<32 x s16>)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s16>) = G_CONCAT_VECTORS [[AIE_ADD_VECTOR_ELT_HI31]](<32 x s16>), [[AIE_ADD_VECTOR_ELT_HI63]](<32 x s16>)
; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; CHECK-NEXT: G_STORE [[CONCAT_VECTORS]](<64 x s16>), [[FRAME_INDEX]](p0) :: (store (<64 x s16>), align 32)
%2:_(s16) = G_CONSTANT i16 19
Expand Down Expand Up @@ -421,7 +421,7 @@ body: |
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI13:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI12]], [[C13]](s32)
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI14:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI13]], [[C14]](s32)
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI15:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI14]], [[C15]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_ADD_VECTOR_ELT_HI]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_ADD_VECTOR_ELT_HI15]](<16 x s32>)
%1:_(s32) = G_CONSTANT i32 1
%2:_(s32) = G_CONSTANT i32 2
%3:_(s32) = G_CONSTANT i32 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ body: |
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI13:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI12]], [[AIE_SEXT_EXTRACT_VECTOR_ELT13]](s32)
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI14:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI13]], [[AIE_SEXT_EXTRACT_VECTOR_ELT14]](s32)
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI15:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI14]], [[AIE_SEXT_EXTRACT_VECTOR_ELT15]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_ADD_VECTOR_ELT_HI]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_ADD_VECTOR_ELT_HI15]](<16 x s32>)
%0:_(<8 x s32>) = G_IMPLICIT_DEF
%1:_(<16 x s32>) = G_SHUFFLE_VECTOR %0(<8 x s32>), %0(<8 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)
PseudoRET implicit $lr, implicit %1
Expand Down Expand Up @@ -186,7 +186,7 @@ body: |
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI29:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI28]], [[ASSERT_SEXT29]](s32)
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI30:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI29]], [[ASSERT_SEXT30]](s32)
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI31:%[0-9]+]]:_(<32 x s16>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI30]], [[ASSERT_SEXT31]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_ADD_VECTOR_ELT_HI]](<32 x s16>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_ADD_VECTOR_ELT_HI31]](<32 x s16>)
%0:_(<16 x s16>) = G_IMPLICIT_DEF
%1:_(<16 x s16>) = G_IMPLICIT_DEF
%2:_(<32 x s16>) = G_SHUFFLE_VECTOR %0(<16 x s16>), %1(<16 x s16>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)
Expand Down Expand Up @@ -428,7 +428,7 @@ body: |
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI61:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI60]], [[ASSERT_SEXT61]](s32)
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI62:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI61]], [[ASSERT_SEXT62]](s32)
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI63:%[0-9]+]]:_(<64 x s8>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI62]], [[ASSERT_SEXT63]](s32)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_ADD_VECTOR_ELT_HI]](<64 x s8>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_ADD_VECTOR_ELT_HI63]](<64 x s8>)
%0:_(<32 x s8>) = G_IMPLICIT_DEF
%1:_(<32 x s8>) = G_IMPLICIT_DEF
%2:_(<64 x s8>) = G_SHUFFLE_VECTOR %0(<32 x s8>), %1(<32 x s8>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)
Expand Down Expand Up @@ -569,7 +569,7 @@ body: |
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI29:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI28]], [[AIE_SEXT_EXTRACT_VECTOR_ELT29]](s32)
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI30:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI29]], [[AIE_SEXT_EXTRACT_VECTOR_ELT30]](s32)
; CHECK-NEXT: [[AIE_ADD_VECTOR_ELT_HI31:%[0-9]+]]:_(<16 x s32>) = G_AIE_ADD_VECTOR_ELT_HI [[AIE_ADD_VECTOR_ELT_HI30]], [[AIE_SEXT_EXTRACT_VECTOR_ELT31]](s32)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[AIE_ADD_VECTOR_ELT_HI]](<16 x s32>), [[AIE_ADD_VECTOR_ELT_HI16]](<16 x s32>)
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[AIE_ADD_VECTOR_ELT_HI15]](<16 x s32>), [[AIE_ADD_VECTOR_ELT_HI31]](<16 x s32>)
; CHECK-NEXT: PseudoRET implicit $lr, implicit [[CONCAT_VECTORS]](<32 x s32>)
%0:_(<16 x s32>) = G_IMPLICIT_DEF
%1:_(<32 x s32>) = G_SHUFFLE_VECTOR %0(<16 x s32>), %0(<16 x s32>), shufflemask(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)
Expand Down
228 changes: 197 additions & 31 deletions llvm/test/CodeGen/AIE/aie2/intrinsics-shufflevec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -98,28 +98,129 @@ define <16 x i32> @test_insert_vector(<16 x i32> noundef %a, i32 noundef %idx, <
; CHECK-LABEL: test_insert_vector:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopb ; nopa ; nops ; jz r0, #.LBB1_2; nopv
; CHECK-NEXT: nopx // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: mov r1, r16 // Delay Slot 2
; CHECK-NEXT: mova r16, #0 // Delay Slot 1
; CHECK-NEXT: // %bb.1: // %if.end
; CHECK-NEXT: nopb ; nopa ; nops ; ret lr ; nopm ; nopv
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: vextract.s32 r0, x2, r16 // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: nopa ; nopb ; nopx ; mov r24, r16; nops
; CHECK-NEXT: mov r25, r17
; CHECK-NEXT: mov r26, r18
; CHECK-NEXT: mov r27, r19
; CHECK-NEXT: mova r19, #0
; CHECK-NEXT: mova r18, #1
; CHECK-NEXT: mova r17, #2
; CHECK-NEXT: mova r16, #3
; CHECK-NEXT: vextract.s32 r4, x4, r16
; CHECK-NEXT: mova r16, #4
; CHECK-NEXT: vextract.s32 r1, x4, r19
; CHECK-NEXT: vextract.s32 r2, x4, r18
; CHECK-NEXT: vextract.s32 r3, x4, r17
; CHECK-NEXT: vextract.s32 r5, x4, r16
; CHECK-NEXT: mova r16, #5
; CHECK-NEXT: vextract.s32 r6, x4, r16
; CHECK-NEXT: mova r16, #6
; CHECK-NEXT: vpush.hi.32 x0, x0, r1
; CHECK-NEXT: vextract.s32 r7, x4, r16
; CHECK-NEXT: mova r16, #7
; CHECK-NEXT: vpush.hi.32 x0, x0, r2
; CHECK-NEXT: vextract.s32 r8, x4, r16
; CHECK-NEXT: vpush.hi.32 x0, x0, r3
; CHECK-NEXT: vpush.hi.32 x0, x0, r4
; CHECK-NEXT: vpush.hi.32 x0, x0, r5
; CHECK-NEXT: vpush.hi.32 x0, x0, r6
; CHECK-NEXT: vpush.hi.32 x0, x0, r7
; CHECK-NEXT: vpush.hi.32 x0, x0, r8
; CHECK-NEXT: vpush.hi.32 x0, x0, r0
; CHECK-NEXT: vpush.hi.32 x0, x0, r0
; CHECK-NEXT: vpush.hi.32 x0, x0, r0
; CHECK-NEXT: jz r0, #.LBB1_2
; CHECK-NEXT: vpush.hi.32 x0, x0, r0 // Delay Slot 5
; CHECK-NEXT: vpush.hi.32 x0, x0, r0 // Delay Slot 4
; CHECK-NEXT: vpush.hi.32 x0, x0, r0 // Delay Slot 3
; CHECK-NEXT: vpush.hi.32 x0, x0, r0 // Delay Slot 2
; CHECK-NEXT: mov r16, r1 // Delay Slot 1
; CHECK-NEXT: vpush.hi.32 x0, x0, r0 // Delay Slot 1
; CHECK-NEXT: // %bb.1: // %if.end
; CHECK-NEXT: mova r16, #3; nopxm
; CHECK-NEXT: vextract.s32 r0, x2, r19
; CHECK-NEXT: vextract.s32 r1, x0, r19
; CHECK-NEXT: vextract.s32 r2, x2, r18
; CHECK-NEXT: vextract.s32 r3, x0, r18
; CHECK-NEXT: vextract.s32 r4, x2, r17
; CHECK-NEXT: vextract.s32 r5, x0, r17
; CHECK-NEXT: vextract.s32 r6, x2, r16
; CHECK-NEXT: vextract.s32 r7, x0, r16
; CHECK-NEXT: mova r16, #4
; CHECK-NEXT: vextract.s32 r8, x2, r16
; CHECK-NEXT: vextract.s32 r9, x0, r16
; CHECK-NEXT: mova r16, #5
; CHECK-NEXT: vextract.s32 r10, x2, r16
; CHECK-NEXT: vextract.s32 r11, x0, r16
; CHECK-NEXT: mova r16, #6
; CHECK-NEXT: vextract.s32 r12, x2, r16
; CHECK-NEXT: vextract.s32 r13, x0, r16
; CHECK-NEXT: mova r16, #7
; CHECK-NEXT: vextract.s32 r14, x2, r16
; CHECK-NEXT: vextract.s32 r15, x0, r16
; CHECK-NEXT: vpush.hi.32 x0, x0, r0
; CHECK-NEXT: vpush.hi.32 x0, x0, r2
; CHECK-NEXT: vpush.hi.32 x0, x0, r4
; CHECK-NEXT: vpush.hi.32 x0, x0, r6
; CHECK-NEXT: vpush.hi.32 x0, x0, r8
; CHECK-NEXT: vpush.hi.32 x0, x0, r10
; CHECK-NEXT: vpush.hi.32 x0, x0, r12
; CHECK-NEXT: vpush.hi.32 x0, x0, r14
; CHECK-NEXT: vpush.hi.32 x0, x0, r1
; CHECK-NEXT: vpush.hi.32 x0, x0, r3
; CHECK-NEXT: vpush.hi.32 x0, x0, r5
; CHECK-NEXT: j #.LBB1_3
; CHECK-NEXT: vpush.hi.32 x0, x0, r7 // Delay Slot 5
; CHECK-NEXT: vpush.hi.32 x0, x0, r9 // Delay Slot 4
; CHECK-NEXT: vpush.hi.32 x0, x0, r11 // Delay Slot 3
; CHECK-NEXT: vpush.hi.32 x0, x0, r13 // Delay Slot 2
; CHECK-NEXT: vpush.hi.32 x0, x0, r15 // Delay Slot 1
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_2: // %if.then
; CHECK-NEXT: nopa ; nopx ; vextract.s32 r0, x4, r16
; CHECK-NEXT: ret lr
; CHECK-NEXT: vpush.hi.32 x0, x0, r0 // Delay Slot 5
; CHECK-NEXT: vextract.s32 r0, x0, r16 // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: vpush.hi.32 x0, x0, r0 // Delay Slot 2
; CHECK-NEXT: mov r16, r1 // Delay Slot 1
; CHECK-NEXT: nopb ; mova r16, #3; nops ; nopxm ; nopv
; CHECK-NEXT: vextract.s32 r0, x0, r19
; CHECK-NEXT: vextract.s32 r1, x2, r19
; CHECK-NEXT: vextract.s32 r2, x0, r18
; CHECK-NEXT: vextract.s32 r3, x2, r18
; CHECK-NEXT: vextract.s32 r4, x0, r17
; CHECK-NEXT: vextract.s32 r5, x2, r17
; CHECK-NEXT: vextract.s32 r6, x0, r16
; CHECK-NEXT: vextract.s32 r7, x2, r16
; CHECK-NEXT: mova r16, #4
; CHECK-NEXT: vextract.s32 r8, x0, r16
; CHECK-NEXT: vextract.s32 r9, x2, r16
; CHECK-NEXT: mova r16, #5
; CHECK-NEXT: vextract.s32 r10, x0, r16
; CHECK-NEXT: vextract.s32 r11, x2, r16
; CHECK-NEXT: mova r16, #6
; CHECK-NEXT: vextract.s32 r12, x0, r16
; CHECK-NEXT: vextract.s32 r13, x2, r16
; CHECK-NEXT: mova r16, #7
; CHECK-NEXT: vextract.s32 r14, x0, r16
; CHECK-NEXT: vextract.s32 r15, x2, r16
; CHECK-NEXT: vpush.hi.32 x0, x0, r0
; CHECK-NEXT: vpush.hi.32 x0, x0, r2
; CHECK-NEXT: vpush.hi.32 x0, x0, r4
; CHECK-NEXT: vpush.hi.32 x0, x0, r6
; CHECK-NEXT: vpush.hi.32 x0, x0, r8
; CHECK-NEXT: vpush.hi.32 x0, x0, r10
; CHECK-NEXT: vpush.hi.32 x0, x0, r12
; CHECK-NEXT: vpush.hi.32 x0, x0, r14
; CHECK-NEXT: vpush.hi.32 x0, x0, r1
; CHECK-NEXT: vpush.hi.32 x0, x0, r3
; CHECK-NEXT: vpush.hi.32 x0, x0, r5
; CHECK-NEXT: vpush.hi.32 x0, x0, r7
; CHECK-NEXT: vpush.hi.32 x0, x0, r9
; CHECK-NEXT: vpush.hi.32 x0, x0, r11
; CHECK-NEXT: vpush.hi.32 x0, x0, r13
; CHECK-NEXT: vpush.hi.32 x0, x0, r15
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_3: // %cleanup
; CHECK-NEXT: nopa ; nopb ; ret lr ; nopm ; nops
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: mov r19, r27 // Delay Slot 4
; CHECK-NEXT: mov r18, r26 // Delay Slot 3
; CHECK-NEXT: mov r17, r25 // Delay Slot 2
; CHECK-NEXT: mov r16, r24 // Delay Slot 1
entry:
%shuffle = shufflevector <8 x i32> %b, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%cmp = icmp eq i32 %idx, 0
Expand All @@ -142,13 +243,49 @@ define <16 x i32> @test_concat_vector(<8 x i32> noundef %a, <8 x i32> noundef %b
; CHECK-LABEL: test_concat_vector:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopa ; nopx ; mov r1, r16
; CHECK-NEXT: nopx ; mov r24, r16
; CHECK-NEXT: mova r16, #0
; CHECK-NEXT: vextract.s32 r0, x2, r16
; CHECK-NEXT: vextract.s32 r1, x4, r16
; CHECK-NEXT: mova r16, #1
; CHECK-NEXT: vextract.s32 r2, x2, r16
; CHECK-NEXT: vextract.s32 r3, x4, r16
; CHECK-NEXT: mova r16, #2
; CHECK-NEXT: vextract.s32 r4, x2, r16
; CHECK-NEXT: vextract.s32 r5, x4, r16
; CHECK-NEXT: mova r16, #3
; CHECK-NEXT: vpush.hi.32 x0, x0, r0
; CHECK-NEXT: vextract.s32 r6, x2, r16
; CHECK-NEXT: vextract.s32 r7, x4, r16
; CHECK-NEXT: mova r16, #4
; CHECK-NEXT: vpush.hi.32 x0, x0, r2
; CHECK-NEXT: vextract.s32 r8, x2, r16
; CHECK-NEXT: vextract.s32 r9, x4, r16
; CHECK-NEXT: mova r16, #5
; CHECK-NEXT: vpush.hi.32 x0, x0, r4
; CHECK-NEXT: vextract.s32 r10, x2, r16
; CHECK-NEXT: vextract.s32 r11, x4, r16
; CHECK-NEXT: mova r16, #6
; CHECK-NEXT: vpush.hi.32 x0, x0, r6
; CHECK-NEXT: vextract.s32 r12, x2, r16
; CHECK-NEXT: vextract.s32 r13, x4, r16
; CHECK-NEXT: mova r16, #7
; CHECK-NEXT: vpush.hi.32 x0, x0, r8
; CHECK-NEXT: vextract.s32 r14, x2, r16
; CHECK-NEXT: vextract.s32 r15, x4, r16
; CHECK-NEXT: vpush.hi.32 x0, x0, r10
; CHECK-NEXT: vpush.hi.32 x0, x0, r12
; CHECK-NEXT: vpush.hi.32 x0, x0, r14
; CHECK-NEXT: vpush.hi.32 x0, x0, r1
; CHECK-NEXT: vpush.hi.32 x0, x0, r3
; CHECK-NEXT: vpush.hi.32 x0, x0, r5
; CHECK-NEXT: vpush.hi.32 x0, x0, r7
; CHECK-NEXT: ret lr
; CHECK-NEXT: mova r16, #0 // Delay Slot 5
; CHECK-NEXT: vextract.s32 r0, x2, r16 // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: vpush.hi.32 x0, x0, r0 // Delay Slot 2
; CHECK-NEXT: mov r16, r1 // Delay Slot 1
; CHECK-NEXT: vpush.hi.32 x0, x0, r9 // Delay Slot 5
; CHECK-NEXT: vpush.hi.32 x0, x0, r11 // Delay Slot 4
; CHECK-NEXT: vpush.hi.32 x0, x0, r13 // Delay Slot 3
; CHECK-NEXT: vpush.hi.32 x0, x0, r15 // Delay Slot 2
; CHECK-NEXT: mov r16, r24 // Delay Slot 1
entry:
%shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x i32> %shuffle
Expand All @@ -158,15 +295,44 @@ define <16 x i32> @test_set_vector(i32 noundef %idx, <8 x i32> noundef %a) {
; CHECK-LABEL: test_set_vector:
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopb ; nopa ; nops ; nopx ; mov r2, r16; nopv
; CHECK-NEXT: nopa ; nopb ; nopx ; mov r9, r16
; CHECK-NEXT: mova r16, #0
; CHECK-NEXT: eqz r0, r0
; CHECK-NEXT: vextract.s32 r1, x2, r16
; CHECK-NEXT: mova r16, #1
; CHECK-NEXT: vextract.s32 r2, x2, r16
; CHECK-NEXT: mova r16, #2
; CHECK-NEXT: vextract.s32 r3, x2, r16
; CHECK-NEXT: mova r16, #3
; CHECK-NEXT: vpush.hi.32 x0, x0, r1
; CHECK-NEXT: vextract.s32 r4, x2, r16
; CHECK-NEXT: mova r16, #4
; CHECK-NEXT: vpush.hi.32 x0, x0, r2
; CHECK-NEXT: vextract.s32 r5, x2, r16
; CHECK-NEXT: mova r16, #5
; CHECK-NEXT: vpush.hi.32 x0, x0, r3
; CHECK-NEXT: vextract.s32 r6, x2, r16
; CHECK-NEXT: mova r16, #6
; CHECK-NEXT: vpush.hi.32 x0, x0, r4
; CHECK-NEXT: vextract.s32 r7, x2, r16
; CHECK-NEXT: mova r16, #7
; CHECK-NEXT: vpush.hi.32 x0, x0, r5
; CHECK-NEXT: vextract.s32 r8, x2, r16
; CHECK-NEXT: add r16, r0, #-1
; CHECK-NEXT: vpush.hi.32 x0, x0, r6
; CHECK-NEXT: vpush.hi.32 x0, x0, r7
; CHECK-NEXT: vpush.hi.32 x0, x0, r8
; CHECK-NEXT: vpush.hi.32 x2, x0, r0
; CHECK-NEXT: vpush.hi.32 x2, x2, r0
; CHECK-NEXT: vpush.hi.32 x2, x2, r0
; CHECK-NEXT: vpush.hi.32 x2, x2, r0
; CHECK-NEXT: vpush.hi.32 x2, x2, r0
; CHECK-NEXT: ret lr
; CHECK-NEXT: vextract.s32 r1, x2, r16 // Delay Slot 5
; CHECK-NEXT: add r16, r0, #-1 // Delay Slot 4
; CHECK-NEXT: vpush.hi.32 x0, x0, r1 // Delay Slot 3
; CHECK-NEXT: vsel.32 x0, x0, x0, r16 // Delay Slot 2
; CHECK-NEXT: mov r16, r2 // Delay Slot 1
; CHECK-NEXT: vpush.hi.32 x2, x2, r0 // Delay Slot 5
; CHECK-NEXT: vpush.hi.32 x2, x2, r0 // Delay Slot 4
; CHECK-NEXT: vpush.hi.32 x2, x2, r0 // Delay Slot 3
; CHECK-NEXT: vsel.32 x0, x2, x0, r16 // Delay Slot 2
; CHECK-NEXT: mov r16, r9 // Delay Slot 1
entry:
%cmp = icmp eq i32 %idx, 0
%shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
Expand Down

0 comments on commit 7909ccf

Please sign in to comment.