diff --git a/llvm/lib/Target/AIE/AIE2InstrPatterns.td b/llvm/lib/Target/AIE/AIE2InstrPatterns.td index 2d9409264d24..d6c7db3766b2 100644 --- a/llvm/lib/Target/AIE/AIE2InstrPatterns.td +++ b/llvm/lib/Target/AIE/AIE2InstrPatterns.td @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// // @@ -116,33 +116,6 @@ class SelectPat : def : SelectPat; def : SelectPat; -// For vector select, VSEL instruction is used, since it does select word by word -// sel input is modified, "sel ? v1 : v2 is equivalent to vsel(sel-1, v1, v2)" -// rs1(sel) is zero-extended from the original 1 bit type -foreach vec256Ty = [v32i8, v16i16, v8i32] in { -def : Pat<(vec256Ty (select (i32 eRS8:$rs1), VEC256:$rs2, VEC256:$rs3)), - (vec256Ty (EXTRACT_SUBREG (VSEL_32 - (v32i16 (REG_SEQUENCE VEC512, VEC256:$rs2, sub_256_lo)), - (v32i16 (REG_SEQUENCE VEC512, VEC256:$rs3, sub_256_lo)), - (ADD_add_r_ri eR:$rs1, (i32 -1))), - sub_256_lo))>; -} -foreach vec512Ty = [v64i8, v32i16, v16i32] in { - def : Pat<(vec512Ty (select (i32 eRS8:$rs1), VEC512:$rs2, VEC512:$rs3)), - (vec512Ty (VSEL_32 VEC512:$rs2, VEC512:$rs3, (ADD_add_r_ri eR:$rs1, (i32 -1))))>; -} -foreach vec1024Ty = [v128i8, v64i16, v32i32] in { -def : Pat<(vec1024Ty (select (i32 eRS8:$rs1), VEC1024:$rs2, VEC1024:$rs3)), - (REG_SEQUENCE VEC1024, - (VSEL_32 (v16i32 (EXTRACT_SUBREG VEC1024:$rs2, sub_512_lo)), - (v16i32 (EXTRACT_SUBREG VEC1024:$rs3, sub_512_lo)), - (ADD_add_r_ri eR:$rs1, (i32 -1))), sub_512_lo, - (VSEL_32 (v16i32 (EXTRACT_SUBREG VEC1024:$rs2, sub_512_hi)), - (v16i32 (EXTRACT_SUBREG VEC1024:$rs3, sub_512_hi)), - (ADD_add_r_ri eR:$rs1, (i32 -1))), sub_512_hi - )>; -} - class SelectNezPat : Pat<(type (select (i32 (setne eR27:$rs1, (i32 0))), (type eR:$rs2), (type eR:$rs3))), (SELNEZ eR:$rs2, eR:$rs3, eR27:$rs1)>; diff --git a/llvm/lib/Target/AIE/AIEBaseInstrPatterns.td b/llvm/lib/Target/AIE/AIEBaseInstrPatterns.td index 3f12e1d3068f..261366bc279b 100644 --- a/llvm/lib/Target/AIE/AIEBaseInstrPatterns.td +++ b/llvm/lib/Target/AIE/AIEBaseInstrPatterns.td @@ -4,7 +4,7 @@ // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // -// (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates +// (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates // //===----------------------------------------------------------------------===// // @@ -69,3 +69,11 @@ def : Pat<(vec1024Ty (and VEC1024:$src1, VEC1024:$src2)), (v16i32 (EXTRACT_SUBREG VEC1024:$src2, sub_512_hi))), sub_512_hi )>; } + +// For vector select, VSEL instruction is used, since it does select word by word +// sel input is modified, "sel ? v1 : v2 is equivalent to vsel(sel-1, v1, v2)" +// rs1(sel) is zero-extended from the original 1 bit type +foreach vec512Ty = [v64i8, v32i16, v16i32] in { + def : Pat<(vec512Ty (select (i32 eR:$rs1), VEC512:$rs2, VEC512:$rs3)), + (vec512Ty (VSEL_32 VEC512:$rs2, VEC512:$rs3, (ADD_add_r_ri eR:$rs1, (i32 -1))))>; +} diff --git a/llvm/test/CodeGen/AIE/GlobalISel/inst-select-select.mir b/llvm/test/CodeGen/AIE/GlobalISel/inst-select-select.mir index 31ee8603a015..a2eedc996c4f 100644 --- a/llvm/test/CodeGen/AIE/GlobalISel/inst-select-select.mir +++ b/llvm/test/CodeGen/AIE/GlobalISel/inst-select-select.mir @@ -4,8 +4,10 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -# (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates -# RUN: llc -mtriple aie -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s +# (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates + +# RUN: llc -mtriple aie2 -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck --check-prefix=AIE2 %s +# RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck --check-prefix=AIE2P %s --- name: select_s32 @@ -15,12 +17,23 @@ regBankSelected: true body: | bb.0: liveins: $r6, $r7, $r8 - ; CHECK-LABEL: name: select_s32 - ; CHECK: [[COPY:%[0-9]+]]:gpr0 = COPY $r6 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r7 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $r8 - ; CHECK-NEXT: [[ITE_NEZ:%[0-9]+]]:gpr = ITE_NEZ [[COPY1]], [[COPY2]], [[COPY]] - ; CHECK-NEXT: $r0 = COPY [[ITE_NEZ]] + ; AIE2-LABEL: name: select_s32 + ; AIE2: liveins: $r6, $r7, $r8 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:er27 = COPY $r6 + ; AIE2-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r7 + ; AIE2-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r8 + ; AIE2-NEXT: [[SELNEZ:%[0-9]+]]:er = SELNEZ [[COPY1]], [[COPY2]], [[COPY]] + ; AIE2-NEXT: $r0 = COPY [[SELNEZ]] + ; + ; AIE2P-LABEL: name: select_s32 + ; AIE2P: liveins: $r6, $r7, $r8 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:mr27_select = COPY $r6 + ; AIE2P-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r7 + ; AIE2P-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r8 + ; AIE2P-NEXT: [[SEL_NEZ:%[0-9]+]]:er = SEL_NEZ [[COPY1]], [[COPY2]], [[COPY]] + ; AIE2P-NEXT: $r0 = COPY [[SEL_NEZ]] %0:gprregbank(s32) = COPY $r6 %1:gprregbank(s32) = COPY $r7 %2:gprregbank(s32) = COPY $r8 @@ -36,14 +49,27 @@ regBankSelected: true body: | bb.0: liveins: $p0, $p1, $r6 - ; CHECK-LABEL: name: select_p0 - ; CHECK: [[COPY:%[0-9]+]]:gpr0 = COPY $r6 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ptr = COPY $p0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ptr = COPY $p1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY [[COPY2]] - ; CHECK-NEXT: [[ITE_NEZ:%[0-9]+]]:gpr = ITE_NEZ [[COPY3]], [[COPY4]], [[COPY]] - ; CHECK-NEXT: $p0 = COPY [[ITE_NEZ]] + ; AIE2-LABEL: name: select_p0 + ; AIE2: liveins: $p0, $p1, $r6 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:er27 = COPY $r6 + ; AIE2-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; AIE2-NEXT: [[COPY2:%[0-9]+]]:ep = COPY $p1 + ; AIE2-NEXT: [[COPY3:%[0-9]+]]:er = COPY [[COPY1]] + ; AIE2-NEXT: [[COPY4:%[0-9]+]]:er = COPY [[COPY2]] + ; AIE2-NEXT: [[SELNEZ:%[0-9]+]]:er = SELNEZ [[COPY3]], [[COPY4]], [[COPY]] + ; AIE2-NEXT: $p0 = COPY [[SELNEZ]] + ; + ; AIE2P-LABEL: name: select_p0 + ; AIE2P: liveins: $p0, $p1, $r6 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:mr27_select = COPY $r6 + ; AIE2P-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; AIE2P-NEXT: [[COPY2:%[0-9]+]]:ep = COPY $p1 + ; AIE2P-NEXT: [[COPY3:%[0-9]+]]:er = COPY [[COPY1]] + ; AIE2P-NEXT: [[COPY4:%[0-9]+]]:er = COPY [[COPY2]] + ; AIE2P-NEXT: [[SEL_NEZ:%[0-9]+]]:er = SEL_NEZ [[COPY3]], [[COPY4]], [[COPY]] + ; AIE2P-NEXT: $p0 = COPY [[SEL_NEZ]] %0:gprregbank(s32) = COPY $r6 %1:ptrregbank(p0) = COPY $p0 %2:ptrregbank(p0) = COPY $p1 @@ -52,3 +78,40 @@ body: | %5:gprregbank(p0) = G_SELECT %0, %3, %4 $p0 = COPY %5 ... + +--- +name: select_v512 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0, $x2, $x4 + ; AIE2-LABEL: name: select_v512 + ; AIE2: liveins: $r0, $x2, $x4 + ; AIE2-NEXT: {{ $}} + ; AIE2-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; AIE2-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x2 + ; AIE2-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY $x4 + ; AIE2-NEXT: [[ADD_add_r_ri:%[0-9]+]]:ers8 = ADD_add_r_ri [[COPY]], -1, implicit-def dead $srcarry + ; AIE2-NEXT: [[VSEL_32_:%[0-9]+]]:vec512 = VSEL_32 [[COPY1]], [[COPY2]], [[ADD_add_r_ri]] + ; AIE2-NEXT: $x0 = COPY [[VSEL_32_]] + ; AIE2-NEXT: PseudoRET implicit $lr, implicit $x0 + ; + ; AIE2P-LABEL: name: select_v512 + ; AIE2P: liveins: $r0, $x2, $x4 + ; AIE2P-NEXT: {{ $}} + ; AIE2P-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 + ; AIE2P-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x2 + ; AIE2P-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY $x4 + ; AIE2P-NEXT: [[ADD_add_r_ri:%[0-9]+]]:ers16 = ADD_add_r_ri [[COPY]], -1, implicit-def dead $srcarry + ; AIE2P-NEXT: [[VSEL_32_:%[0-9]+]]:vec512 = VSEL_32 [[COPY1]], [[COPY2]], [[ADD_add_r_ri]] + ; AIE2P-NEXT: $x0 = COPY [[VSEL_32_]] + ; AIE2P-NEXT: PseudoRET implicit $lr, implicit $x0 + %4:gprregbank(s32) = COPY $r0 + %5:gprregbank(s32) = G_ASSERT_ZEXT %4:gprregbank, 1 + %2:vregbank(<16 x s32>) = COPY $x2 + %3:vregbank(<16 x s32>) = COPY $x4 + %0:vregbank(<16 x s32>) = G_SELECT %5:gprregbank(s32), %2:vregbank, %3:vregbank + $x0 = COPY %0:vregbank(<16 x s32>) + PseudoRET implicit $lr, implicit $x0 +... diff --git a/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-select.mir b/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-select.mir deleted file mode 100644 index 66160c80e856..000000000000 --- a/llvm/test/CodeGen/AIE/aie2/GlobalISel/inst-select-select.mir +++ /dev/null @@ -1,146 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates -# RUN: llc -mtriple aie2 -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s - ---- -name: select_s32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $r6, $r7, $r8 - ; CHECK-LABEL: name: select_s32 - ; CHECK: liveins: $r6, $r7, $r8 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:er27 = COPY $r6 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r7 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r8 - ; CHECK-NEXT: [[SELNEZ:%[0-9]+]]:er = SELNEZ [[COPY1]], [[COPY2]], [[COPY]] - ; CHECK-NEXT: $r0 = COPY [[SELNEZ]] - %0:gprregbank(s32) = COPY $r6 - %1:gprregbank(s32) = COPY $r7 - %2:gprregbank(s32) = COPY $r8 - %3:gprregbank(s32) = G_SELECT %0, %1, %2 - $r0 = COPY %3 -... - ---- -name: select_p0 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $p0, $p1, $r6 - ; CHECK-LABEL: name: select_p0 - ; CHECK: liveins: $p0, $p1, $r6 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:er27 = COPY $r6 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ep = COPY $p1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:er = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:er = COPY [[COPY2]] - ; CHECK-NEXT: [[SELNEZ:%[0-9]+]]:er = SELNEZ [[COPY3]], [[COPY4]], [[COPY]] - ; CHECK-NEXT: $p0 = COPY [[SELNEZ]] - %0:gprregbank(s32) = COPY $r6 - %1:ptrregbank(p0) = COPY $p0 - %2:ptrregbank(p0) = COPY $p1 - %3:gprregbank(p0) = COPY %1 - %4:gprregbank(p0) = COPY %2 - %5:gprregbank(p0) = G_SELECT %0, %3, %4 - $p0 = COPY %5 -... - ---- -name: select_v256 -legalized: true -regBankSelected: true -body: | - bb.1.entry: - liveins: $r0, $wl2, $wl4 - ; CHECK-LABEL: name: select_v256 - ; CHECK: liveins: $r0, $wl2, $wl4 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ewl = COPY $wl2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ewl = COPY $wl4 - ; CHECK-NEXT: [[ADD_add_r_ri:%[0-9]+]]:ers8 = ADD_add_r_ri [[COPY]], -1, implicit-def dead $srcarry - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec512 = REG_SEQUENCE [[COPY2]], %subreg.sub_256_lo - ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vec512 = REG_SEQUENCE [[COPY1]], %subreg.sub_256_lo - ; CHECK-NEXT: [[VSEL_32_:%[0-9]+]]:mxm = VSEL_32 [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[ADD_add_r_ri]] - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ewl = COPY [[VSEL_32_]].sub_256_lo - ; CHECK-NEXT: $wl0 = COPY [[COPY3]] - ; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0 - %4:gprregbank(s32) = COPY $r0 - %5:gprregbank(s32) = G_ASSERT_ZEXT %4:gprregbank, 1 - %2:vregbank(<8 x s32>) = COPY $wl2 - %3:vregbank(<8 x s32>) = COPY $wl4 - %0:vregbank(<8 x s32>) = G_SELECT %5:gprregbank(s32), %2:vregbank, %3:vregbank - $wl0 = COPY %0:vregbank(<8 x s32>) - PseudoRET implicit $lr, implicit $wl0 -... - ---- -name: select_v512 -legalized: true -regBankSelected: true -body: | - bb.1.entry: - liveins: $r0, $x2, $x4 - ; CHECK-LABEL: name: select_v512 - ; CHECK: liveins: $r0, $x2, $x4 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x2 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY $x4 - ; CHECK-NEXT: [[ADD_add_r_ri:%[0-9]+]]:ers8 = ADD_add_r_ri [[COPY]], -1, implicit-def dead $srcarry - ; CHECK-NEXT: [[VSEL_32_:%[0-9]+]]:vec512 = VSEL_32 [[COPY1]], [[COPY2]], [[ADD_add_r_ri]] - ; CHECK-NEXT: $x0 = COPY [[VSEL_32_]] - ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 - %4:gprregbank(s32) = COPY $r0 - %5:gprregbank(s32) = G_ASSERT_ZEXT %4:gprregbank, 1 - %2:vregbank(<16 x s32>) = COPY $x2 - %3:vregbank(<16 x s32>) = COPY $x4 - %0:vregbank(<16 x s32>) = G_SELECT %5:gprregbank(s32), %2:vregbank, %3:vregbank - $x0 = COPY %0:vregbank(<16 x s32>) - PseudoRET implicit $lr, implicit $x0 -... - ---- -name: select_v1024 -legalized: true -regBankSelected: true -body: | - bb.1.entry: - liveins: $r0, $y3, $y4 - ; CHECK-LABEL: name: select_v1024 - ; CHECK: liveins: $r0, $y3, $y4 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:er = COPY $r0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y3 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec1024 = COPY $y4 - ; CHECK-NEXT: [[ADD_add_r_ri:%[0-9]+]]:ers8 = ADD_add_r_ri [[COPY]], -1, implicit-def dead $srcarry - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:exo = COPY [[COPY2]].sub_512_hi - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:exo = COPY [[COPY1]].sub_512_hi - ; CHECK-NEXT: [[VSEL_32_:%[0-9]+]]:exo = VSEL_32 [[COPY4]], [[COPY3]], [[ADD_add_r_ri]] - ; CHECK-NEXT: [[ADD_add_r_ri1:%[0-9]+]]:ers8 = ADD_add_r_ri [[COPY]], -1, implicit-def dead $srcarry - ; CHECK-NEXT: [[COPY5:%[0-9]+]]:exe = COPY [[COPY2]].sub_512_lo - ; CHECK-NEXT: [[COPY6:%[0-9]+]]:exe = COPY [[COPY1]].sub_512_lo - ; CHECK-NEXT: [[VSEL_32_1:%[0-9]+]]:exe = VSEL_32 [[COPY6]], [[COPY5]], [[ADD_add_r_ri1]] - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec1024 = REG_SEQUENCE [[VSEL_32_1]], %subreg.sub_512_lo, [[VSEL_32_]], %subreg.sub_512_hi - ; CHECK-NEXT: $y2 = COPY [[REG_SEQUENCE]] - ; CHECK-NEXT: PseudoRET implicit $lr, implicit $y2 - %4:gprregbank(s32) = COPY $r0 - %5:gprregbank(s32) = G_ASSERT_ZEXT %4:gprregbank, 1 - %2:vregbank(<32 x s32>) = COPY $y3 - %3:vregbank(<32 x s32>) = COPY $y4 - %0:vregbank(<32 x s32>) = G_SELECT %5:gprregbank(s32), %2:vregbank, %3:vregbank - $y2 = COPY %0:vregbank(<32 x s32>) - PseudoRET implicit $lr, implicit $y2 -... diff --git a/llvm/test/CodeGen/AIE/aie2/vector_select.ll b/llvm/test/CodeGen/AIE/aie2/vector_select.ll deleted file mode 100644 index ced1d7e79eef..000000000000 --- a/llvm/test/CodeGen/AIE/aie2/vector_select.ll +++ /dev/null @@ -1,53 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; -; This file is licensed under the Apache License v2.0 with LLVM Exceptions. -; See https://llvm.org/LICENSE.txt for license information. -; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -; -; (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates -; RUN: llc -O2 -mtriple=aie2 --issue-limit=1 %s -o - | FileCheck %s - -define dso_local noundef <8 x i32> @_Z8test_andbDv8_iS_(i1 noundef zeroext %sel, <8 x i32> noundef %a, <8 x i32> noundef %b) local_unnamed_addr #0 { -; CHECK-LABEL: _Z8test_andbDv8_iS_: -; CHECK: .p2align 4 -; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopa ; nopb ; ret lr ; nopm ; nops -; CHECK-NEXT: nop // Delay Slot 5 -; CHECK-NEXT: mov r1, r16 // Delay Slot 4 -; CHECK-NEXT: add r16, r0, #-1 // Delay Slot 3 -; CHECK-NEXT: vsel.32 x0, x4, x2, r16 // Delay Slot 2 -; CHECK-NEXT: mov r16, r1 // Delay Slot 1 -entry: - %cond = select i1 %sel, <8 x i32> %b, <8 x i32> %a - ret <8 x i32> %cond -} - -define dso_local noundef <16 x i32> @_Z8test_andbDv16_iS_(i1 noundef zeroext %sel, <16 x i32> noundef %a, <16 x i32> noundef %b) local_unnamed_addr #0 { -; CHECK-LABEL: _Z8test_andbDv16_iS_: -; CHECK: .p2align 4 -; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopa ; nopb ; ret lr ; nopm ; nops -; CHECK-NEXT: nop // Delay Slot 5 -; CHECK-NEXT: mov r1, r16 // Delay Slot 4 -; CHECK-NEXT: add r16, r0, #-1 // Delay Slot 3 -; CHECK-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 -; CHECK-NEXT: mov r16, r1 // Delay Slot 1 -entry: - %cond = select i1 %sel, <16 x i32> %a, <16 x i32> %b - ret <16 x i32> %cond -} - -define dso_local noundef <32 x i32> @_Z8test_andbDv32_iS_(i1 noundef zeroext %sel, <32 x i32> noundef %a, <32 x i32> noundef %b) local_unnamed_addr #0 { -; CHECK-LABEL: _Z8test_andbDv32_iS_: -; CHECK: .p2align 4 -; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: nopa ; nopb ; ret lr ; nopm -; CHECK-NEXT: mov r1, r16 // Delay Slot 5 -; CHECK-NEXT: add r16, r0, #-1 // Delay Slot 4 -; CHECK-NEXT: vsel.32 x5, x7, x9, r16 // Delay Slot 3 -; CHECK-NEXT: vsel.32 x4, x6, x8, r16 // Delay Slot 2 -; CHECK-NEXT: mov r16, r1 // Delay Slot 1 -entry: - %cond = select i1 %sel, <32 x i32> %a, <32 x i32> %b - ret <32 x i32> %cond -} diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-select.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-select.mir deleted file mode 100644 index afb2f175d29e..000000000000 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-select.mir +++ /dev/null @@ -1,59 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates -# RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s - ---- -name: select_s32 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $r6, $r7, $r8 - ; CHECK-LABEL: name: select_s32 - ; CHECK: liveins: $r6, $r7, $r8 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:mr27_select = COPY $r6 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r7 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r8 - ; CHECK-NEXT: [[SEL_NEZ:%[0-9]+]]:er = SEL_NEZ [[COPY1]], [[COPY2]], [[COPY]] - ; CHECK-NEXT: $r0 = COPY [[SEL_NEZ]] - %0:gprregbank(s32) = COPY $r6 - %1:gprregbank(s32) = COPY $r7 - %2:gprregbank(s32) = COPY $r8 - %3:gprregbank(s32) = G_SELECT %0, %1, %2 - $r0 = COPY %3 -... - ---- -name: select_p0 -legalized: true -regBankSelected: true - -body: | - bb.0: - liveins: $p0, $p1, $r6 - ; CHECK-LABEL: name: select_p0 - ; CHECK: liveins: $p0, $p1, $r6 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[COPY:%[0-9]+]]:mr27_select = COPY $r6 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:ep = COPY $p1 - ; CHECK-NEXT: [[COPY3:%[0-9]+]]:er = COPY [[COPY1]] - ; CHECK-NEXT: [[COPY4:%[0-9]+]]:er = COPY [[COPY2]] - ; CHECK-NEXT: [[SEL_NEZ:%[0-9]+]]:er = SEL_NEZ [[COPY3]], [[COPY4]], [[COPY]] - ; CHECK-NEXT: $p0 = COPY [[SEL_NEZ]] - %0:gprregbank(s32) = COPY $r6 - %1:ptrregbank(p0) = COPY $p0 - %2:ptrregbank(p0) = COPY $p1 - %3:gprregbank(p0) = COPY %1 - %4:gprregbank(p0) = COPY %2 - %5:gprregbank(p0) = G_SELECT %0, %3, %4 - $p0 = COPY %5 -... - diff --git a/llvm/test/CodeGen/AIE/aie2p/vector_select.ll b/llvm/test/CodeGen/AIE/aie2p/vector_select.ll new file mode 100644 index 000000000000..5ec0a77fff35 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/vector_select.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; +; This file is licensed under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +; (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates + +; RUN: llc -O2 -mtriple=aie2p --issue-limit=1 %s -o - | FileCheck --check-prefix=AIE2P %s + +define noundef inreg <64 x i32> @test_select_64xi32(i1 noundef zeroext %sel, <64 x i32> inreg noundef %a, <64 x i32> inreg noundef %b) { +; AIE2P-LABEL: test_select_64xi32: +; AIE2P: .p2align 4 +; AIE2P-NEXT: // %bb.0: // %entry +; AIE2P-NEXT: nopa ; add r16, r0, #-1; nopm +; AIE2P-NEXT: vmov x0, bmlh2 +; AIE2P-NEXT: vmov x2, bmhl2 +; AIE2P-NEXT: vmov x4, bmhh2 +; AIE2P-NEXT: vmov x6, bmll2 +; AIE2P-NEXT: vmov x8, bmlh1 +; AIE2P-NEXT: vmov x10, bmhl1 +; AIE2P-NEXT: vmov x1, bmhh1 +; AIE2P-NEXT: vmov x3, bmll1 +; AIE2P-NEXT: vsel.32 x6, x6, x3, r16 +; AIE2P-NEXT: vsel.32 x7, x0, x8, r16 +; AIE2P-NEXT: vsel.32 x0, x2, x10, r16 +; AIE2P-NEXT: vsel.32 x1, x4, x1, r16 +; AIE2P-NEXT: ret lr +; AIE2P-NEXT: vmov bmll0, x6 // Delay Slot 5 +; AIE2P-NEXT: vmov bmlh0, x7 // Delay Slot 4 +; AIE2P-NEXT: vmov bmhl0, x0 // Delay Slot 3 +; AIE2P-NEXT: vmov bmhh0, x1 // Delay Slot 2 +; AIE2P-NEXT: nop // Delay Slot 1 +entry: + %cond = select i1 %sel, <64 x i32> %b, <64 x i32> %a + ret <64 x i32> %cond +} diff --git a/llvm/test/CodeGen/AIE/vector_select.ll b/llvm/test/CodeGen/AIE/vector_select.ll new file mode 100644 index 000000000000..a82bd3e5ae52 --- /dev/null +++ b/llvm/test/CodeGen/AIE/vector_select.ll @@ -0,0 +1,235 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; +; This file is licensed under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +; (c) Copyright 2023-2025 Advanced Micro Devices, Inc. or its affiliates + +; RUN: llc -O2 -mtriple=aie2 --issue-limit=1 %s -o - | FileCheck --check-prefix=AIE2 %s +; RUN: llc -O2 -mtriple=aie2p --issue-limit=1 %s -o - | FileCheck --check-prefix=AIE2P %s + +define <32 x i8> @test_select_32xi8(i1 noundef zeroext %sel, <32 x i8> noundef %a, <32 x i8> noundef %b) { +; AIE2-LABEL: test_select_32xi8: +; AIE2: .p2align 4 +; AIE2-NEXT: // %bb.0: // %entry +; AIE2-NEXT: nopa ; nopb ; ret lr ; nopm ; nops +; AIE2-NEXT: nop // Delay Slot 5 +; AIE2-NEXT: mov r1, r16 // Delay Slot 4 +; AIE2-NEXT: add r16, r0, #-1 // Delay Slot 3 +; AIE2-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 +; AIE2-NEXT: mov r16, r1 // Delay Slot 1 +; +; AIE2P-LABEL: test_select_32xi8: +; AIE2P: .p2align 4 +; AIE2P-NEXT: // %bb.0: // %entry +; AIE2P-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; AIE2P-NEXT: nopx // Delay Slot 5 +; AIE2P-NEXT: nop // Delay Slot 4 +; AIE2P-NEXT: add r16, r0, #-1 // Delay Slot 3 +; AIE2P-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 +; AIE2P-NEXT: nop // Delay Slot 1 +entry: + %cond = select i1 %sel, <32 x i8> %a, <32 x i8> %b + ret <32 x i8> %cond +} + +define <16 x i16> @test_select_16xi16(i1 noundef zeroext %sel, <16 x i16> noundef %a, <16 x i16> noundef %b) { +; AIE2-LABEL: test_select_16xi16: +; AIE2: .p2align 4 +; AIE2-NEXT: // %bb.0: // %entry +; AIE2-NEXT: nopa ; nopb ; ret lr ; nopm ; nops +; AIE2-NEXT: nop // Delay Slot 5 +; AIE2-NEXT: mov r1, r16 // Delay Slot 4 +; AIE2-NEXT: add r16, r0, #-1 // Delay Slot 3 +; AIE2-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 +; AIE2-NEXT: mov r16, r1 // Delay Slot 1 +; +; AIE2P-LABEL: test_select_16xi16: +; AIE2P: .p2align 4 +; AIE2P-NEXT: // %bb.0: // %entry +; AIE2P-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; AIE2P-NEXT: nopx // Delay Slot 5 +; AIE2P-NEXT: nop // Delay Slot 4 +; AIE2P-NEXT: add r16, r0, #-1 // Delay Slot 3 +; AIE2P-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 +; AIE2P-NEXT: nop // Delay Slot 1 +entry: + %cond = select i1 %sel, <16 x i16> %a, <16 x i16> %b + ret <16 x i16> %cond +} + +define <8 x i32> @test_select_8xi32(i1 noundef zeroext %sel, <8 x i32> noundef %a, <8 x i32> noundef %b) { +; AIE2-LABEL: test_select_8xi32: +; AIE2: .p2align 4 +; AIE2-NEXT: // %bb.0: // %entry +; AIE2-NEXT: nopa ; nopb ; ret lr ; nopm ; nops +; AIE2-NEXT: nop // Delay Slot 5 +; AIE2-NEXT: mov r1, r16 // Delay Slot 4 +; AIE2-NEXT: add r16, r0, #-1 // Delay Slot 3 +; AIE2-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 +; AIE2-NEXT: mov r16, r1 // Delay Slot 1 +; +; AIE2P-LABEL: test_select_8xi32: +; AIE2P: .p2align 4 +; AIE2P-NEXT: // %bb.0: // %entry +; AIE2P-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; AIE2P-NEXT: nopx // Delay Slot 5 +; AIE2P-NEXT: nop // Delay Slot 4 +; AIE2P-NEXT: add r16, r0, #-1 // Delay Slot 3 +; AIE2P-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 +; AIE2P-NEXT: nop // Delay Slot 1 +entry: + %cond = select i1 %sel, <8 x i32> %a, <8 x i32> %b + ret <8 x i32> %cond +} + +define <64 x i8> @test_select_64xi8(i1 noundef zeroext %sel, <64 x i8> noundef %a, <64 x i8> noundef %b) { +; AIE2-LABEL: test_select_64xi8: +; AIE2: .p2align 4 +; AIE2-NEXT: // %bb.0: // %entry +; AIE2-NEXT: nopa ; nopb ; ret lr ; nopm ; nops +; AIE2-NEXT: nop // Delay Slot 5 +; AIE2-NEXT: mov r1, r16 // Delay Slot 4 +; AIE2-NEXT: add r16, r0, #-1 // Delay Slot 3 +; AIE2-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 +; AIE2-NEXT: mov r16, r1 // Delay Slot 1 +; +; AIE2P-LABEL: test_select_64xi8: +; AIE2P: .p2align 4 +; AIE2P-NEXT: // %bb.0: // %entry +; AIE2P-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; AIE2P-NEXT: nopx // Delay Slot 5 +; AIE2P-NEXT: nop // Delay Slot 4 +; AIE2P-NEXT: add r16, r0, #-1 // Delay Slot 3 +; AIE2P-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 +; AIE2P-NEXT: nop // Delay Slot 1 +entry: + %cond = select i1 %sel, <64 x i8> %a, <64 x i8> %b + ret <64 x i8> %cond +} + +define <32 x i16> @test_select_32xi16(i1 noundef zeroext %sel, <32 x i16> noundef %a, <32 x i16> noundef %b) { +; AIE2-LABEL: test_select_32xi16: +; AIE2: .p2align 4 +; AIE2-NEXT: // %bb.0: // %entry +; AIE2-NEXT: nopa ; nopb ; ret lr ; nopm ; nops +; AIE2-NEXT: nop // Delay Slot 5 +; AIE2-NEXT: mov r1, r16 // Delay Slot 4 +; AIE2-NEXT: add r16, r0, #-1 // Delay Slot 3 +; AIE2-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 +; AIE2-NEXT: mov r16, r1 // Delay Slot 1 +; +; AIE2P-LABEL: test_select_32xi16: +; AIE2P: .p2align 4 +; AIE2P-NEXT: // %bb.0: // %entry +; AIE2P-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; AIE2P-NEXT: nopx // Delay Slot 5 +; AIE2P-NEXT: nop // Delay Slot 4 +; AIE2P-NEXT: add r16, r0, #-1 // Delay Slot 3 +; AIE2P-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 +; AIE2P-NEXT: nop // Delay Slot 1 +entry: + %cond = select i1 %sel, <32 x i16> %a, <32 x i16> %b + ret <32 x i16> %cond +} + +define <16 x i32> @test_select_16xi32(i1 noundef zeroext %sel, <16 x i32> noundef %a, <16 x i32> noundef %b) { +; AIE2-LABEL: test_select_16xi32: +; AIE2: .p2align 4 +; AIE2-NEXT: // %bb.0: // %entry +; AIE2-NEXT: nopa ; nopb ; ret lr ; nopm ; nops +; AIE2-NEXT: nop // Delay Slot 5 +; AIE2-NEXT: mov r1, r16 // Delay Slot 4 +; AIE2-NEXT: add r16, r0, #-1 // Delay Slot 3 +; AIE2-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 +; AIE2-NEXT: mov r16, r1 // Delay Slot 1 +; +; AIE2P-LABEL: test_select_16xi32: +; AIE2P: .p2align 4 +; AIE2P-NEXT: // %bb.0: // %entry +; AIE2P-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; AIE2P-NEXT: nopx // Delay Slot 5 +; AIE2P-NEXT: nop // Delay Slot 4 +; AIE2P-NEXT: add r16, r0, #-1 // Delay Slot 3 +; AIE2P-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 +; AIE2P-NEXT: nop // Delay Slot 1 +entry: + %cond = select i1 %sel, <16 x i32> %a, <16 x i32> %b + ret <16 x i32> %cond +} + +define <128 x i8> @test_select_128xi8(i1 noundef zeroext %sel, <128 x i8> noundef %a, <128 x i8> noundef %b) { +; AIE2-LABEL: test_select_128xi8: +; AIE2: .p2align 4 +; AIE2-NEXT: // %bb.0: // %entry +; AIE2-NEXT: nopa ; nopb ; ret lr ; nopm +; AIE2-NEXT: mov r1, r16 // Delay Slot 5 +; AIE2-NEXT: add r16, r0, #-1 // Delay Slot 4 +; AIE2-NEXT: vsel.32 x4, x6, x8, r16 // Delay Slot 3 +; AIE2-NEXT: vsel.32 x5, x7, x9, r16 // Delay Slot 2 +; AIE2-NEXT: mov r16, r1 // Delay Slot 1 +; +; AIE2P-LABEL: test_select_128xi8: +; AIE2P: .p2align 4 +; AIE2P-NEXT: // %bb.0: // %entry +; AIE2P-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; AIE2P-NEXT: nop // Delay Slot 5 +; AIE2P-NEXT: add r16, r0, #-1 // Delay Slot 4 +; AIE2P-NEXT: vsel.32 x4, x6, x8, r16 // Delay Slot 3 +; AIE2P-NEXT: vsel.32 x5, x7, x9, r16 // Delay Slot 2 +; AIE2P-NEXT: nop // Delay Slot 1 +entry: + %cond = select i1 %sel, <128 x i8> %a, <128 x i8> %b + ret <128 x i8> %cond +} + +define <64 x i16> @test_select_64xi16(i1 noundef zeroext %sel, <64 x i16> noundef %a, <64 x i16> noundef %b) { +; AIE2-LABEL: test_select_64xi16: +; AIE2: .p2align 4 +; AIE2-NEXT: // %bb.0: // %entry +; AIE2-NEXT: nopa ; nopb ; ret lr ; nopm +; AIE2-NEXT: mov r1, r16 // Delay Slot 5 +; AIE2-NEXT: add r16, r0, #-1 // Delay Slot 4 +; AIE2-NEXT: vsel.32 x4, x6, x8, r16 // Delay Slot 3 +; AIE2-NEXT: vsel.32 x5, x7, x9, r16 // Delay Slot 2 +; AIE2-NEXT: mov r16, r1 // Delay Slot 1 +; +; AIE2P-LABEL: test_select_64xi16: +; AIE2P: .p2align 4 +; AIE2P-NEXT: // %bb.0: // %entry +; AIE2P-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; AIE2P-NEXT: nop // Delay Slot 5 +; AIE2P-NEXT: add r16, r0, #-1 // Delay Slot 4 +; AIE2P-NEXT: vsel.32 x4, x6, x8, r16 // Delay Slot 3 +; AIE2P-NEXT: vsel.32 x5, x7, x9, r16 // Delay Slot 2 +; AIE2P-NEXT: nop // Delay Slot 1 +entry: + %cond = select i1 %sel, <64 x i16> %a, <64 x i16> %b + ret <64 x i16> %cond +} + +define <32 x i32> @test_select_32xi32(i1 noundef zeroext %sel, <32 x i32> noundef %a, <32 x i32> noundef %b) { +; AIE2-LABEL: test_select_32xi32: +; AIE2: .p2align 4 +; AIE2-NEXT: // %bb.0: // %entry +; AIE2-NEXT: nopa ; nopb ; ret lr ; nopm +; AIE2-NEXT: mov r1, r16 // Delay Slot 5 +; AIE2-NEXT: add r16, r0, #-1 // Delay Slot 4 +; AIE2-NEXT: vsel.32 x4, x6, x8, r16 // Delay Slot 3 +; AIE2-NEXT: vsel.32 x5, x7, x9, r16 // Delay Slot 2 +; AIE2-NEXT: mov r16, r1 // Delay Slot 1 +; +; AIE2P-LABEL: test_select_32xi32: +; AIE2P: .p2align 4 +; AIE2P-NEXT: // %bb.0: // %entry +; AIE2P-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; AIE2P-NEXT: nop // Delay Slot 5 +; AIE2P-NEXT: add r16, r0, #-1 // Delay Slot 4 +; AIE2P-NEXT: vsel.32 x4, x6, x8, r16 // Delay Slot 3 +; AIE2P-NEXT: vsel.32 x5, x7, x9, r16 // Delay Slot 2 +; AIE2P-NEXT: nop // Delay Slot 1 +entry: + %cond = select i1 %sel, <32 x i32> %a, <32 x i32> %b + ret <32 x i32> %cond +}