diff --git a/llvm/lib/Target/AIE/AIE2InstrPatterns.td b/llvm/lib/Target/AIE/AIE2InstrPatterns.td index 2d9409264d24..04c03d950bf9 100644 --- a/llvm/lib/Target/AIE/AIE2InstrPatterns.td +++ b/llvm/lib/Target/AIE/AIE2InstrPatterns.td @@ -116,33 +116,6 @@ class SelectPat : def : SelectPat; def : SelectPat; -// For vector select, VSEL instruction is used, since it does select word by word -// sel input is modified, "sel ? v1 : v2 is equivalent to vsel(sel-1, v1, v2)" -// rs1(sel) is zero-extended from the original 1 bit type -foreach vec256Ty = [v32i8, v16i16, v8i32] in { -def : Pat<(vec256Ty (select (i32 eRS8:$rs1), VEC256:$rs2, VEC256:$rs3)), - (vec256Ty (EXTRACT_SUBREG (VSEL_32 - (v32i16 (REG_SEQUENCE VEC512, VEC256:$rs2, sub_256_lo)), - (v32i16 (REG_SEQUENCE VEC512, VEC256:$rs3, sub_256_lo)), - (ADD_add_r_ri eR:$rs1, (i32 -1))), - sub_256_lo))>; -} -foreach vec512Ty = [v64i8, v32i16, v16i32] in { - def : Pat<(vec512Ty (select (i32 eRS8:$rs1), VEC512:$rs2, VEC512:$rs3)), - (vec512Ty (VSEL_32 VEC512:$rs2, VEC512:$rs3, (ADD_add_r_ri eR:$rs1, (i32 -1))))>; -} -foreach vec1024Ty = [v128i8, v64i16, v32i32] in { -def : Pat<(vec1024Ty (select (i32 eRS8:$rs1), VEC1024:$rs2, VEC1024:$rs3)), - (REG_SEQUENCE VEC1024, - (VSEL_32 (v16i32 (EXTRACT_SUBREG VEC1024:$rs2, sub_512_lo)), - (v16i32 (EXTRACT_SUBREG VEC1024:$rs3, sub_512_lo)), - (ADD_add_r_ri eR:$rs1, (i32 -1))), sub_512_lo, - (VSEL_32 (v16i32 (EXTRACT_SUBREG VEC1024:$rs2, sub_512_hi)), - (v16i32 (EXTRACT_SUBREG VEC1024:$rs3, sub_512_hi)), - (ADD_add_r_ri eR:$rs1, (i32 -1))), sub_512_hi - )>; -} - class SelectNezPat : Pat<(type (select (i32 (setne eR27:$rs1, (i32 0))), (type eR:$rs2), (type eR:$rs3))), (SELNEZ eR:$rs2, eR:$rs3, eR27:$rs1)>; diff --git a/llvm/lib/Target/AIE/AIEBaseInstrPatterns.td b/llvm/lib/Target/AIE/AIEBaseInstrPatterns.td index 3f12e1d3068f..01a26ee0142a 100644 --- a/llvm/lib/Target/AIE/AIEBaseInstrPatterns.td +++ b/llvm/lib/Target/AIE/AIEBaseInstrPatterns.td @@ -69,3 +69,30 @@ def : Pat<(vec1024Ty (and VEC1024:$src1, VEC1024:$src2)), (v16i32 (EXTRACT_SUBREG VEC1024:$src2, sub_512_hi))), sub_512_hi )>; } + +// For vector select, VSEL instruction is used, since it does select word by word +// sel input is modified, "sel ? v1 : v2 is equivalent to vsel(sel-1, v1, v2)" +// rs1(sel) is zero-extended from the original 1 bit type +foreach vec256Ty = [v32i8, v16i16, v8i32] in { +def : Pat<(vec256Ty (select (i32 eR:$rs1), VEC256:$rs2, VEC256:$rs3)), + (vec256Ty (EXTRACT_SUBREG (VSEL_32 + (v32i16 (REG_SEQUENCE VEC512, VEC256:$rs2, sub_256_lo)), + (v32i16 (REG_SEQUENCE VEC512, VEC256:$rs3, sub_256_lo)), + (ADD_add_r_ri eR:$rs1, (i32 -1))), + sub_256_lo))>; +} +foreach vec512Ty = [v64i8, v32i16, v16i32] in { + def : Pat<(vec512Ty (select (i32 eR:$rs1), VEC512:$rs2, VEC512:$rs3)), + (vec512Ty (VSEL_32 VEC512:$rs2, VEC512:$rs3, (ADD_add_r_ri eR:$rs1, (i32 -1))))>; +} +foreach vec1024Ty = [v128i8, v64i16, v32i32] in { +def : Pat<(vec1024Ty (select (i32 eR:$rs1), VEC1024:$rs2, VEC1024:$rs3)), + (REG_SEQUENCE VEC1024, + (VSEL_32 (v16i32 (EXTRACT_SUBREG VEC1024:$rs2, sub_512_lo)), + (v16i32 (EXTRACT_SUBREG VEC1024:$rs3, sub_512_lo)), + (ADD_add_r_ri eR:$rs1, (i32 -1))), sub_512_lo, + (VSEL_32 (v16i32 (EXTRACT_SUBREG VEC1024:$rs2, sub_512_hi)), + (v16i32 (EXTRACT_SUBREG VEC1024:$rs3, sub_512_hi)), + (ADD_add_r_ri eR:$rs1, (i32 -1))), sub_512_hi + )>; +} diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vsel.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vsel.mir index 6a6c062a7493..f777b31eb7ea 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vsel.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vsel.mir @@ -166,3 +166,91 @@ body: | $x0 = COPY %0:vregbank(<16 x s32>) PseudoRET implicit $lr, implicit $x0 ... + +--- +name: gselect_v8s32 +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0, $wh0, $wh1 + ; CHECK-LABEL: name: gselect_v8s32 + ; CHECK: liveins: $r0, $wh0, $wh1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ewl = COPY $wh0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ewl = COPY $wh1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[ADD_add_r_ri:%[0-9]+]]:ers16 = ADD_add_r_ri [[COPY2]], -1, implicit-def dead $srcarry + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec512 = REG_SEQUENCE [[COPY1]], %subreg.sub_256_lo + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vec512 = REG_SEQUENCE [[COPY]], %subreg.sub_256_lo + ; CHECK-NEXT: [[VSEL_32_:%[0-9]+]]:mxm = VSEL_32 [[REG_SEQUENCE1]], [[REG_SEQUENCE]], [[ADD_add_r_ri]] + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:ewl = COPY [[VSEL_32_]].sub_256_lo + ; CHECK-NEXT: $wh0 = COPY [[COPY3]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $wh0 + %1:vregbank(<8 x s32>) = COPY $wh0 + %2:vregbank(<8 x s32>) = COPY $wh1 + %3:gprregbank(s32) = COPY $r0 + %0:vregbank(<8 x s32>) = G_SELECT %3:gprregbank(s32), %1:vregbank(<8 x s32>), %2:vregbank(<8 x s32>) + $wh0 = COPY %0:vregbank(<8 x s32>) + PseudoRET implicit $lr, implicit $wh0 +... + +--- +name: gselect_v32s16 +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0, $x0, $x1 + ; CHECK-LABEL: name: gselect_v32s16 + ; CHECK: liveins: $r0, $x0, $x1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY $x1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[ADD_add_r_ri:%[0-9]+]]:ers16 = ADD_add_r_ri [[COPY2]], -1, implicit-def dead $srcarry + ; CHECK-NEXT: [[VSEL_32_:%[0-9]+]]:vec512 = VSEL_32 [[COPY]], [[COPY1]], [[ADD_add_r_ri]] + ; CHECK-NEXT: $x0 = COPY [[VSEL_32_]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $x0 + %1:vregbank(<32 x s16>) = COPY $x0 + %2:vregbank(<32 x s16>) = COPY $x1 + %3:gprregbank(s32) = COPY $r0 + %0:vregbank(<32 x s16>) = G_SELECT %3:gprregbank(s32), %1:vregbank(<32 x s16>), %2:vregbank(<32 x s16>) + $x0 = COPY %0:vregbank(<32 x s16>) + PseudoRET implicit $lr, implicit $x0 +... + +--- +name: gselect_v128s8 +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $r0, $y1, $y2 + ; CHECK-LABEL: name: gselect_v128s8 + ; CHECK: liveins: $r0, $y1, $y2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec1024 = COPY $y1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y2 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[ADD_add_r_ri:%[0-9]+]]:ers16 = ADD_add_r_ri [[COPY2]], -1, implicit-def dead $srcarry + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:exo = COPY [[COPY1]].sub_512_hi + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:exo = COPY [[COPY]].sub_512_hi + ; CHECK-NEXT: [[VSEL_32_:%[0-9]+]]:exo = VSEL_32 [[COPY4]], [[COPY3]], [[ADD_add_r_ri]] + ; CHECK-NEXT: [[ADD_add_r_ri1:%[0-9]+]]:ers16 = ADD_add_r_ri [[COPY2]], -1, implicit-def dead $srcarry + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:exe = COPY [[COPY1]].sub_512_lo + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:exe = COPY [[COPY]].sub_512_lo + ; CHECK-NEXT: [[VSEL_32_1:%[0-9]+]]:exe = VSEL_32 [[COPY6]], [[COPY5]], [[ADD_add_r_ri1]] + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec1024 = REG_SEQUENCE [[VSEL_32_1]], %subreg.sub_512_lo, [[VSEL_32_]], %subreg.sub_512_hi + ; CHECK-NEXT: $y1 = COPY [[REG_SEQUENCE]] + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $y1 + %1:vregbank(<128 x s8>) = COPY $y1 + %2:vregbank(<128 x s8>) = COPY $y2 + %3:gprregbank(s32) = COPY $r0 + %0:vregbank(<128 x s8>) = G_SELECT %3:gprregbank(s32), %1:vregbank(<128 x s8>), %2:vregbank(<128 x s8>) + $y1 = COPY %0:vregbank(<128 x s8>) + PseudoRET implicit $lr, implicit $y1 +...