Skip to content

Commit

Permalink
[AArch64] Add patterns for 64bit vector addp
Browse files Browse the repository at this point in the history
This extends the existing patterns for addp to 64bit outputs with a single
input. Whilst the general pattern is similar to the 128bit patterns
(add(uzp1(extract_lo, extract_hi), uzp2(extract_lo, extract_hi))), at the late
stage other optimzations have happened to turn the first uzp1 into trunc and
the second into extract(uzp2) with undef.

Fixes llvm#109108
  • Loading branch information
davemgreen committed Sep 19, 2024
1 parent 4ec4ac1 commit 4c50112
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 4 deletions.
12 changes: 12 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -9634,6 +9634,18 @@ def : Pat<(v16i8 (add (AArch64uzp1 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)),
(AArch64uzp2 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)))),
(v16i8 (ADDPv16i8 $Rn, $Rm))>;

def : Pat<(v2i32 (add (AArch64zip1 (extract_subvector (v4i32 FPR128:$Rn), (i64 0)),
(extract_subvector (v4i32 FPR128:$Rn), (i64 2))),
(AArch64zip2 (extract_subvector (v4i32 FPR128:$Rn), (i64 0)),
(extract_subvector (v4i32 FPR128:$Rn), (i64 2))))),
(EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub)>;
def : Pat<(v4i16 (add (trunc (v4i32 (bitconvert FPR128:$Rn))),
(extract_subvector (AArch64uzp2 (v8i16 FPR128:$Rn), undef), (i64 0)))),
(EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub)>;
def : Pat<(v8i8 (add (trunc (v8i16 (bitconvert FPR128:$Rn))),
(extract_subvector (AArch64uzp2 (v16i8 FPR128:$Rn), undef), (i64 0)))),
(EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub)>;

def : Pat<(v2f64 (fadd (AArch64zip1 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)),
(AArch64zip2 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)))),
(v2f64 (FADDPv2f64 $Rn, $Rm))>;
Expand Down
30 changes: 26 additions & 4 deletions llvm/test/CodeGen/AArch64/addp-shuffle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,8 @@ define <4 x i32> @deinterleave_shuffle_v8i32_c(<8 x i32> %a) {
define <2 x i32> @deinterleave_shuffle_v4i32(<4 x i32> %a) {
; CHECK-LABEL: deinterleave_shuffle_v4i32:
; CHECK: // %bb.0:
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: zip1 v2.2s, v0.2s, v1.2s
; CHECK-NEXT: zip2 v0.2s, v0.2s, v1.2s
; CHECK-NEXT: add v0.2s, v2.2s, v0.2s
; CHECK-NEXT: addp v0.4s, v0.4s, v0.4s
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%r0 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 0, i32 2>
%r1 = shufflevector <4 x i32> %a, <4 x i32> poison, <2 x i32> <i32 1, i32 3>
Expand All @@ -49,6 +47,18 @@ define <8 x i16> @deinterleave_shuffle_v16i16(<16 x i16> %a) {
ret <8 x i16> %o
}

define <4 x i16> @deinterleave_shuffle_v8i16(<8 x i16> %a) {
; CHECK-LABEL: deinterleave_shuffle_v8i16:
; CHECK: // %bb.0:
; CHECK-NEXT: addp v0.8h, v0.8h, v0.8h
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%r0 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%r1 = shufflevector <8 x i16> %a, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
%o = add <4 x i16> %r0, %r1
ret <4 x i16> %o
}

define <16 x i8> @deinterleave_shuffle_v32i8(<32 x i8> %a) {
; CHECK-LABEL: deinterleave_shuffle_v32i8:
; CHECK: // %bb.0:
Expand All @@ -60,6 +70,18 @@ define <16 x i8> @deinterleave_shuffle_v32i8(<32 x i8> %a) {
ret <16 x i8> %o
}

define <8 x i8> @deinterleave_shuffle_v16i8(<16 x i8> %a) {
; CHECK-LABEL: deinterleave_shuffle_v16i8:
; CHECK: // %bb.0:
; CHECK-NEXT: addp v0.16b, v0.16b, v0.16b
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%r0 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
%r1 = shufflevector <16 x i8> %a, <16 x i8> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
%o = add <8 x i8> %r0, %r1
ret <8 x i8> %o
}

define <4 x i64> @deinterleave_shuffle_v8i64(<8 x i64> %a) {
; CHECK-LABEL: deinterleave_shuffle_v8i64:
; CHECK: // %bb.0:
Expand Down

0 comments on commit 4c50112

Please sign in to comment.