From 099ac6446b68b9214cba706ca8b91b8dd53d4226 Mon Sep 17 00:00:00 2001 From: Kateryna Muts Date: Fri, 7 Feb 2025 10:28:10 +0000 Subject: [PATCH] [AIE2P] Add LLVM IR end-to-end tests for shufflevector combines --- llvm/test/CodeGen/AIE/aie2p/shufflevec.ll | 425 ++++++++++++++++++++++ 1 file changed, 425 insertions(+) create mode 100644 llvm/test/CodeGen/AIE/aie2p/shufflevec.ll diff --git a/llvm/test/CodeGen/AIE/aie2p/shufflevec.ll b/llvm/test/CodeGen/AIE/aie2p/shufflevec.ll new file mode 100644 index 000000000000..234b257ae9f3 --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/shufflevec.ll @@ -0,0 +1,425 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; +; This file is licensed under the Apache License v2.0 with LLVM Exceptions. +; See https://llvm.org/LICENSE.txt for license information. +; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +; +; (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates +; RUN: llc -O2 -mtriple=aie2p -verify-machineinstrs --issue-limit=1 %s -o - | FileCheck %s + +define <16 x i32> @test_shuffle_vector_to_concat_vector(<8 x i32> noundef %a, <8 x i32> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_to_concat_vector: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nopx // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: vmov wl0, wl2 // Delay Slot 3 +; CHECK-NEXT: vmov wh0, wl4 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> + ret <16 x i32> %shuffle +} + +define <16 x i32> @test_shuffle_vector_2x32Src(<2 x i32> noundef %a, <2 x i32> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_2x32Src: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vbcst.64 x0, r1:r0 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <2 x i32> %a, <2 x i32> %b, <16 x i32> + ret <16 x i32> %shuffle +} + +define <16 x i16> @test_shuffle_vector_4x16Src(<4 x i16> noundef %a, <4 x i16> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_4x16Src: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vbcst.64 x0, r1:r0 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <4 x i16> %a, <4 x i16> %b, <16 x i32> + ret <16 x i16> %shuffle +} + +define <32 x i8> @test_shuffle_vector_8x8Src(<8 x i8> noundef %a, <8 x i8> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_8x8Src: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vbcst.64 x0, r1:r0 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <8 x i8> %a, <8 x i8> %b, <32 x i32> + ret <32 x i8> %shuffle +} + +define <16 x i16> @test_shuffle_vector_2x16Src(<2 x i16> noundef %a, <2 x i16> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_2x16Src: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vbcst.32 x0, r0 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <2 x i16> %a, <2 x i16> %b, <16 x i32> + ret <16 x i16> %shuffle +} + +define <32 x i8> @test_shuffle_vector_4x8Src(<4 x i8> noundef %a, <4 x i8> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_4x8Src: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vbcst.32 x0, r0 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <4 x i8> %a, <4 x i8> %b, <32 x i32> + ret <32 x i8> %shuffle +} + +define <16 x i32> @test_shuffle_vector_all_undef(<16 x i32> noundef %a, <16 x i32> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_all_undef: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopa ; ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: nop // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> + ret <16 x i32> %shuffle +} + +define <16 x i32> @test_shuffle_vector_vsel_valid_mask_16_s32(<16 x i32> noundef %a, <16 x i32> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_vsel_valid_mask_16_s32: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: movxm r16, #65520 // Delay Slot 3 +; CHECK-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> + ret <16 x i32> %shuffle +} + +define <32 x i16> @test_shuffle_vector_vsel_valid_mask_32_s16(<32 x i16> noundef %a, <32 x i16> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_vsel_valid_mask_32_s16: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nopx // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: mova r16, #10 // Delay Slot 3 +; CHECK-NEXT: vsel.16 x0, x2, x4, r16 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <32 x i16> %a, <32 x i16> %b, <32 x i32> + ret <32 x i16> %shuffle +} + +define <64 x i8> @test_shuffle_vector_vsel_valid_mask_64_s8(<64 x i8> noundef %a, <64 x i8> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_vsel_valid_mask_64_s8: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: mova r0, #5 // Delay Slot 4 +; CHECK-NEXT: mova r1, #0 // Delay Slot 3 +; CHECK-NEXT: vsel.8 x0, x2, x4, r1:r0 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <64 x i8> %a, <64 x i8> %b, <64 x i32> + ret <64 x i8> %shuffle +} + +define <16 x i32> @test_shuffle_vector_vsel_valid_mask_ordered_indices_with_undef(<16 x i32> noundef %a, <16 x i32> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_vsel_valid_mask_ordered_indices_with_undef: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: movxm r16, #65520 // Delay Slot 3 +; CHECK-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> + ret <16 x i32> %shuffle +} + +define <16 x i32> @test_shuffle_vector_vsel_valid_mask_with_undef(<16 x i32> noundef %a, <16 x i32> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_vsel_valid_mask_with_undef: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: movxm r16, #64992 // Delay Slot 3 +; CHECK-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> + ret <16 x i32> %shuffle +} + +define <16 x i32> @test_shuffle_vector_vsel_valid_mask_mixed_indices(<16 x i32> noundef %a, <16 x i32> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_vsel_valid_mask_mixed_indices: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: movxm r16, #65521 // Delay Slot 3 +; CHECK-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> + ret <16 x i32> %shuffle +} + +define <16 x i32> @test_shuffle_vector_vsel_valid_mask_mixed_indices_with_undef(<16 x i32> noundef %a, <16 x i32> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_vsel_valid_mask_mixed_indices_with_undef: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: movxm r16, #64465 // Delay Slot 3 +; CHECK-NEXT: vsel.32 x0, x2, x4, r16 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> + ret <16 x i32> %shuffle +} + +define <16 x i32> @test_shuffle_vector_vsel_invalid_mask(<16 x i32> noundef %a, <16 x i32> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_vsel_invalid_mask: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopx ; vextract.32 r0, x2, #0, vaddsign1 +; CHECK-NEXT: vextract.32 r1, x2, #1, vaddsign1 +; CHECK-NEXT: vpush.hi.32 x0, x0, r0 +; CHECK-NEXT: vpush.hi.32 x0, x0, r1 +; CHECK-NEXT: vpush.hi.32 x0, x0, r1 +; CHECK-NEXT: vpush.hi.32 x0, x0, r1 +; CHECK-NEXT: vpush.hi.32 x0, x0, r1 +; CHECK-NEXT: vpush.hi.32 x0, x0, r1 +; CHECK-NEXT: vpush.hi.32 x0, x0, r1 +; CHECK-NEXT: vpush.hi.32 x0, x0, r1 +; CHECK-NEXT: vpush.hi.32 x0, x0, r1 +; CHECK-NEXT: vpush.hi.32 x0, x0, r1 +; CHECK-NEXT: vpush.hi.32 x0, x0, r1 +; CHECK-NEXT: vpush.hi.32 x0, x0, r1 +; CHECK-NEXT: ret lr +; CHECK-NEXT: vpush.hi.32 x0, x0, r1 // Delay Slot 5 +; CHECK-NEXT: vpush.hi.32 x0, x0, r1 // Delay Slot 4 +; CHECK-NEXT: vpush.hi.32 x0, x0, r1 // Delay Slot 3 +; CHECK-NEXT: vpush.hi.32 x0, x0, r1 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> + ret <16 x i32> %shuffle +} + +define <8 x i32> @test_shuffle_vector_to_extract_broadcast_vecElem(<8 x i32> noundef %a, <8 x i32> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_to_extract_broadcast_vecElem: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nopx // Delay Slot 5 +; CHECK-NEXT: vextract.32 r0, x2, #2, vaddsign1 // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vbcst.32 x0, r0 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> + ret <8 x i32> %shuffle +} + +define <16 x i16> @test_shuffle_vector_to_extract_broadcast_s16(<16 x i16> noundef %a, <16 x i16> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_to_extract_broadcast_s16: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nopx // Delay Slot 5 +; CHECK-NEXT: vextract.16 r0, x2, #2, vaddsign1 // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vbcst.16 x0, r0 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %shuffle +} + +; Tests for combining shuffle_vector to extract_subvector +; Test destination vector types with native source vector type (512 bits) + +define <4 x i8> @test_shuffle_vector_to_extract_subvec_4xs8Dst(<64 x i8> noundef %a, <64 x i8> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_to_extract_subvec_4xs8Dst: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vextract.32 r0, x0, #1, vaddsign1 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <64 x i8> %a, <64 x i8> %b, <4 x i32> + ret <4 x i8> %shuffle +} + +define <2 x i16> @test_shuffle_vector_to_extract_subvec_2xs16Dst(<32 x i16> noundef %a, <32 x i16> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_to_extract_subvec_2xs16Dst: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vextract.32 r0, x0, #0, vaddsign1 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <32 x i16> %a, <32 x i16> %b, <2 x i32> + ret <2 x i16> %shuffle +} + +define <8 x i8> @test_shuffle_vector_to_extract_subvec_8xs8Dst(<64 x i8> noundef %a, <64 x i8> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_to_extract_subvec_8xs8Dst: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vextract.64 r1:r0, x0, #0, vaddsign1 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <64 x i8> %a, <64 x i8> %b, <8 x i32> + ret <8 x i8> %shuffle +} + +define <4 x i16> @test_shuffle_vector_to_extract_subvec_4xs16Dst(<32 x i16> noundef %a, <32 x i16> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_to_extract_subvec_4xs16Dst: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vextract.64 r1:r0, x0, #1, vaddsign1 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <32 x i16> %a, <32 x i16> %b, <4 x i32> + ret <4 x i16> %shuffle +} + +define <2 x i32> @test_shuffle_vector_to_extract_subvec_2xs32Dst(<16 x i32> noundef %a, <16 x i32> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_to_extract_subvec_2xs32Dst: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vextract.64 r1:r0, x0, #0, vaddsign1 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <16 x i32> %a, <16 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle +} + +; Test not native source vector types + +define <2 x i32> @test_shuffle_vector_to_extract_subvec_256BitSrc(<8 x i32> noundef %a, <8 x i32> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_to_extract_subvec_256BitSrc: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vextract.64 r1:r0, x0, #3, vaddsign1 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle +} + +define <2 x i32> @test_shuffle_vector_to_extract_subvec_1024BitSrc(<32 x i32> noundef %a, <32 x i32> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_to_extract_subvec_1024BitSrc: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vextract.64 r1:r0, x5, #0, vaddsign1 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <32 x i32> %a, <32 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle +} + +define <2 x i32> @test_shuffle_vector_to_extract_subvec_2048BitSrc(<64 x i32> noundef %a, <64 x i32> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_to_extract_subvec_2048BitSrc: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; nopx ; mov p0, sp; nopv +; CHECK-NEXT: padda [p0], #-256 +; CHECK-NEXT: vlda bmll0, [p0, #0] +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: ret lr +; CHECK-NEXT: nop // Delay Slot 5 +; CHECK-NEXT: nop // Delay Slot 4 +; CHECK-NEXT: vmov x0, bmll0 // Delay Slot 3 +; CHECK-NEXT: vextract.64 r1:r0, x0, #0, vaddsign1 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <64 x i32> %a, <64 x i32> %b, <2 x i32> + ret <2 x i32> %shuffle +} + +; Test extract and broadcast a subvector + +define <16 x i16> @test_shuffle_vector_to_extract_broadcast_subvec(<16 x i16> noundef %a, <16 x i16> noundef %b) { +; CHECK-LABEL: test_shuffle_vector_to_extract_broadcast_subvec: +; CHECK: .p2align 4 +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: nopa ; nopb ; nops ; ret lr; nopm ; nopv +; CHECK-NEXT: nopx // Delay Slot 5 +; CHECK-NEXT: vextract.64 r1:r0, x2, #1, vaddsign1 // Delay Slot 4 +; CHECK-NEXT: nop // Delay Slot 3 +; CHECK-NEXT: vbcst.64 x0, r1:r0 // Delay Slot 2 +; CHECK-NEXT: nop // Delay Slot 1 +entry: + %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %shuffle +}