Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Combine (shl (and x, imm1), imm2) to (shl x, imm2) #246

Merged
merged 1 commit into from
Jan 9, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,11 @@ class CombinerHelper {
void applyCombineTruncOfExt(MachineInstr &MI,
std::pair<Register, unsigned> &MatchInfo);

/// Transform (shl (and x, imm1, imm2) to (shl x, imm2)
/// if (~imm1 << imm2) = 0
bool matchCombineShlOfAnd(MachineInstr &MI, Register &Reg);
void applyCombineShlOfAnd(MachineInstr &MI, Register &Reg);

/// Transform trunc (shl x, K) to shl (trunc x), K
/// if K < VT.getScalarSizeInBits().
///
Expand Down
11 changes: 10 additions & 1 deletion llvm/include/llvm/Target/GlobalISel/Combine.td
Original file line number Diff line number Diff line change
Expand Up @@ -822,6 +822,15 @@ def trunc_ext_fold: GICombineRule <
(apply [{ Helper.applyCombineTruncOfExt(*${root}, ${matchinfo}); }])
>;

// Under certain conditions, transform:
// Fold (shl (and x, imm1), imm2) -> (shl x, imm2)
def shl_and_fold: GICombineRule <
(defs root:$root, register_matchinfo:$matchinfo),
(match (wip_match_opcode G_SHL):$root,
[{ return Helper.matchCombineShlOfAnd(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applyCombineShlOfAnd(*${root}, ${matchinfo}); }])
>;

// Under certain conditions, transform:
// trunc (shl x, K) -> shl (trunc x), K//
// trunc ([al]shr x, K) -> (trunc ([al]shr (trunc x), K))
Expand Down Expand Up @@ -1588,7 +1597,7 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
bitcast_bitcast_fold, fptrunc_fpext_fold,
right_identity_neg_zero_fp,
right_identity_neg_one_fp,
combine_inttoptr_constant]>;
combine_inttoptr_constant, shl_and_fold]>;

def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
overlapping_and, mulo_by_2, mulo_by_0,
Expand Down
33 changes: 33 additions & 0 deletions llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2553,6 +2553,39 @@ void CombinerHelper::applyCombineTruncOfExt(
MI.eraseFromParent();
}

bool CombinerHelper::matchCombineShlOfAnd(MachineInstr &MI, Register &Reg) {
// We're trying to match the following pattern:
// %t = G_AND %x, imm1
// %root = G_SHL %t, imm2
// -->
// %root = G_SHL %x, imm2
// Where (~imm1 << imm2) = 0
assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected a G_SHL");
const Register DstReg = MI.getOperand(0).getReg();
const Register SrcReg = MI.getOperand(1).getReg();
const LLT SrcTy = MRI.getType(SrcReg);
const unsigned Size = SrcTy.getSizeInBits();

// Try to match shl (and x, imm1), imm2
int64_t ShiftImm, AndImm;
if (!mi_match(DstReg, MRI,
m_GShl(m_OneNonDBGUse(m_GAnd(m_Reg(Reg), m_ICst(AndImm))),
Copy link
Collaborator

@martien-de-jong martien-de-jong Jan 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I don't think the mOneNonDBGUse condition is tested, but I also don't think it's necessary. We just remove a use of the AND. If it's the last use, DCE will remove it; if it's not, it will serve the other uses.

m_ICst(ShiftImm))))
return false;
// Check if AndImm has bits set only in positions that will be shifted out by
// ShiftImm. If any significant bits remain after the shift, the AND operation
// cannot be removed.
uint64_t Mask = ~0ULL >> (64 - Size);
return !((~AndImm << ShiftImm) & Mask);
}

void CombinerHelper::applyCombineShlOfAnd(MachineInstr &MI, Register &Reg) {
assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected a G_SHL");
Observer.changingInstr(MI);
MI.getOperand(1).setReg(Reg);
Observer.changedInstr(MI);
}

static LLT getMidVTForTruncRightShiftCombine(LLT ShiftTy, LLT TruncTy) {
const unsigned ShiftSize = ShiftTy.getScalarSizeInBits();
const unsigned TruncSize = TruncTy.getScalarSizeInBits();
Expand Down
152 changes: 152 additions & 0 deletions llvm/test/CodeGen/AArch64/GlobalISel/combine-shl-and.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# Modifications (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s

---
name: test_combine_shl_of_and_I16_shift_8
legalized: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $w0
; CHECK-LABEL: name: test_combine_shl_of_and_I16_shift_8
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[C]](s16)
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SHL]](s16)
; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
%0:_(s32) = COPY $w0
%1:_(s16) = G_CONSTANT i16 8
%2:_(s16) = G_CONSTANT i16 255
%3:_(s16) = G_TRUNC %0
%4:_(s16) = G_AND %3, %2
%5:_(s16) = G_SHL %4, %1
%6:_(s32) = G_SEXT %5
$w0 = COPY %6(s32)
...
---
# Negative test case: Here we're trying to shift less than half size.
name: test_combine_shl_of_and_I16_shift_4_neg
legalized: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $w0
; CHECK-LABEL: name: test_combine_shl_of_and_I16_shift_4_neg
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 4
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 15
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[AND]], [[C]](s16)
; CHECK-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SHL]](s16)
; CHECK-NEXT: $w0 = COPY [[SEXT]](s32)
%0:_(s32) = COPY $w0
%1:_(s16) = G_CONSTANT i16 4
%2:_(s16) = G_CONSTANT i16 15
%3:_(s16) = G_TRUNC %0
%4:_(s16) = G_AND %3, %2
%5:_(s16) = G_SHL %4, %1
%6:_(s32) = G_SEXT %5
$w0 = COPY %6(s32)
...
---
name: test_combine_shl_of_and_I32_shift_16
legalized: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $w0
; CHECK-LABEL: name: test_combine_shl_of_and_I32_shift_16
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
; CHECK-NEXT: $w0 = COPY [[SHL]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = G_CONSTANT i32 16
%2:_(s32) = G_CONSTANT i32 65535
%3:_(s32) = G_AND %0, %2
%4:_(s32) = G_SHL %3, %1
$w0 = COPY %4(s32)
...
---
name: test_combine_shl_of_and_I32_shift_24
legalized: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $w0
; CHECK-LABEL: name: test_combine_shl_of_and_I32_shift_24
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
; CHECK-NEXT: $w0 = COPY [[SHL]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = G_CONSTANT i32 24
%2:_(s32) = G_CONSTANT i32 16777215
%3:_(s32) = G_AND %0, %2
%4:_(s32) = G_SHL %3, %1
$w0 = COPY %4(s32)
...
---
# Negative test case: Here we're trying to shift less than half size.
name: test_combine_shl_of_and_I32_shift_8_neg
legalized: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $w0
; CHECK-LABEL: name: test_combine_shl_of_and_I32_shift_8_neg
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
; CHECK-NEXT: $w0 = COPY [[SHL]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = G_CONSTANT i32 8
%2:_(s32) = G_CONSTANT i32 255
%3:_(s32) = G_AND %0, %2
%4:_(s32) = G_SHL %3, %1
$w0 = COPY %4(s32)
...
---
# Negative test case: The AND and SHL operations cannot be combined because imm1 (255) and imm2 (16) do not satisfy the condition (~imm1 << imm2) = 0
name: test_combine_shl_of_and_I32_shift_16_neg
legalized: true
tracksRegLiveness: true
body: |
bb.0.entry:
liveins: $w0
; CHECK-LABEL: name: test_combine_shl_of_and_I32_shift_16_neg
; CHECK: liveins: $w0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s32)
; CHECK-NEXT: $w0 = COPY [[SHL]](s32)
%0:_(s32) = COPY $w0
%1:_(s32) = G_CONSTANT i32 16
%2:_(s32) = G_CONSTANT i32 255
%3:_(s32) = G_AND %0, %2
%4:_(s32) = G_SHL %3, %1
$w0 = COPY %4(s32)
...
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AIE/aie2/bfloat16_to_float.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ define dso_local noundef float @bfloat16_to_float_test(%class.bfloat16 %bf.coerc
; CHECK: .p2align 4
; CHECK-NEXT: // %bb.0: // %entry
; CHECK-NEXT: nopb ; nopa ; nops ; ret lr ; nopm ; nopv
; CHECK-NEXT: nop // Delay Slot 5
; CHECK-NEXT: nopx // Delay Slot 5
; CHECK-NEXT: nop // Delay Slot 4
; CHECK-NEXT: nop // Delay Slot 3
; CHECK-NEXT: mova r0, #16; extend.u16 r1, r1 // Delay Slot 2
; CHECK-NEXT: mova r0, #16 // Delay Slot 2
; CHECK-NEXT: lshl r0, r1, r0 // Delay Slot 1
entry:
%bf.coerce.fca.0.extract = extractvalue %class.bfloat16 %bf.coerce, 0
Expand Down
29 changes: 11 additions & 18 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/add.v2i16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -262,11 +262,10 @@ define <2 x i16> @v_add_v2i16_neg_inline_imm_hi(<2 x i16> %a) {
define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
; GFX7-LABEL: s_add_v2i16_neg_inline_imm_splat:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_sub_i32 s1, s1, 64
; GFX7-NEXT: s_sub_i32 s0, s0, 64
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_add_i32 s1, s1, 0xffc00000
; GFX7-NEXT: s_or_b32 s0, s0, s1
; GFX7-NEXT: ; return to shader part epilog
;
Expand Down Expand Up @@ -304,11 +303,10 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) {
define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
; GFX7-LABEL: s_add_v2i16_neg_inline_imm_lo:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_add_i32 s1, s1, 4
; GFX7-NEXT: s_sub_i32 s0, s0, 64
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_add_i32 s1, s1, 0x40000
; GFX7-NEXT: s_or_b32 s0, s0, s1
; GFX7-NEXT: ; return to shader part epilog
;
Expand Down Expand Up @@ -346,11 +344,10 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) {
define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
; GFX7-LABEL: s_add_v2i16_neg_inline_imm_hi:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_sub_i32 s1, s1, 64
; GFX7-NEXT: s_add_i32 s0, s0, 4
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_add_i32 s1, s1, 0xffc00000
; GFX7-NEXT: s_or_b32 s0, s0, s1
; GFX7-NEXT: ; return to shader part epilog
;
Expand Down Expand Up @@ -388,9 +385,8 @@ define amdgpu_ps i32 @s_add_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) {
define amdgpu_ps i32 @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) {
; GFX7-LABEL: s_add_v2i16:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_add_i32 s1, s1, s3
; GFX7-NEXT: s_add_i32 s0, s0, s2
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
; GFX7-NEXT: s_add_i32 s1, s1, s3
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_or_b32 s0, s0, s1
Expand Down Expand Up @@ -439,9 +435,8 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg
; GFX7-NEXT: s_or_b32 s0, s1, s0
; GFX7-NEXT: s_xor_b32 s0, s0, 0x80008000
; GFX7-NEXT: s_lshr_b32 s1, s0, 16
; GFX7-NEXT: s_add_i32 s1, s1, s3
; GFX7-NEXT: s_add_i32 s0, s0, s2
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
; GFX7-NEXT: s_add_i32 s1, s1, s3
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_or_b32 s0, s0, s1
Expand Down Expand Up @@ -495,9 +490,8 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg
; GFX7-NEXT: s_or_b32 s2, s3, s2
; GFX7-NEXT: s_xor_b32 s2, s2, 0x80008000
; GFX7-NEXT: s_lshr_b32 s3, s2, 16
; GFX7-NEXT: s_add_i32 s1, s1, s3
; GFX7-NEXT: s_add_i32 s0, s0, s2
; GFX7-NEXT: s_and_b32 s1, s1, 0xffff
; GFX7-NEXT: s_add_i32 s1, s1, s3
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_or_b32 s0, s0, s1
Expand Down Expand Up @@ -556,11 +550,10 @@ define amdgpu_ps i32 @s_add_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x ha
; GFX7-NEXT: s_xor_b32 s1, s1, 0x80008000
; GFX7-NEXT: s_lshr_b32 s2, s0, 16
; GFX7-NEXT: s_lshr_b32 s3, s1, 16
; GFX7-NEXT: s_add_i32 s2, s2, s3
; GFX7-NEXT: s_add_i32 s0, s0, s1
; GFX7-NEXT: s_and_b32 s1, s2, 0xffff
; GFX7-NEXT: s_add_i32 s2, s2, s3
; GFX7-NEXT: s_and_b32 s0, s0, 0xffff
; GFX7-NEXT: s_lshl_b32 s1, s1, 16
; GFX7-NEXT: s_lshl_b32 s1, s2, 16
; GFX7-NEXT: s_or_b32 s0, s0, s1
; GFX7-NEXT: ; return to shader part epilog
;
Expand Down
10 changes: 1 addition & 9 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/andn2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -688,11 +688,9 @@ define <2 x i16> @v_andn2_v2i16(<2 x i16> %src0, <2 x i16> %src1) {
define amdgpu_ps i48 @s_andn2_v3i16(<3 x i16> inreg %src0, <3 x i16> inreg %src1) {
; GFX6-LABEL: s_andn2_v3i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_and_b32 s6, s6, 0xffff
; GFX6-NEXT: s_mov_b32 s0, -1
; GFX6-NEXT: s_and_b32 s5, s5, 0xffff
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_and_b32 s3, s3, 0xffff
; GFX6-NEXT: s_mov_b32 s1, 0xffff
; GFX6-NEXT: s_or_b32 s6, s5, s6
; GFX6-NEXT: s_and_b32 s7, s7, 0xffff
Expand Down Expand Up @@ -741,11 +739,9 @@ define amdgpu_ps i48 @s_andn2_v3i16(<3 x i16> inreg %src0, <3 x i16> inreg %src1
define amdgpu_ps i48 @s_andn2_v3i16_commute(<3 x i16> inreg %src0, <3 x i16> inreg %src1) {
; GFX6-LABEL: s_andn2_v3i16_commute:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_and_b32 s6, s6, 0xffff
; GFX6-NEXT: s_mov_b32 s0, -1
; GFX6-NEXT: s_and_b32 s5, s5, 0xffff
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_and_b32 s3, s3, 0xffff
; GFX6-NEXT: s_mov_b32 s1, 0xffff
; GFX6-NEXT: s_or_b32 s6, s5, s6
; GFX6-NEXT: s_and_b32 s7, s7, 0xffff
Expand Down Expand Up @@ -794,17 +790,15 @@ define amdgpu_ps i48 @s_andn2_v3i16_commute(<3 x i16> inreg %src0, <3 x i16> inr
define amdgpu_ps { i48, i48 } @s_andn2_v3i16_multi_use(<3 x i16> inreg %src0, <3 x i16> inreg %src1) {
; GFX6-LABEL: s_andn2_v3i16_multi_use:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_and_b32 s6, s6, 0xffff
; GFX6-NEXT: s_mov_b32 s0, -1
; GFX6-NEXT: s_and_b32 s5, s5, 0xffff
; GFX6-NEXT: s_lshl_b32 s6, s6, 16
; GFX6-NEXT: s_mov_b32 s1, 0xffff
; GFX6-NEXT: s_or_b32 s6, s5, s6
; GFX6-NEXT: s_and_b32 s7, s7, 0xffff
; GFX6-NEXT: s_xor_b64 s[6:7], s[6:7], s[0:1]
; GFX6-NEXT: s_and_b32 s1, s3, 0xffff
; GFX6-NEXT: s_and_b32 s0, s2, 0xffff
; GFX6-NEXT: s_lshl_b32 s1, s1, 16
; GFX6-NEXT: s_lshl_b32 s1, s3, 16
; GFX6-NEXT: s_or_b32 s0, s0, s1
; GFX6-NEXT: s_and_b32 s1, s4, 0xffff
; GFX6-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
Expand Down Expand Up @@ -866,10 +860,8 @@ define <3 x i16> @v_andn2_v3i16(<3 x i16> %src0, <3 x i16> %src1) {
; GFX6-LABEL: v_andn2_v3i16:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4
; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4
; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX6-NEXT: v_or_b32_e32 v3, v3, v4
; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1
Expand Down
Loading
Loading