Skip to content

Commit

Permalink
[ARM] Add mayStore to more store instructions
Browse files Browse the repository at this point in the history
As in llvm#121565 we need to mark all stores as mayStore, hasSideEffects is not
enough to prevent moving loads past the instructions. And marking the
instructions as mayStore is a sensible thing to do on its own.
  • Loading branch information
davemgreen committed Jan 13, 2025
1 parent 7e19103 commit a10ce71
Show file tree
Hide file tree
Showing 10 changed files with 59 additions and 32 deletions.
2 changes: 2 additions & 0 deletions llvm/lib/Target/ARM/ARMInstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,8 @@ class AIstr_ex_or_rel<bits<2> opcod, bits<2> opcod2, dag oops, dag iops, InstrIt
let Inst{9-8} = opcod2;
let Inst{7-4} = 0b1001;
let Inst{3-0} = Rt;

let mayStore = 1;
}
// Atomic load/store instructions
class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
Expand Down
11 changes: 8 additions & 3 deletions llvm/lib/Target/ARM/ARMInstrInfo.td
Original file line number Diff line number Diff line change
Expand Up @@ -3388,6 +3388,8 @@ def STRD_POST: AI3ldstidx<0b1111, 0, 0, (outs GPR:$Rn_wb),

// STRT, STRBT, and STRHT

let mayStore = 1, hasSideEffects = 0 in {

def STRBT_POST_REG : AI2ldstidx<0, 1, 0, (outs GPR:$Rn_wb),
(ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
IndexModePost, StFrm, IIC_iStore_bh_ru,
Expand Down Expand Up @@ -3428,7 +3430,6 @@ def STRBT_POST
: ARMAsmPseudo<"strbt${q} $Rt, $addr",
(ins GPR:$Rt, addr_offset_none:$addr, pred:$q)>;

let mayStore = 1, hasSideEffects = 0 in {
def STRT_POST_REG : AI2ldstidx<0, 0, 0, (outs GPR:$Rn_wb),
(ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset),
IndexModePost, StFrm, IIC_iStore_ru,
Expand Down Expand Up @@ -3464,7 +3465,6 @@ def STRT_POST_IMM
let Inst{11-0} = offset{11-0};
let DecoderMethod = "DecodeAddrMode2IdxInstruction";
}
}

def STRT_POST
: ARMAsmPseudo<"strt${q} $Rt, $addr",
Expand Down Expand Up @@ -3493,7 +3493,6 @@ multiclass AI3strT<bits<4> op, string opc> {
}
}


defm STRHT : AI3strT<0b1011, "strht">;

def STL : AIstrrel<0b00, (outs), (ins GPR:$Rt, addr_offset_none:$addr),
Expand All @@ -3503,6 +3502,8 @@ def STLB : AIstrrel<0b10, (outs), (ins GPR:$Rt, addr_offset_none:$addr),
def STLH : AIstrrel<0b11, (outs), (ins GPR:$Rt, addr_offset_none:$addr),
NoItinerary, "stlh", "\t$Rt, $addr", []>;

} // mayStore = 1, hasSideEffects = 0

//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
//
Expand Down Expand Up @@ -5633,15 +5634,19 @@ multiclass LdSt2Cop<bit load, bit Dbit, string asm, list<dag> pattern> {
}
}

let mayLoad = 1 in {
defm LDC : LdStCop <1, 0, "ldc", [(int_arm_ldc timm:$cop, timm:$CRd, addrmode5:$addr)]>;
defm LDCL : LdStCop <1, 1, "ldcl", [(int_arm_ldcl timm:$cop, timm:$CRd, addrmode5:$addr)]>;
defm LDC2 : LdSt2Cop<1, 0, "ldc2", [(int_arm_ldc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
defm LDC2L : LdSt2Cop<1, 1, "ldc2l", [(int_arm_ldc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
}

let mayStore = 1 in {
defm STC : LdStCop <0, 0, "stc", [(int_arm_stc timm:$cop, timm:$CRd, addrmode5:$addr)]>;
defm STCL : LdStCop <0, 1, "stcl", [(int_arm_stcl timm:$cop, timm:$CRd, addrmode5:$addr)]>;
defm STC2 : LdSt2Cop<0, 0, "stc2", [(int_arm_stc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
defm STC2L : LdSt2Cop<0, 1, "stc2l", [(int_arm_stc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[IsARM,PreV8]>;
}

} // DecoderNamespace = "CoProc"

Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/ARM/ARMInstrNEON.td
Original file line number Diff line number Diff line change
Expand Up @@ -8186,6 +8186,7 @@ def VLD1LNdWB_register_Asm_32 :
(ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;

let mayStore = 1 in {

// VST1 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
Expand Down Expand Up @@ -8224,6 +8225,8 @@ def VST1LNdWB_register_Asm_32 :
(ins VecListOneDWordIndexed:$list, addrmode6align32:$addr,
rGPR:$Rm, pred:$p)>;

}

// VLD2 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD2LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vld2${p}", ".8", "$list, $addr",
Expand Down Expand Up @@ -8282,6 +8285,7 @@ def VLD2LNqWB_register_Asm_32 :
(ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;

let mayStore = 1 in {

// VST2 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
Expand Down Expand Up @@ -8342,6 +8346,8 @@ def VST2LNqWB_register_Asm_32 :
(ins VecListTwoQWordIndexed:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;

}

// VLD3 all-lanes pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD3DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld3${p}", ".8", "$list, $addr",
Expand Down Expand Up @@ -8531,6 +8537,8 @@ def VLD3qWB_register_Asm_32 :
(ins VecListThreeQ:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;

let mayStore = 1 in {

// VST3 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VST3LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst3${p}", ".8", "$list, $addr",
Expand Down Expand Up @@ -8650,6 +8658,8 @@ def VST3qWB_register_Asm_32 :
(ins VecListThreeQ:$list, addrmode6align64:$addr,
rGPR:$Rm, pred:$p)>;

}

// VLD4 all-lanes pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VLD4DUPdAsm_8 : NEONDataTypeAsmPseudoInst<"vld4${p}", ".8", "$list, $addr",
Expand Down Expand Up @@ -8853,6 +8863,8 @@ def VLD4qWB_register_Asm_32 :
(ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;

let mayStore = 1 in {

// VST4 single-lane pseudo-instructions. These need special handling for
// the lane index that an InstAlias can't handle, so we use these instead.
def VST4LNdAsm_8 : NEONDataTypeAsmPseudoInst<"vst4${p}", ".8", "$list, $addr",
Expand Down Expand Up @@ -8983,6 +8995,7 @@ def VST4qWB_register_Asm_32 :
NEONDataTypeAsmPseudoInst<"vst4${p}", ".32", "$list, $addr, $Rm",
(ins VecListFourQ:$list, addrmode6align64or128or256:$addr,
rGPR:$Rm, pred:$p)>;
}

// VMOV/VMVN takes an optional datatype suffix
defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm",
Expand Down
9 changes: 8 additions & 1 deletion llvm/lib/Target/ARM/ARMInstrThumb2.td
Original file line number Diff line number Diff line change
Expand Up @@ -1795,6 +1795,8 @@ def t2STRH_preidx: t2PseudoInst<(outs GPRnopc:$Rn_wb),
Sched<[WriteST]>;
}

let mayStore = 1, hasSideEffects = 0 in {

// F5.1.229 STR (immediate) T4
// .w suffixes; Constraints can't be used on t2InstAlias to describe
// "$Rn = $Rn_wb,@earlyclobber $Rn_wb" on POST or
Expand Down Expand Up @@ -1850,6 +1852,8 @@ def t2STRT : T2IstT<0b10, "strt", IIC_iStore_i>;
def t2STRBT : T2IstT<0b00, "strbt", IIC_iStore_bh_i>;
def t2STRHT : T2IstT<0b01, "strht", IIC_iStore_bh_i>;

} // mayStore = 1, hasSideEffects = 0

// ldrd / strd pre / post variants

let mayLoad = 1, hasSideEffects = 0 in
Expand Down Expand Up @@ -4482,16 +4486,19 @@ multiclass t2LdStCop<bits<4> op31_28, bit load, bit Dbit, string asm, list<dag>
}

let DecoderNamespace = "Thumb2CoProc" in {
let mayLoad = 1 in {
defm t2LDC : t2LdStCop<0b1110, 1, 0, "ldc", [(int_arm_ldc timm:$cop, timm:$CRd, addrmode5:$addr)]>;
defm t2LDCL : t2LdStCop<0b1110, 1, 1, "ldcl", [(int_arm_ldcl timm:$cop, timm:$CRd, addrmode5:$addr)]>;
defm t2LDC2 : t2LdStCop<0b1111, 1, 0, "ldc2", [(int_arm_ldc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
defm t2LDC2L : t2LdStCop<0b1111, 1, 1, "ldc2l", [(int_arm_ldc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;

}
let mayStore = 1 in {
defm t2STC : t2LdStCop<0b1110, 0, 0, "stc", [(int_arm_stc timm:$cop, timm:$CRd, addrmode5:$addr)]>;
defm t2STCL : t2LdStCop<0b1110, 0, 1, "stcl", [(int_arm_stcl timm:$cop, timm:$CRd, addrmode5:$addr)]>;
defm t2STC2 : t2LdStCop<0b1111, 0, 0, "stc2", [(int_arm_stc2 timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
defm t2STC2L : t2LdStCop<0b1111, 0, 1, "stc2l", [(int_arm_stc2l timm:$cop, timm:$CRd, addrmode5:$addr)]>, Requires<[PreV8,IsThumb2]>;
}
}


//===----------------------------------------------------------------------===//
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/tools/llvm-mca/ARM/cortex-a57-memory-instructions.s
Original file line number Diff line number Diff line change
Expand Up @@ -306,10 +306,10 @@
# CHECK-NEXT: 2 2 1.00 * strb r6, [r2], -r4
# CHECK-NEXT: 2 3 1.00 * strb r7, [r12, -r3, lsl #5]
# CHECK-NEXT: 2 2 1.00 * strb sp, [r7], r2, asr #12
# CHECK-NEXT: 2 1 1.00 U strbt r6, [r2], #12
# CHECK-NEXT: 2 1 1.00 U strbt r5, [r6], #-13
# CHECK-NEXT: 2 2 1.00 U strbt r4, [r9], r5
# CHECK-NEXT: 2 2 1.00 U strbt r3, [r8], -r2, lsl #3
# CHECK-NEXT: 2 1 1.00 * strbt r6, [r2], #12
# CHECK-NEXT: 2 1 1.00 * strbt r5, [r6], #-13
# CHECK-NEXT: 2 2 1.00 * strbt r4, [r9], r5
# CHECK-NEXT: 2 2 1.00 * strbt r3, [r8], -r2, lsl #3
# CHECK-NEXT: 1 1 1.00 * strd r0, r1, [r4]
# CHECK-NEXT: 1 1 1.00 * strd r2, r3, [r6, #1]
# CHECK-NEXT: 1 1 1.00 * strd r2, r3, [r6, r2]
Expand All @@ -332,10 +332,10 @@
# CHECK-NEXT: 2 1 1.00 * strh r1, [r2, -r1]!
# CHECK-NEXT: 2 1 1.00 * strh r9, [r7], r2
# CHECK-NEXT: 2 1 1.00 * strh r4, [r3], -r2
# CHECK-NEXT: 2 1 1.00 U strht r2, [r5], #76
# CHECK-NEXT: 2 1 1.00 U strht r8, [r1], #-25
# CHECK-NEXT: 2 1 1.00 U strht r5, [r3], r4
# CHECK-NEXT: 2 1 1.00 U strht r6, [r8], -r0
# CHECK-NEXT: 2 1 1.00 * strht r2, [r5], #76
# CHECK-NEXT: 2 1 1.00 * strht r8, [r1], #-25
# CHECK-NEXT: 2 1 1.00 * strht r5, [r3], r4
# CHECK-NEXT: 2 1 1.00 * strht r6, [r8], -r0

# CHECK: Resources:
# CHECK-NEXT: [0] - A57UnitB
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/tools/llvm-mca/ARM/cortex-a57-thumb.s
Original file line number Diff line number Diff line change
Expand Up @@ -1594,14 +1594,14 @@
# CHECK-NEXT: 1 1 1.00 * strh.w r8, [r8, r2, lsl #2]
# CHECK-NEXT: 1 1 1.00 * strh.w r7, [sp, r2, lsl #1]
# CHECK-NEXT: 1 1 1.00 * strh.w r7, [sp, r2]
# CHECK-NEXT: 2 1 1.00 U strht r1, [r2]
# CHECK-NEXT: 2 1 1.00 U strht r1, [r8]
# CHECK-NEXT: 2 1 1.00 U strht r1, [r8, #3]
# CHECK-NEXT: 2 1 1.00 U strht r1, [r8, #255]
# CHECK-NEXT: 1 1 1.00 U strt r1, [r2]
# CHECK-NEXT: 1 1 1.00 U strt r1, [r8]
# CHECK-NEXT: 1 1 1.00 U strt r1, [r8, #3]
# CHECK-NEXT: 1 1 1.00 U strt r1, [r8, #255]
# CHECK-NEXT: 2 1 1.00 * strht r1, [r2]
# CHECK-NEXT: 2 1 1.00 * strht r1, [r8]
# CHECK-NEXT: 2 1 1.00 * strht r1, [r8, #3]
# CHECK-NEXT: 2 1 1.00 * strht r1, [r8, #255]
# CHECK-NEXT: 1 1 1.00 * strt r1, [r2]
# CHECK-NEXT: 1 1 1.00 * strt r1, [r8]
# CHECK-NEXT: 1 1 1.00 * strt r1, [r8, #3]
# CHECK-NEXT: 1 1 1.00 * strt r1, [r8, #255]
# CHECK-NEXT: 0 0 0.00 U itet eq
# CHECK-NEXT: 1 1 0.50 subeq r1, r2, #4
# CHECK-NEXT: 1 1 0.50 subwne r5, r3, #1023
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/tools/llvm-mca/ARM/m4-int.s
Original file line number Diff line number Diff line change
Expand Up @@ -776,7 +776,7 @@ yield
# CHECK-NEXT: 1 1 1.00 * strb r0, [r1, r2]
# CHECK-NEXT: 1 1 1.00 * strb.w r0, [r1, r2]
# CHECK-NEXT: 1 1 1.00 * strb.w r0, [r1, r2, lsl #1]
# CHECK-NEXT: 1 1 1.00 U strbt r0, [r1, #1]
# CHECK-NEXT: 1 1 1.00 * strbt r0, [r1, #1]
# CHECK-NEXT: 1 1 1.00 * strd r0, r1, [r2, #4]
# CHECK-NEXT: 1 1 1.00 * strd r0, r1, [r2], #4
# CHECK-NEXT: 1 1 1.00 * strd r0, r1, [r2, #4]!
Expand All @@ -793,8 +793,8 @@ yield
# CHECK-NEXT: 1 1 1.00 * strh r0, [r1, r2]
# CHECK-NEXT: 1 1 1.00 * strh.w r0, [r1, r2]
# CHECK-NEXT: 1 1 1.00 * strh.w r0, [r1, r2, lsl #1]
# CHECK-NEXT: 1 1 1.00 U strht r0, [r1, #1]
# CHECK-NEXT: 1 1 1.00 U strt r0, [r1, #1]
# CHECK-NEXT: 1 1 1.00 * strht r0, [r1, #1]
# CHECK-NEXT: 1 1 1.00 * strt r0, [r1, #1]
# CHECK-NEXT: 1 1 1.00 U sub sp, #4
# CHECK-NEXT: 1 1 1.00 sub.w r0, sp, #1
# CHECK-NEXT: 1 1 1.00 subs.w r0, sp, #1
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/tools/llvm-mca/ARM/m55-int.s
Original file line number Diff line number Diff line change
Expand Up @@ -836,7 +836,7 @@ yield
# CHECK-NEXT: 1 1 1.00 * strb r0, [r1, r2]
# CHECK-NEXT: 1 1 1.00 * strb.w r0, [r1, r2]
# CHECK-NEXT: 1 1 1.00 * strb.w r0, [r1, r2, lsl #1]
# CHECK-NEXT: 1 1 1.00 U strbt r0, [r1, #1]
# CHECK-NEXT: 1 1 1.00 * strbt r0, [r1, #1]
# CHECK-NEXT: 1 2 1.00 * strd r0, r1, [r2, #4]
# CHECK-NEXT: 1 2 1.00 * strd r0, r1, [r2], #4
# CHECK-NEXT: 1 2 1.00 * strd r0, r1, [r2, #4]!
Expand All @@ -853,8 +853,8 @@ yield
# CHECK-NEXT: 1 1 1.00 * strh r0, [r1, r2]
# CHECK-NEXT: 1 1 1.00 * strh.w r0, [r1, r2]
# CHECK-NEXT: 1 1 1.00 * strh.w r0, [r1, r2, lsl #1]
# CHECK-NEXT: 1 1 1.00 U strht r0, [r1, #1]
# CHECK-NEXT: 1 1 1.00 U strt r0, [r1, #1]
# CHECK-NEXT: 1 1 1.00 * strht r0, [r1, #1]
# CHECK-NEXT: 1 1 1.00 * strt r0, [r1, #1]
# CHECK-NEXT: 1 1 1.00 U sub sp, #4
# CHECK-NEXT: 1 1 0.50 sub.w r0, sp, #1
# CHECK-NEXT: 1 1 0.50 subs.w r0, sp, #1
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/tools/llvm-mca/ARM/m7-int.s
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,7 @@ yield
# CHECK-NEXT: 1 3 1.00 * strb r0, [r1, r2]
# CHECK-NEXT: 1 3 1.00 * strb.w r0, [r1, r2]
# CHECK-NEXT: 1 3 1.00 * strb.w r0, [r1, r2, lsl #1]
# CHECK-NEXT: 1 3 1.00 U strbt r0, [r1, #1]
# CHECK-NEXT: 1 3 1.00 * strbt r0, [r1, #1]
# CHECK-NEXT: 1 3 1.00 * strd r0, r1, [r2, #4]
# CHECK-NEXT: 1 3 1.00 * strd r0, r1, [r2], #4
# CHECK-NEXT: 1 3 1.00 * strd r0, r1, [r2, #4]!
Expand All @@ -769,8 +769,8 @@ yield
# CHECK-NEXT: 1 3 1.00 * strh r0, [r1, r2]
# CHECK-NEXT: 1 3 1.00 * strh.w r0, [r1, r2]
# CHECK-NEXT: 1 3 1.00 * strh.w r0, [r1, r2, lsl #1]
# CHECK-NEXT: 1 3 1.00 U strht r0, [r1, #1]
# CHECK-NEXT: 1 3 1.00 U strt r0, [r1, #1]
# CHECK-NEXT: 1 3 1.00 * strht r0, [r1, #1]
# CHECK-NEXT: 1 3 1.00 * strt r0, [r1, #1]
# CHECK-NEXT: 1 1 0.50 subs r0, r1, #1
# CHECK-NEXT: 1 1 0.50 subs r0, #1
# CHECK-NEXT: 1 1 0.50 sub.w r0, r1, #1
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/tools/llvm-mca/ARM/m85-int.s
Original file line number Diff line number Diff line change
Expand Up @@ -865,7 +865,7 @@ yield.w
# CHECK-NEXT: 1 3 0.50 * strb r0, [r1, r2]
# CHECK-NEXT: 1 3 0.50 * strb.w r0, [r1, r2]
# CHECK-NEXT: 1 3 0.50 * strb.w r0, [r1, r2, lsl #1]
# CHECK-NEXT: 1 3 0.50 U strbt r0, [r1, #1]
# CHECK-NEXT: 1 3 0.50 * strbt r0, [r1, #1]
# CHECK-NEXT: 1 3 1.00 * strd r0, r1, [r2, #4]
# CHECK-NEXT: 1 3 1.00 * strd r0, r1, [r2], #4
# CHECK-NEXT: 1 3 1.00 * strd r0, r1, [r2, #4]!
Expand All @@ -882,8 +882,8 @@ yield.w
# CHECK-NEXT: 1 3 0.50 * strh r0, [r1, r2]
# CHECK-NEXT: 1 3 0.50 * strh.w r0, [r1, r2]
# CHECK-NEXT: 1 3 0.50 * strh.w r0, [r1, r2, lsl #1]
# CHECK-NEXT: 1 3 0.50 U strht r0, [r1, #1]
# CHECK-NEXT: 1 3 0.50 U strt r0, [r1, #1]
# CHECK-NEXT: 1 3 0.50 * strht r0, [r1, #1]
# CHECK-NEXT: 1 3 0.50 * strt r0, [r1, #1]
# CHECK-NEXT: 1 2 0.50 U sub sp, #4
# CHECK-NEXT: 1 1 0.50 sub.w r0, sp, #1
# CHECK-NEXT: 1 1 0.50 subs.w r0, sp, #1
Expand Down

0 comments on commit a10ce71

Please sign in to comment.