diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp index bfbf428376cc..5b57b02111c2 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp @@ -763,6 +763,8 @@ Register AIE2PInstrInfo::isLoadFromStackSlot(const MachineInstr &MI, case AIE2P::VLDA_PLFR_SPILL: case AIE2P::LDA_D_SPILL: case AIE2P::LDA_DS_SPILL: + case AIE2P::VLDA_EX_SPILL: + case AIE2P::VLDA_E_SPILL: break; } @@ -794,6 +796,8 @@ Register AIE2PInstrInfo::isStoreToStackSlot(const MachineInstr &MI, case AIE2P::VST_DM_SPILL: case AIE2P::VST_L_SPILL: case AIE2P::VST_Y_SPILL: + case AIE2P::VST_E_SPILL: + case AIE2P::VST_EX_SPILL: break; } @@ -865,6 +869,10 @@ void AIE2PInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, Opcode = AIE2P::ST_D_SPILL; } else if (regClassMatches(AIE2P::eDSRegClass, RC, SrcReg)) { Opcode = AIE2P::ST_DS_SPILL; + } else if (regClassMatches(AIE2P::EXPVEC64RegClass, RC, SrcReg)) { + Opcode = AIE2P::VST_E_SPILL; + } else if (regClassMatches(AIE2P::VEC576RegClass, RC, SrcReg)) { + Opcode = AIE2P::VST_EX_SPILL; } else if (regClassMatches(AIE2P::eSRegClass, RC, SrcReg) || regClassMatches(AIE2P::spill_eS_to_eRRegClass, RC, SrcReg)) { // Can't spill these directly. Need to bounce through a GPR. @@ -947,6 +955,10 @@ void AIE2PInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, Opcode = AIE2P::LDA_D_SPILL; } else if (regClassMatches(AIE2P::eDSRegClass, RC, DstReg)) { Opcode = AIE2P::LDA_DS_SPILL; + } else if (regClassMatches(AIE2P::EXPVEC64RegClass, RC, DstReg)) { + Opcode = AIE2P::VLDA_E_SPILL; + } else if (regClassMatches(AIE2P::VEC576RegClass, RC, DstReg)) { + Opcode = AIE2P::VLDA_EX_SPILL; } else if (regClassMatches(AIE2P::eSRegClass, RC, DstReg) || regClassMatches(AIE2P::spill_eS_to_eRRegClass, RC, DstReg)) { // Can't spill these directly. Need to bounce through a GPR. @@ -1053,6 +1065,18 @@ AIE2PInstrInfo::getSpillPseudoExpandInfo(const MachineInstr &MI) const { {AIE2P::LDA_dms_lda_spill, AIE2P::sub_hi_dim_then_sub_dim_size}, {AIE2P::LDA_dms_lda_spill, AIE2P::sub_hi_dim_then_sub_dim_stride}, {AIE2P::LDA_dms_lda_spill, AIE2P::sub_hi_dim_then_sub_dim_count}}; + case AIE2P::VLDA_E_SPILL: + return {{AIE2P::LDA_dms_lda_spill, AIE2P::sub_lo_exp}, + {AIE2P::LDA_dms_lda_spill, AIE2P::sub_hi_exp}}; + case AIE2P::VLDA_EX_SPILL: + return {{AIE2P::VLDA_dmx_lda_x_spill, AIE2P::sub_bfp16_x}, + {AIE2P::VLDA_E_SPILL, AIE2P::sub_bfp16_e}}; + case AIE2P::VST_E_SPILL: + return {{AIE2P::ST_dms_sts_spill, AIE2P::sub_lo_exp}, + {AIE2P::ST_dms_sts_spill, AIE2P::sub_hi_exp}}; + case AIE2P::VST_EX_SPILL: + return {{AIE2P::VST_dmx_sts_x_spill, AIE2P::sub_bfp16_x}, + {AIE2P::VST_E_SPILL, AIE2P::sub_bfp16_e}}; } llvm_unreachable("Un-implemented"); } diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td index 1a6d785713cd..c641ab3d18f9 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.td @@ -159,6 +159,8 @@ def ST_D_SPILL : Pseudo<(outs ), (ins eD:$src, c12n_step4:$imm), "st_d_spill", " def ST_DS_SPILL : Pseudo<(outs ), (ins eDS:$src, c12n_step4:$imm), "st_ds_spill", "$src, [sp, $imm]">; def VST_FIFO_SPILL : Pseudo<(outs ), (ins FIFO1024:$src, c16n_step64:$imm), "vst_fifo_spill", "${src}, [sp, $imm]">; def VST_PLFR_SPILL : Pseudo<(outs ), (ins ePSRFLdF:$src, c16n_step64:$imm), "vst_plfr_spill", "${src}, [sp, $imm]">; +def VST_EX_SPILL : Pseudo<(outs ), (ins VEC576:$src, c16n_step64:$imm), "vst_ex_spill", "${src}, [sp, $imm]">; +def VST_E_SPILL : Pseudo<(outs ), (ins EXPVEC64:$src, c12n_step4:$imm), "vst_e_spill", "$src, [sp, $imm]">; } let mayLoad = true, mayStore = false in { @@ -171,6 +173,8 @@ def VLDA_PLFR_SPILL : Pseudo<(outs ePSRFLdF:$dst), (ins c16n_step64:$imm), "vlda def VLDA_DM_SPILL : Pseudo<(outs ACC2048:$dst), (ins c16n_step64:$imm), "vlda_dm_spill", "${dst}, [sp, $imm]">; def LDA_D_SPILL : Pseudo<(outs eD:$dst), (ins c12n_step4:$imm), "lda_d_spill", "${dst}, [sp, $imm]">; def LDA_DS_SPILL : Pseudo<(outs eDS:$dst), (ins c12n_step4:$imm), "lda_ds_spill", "${dst}, [sp, $imm]">; +def VLDA_EX_SPILL : Pseudo<(outs VEC576:$dst), (ins c16n_step64:$imm), "vlda_ex_spill", "${dst}, [sp, $imm]">; +def VLDA_E_SPILL : Pseudo<(outs EXPVEC64:$dst), (ins c12n_step4:$imm), "vlda_e_spill", "${dst}, [sp, $imm]">; } } diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp index c249c7b7bb0a..6c8d537b3ded 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PRegisterInfo.cpp @@ -208,6 +208,10 @@ bool AIE2PRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, case AIE2P::VLDA_FIFO_SPILL: case AIE2P::VLDA_PLFR_SPILL: case AIE2P::VLDA_Y_SPILL: + case AIE2P::VST_EX_SPILL: + case AIE2P::VST_E_SPILL: + case AIE2P::VLDA_E_SPILL: + case AIE2P::VLDA_EX_SPILL: MI.getOperand(FIOperandNum).ChangeToImmediate(Offset); TII->expandSpillPseudo(MI, TRI, /*SubRegOffsetAlign=*/Align(4)); return true; diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vconv-bfp16.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vconv-bfp16.mir index 667084827773..56d7b928c309 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vconv-bfp16.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vconv-bfp16.mir @@ -76,7 +76,7 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec512 = COPY $x1 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:expvec64 = COPY $e1 - ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:mbp2bp = REG_SEQUENCE [[COPY]], %subreg.sub_bfp16_x, [[COPY1]], %subreg.sub_bfp16_e + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec576 = REG_SEQUENCE [[COPY]], %subreg.sub_bfp16_x, [[COPY1]], %subreg.sub_bfp16_e ; CHECK-NEXT: [[VCONV_bfp16ebs16_ebs8_:%[0-9]+]]:mexa = VCONV_bfp16ebs16_ebs8 [[REG_SEQUENCE]], implicit-def $srf2bflags, implicit $crf2bmask, implicit $crrnd ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY [[VCONV_bfp16ebs16_ebs8_]].sub_bfp16_x ; CHECK-NEXT: [[COPY3:%[0-9]+]]:expvec64 = COPY [[VCONV_bfp16ebs16_ebs8_]].sub_bfp16_e diff --git a/llvm/test/CodeGen/AIE/aie2p/eliminate-frame-index.mir b/llvm/test/CodeGen/AIE/aie2p/eliminate-frame-index.mir index 639e2a6800a8..5219309c4a91 100644 --- a/llvm/test/CodeGen/AIE/aie2p/eliminate-frame-index.mir +++ b/llvm/test/CodeGen/AIE/aie2p/eliminate-frame-index.mir @@ -189,6 +189,43 @@ body: | VST_L_SPILL $l7, %stack.1, implicit $sp ... +--- +name: test_vec64_exp +alignment: 16 +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 8, alignment: 4 } +body: | + bb.0 (align 16): + ; CHECK-LABEL: name: test_vec64_exp + ; CHECK: frame-setup PADDXM_pstm_sp_imm 64, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $el0 = LDA_dms_lda_spill -64, implicit $sp + ; CHECK-NEXT: $eh0 = LDA_dms_lda_spill -60, implicit $sp + ; CHECK-NEXT: ST_dms_sts_spill $el0, -64, implicit $sp + ; CHECK-NEXT: ST_dms_sts_spill $eh0, -60, implicit $sp + $e0 = VLDA_E_SPILL %stack.0, implicit $sp + VST_E_SPILL $e0, %stack.0, implicit $sp +... + + +--- +name: test_vec576 +alignment: 16 +stack: + - { id: 0, name: '', type: spill-slot, offset: 0, size: 72, alignment: 4 } +body: | + bb.0 (align 16): + ; CHECK-LABEL: name: test_vec576 + ; CHECK: frame-setup PADDXM_pstm_sp_imm 128, implicit-def $sp, implicit $sp + ; CHECK-NEXT: $x0 = VLDA_dmx_lda_x_spill -128, implicit $sp + ; CHECK-NEXT: $el0 = LDA_dms_lda_spill -64, implicit $sp + ; CHECK-NEXT: $eh0 = LDA_dms_lda_spill -60, implicit $sp + ; CHECK-NEXT: VST_dmx_sts_x_spill $x0, -128, implicit $sp + ; CHECK-NEXT: ST_dms_sts_spill $el0, -64, implicit $sp + ; CHECK-NEXT: ST_dms_sts_spill $eh0, -60, implicit $sp + $ex0 = VLDA_EX_SPILL %stack.0, implicit $sp + VST_EX_SPILL $ex0, %stack.0, implicit $sp +... + --- name: test_fifo_lf diff --git a/llvm/test/CodeGen/AIE/aie2p/spill/spill-reload-vregs.mir b/llvm/test/CodeGen/AIE/aie2p/spill/spill-reload-vregs.mir index 4f40f4bc145f..4863869be3df 100644 --- a/llvm/test/CodeGen/AIE/aie2p/spill/spill-reload-vregs.mir +++ b/llvm/test/CodeGen/AIE/aie2p/spill/spill-reload-vregs.mir @@ -114,3 +114,54 @@ body: | PseudoRET implicit $lr, implicit %0 ... +--- +name: test_576_caller_saved +alignment: 16 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $lf0, $ex2 + + ; CHECK-LABEL: name: test_576_caller_saved + ; CHECK: liveins: $ex2, $lf0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: VST_EX_SPILL $ex2, %stack.0, implicit $sp :: (store (s576) into %stack.0, align 64) + ; CHECK-NEXT: PseudoJL 32, csr_aie2p, implicit-def $lr + ; CHECK-NEXT: renamable $ex0 = VLDA_EX_SPILL %stack.0, implicit $sp :: (load (s576) from %stack.0, align 64) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $ex0 + %0:vec576 = COPY $ex2 + PseudoJL 32, csr_aie2p, implicit-def $lr + PseudoRET implicit $lr, implicit %0 +... + +--- +name: test_576_scarce_regs +alignment: 16 +legalized: true +regBankSelected: true +selected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $bmll0, $r0, $p0, $ex1, $ex2, $ex3, $ex4, $ex5, $ex6, $ex7, $ex8, $ex9, $ex10, $ex11 + + ; CHECK-LABEL: name: test_576_scarce_regs + ; CHECK: liveins: $bmll0, $ex1, $ex2, $ex3, $ex4, $ex5, $ex6, $ex7, $ex8, $ex9, $ex10, $ex11, $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $ex0 = COPY $bmll0 + ; CHECK-NEXT: VST_EX_SPILL killed renamable $ex0, %stack.0, implicit $sp :: (store (s576) into %stack.0, align 64) + ; CHECK-NEXT: $ex0 = COPY $ex1 + ; CHECK-NEXT: ST_dms_sts_idx_imm $r0, $p0, 0, implicit $ex0, implicit $ex1, implicit $ex2, implicit $ex3, implicit $ex4, implicit $ex5, implicit $ex6, implicit $ex7, implicit $ex8, implicit $ex9, implicit $ex10, implicit $ex11 + ; CHECK-NEXT: renamable $ex0 = VLDA_EX_SPILL %stack.0, implicit $sp :: (load (s576) from %stack.0, align 64) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit killed renamable $ex0 + %0:vec576 = COPY $bmll0 ; -> only $ex0 is available + + $ex0 = COPY $ex1 + ST_dms_sts_idx_imm $r0, $p0, 0, implicit $ex0, implicit $ex1, implicit $ex2, implicit $ex3, implicit $ex4, implicit $ex5, implicit $ex6, implicit $ex7, implicit $ex8, implicit $ex9, implicit $ex10, implicit $ex11 + + PseudoRET implicit $lr, implicit %0 +... +