diff --git a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp index 6a094d173d14..ff929385659c 100644 --- a/llvm/lib/Target/AIE/AIE2InstrInfo.cpp +++ b/llvm/lib/Target/AIE/AIE2InstrInfo.cpp @@ -223,6 +223,13 @@ unsigned AIE2InstrInfo::getOffsetMemOpcode(unsigned BaseMemOpcode) const { llvm_unreachable("not a generic load/store"); } +bool AIE2InstrInfo::isGenericOffsetMemOpcode(unsigned Opcode) const { + return ((Opcode == AIE2::G_AIE_OFFSET_STORE) || + (Opcode == AIE2::G_AIE_OFFSET_LOAD) || + (Opcode == AIE2::G_AIE_OFFSET_SEXTLOAD) || + (Opcode == AIE2::G_AIE_OFFSET_ZEXTLOAD)); +} + std::optional AIE2InstrInfo::getCombinedPostIncOpcode( MachineInstr &BaseMemI, MachineInstr &PostIncI, TypeSize Size) const { switch (PostIncI.getOpcode()) { diff --git a/llvm/lib/Target/AIE/AIE2InstrInfo.h b/llvm/lib/Target/AIE/AIE2InstrInfo.h index 21a2b4a1186a..0cbed40e2a67 100644 --- a/llvm/lib/Target/AIE/AIE2InstrInfo.h +++ b/llvm/lib/Target/AIE/AIE2InstrInfo.h @@ -83,6 +83,7 @@ class AIE2InstrInfo : public AIE2GenInstrInfo { bool isBooleanNoOp(unsigned Opc) const override; bool isBooleanNot(unsigned Opc) const override; bool isConstStep(const MachineInstr &MI, int64_t &Step) const override; + bool isGenericOffsetMemOpcode(unsigned Opcode) const override; bool verifyGenericInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override; diff --git a/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp b/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp index 75606807153b..5c87c3dc1023 100644 --- a/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp +++ b/llvm/lib/Target/AIE/AIE2InstructionSelector.cpp @@ -52,7 +52,6 @@ class AIE2InstructionSelector : public AIEBaseInstructionSelector { MachineRegisterInfo &MRI) override; Register createSparseRegSequence(Register Vec, Register Mask, MachineRegisterInfo &MRI); - void insertPtrAddForOffset(MachineRegisterInfo &MRI, MachineInstr &MemI); void setCtrlRegister(MachineInstr &I, MachineRegisterInfo &MRI, Register CRReg, Register ValueReg); @@ -994,37 +993,6 @@ bool AIE2InstructionSelector::selectG_AIE_LOAD_UNPACK( return constrainSelectedInstRegOperands(*NewInstr.getInstr(), TII, TRI, RBI); } -void AIE2InstructionSelector::insertPtrAddForOffset(MachineRegisterInfo &MRI, - MachineInstr &MemI) { - // The offset is not an immediate or the immediate does not fit the immediate - // range. Instruction select PTR_ADD for the splitting of instruction. E.g.: - // $x0 = G_AIE_OFFSET_LOAD %ptr, %offset has to be selected to - // %new_ptr = PTR_ADD %ptr, %offset - // $wh0 = VLDA_dmw_lda_w_ag_idx_imm %new_ptr, #32 - // $wl0 = VLDA_dmw_lda_w_ag_idx_imm %new_ptr, #0 - - // This function only gets called for G_AIE_OFFSET_LOAD AND G_AIE_OFFSET_STORE - // Both instruction have the pointer and the offset in the same operands - assert((MemI.getOpcode() == AIE2::G_AIE_OFFSET_LOAD || - MemI.getOpcode() == AIE2::G_AIE_OFFSET_STORE) && - "Unexpected instruction in instrPtrAddForOffset"); - const unsigned PointerRegIndex = 1; - const unsigned OffsetRegIndex = 2; - - Register NewPtrReg = - MRI.cloneVirtualRegister(MemI.getOperand(PointerRegIndex).getReg()); - MachineInstrBuilder NewPtr = - MIB.buildInstr(TargetOpcode::G_PTR_ADD) - .addDef(NewPtrReg) - .addReg(MemI.getOperand(PointerRegIndex).getReg()) - .addReg(MemI.getOperand(OffsetRegIndex).getReg()); - - if (!selectImpl(*NewPtr.getInstr(), *CoverageInfo)) - llvm_unreachable("Unexpected failure selecting G_PTR_ADD"); - - MemI.getOperand(PointerRegIndex).setReg(NewPtrReg); -} - std::optional AIE2InstructionSelector::getCombinedOpcodeSRSUPS(const MachineInstr &MemOp, const MachineInstr &CombOp, diff --git a/llvm/lib/Target/AIE/AIEBaseInstrInfo.h b/llvm/lib/Target/AIE/AIEBaseInstrInfo.h index 9b948701f4bf..4d5becd74726 100644 --- a/llvm/lib/Target/AIE/AIEBaseInstrInfo.h +++ b/llvm/lib/Target/AIE/AIEBaseInstrInfo.h @@ -232,6 +232,8 @@ struct AIEBaseInstrInfo : public TargetInstrInfo { return false; } + virtual bool isGenericOffsetMemOpcode(unsigned Opcode) const { return false; } + // Used for Load/Store combiners virtual unsigned getOffsetMemOpcode(unsigned BaseMemOpcode) const { llvm_unreachable("Target didn't implement getOffsetMemOpcode"); diff --git a/llvm/lib/Target/AIE/AIEBaseInstructionSelector.cpp b/llvm/lib/Target/AIE/AIEBaseInstructionSelector.cpp index f7332f8b1c74..bd0911908507 100644 --- a/llvm/lib/Target/AIE/AIEBaseInstructionSelector.cpp +++ b/llvm/lib/Target/AIE/AIEBaseInstructionSelector.cpp @@ -755,3 +755,33 @@ void AIEBaseInstructionSelector::makeDeadMI(MachineInstr &MI, Def->setReg(NewReg); } } + +void AIEBaseInstructionSelector::insertPtrAddForOffset(MachineRegisterInfo &MRI, + MachineInstr &MemI) { + // The offset is not an immediate or the immediate does not fit the immediate + // range. Instruction select PTR_ADD for the splitting of instruction. E.g.: + // $x0 = G_AIE_OFFSET_LOAD %ptr, %offset has to be selected to + // %new_ptr = PTR_ADD %ptr, %offset + // $wh0 = VLDA_dmw_lda_w_ag_idx_imm %new_ptr, #32 + // $wl0 = VLDA_dmw_lda_w_ag_idx_imm %new_ptr, #0 + + // This function only gets called for G_AIE_OFFSET_LOAD AND G_AIE_OFFSET_STORE + // Both instruction have the pointer and the offset in the same operands + assert(TII.isGenericOffsetMemOpcode(MemI.getOpcode()) && + "Unexpected instruction in instrPtrAddForOffset"); + const unsigned PointerRegIndex = 1; + const unsigned OffsetRegIndex = 2; + + Register NewPtrReg = + MRI.cloneVirtualRegister(MemI.getOperand(PointerRegIndex).getReg()); + MachineInstrBuilder NewPtr = + MIB.buildInstr(TargetOpcode::G_PTR_ADD) + .addDef(NewPtrReg) + .addReg(MemI.getOperand(PointerRegIndex).getReg()) + .addReg(MemI.getOperand(OffsetRegIndex).getReg()); + + if (!selectImpl(*NewPtr.getInstr(), *CoverageInfo)) + llvm_unreachable("Unexpected failure selecting G_PTR_ADD"); + + MemI.getOperand(PointerRegIndex).setReg(NewPtrReg); +} diff --git a/llvm/lib/Target/AIE/AIEBaseInstructionSelector.h b/llvm/lib/Target/AIE/AIEBaseInstructionSelector.h index 500b562e976c..2a3679904a6d 100644 --- a/llvm/lib/Target/AIE/AIEBaseInstructionSelector.h +++ b/llvm/lib/Target/AIE/AIEBaseInstructionSelector.h @@ -157,6 +157,8 @@ class AIEBaseInstructionSelector : public InstructionSelector { bool selectVSUB_MIN_MAX(MachineInstr &I, MachineRegisterInfo &MRI, MachineIRBuilder &MIB); + void insertPtrAddForOffset(MachineRegisterInfo &MRI, MachineInstr &MemI); + protected: void makeDeadMI(MachineInstr &MI, MachineRegisterInfo &MRI); virtual std::optional diff --git a/llvm/lib/Target/AIE/AIECombine.td b/llvm/lib/Target/AIE/AIECombine.td index 17400173c079..5fb5ad8e6e2a 100644 --- a/llvm/lib/Target/AIE/AIECombine.td +++ b/llvm/lib/Target/AIE/AIECombine.td @@ -203,5 +203,7 @@ def AIE2PostLegalizerCustomCombiner def AIE2PPostLegalizerCustomCombiner : GICombiner<"AIE2PPostLegalizerCustomCombinerImpl", [ combine_load_store_increment, - combine_add_vector_elt_undef ]> { + combine_offset_load_store_ptradd, + combine_offset_load_store_share_ptradd, + combine_add_vector_elt_undef ]> { } diff --git a/llvm/lib/Target/AIE/AIECombinerHelper.cpp b/llvm/lib/Target/AIE/AIECombinerHelper.cpp index b61ab364c843..d7d8f48e86ba 100644 --- a/llvm/lib/Target/AIE/AIECombinerHelper.cpp +++ b/llvm/lib/Target/AIE/AIECombinerHelper.cpp @@ -58,7 +58,11 @@ MachineInstr *findPreIncMatch(MachineInstr &MemI, MachineRegisterInfo &MRI, const AIEBaseInstrInfo &TII) { // This is currently done with patterns in instruction selection. // No need to do it here. - if (MRI.getType(MemI.getOperand(0).getReg()).getSizeInBits() >= 1024) + MachineFunction &MF = *MemI.getMF(); + const Triple &TT = MF.getTarget().getTargetTriple(); + const unsigned VecSize = + MRI.getType(MemI.getOperand(0).getReg()).getSizeInBits(); + if ((TT.isAIE2() && VecSize >= 1024) || (TT.isAIE2P() && VecSize > 2048)) return nullptr; if (!EnableOffsetCombine) return nullptr; @@ -320,9 +324,12 @@ MachineInstr *findPostIncMatch(MachineInstr &MemI, MachineRegisterInfo &MRI, const AIEBaseInstrInfo &TII) { if (!EnablePostIncCombine) return nullptr; - if (MRI.getType(MemI.getOperand(0).getReg()).getSizeInBits() >= 1024) + MachineFunction &MF = *MemI.getMF(); + const Triple &TT = MF.getTarget().getTargetTriple(); + const unsigned VecSize = + MRI.getType(MemI.getOperand(0).getReg()).getSizeInBits(); + if ((TT.isAIE2() && VecSize >= 1024) || (TT.isAIE2P() && VecSize > 2048)) return nullptr; - Register Addr = MemI.getOperand(1).getReg(); for (auto &PtrInc : MRI.use_nodbg_instructions(Addr)) { if (MemI.getParent() != PtrInc.getParent()) diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp index 90ab6f14f05b..e0d6b0fcbfd6 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.cpp @@ -253,6 +253,13 @@ unsigned AIE2PInstrInfo::getOffsetMemOpcode(unsigned BaseMemOpcode) const { llvm_unreachable("not a generic load/store"); } +bool AIE2PInstrInfo::isGenericOffsetMemOpcode(unsigned Opcode) const { + return ((Opcode == AIE2P::G_AIE_OFFSET_STORE) || + (Opcode == AIE2P::G_AIE_OFFSET_LOAD) || + (Opcode == AIE2P::G_AIE_OFFSET_SEXTLOAD) || + (Opcode == AIE2P::G_AIE_OFFSET_ZEXTLOAD)); +} + std::optional AIE2PInstrInfo::getCombinedPostIncOpcode( MachineInstr &BaseMemI, MachineInstr &PostIncI, TypeSize Size) const { switch (PostIncI.getOpcode()) { diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h index 832091aa340c..80af79d5e6e9 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstrInfo.h @@ -83,6 +83,7 @@ class AIE2PInstrInfo : public AIE2PGenInstrInfo { bool isBooleanNoOp(unsigned Opc) const override; bool isBooleanNot(unsigned Opc) const override; bool isConstStep(const MachineInstr &MI, int64_t &Step) const override; + bool isGenericOffsetMemOpcode(unsigned Opcode) const override; bool verifyGenericInstruction(const MachineInstr &MI, StringRef &ErrInfo) const override; diff --git a/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp b/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp index 36b1199c4c22..4212ccd1cd81 100644 --- a/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp +++ b/llvm/lib/Target/AIE/aie2p/AIE2PInstructionSelector.cpp @@ -1774,7 +1774,6 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( // Scalar loads are handled in tablegen patterns mostly and loads to ptr in // selectG_LOAD MachineInstr *PtrDef = MRI.getVRegDef(I.getOperand(1).getReg()); - // TODO: handle remaining load sizes if (LoadStoreSize == 128) { assert(RBID == AIE2P::VRegBankID && "128-bit vectors should be in the Vector Register Bank"); @@ -1812,7 +1811,7 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( } llvm_unreachable( "512-bit vector type must be in AccRegBank or VRegBank " - "or FifoRegBankID"); + "or FifoRegBank"); } if (RBID == AIE2P::AccRegBankID) { return {/*ISelOpcode=*/AIE2P::VLDA_dmx_lda_bm_idx_imm, @@ -1830,7 +1829,7 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( /*OffsetOpcode=*/{}}; } llvm_unreachable("512-bit vector type must be in AccRegBank or VRegBank " - "or FifoRegBankID"); + "or FifoRegBank"); } if (LoadStoreSize == 1024) { unsigned RBID = deriveRegBankID(I.getOperand(0).getReg(), MRI, RBI); @@ -1850,7 +1849,7 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( /*OffsetOpcode=*/AIE2P::VLDA_dmx_lda_x_idx_imm}; } llvm_unreachable("1024-bit vector type must be in AccRegBank or VRegBank " - "or FifoRegBankID"); + "or FifoRegBank"); } else if (LoadStoreSize == 2048) { if (RBID == AIE2P::AccRegBankID) { return {/*ISelOpcode=*/AIE2P::VLDA_dmx_lda_bm_idx_imm, @@ -1862,7 +1861,6 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( break; } case AIE2P::G_AIE_OFFSET_LOAD: { - // TODO: handle remaining load sizes if (LoadStoreSize == 128) { assert(RBID == AIE2P::VRegBankID && "128-bit vectors should be in the Vector Register Bank"); @@ -1894,7 +1892,36 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( return {ISelOpcode, FitsImmediateRange, /*OffsetOpcode=*/AIE2P::VLDA_dmx_lda_x_idx_imm}; } - llvm_unreachable("512-bit vector type must be in AccRegBank or VRegBank"); + if (RBID == AIE2P::FifoRegBankID) { + ISelOpcode = FitsImmediateRange ? AIE2P::VLDA_dmx_lda_fifohl_idx_imm + : AIE2P::VLDA_dmx_lda_fifohl_idx; + return {ISelOpcode, FitsImmediateRange, + /*OffsetOpcode=*/AIE2P::VLDA_dmx_lda_fifohl_idx_imm}; + } + llvm_unreachable("Vector type must be in AccRegBank or VRegBank " + "or FifoRegBank"); + } else if (LoadStoreSize == 1024 || LoadStoreSize == 2048) { + FitsImmediateRange = + (LoadStoreSize == 1024) + ? checkImmediateRangeSplitting<4, 64, 64>(Offset) + : checkImmediateRangeSplitting<4, 64, 192>(Offset); + if (RBID == AIE2P::AccRegBankID) { + return {/*ISelOpcode=*/AIE2P::VLDA_dmx_lda_bm_idx_imm, + FitsImmediateRange, + /*OffsetOpcode=*/AIE2P::VLDA_dmx_lda_bm_idx_imm}; + } + if (RBID == AIE2P::VRegBankID) { + return {/*ISelOpcode=*/AIE2P::VLDA_dmx_lda_x_idx_imm, + FitsImmediateRange, + /*OffsetOpcode=*/AIE2P::VLDA_dmx_lda_x_idx_imm}; + } + if (RBID == AIE2P::FifoRegBankID) { + return {/*ISelOpcode=*/AIE2P::VLDA_dmx_lda_fifohl_idx_imm, + FitsImmediateRange, + /*OffsetOpcode=*/AIE2P::VLDA_dmx_lda_fifohl_idx_imm}; + } + llvm_unreachable("Vector type must be in AccRegBank or VRegBank " + "or FifoRegBank"); } if (LoadStoreSize == 20 || LoadStoreSize == 32) { FitsImmediateRange = checkImmediateRange<4, 4>(Offset); @@ -1935,7 +1962,6 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( break; } case AIE2P::G_AIE_POSTINC_LOAD: { - // TODO: handle remaining load sizes if (LoadStoreSize == 128) { assert(RBID == AIE2P::VRegBankID && "128-bit vectors should be in the Vector Register Bank"); @@ -1953,7 +1979,8 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( : AIE2P::VLDA_dmw_lda_w_pstm_nrm; return {ISelOpcode, FitsImmediateRange, /*OffsetOpcode=*/AIE2P::VLDA_dmw_lda_w_idx_imm}; - } else if (LoadStoreSize == 512) { + } else if (LoadStoreSize == 512 || LoadStoreSize == 1024 || + LoadStoreSize == 2048) { FitsImmediateRange = checkImmediateRange<4, 64>(Offset); if (RBID == AIE2P::AccRegBankID) { ISelOpcode = FitsImmediateRange ? AIE2P::VLDA_dmx_lda_bm_pstm_nrm_imm @@ -1967,7 +1994,15 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( return {ISelOpcode, FitsImmediateRange, /*OffsetOpcode=*/AIE2P::VLDA_dmx_lda_x_idx_imm}; } - llvm_unreachable("512-bit vector type must be in AccRegBank or VRegBank"); + if (RBID == AIE2P::FifoRegBankID) { + ISelOpcode = FitsImmediateRange + ? AIE2P::VLDA_dmx_lda_fifohl_pstm_nrm_imm + : AIE2P::VLDA_dmx_lda_fifohl_pstm_nrm; + return {ISelOpcode, FitsImmediateRange, + /*OffsetOpcode=*/AIE2P::VLDA_dmx_lda_fifohl_idx_imm}; + } + llvm_unreachable("Vector type must be in AccRegBank or VRegBank " + "or FifoRegBank"); } if (LoadStoreSize == 20 || LoadStoreSize == 32) { FitsImmediateRange = checkImmediateRange<4, 4>(Offset); @@ -2019,14 +2054,19 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( "256-bit vectors should be in the Vector Register Bank"); return {/*ISelOpcode=*/AIE2P::VLDA_2D_dmw_lda_w, NoImmediate, /*OffsetOpcode=*/{}}; - } else if (LoadStoreSize == 512) { + } else if (LoadStoreSize == 512 || LoadStoreSize == 1024 || + LoadStoreSize == 2048) { if (RBID == AIE2P::AccRegBankID) return {/*ISelOpcode=*/AIE2P::VLDA_2D_dmx_lda_bm, NoImmediate, - /*OffsetOpcode=*/{}}; + /*OffsetOpcode=*/{AIE2P::VLDA_dmx_lda_bm_idx_imm}}; if (RBID == AIE2P::VRegBankID) return {/*ISelOpcode=*/AIE2P::VLDA_2D_dmx_lda_x, NoImmediate, - /*OffsetOpcode=*/{}}; - llvm_unreachable("512-bit vector type must be in AccRegBank or VRegBank"); + /*OffsetOpcode=*/{AIE2P::VLDA_dmw_lda_w_idx_imm}}; + if (RBID == AIE2P::FifoRegBankID) + return {/*ISelOpcode=*/AIE2P::VLDA_2D_dmx_lda_fifohl, NoImmediate, + /*OffsetOpcode=*/{AIE2P::VLDA_dmx_lda_fifohl_idx_imm}}; + llvm_unreachable("Vector type must be in AccRegBank or VRegBank " + "or FifoRegBank"); } if (LoadStoreSize == 20 || LoadStoreSize == 32) { return {/*ISelOpcode=*/AIE2P::LDA_2D_dms_lda, NoImmediate, @@ -2066,14 +2106,19 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( "256-bit vectors should be in the Vector Register Bank"); return {/*ISelOpcode=*/AIE2P::VLDA_3D_dmw_lda_w, NoImmediate, /*OffsetOpcode=*/{}}; - } else if (LoadStoreSize == 512) { + } else if (LoadStoreSize == 512 || LoadStoreSize == 1024 || + LoadStoreSize == 2048) { if (RBID == AIE2P::AccRegBankID) return {/*ISelOpcode=*/AIE2P::VLDA_3D_dmx_lda_bm, NoImmediate, - /*OffsetOpcode=*/{}}; + /*OffsetOpcode=*/{AIE2P::VLDA_dmx_lda_bm_idx_imm}}; if (RBID == AIE2P::VRegBankID) return {/*ISelOpcode=*/AIE2P::VLDA_3D_dmx_lda_x, NoImmediate, - /*OffsetOpcode=*/{}}; - llvm_unreachable("512-bit vector type must be in AccRegBank or VRegBank"); + /*OffsetOpcode=*/{AIE2P::VLDA_dmw_lda_w_idx_imm}}; + if (RBID == AIE2P::FifoRegBankID) + return {/*ISelOpcode=*/AIE2P::VLDA_3D_dmx_lda_fifohl, NoImmediate, + /*OffsetOpcode=*/{AIE2P::VLDA_dmx_lda_fifohl_idx_imm}}; + llvm_unreachable("Vector type must be in AccRegBank or VRegBank " + "or FifoRegBank"); } if (LoadStoreSize == 20 || LoadStoreSize == 32) { return {/*ISelOpcode=*/AIE2P::LDA_3D_dms_lda, NoImmediate, @@ -2104,7 +2149,6 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( case AIE2P::G_STORE: { // Scalar stores are handled in tablegen patterns MachineInstr *PtrDef = MRI.getVRegDef(I.getOperand(1).getReg()); - // TODO: handle remaining store sizes if (LoadStoreSize == 128) { assert(RBID == AIE2P::VRegBankID && "128-bit vectors should be in the Vector Register Bank"); @@ -2141,7 +2185,7 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( /*OffsetOpcode=*/{}}; } llvm_unreachable("512-bit vector type must be in AccRegBank or " - "VRegBank or FifoRegBankID"); + "VRegBank or FifoRegBank"); } else { if (RBID == AIE2P::AccRegBankID) { return {/*ISelOpcode=*/AIE2P::VST_dmx_sts_bm_idx_imm, @@ -2159,7 +2203,7 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( /*OffsetOpcode=*/AIE2P::VST_dmx_sts_fifohl_idx_imm}; } llvm_unreachable("512-bit vector type must be in AccRegBank or " - "VRegBank or FifoRegBankID"); + "VRegBank or FifoRegBank"); } } else if (LoadStoreSize == 1024) { if (RBID == AIE2P::FifoRegBankID) { @@ -2178,7 +2222,7 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( /*OffsetOpcode=*/AIE2P::VST_dmx_sts_x_idx_imm}; } llvm_unreachable("1024-bit vector type must be in AccRegBank or " - "VRegBank or FifoRegBankID"); + "VRegBank or FifoRegBank"); } else if (LoadStoreSize == 2048) { assert(RBID == AIE2P::AccRegBankID && "2048-bit vectors should be in the Accumulator Register Bank"); @@ -2191,7 +2235,6 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( break; } case AIE2P::G_AIE_OFFSET_STORE: { - // TODO: handle remaining store sizes if (LoadStoreSize == 128) { assert(RBID == AIE2P::VRegBankID && "128-bit vectors should be in the Vector Register Bank"); @@ -2223,7 +2266,35 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( return {ISelOpcode, FitsImmediateRange, /*OffsetOpcode=*/AIE2P::VST_dmx_sts_x_idx_imm}; } - llvm_unreachable("512-bit vector type must be in AccRegBank or VRegBank"); + if (RBID == AIE2P::FifoRegBankID) { + ISelOpcode = FitsImmediateRange ? AIE2P::VST_dmx_sts_fifohl_idx_imm + : AIE2P::VST_dmx_sts_fifohl_idx; + return {ISelOpcode, FitsImmediateRange, + /*OffsetOpcode=*/AIE2P::VST_dmx_sts_fifohl_idx_imm}; + } + llvm_unreachable("512-bit vector type must be in AccRegBank or " + "VRegBank or FifoRegBank"); + } else if (LoadStoreSize == 1024 || LoadStoreSize == 2048) { + FitsImmediateRange = + (LoadStoreSize == 1024) + ? checkImmediateRangeSplitting<4, 64, 64>(Offset) + : checkImmediateRangeSplitting<4, 64, 192>(Offset); + if (RBID == AIE2P::AccRegBankID) { + return {/*ISelOpcode=*/AIE2P::VST_dmx_sts_bm_idx_imm, + FitsImmediateRange, + /*OffsetOpcode=*/AIE2P::VST_dmx_sts_bm_idx_imm}; + } + if (RBID == AIE2P::VRegBankID) { + return {/*ISelOpcode=*/AIE2P::VST_dmx_sts_x_idx_imm, FitsImmediateRange, + /*OffsetOpcode=*/AIE2P::VST_dmx_sts_x_idx_imm}; + } + if (RBID == AIE2P::FifoRegBankID) { + return {/*ISelOpcode=*/AIE2P::VST_dmx_sts_fifohl_idx_imm, + FitsImmediateRange, + /*OffsetOpcode=*/AIE2P::VST_dmx_sts_fifohl_idx_imm}; + } + llvm_unreachable("Vector type must be in AccRegBank or " + "VRegBank or FifoRegBank"); } if (LoadStoreSize == 20 || LoadStoreSize == 32) { FitsImmediateRange = checkImmediateRange<4, 4>(Offset); @@ -2246,7 +2317,6 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( } case AIE2P::G_AIE_POSTINC_STORE: { RBID = deriveRegBankID(I.getOperand(1).getReg(), MRI, RBI); - // TODO: handle remaining store sizes if (LoadStoreSize == 128) { assert(RBID == AIE2P::VRegBankID && "128-bit vectors should be in the Vector Register Bank"); @@ -2264,21 +2334,29 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( : AIE2P::VST_dmw_sts_w_pstm_nrm; return {ISelOpcode, FitsImmediateRange, /*OffsetOpcode=*/AIE2P::VST_dmw_sts_w_pstm_nrm_imm}; - } else if (LoadStoreSize == 512) { + } else if (LoadStoreSize == 512 || LoadStoreSize == 1024 || + LoadStoreSize == 2048) { FitsImmediateRange = checkImmediateRange<4, 64>(Offset); if (RBID == AIE2P::AccRegBankID) { ISelOpcode = FitsImmediateRange ? AIE2P::VST_dmx_sts_bm_pstm_nrm_imm : AIE2P::VST_dmx_sts_bm_pstm_nrm; return {ISelOpcode, FitsImmediateRange, - /*OffsetOpcode=*/AIE2P::VST_dmx_sts_bm_pstm_nrm_imm}; + /*OffsetOpcode=*/AIE2P::VST_dmx_sts_bm_idx_imm}; } if (RBID == AIE2P::VRegBankID) { ISelOpcode = FitsImmediateRange ? AIE2P::VST_dmx_sts_x_pstm_nrm_imm : AIE2P::VST_dmx_sts_x_pstm_nrm; return {ISelOpcode, FitsImmediateRange, - /*OffsetOpcode=*/AIE2P::VST_dmx_sts_x_pstm_nrm_imm}; + /*OffsetOpcode=*/AIE2P::VST_dmx_sts_x_idx_imm}; } - llvm_unreachable("512-bit vector type must be in AccRegBank or VRegBank"); + if (RBID == AIE2P::FifoRegBankID) { + ISelOpcode = FitsImmediateRange ? AIE2P::VST_dmx_sts_fifohl_pstm_nrm_imm + : AIE2P::VST_dmx_sts_fifohl_pstm_nrm; + return {ISelOpcode, FitsImmediateRange, + /*OffsetOpcode=*/AIE2P::VST_dmx_sts_fifohl_idx_imm}; + } + llvm_unreachable("Vector type must be in AccRegBank or VRegBank " + "or FifoRegBank"); } if (LoadStoreSize == 20 || LoadStoreSize == 32) { FitsImmediateRange = checkImmediateRange<4, 4>(Offset); @@ -2302,7 +2380,6 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( } case AIE2P::G_AIE_POSTINC_2D_STORE: { RBID = deriveRegBankID(I.getOperand(2).getReg(), MRI, RBI); - // TODO: handle remaining store sizes if (LoadStoreSize == 128) { assert(RBID == AIE2P::VRegBankID && "128-bit vectors should be in the Vector Register Bank"); @@ -2314,16 +2391,22 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( "256-bit vectors should be in the Vector Register Bank"); return {AIE2P::VST_2D_dmw_sts_w, NoImmediate, /*OffsetOpcode=*/{}}; - } else if (LoadStoreSize == 512) { + } else if (LoadStoreSize == 512 || LoadStoreSize == 1024 || + LoadStoreSize == 2048) { if (RBID == AIE2P::AccRegBankID) { return {AIE2P::VST_2D_dmx_sts_bm, NoImmediate, - /*OffsetOpcode=*/{}}; + /*OffsetOpcode=*/{AIE2P::VST_dmx_sts_bm_idx_imm}}; } if (RBID == AIE2P::VRegBankID) { return {AIE2P::VST_2D_dmx_sts_x, NoImmediate, - /*OffsetOpcode=*/{}}; + /*OffsetOpcode=*/{AIE2P::VST_dmx_sts_x_idx_imm}}; + } + if (RBID == AIE2P::FifoRegBankID) { + return {AIE2P::VST_2D_dmx_sts_x, NoImmediate, + /*OffsetOpcode=*/{AIE2P::VST_dmx_sts_fifohl_idx_imm}}; } - llvm_unreachable("512-bit vector type must be in AccRegBank or VRegBank"); + llvm_unreachable("Vector type must be in AccRegBank or VRegBank " + "or FifoRegBank"); } if (LoadStoreSize == 20 || LoadStoreSize == 32) { return {/*ISelOpcode=*/AIE2P::ST_2D_dms_sts, NoImmediate, @@ -2341,7 +2424,6 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( } case AIE2P::G_AIE_POSTINC_3D_STORE: { RBID = deriveRegBankID(I.getOperand(3).getReg(), MRI, RBI); - // TODO: handle remaining store sizes if (LoadStoreSize == 128) { assert(RBID == AIE2P::VRegBankID && "128-bit vectors should be in the Vector Register Bank"); @@ -2353,16 +2435,22 @@ LoadStoreOpcodes AIE2PInstructionSelector::getLoadStoreOpcode( "256-bit vectors should be in the Vector Register Bank"); return {AIE2P::VST_3D_dmw_sts_w, NoImmediate, /*OffsetOpcode=*/{}}; - } else if (LoadStoreSize == 512) { + } else if (LoadStoreSize == 512 || LoadStoreSize == 1024 || + LoadStoreSize == 2048) { if (RBID == AIE2P::AccRegBankID) { return {AIE2P::VST_3D_dmx_sts_bm, NoImmediate, - /*OffsetOpcode=*/{}}; + /*OffsetOpcode=*/{AIE2P::VST_dmx_sts_bm_idx_imm}}; } if (RBID == AIE2P::VRegBankID) { return {AIE2P::VST_3D_dmx_sts_x, NoImmediate, - /*OffsetOpcode=*/{}}; + /*OffsetOpcode=*/{AIE2P::VST_dmx_sts_x_idx_imm}}; } - llvm_unreachable("512-bit vector type must be in AccRegBank or VRegBank"); + if (RBID == AIE2P::FifoRegBankID) { + return {AIE2P::VST_3D_dmx_sts_x, NoImmediate, + /*OffsetOpcode=*/{AIE2P::VST_dmx_sts_fifohl_idx_imm}}; + } + llvm_unreachable("Vector type must be in AccRegBank or VRegBank " + "or FifoRegBank"); } if (LoadStoreSize == 20 || LoadStoreSize == 32) { return {/*ISelOpcode=*/AIE2P::ST_3D_dms_sts, NoImmediate, @@ -2391,7 +2479,8 @@ bool AIE2PInstructionSelector::selectWideG_AIE_LOAD_STORE( LLT SrcDstTy = MRI.getType(AMI.SrcDstOp.getReg()); const unsigned SrcDstTySize = SrcDstTy.getSizeInBits(); const unsigned SplitFactor = (SrcDstTySize == 1024) ? 2 : 4; - const unsigned RBID = deriveRegBankID(I.getOperand(0).getReg(), MRI, RBI); + const unsigned OpIdx = AMI.MemI.mayStore() ? I.getNumExplicitDefs() : 0; + const unsigned RBID = deriveRegBankID(I.getOperand(OpIdx).getReg(), MRI, RBI); const TargetRegisterClass *RC512 = nullptr; const TargetRegisterClass *RC1024 = nullptr; const TargetRegisterClass *RC2048 = &AIE2P::ACC2048RegClass; @@ -2437,25 +2526,28 @@ bool AIE2PInstructionSelector::selectWideG_AIE_LOAD_STORE( SmallVector SplitInstrs; switch (AMI.MemI.getOpcode()) { - case AIE2P::G_STORE: { + case AIE2P::G_STORE: + case AIE2P::G_AIE_POSTINC_STORE: + case AIE2P::G_AIE_POSTINC_2D_STORE: + case AIE2P::G_AIE_POSTINC_3D_STORE: { for (unsigned SubRegIdx = 0; SubRegIdx < SplitFactor; ++SubRegIdx) { const unsigned Offset = SubRegIdx * 64; auto Copy = MIB.buildInstr(TargetOpcode::COPY, {SubRegs[SubRegIdx]}, {}) .addReg(AMI.SrcDstOp.getReg(), 0, SubRegIdxes[SubRegIdx % SubRegIdxes.size()]); - - auto StoreInstr = (SubRegIdx == 0) - ? MIB.buildInstr(LSO.ISelOpcode, {}, {}) - : MIB.buildInstr(*LSO.OffsetOpcode, {}, {}) - .addReg(Copy.getReg(0)) - .addReg(AMI.PtrOp.getReg()) - .addImm(Offset); - + MachineInstrBuilder StoreInstr; if (SubRegIdx == 0) { + StoreInstr = MIB.buildInstr(LSO.ISelOpcode, {}, {}); for (auto Def : AMI.MemI.defs()) StoreInstr.addDef(Def.getReg()); StoreInstr.addReg(Copy.getReg(0)); addAddressingMode(StoreInstr, AMI, LSO.FitsImmediateRange, false, MRI); + } else { + + StoreInstr = MIB.buildInstr(*LSO.OffsetOpcode, {}, {}) + .addReg(Copy.getReg(0)) + .addReg(AMI.PtrOp.getReg()) + .addImm(Offset); } SplitInstrs.push_back(StoreInstr); } @@ -2463,22 +2555,114 @@ bool AIE2PInstructionSelector::selectWideG_AIE_LOAD_STORE( handleSplitMemOperands(SplitInstrs); break; } - case AIE2P::G_LOAD: { + case AIE2P::G_AIE_OFFSET_STORE: { + if (!LSO.FitsImmediateRange) { + // Emit an PTR_ADD to evaluate the offset + insertPtrAddForOffset(MRI, AMI.MemI); + } + for (unsigned SubRegIdx = 0; SubRegIdx < SplitFactor; ++SubRegIdx) { + const unsigned Offset = SubRegIdx * 64; + auto Copy = MIB.buildInstr(TargetOpcode::COPY, {SubRegs[SubRegIdx]}, {}) + .addReg(AMI.SrcDstOp.getReg(), 0, + SubRegIdxes[SubRegIdx % SubRegIdxes.size()]); + MachineInstrBuilder StoreInstr; + if (SubRegIdx == 0) { + StoreInstr = MIB.buildInstr(LSO.ISelOpcode, {}, {}); + StoreInstr.addReg(Copy.getReg(0)); + + StoreInstr.addUse(AMI.PtrOp.getReg()); + + if (LSO.FitsImmediateRange) { + StoreInstr.addImm(AMI.ImmediateOffset->getSExtValue()); // Offset + } else { + // In this case we have already inserted a PTR_ADD to add the offset + // to the base pointer + StoreInstr.addImm(0); // Offset + } + } else { // SubRegIdx != 0 + if (LSO.FitsImmediateRange) { + StoreInstr = MIB.buildInstr(*LSO.OffsetOpcode, {}, {}) + .addReg(Copy.getReg(0)) + .addReg(AMI.PtrOp.getReg()) + .addImm(AMI.ImmediateOffset->getSExtValue() + + Offset); // Offset + } else { + StoreInstr = MIB.buildInstr(*LSO.OffsetOpcode, {}, {}) + .addReg(Copy.getReg(0)) + .addReg(AMI.PtrOp.getReg()) + .addImm(Offset); // Offset + } + } + SplitInstrs.push_back(StoreInstr); + } + handleSplitMemOperands(SplitInstrs); + break; + } + case AIE2P::G_LOAD: + case AIE2P::G_AIE_POSTINC_LOAD: + case AIE2P::G_AIE_POSTINC_2D_LOAD: + case AIE2P::G_AIE_POSTINC_3D_LOAD: { for (unsigned SubRegIdx = 0; SubRegIdx < SplitFactor; ++SubRegIdx) { - auto Load = (SubRegIdx == 0) - ? MIB.buildInstr(LSO.ISelOpcode, {SubRegs[SubRegIdx]}, {}) - : MIB.buildInstr(*LSO.OffsetOpcode, {}, {}) - .addDef(SubRegs[SubRegIdx]) - .addUse(AMI.PtrOp.getReg()) - .addImm(SubRegIdx * 64); + MachineInstrBuilder Load; if (SubRegIdx == 0) { + Load = MIB.buildInstr(LSO.ISelOpcode, {SubRegs[0]}, {}); for (auto *Def = AMI.MemI.defs().begin() + 1; Def != AMI.MemI.defs().end(); Def++) { Load.addDef(Def->getReg()); } addAddressingMode(Load, AMI, LSO.FitsImmediateRange, false, MRI); + } else { + Load = MIB.buildInstr(*LSO.OffsetOpcode, {}, {}) + .addDef(SubRegs[SubRegIdx]) + .addUse(AMI.PtrOp.getReg()) + .addImm(SubRegIdx * 64); } + SplitInstrs.push_back(Load); + } + auto RegSeq = + MIB.buildInstr(AIE2P::REG_SEQUENCE, {AMI.SrcDstOp.getReg()}, {}); + for (unsigned SubRegIdx = 0; SubRegIdx < SplitFactor; ++SubRegIdx) { + RegSeq.addReg(SubRegs[SubRegIdx]).addImm(SubRegIdxes[SubRegIdx]); + } + Register SrcDstReg = AMI.SrcDstOp.getReg(); + if (!RBI.constrainGenericRegister( + SrcDstReg, *(SrcDstTySize == 2048 ? RC2048 : RC1024), MRI)) + return false; + handleSplitMemOperands(SplitInstrs); + break; + } + case AIE2P::G_AIE_OFFSET_LOAD: { + if (!LSO.FitsImmediateRange) { + // Emit an PTR_ADD to evaluate the offset + insertPtrAddForOffset(MRI, AMI.MemI); + } + for (unsigned SubRegIdx = 0; SubRegIdx < SplitFactor; ++SubRegIdx) { + MachineInstrBuilder Load; + if (SubRegIdx == 0) { + Load = MIB.buildInstr(LSO.ISelOpcode, {SubRegs[0]}, {}) + .addUse(AMI.PtrOp.getReg()); + if (LSO.FitsImmediateRange) { + Load.addImm(AMI.ImmediateOffset->getSExtValue()); // Offset + } else { + // In this case we have already inserted a PTR_ADD to add the offset + // to the base pointer + Load.addImm(0); // Offset + } + } else { // SubRegIdx != 0 + if (LSO.FitsImmediateRange) { + Load = MIB.buildInstr(*LSO.OffsetOpcode, {}, {}) + .addDef(SubRegs[SubRegIdx]) + .addUse(AMI.PtrOp.getReg()) + .addImm(AMI.ImmediateOffset->getSExtValue() + + SubRegIdx * 64); // Offset + } else { + Load = MIB.buildInstr(*LSO.OffsetOpcode, {}, {}) + .addDef(SubRegs[SubRegIdx]) + .addUse(AMI.PtrOp.getReg()) + .addImm(SubRegIdx * 64); // Offset + } + } SplitInstrs.push_back(Load); } auto RegSeq = diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vector-indexed-load-store.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vector-indexed-load-store.mir index 9f595931909b..ddbbc55c154f 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vector-indexed-load-store.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vector-indexed-load-store.mir @@ -4,7 +4,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -# (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +# (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates # RUN: llc -mtriple aie2p -run-pass=instruction-select %s -verify-machineinstrs -o - | FileCheck %s --- @@ -498,3 +498,936 @@ body: | %2:modregbank(s20) = G_CONSTANT i20 -576 G_AIE_OFFSET_STORE %0:accregbank(<8 x s64>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<8 x s64>)) ... + +--- +name: FIFO512_LOAD_maxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: FIFO512_LOAD_maxOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], 448 :: (load (<16 x s32>)) + ; CHECK-NEXT: $lfl0 = COPY [[VLDA_dmx_lda_fifohl_idx_imm]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 448 + %0:fiforegbank(<16 x s32>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<16 x s32>)) + $lfl0 = COPY %0:fiforegbank(<16 x s32>) +... + +--- +name: FIFO512_LOAD_minOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: FIFO512_LOAD_minOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], -512 :: (load (<16 x s32>)) + ; CHECK-NEXT: $lfl0 = COPY [[VLDA_dmx_lda_fifohl_idx_imm]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -512 + %0:fiforegbank(<16 x s32>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<16 x s32>)) + $lfl0 = COPY %0:fiforegbank(<16 x s32>) +... + +--- +name: FIFO512_LOAD_overMaxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: FIFO512_LOAD_overMaxOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx [[COPY]], [[MOV_PD_imm11_pseudo]] :: (load (<16 x s32>)) + ; CHECK-NEXT: $lfl0 = COPY [[VLDA_dmx_lda_fifohl_idx]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 512 + %0:fiforegbank(<16 x s32>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<16 x s32>)) + $lfl0 = COPY %0:fiforegbank(<16 x s32>) +... + +--- +name: FIFO512_LOAD_overMinOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: FIFO512_LOAD_overMinOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edj = MOV_PD_imm11_pseudo -576 + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx [[COPY]], [[MOV_PD_imm11_pseudo]] :: (load (<16 x s32>)) + ; CHECK-NEXT: $lfl0 = COPY [[VLDA_dmx_lda_fifohl_idx]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -576 + %0:fiforegbank(<16 x s32>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<16 x s32>)) + $lfl0 = COPY %0:fiforegbank(<16 x s32>) +... + +--- +name: FIFO512_STORE_maxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $lfl0 + ; CHECK-LABEL: name: FIFO512_STORE_maxOffset + ; CHECK: liveins: $p0, $lfl0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fifo512 = COPY $lfl0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY]], [[COPY1]], 448 :: (store (<16 x s32>)) + %0:fiforegbank(<16 x s32>) = COPY $lfl0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 448 + G_AIE_OFFSET_STORE %0:fiforegbank(<16 x s32>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<16 x s32>)) +... + +--- +name: FIFO512_STORE_minOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $lfl0 + ; CHECK-LABEL: name: FIFO512_STORE_minOffset + ; CHECK: liveins: $p0, $lfl0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fifo512 = COPY $lfl0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY]], [[COPY1]], -512 :: (store (<16 x s32>)) + %0:fiforegbank(<16 x s32>) = COPY $lfl0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -512 + G_AIE_OFFSET_STORE %0:fiforegbank(<16 x s32>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<16 x s32>)) +... + +--- +name: FIFO512_STORE_overMaxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $lfl0 + ; CHECK-LABEL: name: FIFO512_STORE_overMaxOffset + ; CHECK: liveins: $p0, $lfl0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fifo512 = COPY $lfl0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx [[COPY]], [[COPY1]], [[MOV_PD_imm11_pseudo]] :: (store (<16 x s32>)) + %0:fiforegbank(<16 x s32>) = COPY $lfl0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 512 + G_AIE_OFFSET_STORE %0:fiforegbank(<16 x s32>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<16 x s32>)) +... + +--- +name: FIFO512_STORE_overMinOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $lfl0 + ; CHECK-LABEL: name: FIFO512_STORE_overMinOffset + ; CHECK: liveins: $p0, $lfl0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fifo512 = COPY $lfl0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:edj = MOV_PD_imm11_pseudo -576 + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx [[COPY]], [[COPY1]], [[MOV_PD_imm11_pseudo]] :: (store (<16 x s32>)) + %0:fiforegbank(<16 x s32>) = COPY $lfl0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -576 + G_AIE_OFFSET_STORE %0:fiforegbank(<16 x s32>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<16 x s32>)) +... + +--- +name: VEC1024_LOAD_maxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: VEC1024_LOAD_maxOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[COPY]], 384 :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm1:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[COPY]], 448 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec1024 = REG_SEQUENCE [[VLDA_dmx_lda_x_idx_imm]], %subreg.sub_512_lo, [[VLDA_dmx_lda_x_idx_imm1]], %subreg.sub_512_hi + ; CHECK-NEXT: $y0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 384 + %0:vregbank(<32 x s32>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<32 x s32>)) + $y0 = COPY %0:vregbank(<32 x s32>) +... + +--- +name: VEC1024_LOAD_minOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: VEC1024_LOAD_minOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[COPY]], -448 :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm1:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[COPY]], -384 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec1024 = REG_SEQUENCE [[VLDA_dmx_lda_x_idx_imm]], %subreg.sub_512_lo, [[VLDA_dmx_lda_x_idx_imm1]], %subreg.sub_512_hi + ; CHECK-NEXT: $y0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -448 + %0:vregbank(<32 x s32>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<32 x s32>)) + $y0 = COPY %0:vregbank(<32 x s32>) +... + +--- +name: VEC1024_LOAD_overMaxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: VEC1024_LOAD_overMaxOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[PADD_imm_pseudo:%[0-9]+]]:ep = PADD_imm_pseudo [[COPY]], 448 + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[PADD_imm_pseudo]], 0 :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm1:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[PADD_imm_pseudo]], 64 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec1024 = REG_SEQUENCE [[VLDA_dmx_lda_x_idx_imm]], %subreg.sub_512_lo, [[VLDA_dmx_lda_x_idx_imm1]], %subreg.sub_512_hi + ; CHECK-NEXT: $y0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 448 + %0:vregbank(<32 x s32>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<32 x s32>)) + $y0 = COPY %0:vregbank(<32 x s32>) +... + +--- +name: VEC1024_LOAD_overMinOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: VEC1024_LOAD_overMinOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo -576 + ; CHECK-NEXT: [[PADD_mod_pseudo:%[0-9]+]]:ep = PADD_mod_pseudo [[COPY]], [[MOV_PD_imm11_pseudo]] + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[PADD_mod_pseudo]], 0 :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm1:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[PADD_mod_pseudo]], 64 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec1024 = REG_SEQUENCE [[VLDA_dmx_lda_x_idx_imm]], %subreg.sub_512_lo, [[VLDA_dmx_lda_x_idx_imm1]], %subreg.sub_512_hi + ; CHECK-NEXT: $y0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -576 + %0:vregbank(<32 x s32>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<32 x s32>)) + $y0 = COPY %0:vregbank(<32 x s32>) +... + +--- +name: VEC1024_STORE_maxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $lfl0 + ; CHECK-LABEL: name: VEC1024_STORE_maxOffset + ; CHECK: liveins: $p0, $lfl0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY [[COPY]].sub_512_lo + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY2]], [[COPY1]], 384 :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vec512 = COPY [[COPY]].sub_512_hi + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY3]], [[COPY1]], 448 :: (store (<16 x s32>) into unknown-address + 64) + %0:vregbank(<32 x s32>) = COPY $y0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 384 + G_AIE_OFFSET_STORE %0:vregbank(<32 x s32>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<32 x s32>)) +... + +--- +name: VEC1024_STORE_minOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $y0 + ; CHECK-LABEL: name: VEC1024_STORE_minOffset + ; CHECK: liveins: $p0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY [[COPY]].sub_512_lo + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY2]], [[COPY1]], -512 :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vec512 = COPY [[COPY]].sub_512_hi + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY3]], [[COPY1]], -448 :: (store (<16 x s32>) into unknown-address + 64) + %0:vregbank(<32 x s32>) = COPY $y0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -512 + G_AIE_OFFSET_STORE %0:vregbank(<32 x s32>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<32 x s32>)) +... + +--- +name: VEC1024_STORE_overMaxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $y0 + ; CHECK-LABEL: name: VEC1024_STORE_overMaxOffset + ; CHECK: liveins: $p0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[PADD_imm_pseudo:%[0-9]+]]:ep = PADD_imm_pseudo [[COPY1]], 448 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY [[COPY]].sub_512_lo + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY2]], [[PADD_imm_pseudo]], 0 :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vec512 = COPY [[COPY]].sub_512_hi + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY3]], [[PADD_imm_pseudo]], 64 :: (store (<16 x s32>) into unknown-address + 64) + %0:vregbank(<32 x s32>) = COPY $y0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 448 + G_AIE_OFFSET_STORE %0:vregbank(<32 x s32>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<32 x s32>)) +... + +--- +name: VEC1024_STORE_overMinOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $y0 + ; CHECK-LABEL: name: VEC1024_STORE_overMinOffset + ; CHECK: liveins: $p0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo -576 + ; CHECK-NEXT: [[PADD_mod_pseudo:%[0-9]+]]:ep = PADD_mod_pseudo [[COPY1]], [[MOV_PD_imm11_pseudo]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY [[COPY]].sub_512_lo + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY2]], [[PADD_mod_pseudo]], 0 :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vec512 = COPY [[COPY]].sub_512_hi + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY3]], [[PADD_mod_pseudo]], 64 :: (store (<16 x s32>) into unknown-address + 64) + %0:vregbank(<32 x s32>) = COPY $y0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -576 + G_AIE_OFFSET_STORE %0:vregbank(<32 x s32>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<32 x s32>)) +... + +--- +name: ACC1024_LOAD_maxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: ACC1024_LOAD_maxOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 384 :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm1:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 448 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:acc1024 = REG_SEQUENCE [[VLDA_dmx_lda_bm_idx_imm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm1]], %subreg.sub_512_acc_hi + ; CHECK-NEXT: $cml0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 384 + %0:accregbank(<32 x s32>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<32 x s32>)) + $cml0 = COPY %0:accregbank(<32 x s32>) +... + +--- +name: ACC1024_LOAD_minOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: ACC1024_LOAD_minOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], -448 :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm1:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], -384 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:acc1024 = REG_SEQUENCE [[VLDA_dmx_lda_bm_idx_imm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm1]], %subreg.sub_512_acc_hi + ; CHECK-NEXT: $cml0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -448 + %0:accregbank(<32 x s32>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<32 x s32>)) + $cml0 = COPY %0:accregbank(<32 x s32>) +... + +--- +name: ACC1024_LOAD_overMaxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: ACC1024_LOAD_overMaxOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[PADD_imm_pseudo:%[0-9]+]]:ep = PADD_imm_pseudo [[COPY]], 448 + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[PADD_imm_pseudo]], 0 :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm1:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[PADD_imm_pseudo]], 64 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:acc1024 = REG_SEQUENCE [[VLDA_dmx_lda_bm_idx_imm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm1]], %subreg.sub_512_acc_hi + ; CHECK-NEXT: $cml0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 448 + %0:accregbank(<32 x s32>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<32 x s32>)) + $cml0 = COPY %0:accregbank(<32 x s32>) +... + +--- +name: ACC1024_LOAD_overMinOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: ACC1024_LOAD_overMinOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo -576 + ; CHECK-NEXT: [[PADD_mod_pseudo:%[0-9]+]]:ep = PADD_mod_pseudo [[COPY]], [[MOV_PD_imm11_pseudo]] + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[PADD_mod_pseudo]], 0 :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm1:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[PADD_mod_pseudo]], 64 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:acc1024 = REG_SEQUENCE [[VLDA_dmx_lda_bm_idx_imm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm1]], %subreg.sub_512_acc_hi + ; CHECK-NEXT: $cml0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -576 + %0:accregbank(<32 x s32>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<32 x s32>)) + $cml0 = COPY %0:accregbank(<32 x s32>) +... + +--- +name: ACC1024_STORE_maxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $cml0 + ; CHECK-LABEL: name: ACC1024_STORE_maxOffset + ; CHECK: liveins: $p0, $cml0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc1024 = COPY $cml0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY [[COPY]].sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY2]], [[COPY1]], 384 :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY [[COPY]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY3]], [[COPY1]], 448 :: (store (<16 x s32>) into unknown-address + 64) + %0:accregbank(<32 x s32>) = COPY $cml0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 384 + G_AIE_OFFSET_STORE %0:accregbank(<32 x s32>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<32 x s32>)) +... + +--- +name: ACC1024_STORE_minOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $cml0 + ; CHECK-LABEL: name: ACC1024_STORE_minOffset + ; CHECK: liveins: $p0, $cml0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc1024 = COPY $cml0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY [[COPY]].sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY2]], [[COPY1]], -512 :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY [[COPY]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY3]], [[COPY1]], -448 :: (store (<16 x s32>) into unknown-address + 64) + %0:accregbank(<32 x s32>) = COPY $cml0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -512 + G_AIE_OFFSET_STORE %0:accregbank(<32 x s32>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<32 x s32>)) +... + +--- +name: ACC1024_STORE_overMaxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $cml0 + ; CHECK-LABEL: name: ACC1024_STORE_overMaxOffset + ; CHECK: liveins: $p0, $cml0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc1024 = COPY $cml0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[PADD_imm_pseudo:%[0-9]+]]:ep = PADD_imm_pseudo [[COPY1]], 448 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY [[COPY]].sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY2]], [[PADD_imm_pseudo]], 0 :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY [[COPY]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY3]], [[PADD_imm_pseudo]], 64 :: (store (<16 x s32>) into unknown-address + 64) + %0:accregbank(<32 x s32>) = COPY $cml0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 448 + G_AIE_OFFSET_STORE %0:accregbank(<32 x s32>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<32 x s32>)) +... + +--- +name: ACC1024_STORE_overMinOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $cml0 + ; CHECK-LABEL: name: ACC1024_STORE_overMinOffset + ; CHECK: liveins: $p0, $cml0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc1024 = COPY $cml0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo -576 + ; CHECK-NEXT: [[PADD_mod_pseudo:%[0-9]+]]:ep = PADD_mod_pseudo [[COPY1]], [[MOV_PD_imm11_pseudo]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY [[COPY]].sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY2]], [[PADD_mod_pseudo]], 0 :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY [[COPY]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY3]], [[PADD_mod_pseudo]], 64 :: (store (<16 x s32>) into unknown-address + 64) + %0:accregbank(<32 x s32>) = COPY $cml0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -576 + G_AIE_OFFSET_STORE %0:accregbank(<32 x s32>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<32 x s32>)) +... + +--- +name: FIFO1024_LOAD_maxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: FIFO1024_LOAD_maxOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], 384 :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm1:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], 448 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:fifo1024 = REG_SEQUENCE [[VLDA_dmx_lda_fifohl_idx_imm]], %subreg.sub_lo_fifo, [[VLDA_dmx_lda_fifohl_idx_imm1]], %subreg.sub_hi_fifo + ; CHECK-NEXT: $lf0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 384 + %0:fiforegbank(<32 x s32>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<32 x s32>)) + $lf0 = COPY %0:fiforegbank(<32 x s32>) +... + +--- +name: FIFO1024_LOAD_minOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: FIFO1024_LOAD_minOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], -448 :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm1:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], -384 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:fifo1024 = REG_SEQUENCE [[VLDA_dmx_lda_fifohl_idx_imm]], %subreg.sub_lo_fifo, [[VLDA_dmx_lda_fifohl_idx_imm1]], %subreg.sub_hi_fifo + ; CHECK-NEXT: $lf0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -448 + %0:fiforegbank(<32 x s32>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<32 x s32>)) + $lf0 = COPY %0:fiforegbank(<32 x s32>) +... + +--- +name: FIFO1024_LOAD_overMaxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: FIFO1024_LOAD_overMaxOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[PADD_imm_pseudo:%[0-9]+]]:ep = PADD_imm_pseudo [[COPY]], 448 + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[PADD_imm_pseudo]], 0 :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm1:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[PADD_imm_pseudo]], 64 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:fifo1024 = REG_SEQUENCE [[VLDA_dmx_lda_fifohl_idx_imm]], %subreg.sub_lo_fifo, [[VLDA_dmx_lda_fifohl_idx_imm1]], %subreg.sub_hi_fifo + ; CHECK-NEXT: $lf0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 448 + %0:fiforegbank(<32 x s32>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<32 x s32>)) + $lf0 = COPY %0:fiforegbank(<32 x s32>) +... + +--- +name: FIFO1024_LOAD_overMinOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: FIFO1024_LOAD_overMinOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo -576 + ; CHECK-NEXT: [[PADD_mod_pseudo:%[0-9]+]]:ep = PADD_mod_pseudo [[COPY]], [[MOV_PD_imm11_pseudo]] + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[PADD_mod_pseudo]], 0 :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm1:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[PADD_mod_pseudo]], 64 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:fifo1024 = REG_SEQUENCE [[VLDA_dmx_lda_fifohl_idx_imm]], %subreg.sub_lo_fifo, [[VLDA_dmx_lda_fifohl_idx_imm1]], %subreg.sub_hi_fifo + ; CHECK-NEXT: $lf0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -576 + %0:fiforegbank(<32 x s32>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<32 x s32>)) + $lf0 = COPY %0:fiforegbank(<32 x s32>) +... + +--- +name: FIFO1024_STORE_maxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $lf0 + ; CHECK-LABEL: name: FIFO1024_STORE_maxOffset + ; CHECK: liveins: $p0, $lf0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fifo1024 = COPY $lf0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fifo512 = COPY [[COPY]].sub_lo_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY2]], [[COPY1]], 384 :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fifo512 = COPY [[COPY]].sub_hi_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY3]], [[COPY1]], 448 :: (store (<16 x s32>) into unknown-address + 64) + %0:fiforegbank(<32 x s32>) = COPY $lf0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 384 + G_AIE_OFFSET_STORE %0:fiforegbank(<32 x s32>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<32 x s32>)) +... + +--- +name: FIFO1024_STORE_minOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $lf0 + ; CHECK-LABEL: name: FIFO1024_STORE_minOffset + ; CHECK: liveins: $p0, $lf0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fifo1024 = COPY $lf0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fifo512 = COPY [[COPY]].sub_lo_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY2]], [[COPY1]], -512 :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fifo512 = COPY [[COPY]].sub_hi_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY3]], [[COPY1]], -448 :: (store (<16 x s32>) into unknown-address + 64) + %0:fiforegbank(<32 x s32>) = COPY $lf0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -512 + G_AIE_OFFSET_STORE %0:fiforegbank(<32 x s32>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<32 x s32>)) +... + +--- +name: FIFO1024_STORE_overMaxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $lf0 + ; CHECK-LABEL: name: FIFO1024_STORE_overMaxOffset + ; CHECK: liveins: $p0, $lf0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fifo1024 = COPY $lf0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[PADD_imm_pseudo:%[0-9]+]]:ep = PADD_imm_pseudo [[COPY1]], 448 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fifo512 = COPY [[COPY]].sub_lo_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY2]], [[PADD_imm_pseudo]], 0 :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fifo512 = COPY [[COPY]].sub_hi_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY3]], [[PADD_imm_pseudo]], 64 :: (store (<16 x s32>) into unknown-address + 64) + %0:fiforegbank(<32 x s32>) = COPY $lf0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 448 + G_AIE_OFFSET_STORE %0:fiforegbank(<32 x s32>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<32 x s32>)) +... + +--- +name: FIFO1024_STORE_overMinOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $lf0 + ; CHECK-LABEL: name: FIFO1024_STORE_overMinOffset + ; CHECK: liveins: $p0, $lf0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:fifo1024 = COPY $lf0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo -576 + ; CHECK-NEXT: [[PADD_mod_pseudo:%[0-9]+]]:ep = PADD_mod_pseudo [[COPY1]], [[MOV_PD_imm11_pseudo]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fifo512 = COPY [[COPY]].sub_lo_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY2]], [[PADD_mod_pseudo]], 0 :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fifo512 = COPY [[COPY]].sub_hi_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY3]], [[PADD_mod_pseudo]], 64 :: (store (<16 x s32>) into unknown-address + 64) + %0:fiforegbank(<32 x s32>) = COPY $lf0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -576 + G_AIE_OFFSET_STORE %0:fiforegbank(<32 x s32>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<32 x s32>)) +... + + +--- +name: ACC2048_LOAD_maxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: ACC2048_LOAD_maxOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 256 :: (load (<8 x s64>), align 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm1:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 320 :: (load (<8 x s64>) from unknown-address + 64) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm2:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 384 :: (load (<8 x s64>) from unknown-address + 128, align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm3:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 448 :: (load (<8 x s64>) from unknown-address + 192) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:acc2048 = REG_SEQUENCE [[VLDA_dmx_lda_bm_idx_imm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm1]], %subreg.sub_512_acc_hi, [[VLDA_dmx_lda_bm_idx_imm2]], %subreg.sub_1024_acc_hi_then_sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm3]], %subreg.sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: $dm0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 256 + %0:accregbank(<32 x s64>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<32 x s64>)) + $dm0 = COPY %0:accregbank(<32 x s64>) +... + +--- +name: ACC2048_LOAD_minOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: ACC2048_LOAD_minOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], -320 :: (load (<8 x s64>), align 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm1:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], -256 :: (load (<8 x s64>) from unknown-address + 64) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm2:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], -192 :: (load (<8 x s64>) from unknown-address + 128, align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm3:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], -128 :: (load (<8 x s64>) from unknown-address + 192) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:acc2048 = REG_SEQUENCE [[VLDA_dmx_lda_bm_idx_imm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm1]], %subreg.sub_512_acc_hi, [[VLDA_dmx_lda_bm_idx_imm2]], %subreg.sub_1024_acc_hi_then_sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm3]], %subreg.sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: $dm0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -320 + %0:accregbank(<32 x s64>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<32 x s64>)) + $dm0 = COPY %0:accregbank(<32 x s64>) +... + +--- +name: ACC2048_LOAD_overMaxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: ACC2048_LOAD_overMaxOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[PADD_imm_pseudo:%[0-9]+]]:ep = PADD_imm_pseudo [[COPY]], 320 + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[PADD_imm_pseudo]], 0 :: (load (<8 x s64>), align 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm1:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[PADD_imm_pseudo]], 64 :: (load (<8 x s64>) from unknown-address + 64) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm2:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[PADD_imm_pseudo]], 128 :: (load (<8 x s64>) from unknown-address + 128, align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm3:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[PADD_imm_pseudo]], 192 :: (load (<8 x s64>) from unknown-address + 192) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:acc2048 = REG_SEQUENCE [[VLDA_dmx_lda_bm_idx_imm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm1]], %subreg.sub_512_acc_hi, [[VLDA_dmx_lda_bm_idx_imm2]], %subreg.sub_1024_acc_hi_then_sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm3]], %subreg.sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: $dm0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 320 + %0:accregbank(<32 x s64>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<32 x s64>)) + $dm0 = COPY %0:accregbank(<32 x s64>) +... + +--- +name: ACC2048_LOAD_overMinOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0 + ; CHECK-LABEL: name: ACC2048_LOAD_overMinOffset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo -576 + ; CHECK-NEXT: [[PADD_mod_pseudo:%[0-9]+]]:ep = PADD_mod_pseudo [[COPY]], [[MOV_PD_imm11_pseudo]] + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[PADD_mod_pseudo]], 0 :: (load (<8 x s64>), align 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm1:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[PADD_mod_pseudo]], 64 :: (load (<8 x s64>) from unknown-address + 64) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm2:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[PADD_mod_pseudo]], 128 :: (load (<8 x s64>) from unknown-address + 128, align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm3:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[PADD_mod_pseudo]], 192 :: (load (<8 x s64>) from unknown-address + 192) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:acc2048 = REG_SEQUENCE [[VLDA_dmx_lda_bm_idx_imm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm1]], %subreg.sub_512_acc_hi, [[VLDA_dmx_lda_bm_idx_imm2]], %subreg.sub_1024_acc_hi_then_sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm3]], %subreg.sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: $dm0 = COPY [[REG_SEQUENCE]] + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -576 + %0:accregbank(<32 x s64>) = G_AIE_OFFSET_LOAD %1:ptrregbank(p0), %2:modregbank(s20) :: (load (<32 x s64>)) + $dm0 = COPY %0:accregbank(<32 x s64>) +... + +--- +name: ACC2048_STORE_maxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $dm0 + ; CHECK-LABEL: name: ACC2048_STORE_maxOffset + ; CHECK: liveins: $p0, $dm0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc2048 = COPY $dm0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY [[COPY]].sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY2]], [[COPY1]], 256 :: (store (<8 x s64>), align 256) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY [[COPY]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY3]], [[COPY1]], 320 :: (store (<8 x s64>) into unknown-address + 64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:acc512 = COPY [[COPY]].sub_1024_acc_hi_then_sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY4]], [[COPY1]], 384 :: (store (<8 x s64>) into unknown-address + 128, align 128) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:acc512 = COPY [[COPY]].sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY5]], [[COPY1]], 448 :: (store (<8 x s64>) into unknown-address + 192) + %0:accregbank(<32 x s64>) = COPY $dm0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 256 + G_AIE_OFFSET_STORE %0:accregbank(<32 x s64>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<32 x s64>)) +... + +--- +name: ACC2048_STORE_minOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $dm0 + ; CHECK-LABEL: name: ACC2048_STORE_minOffset + ; CHECK: liveins: $p0, $dm0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc2048 = COPY $dm0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY [[COPY]].sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY2]], [[COPY1]], -512 :: (store (<8 x s64>), align 256) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY [[COPY]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY3]], [[COPY1]], -448 :: (store (<8 x s64>) into unknown-address + 64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:acc512 = COPY [[COPY]].sub_1024_acc_hi_then_sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY4]], [[COPY1]], -384 :: (store (<8 x s64>) into unknown-address + 128, align 128) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:acc512 = COPY [[COPY]].sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY5]], [[COPY1]], -320 :: (store (<8 x s64>) into unknown-address + 192) + %0:accregbank(<32 x s64>) = COPY $dm0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -512 + G_AIE_OFFSET_STORE %0:accregbank(<32 x s64>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<32 x s64>)) +... + +--- +name: ACC2048_STORE_overMaxOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $dm0 + ; CHECK-LABEL: name: ACC2048_STORE_overMaxOffset + ; CHECK: liveins: $p0, $dm0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc2048 = COPY $dm0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[PADD_imm_pseudo:%[0-9]+]]:ep = PADD_imm_pseudo [[COPY1]], 320 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY [[COPY]].sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY2]], [[PADD_imm_pseudo]], 0 :: (store (<8 x s64>), align 256) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY [[COPY]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY3]], [[PADD_imm_pseudo]], 64 :: (store (<8 x s64>) into unknown-address + 64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:acc512 = COPY [[COPY]].sub_1024_acc_hi_then_sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY4]], [[PADD_imm_pseudo]], 128 :: (store (<8 x s64>) into unknown-address + 128, align 128) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:acc512 = COPY [[COPY]].sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY5]], [[PADD_imm_pseudo]], 192 :: (store (<8 x s64>) into unknown-address + 192) + %0:accregbank(<32 x s64>) = COPY $dm0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 320 + G_AIE_OFFSET_STORE %0:accregbank(<32 x s64>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<32 x s64>)) +... + +--- +name: ACC2048_STORE_overMinOffset +alignment: 16 +legalized: true +regBankSelected: true +body: | + bb.1.entry: + liveins: $p0, $dm0 + ; CHECK-LABEL: name: ACC2048_STORE_overMinOffset + ; CHECK: liveins: $p0, $dm0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:acc2048 = COPY $dm0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo -576 + ; CHECK-NEXT: [[PADD_mod_pseudo:%[0-9]+]]:ep = PADD_mod_pseudo [[COPY1]], [[MOV_PD_imm11_pseudo]] + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY [[COPY]].sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY2]], [[PADD_mod_pseudo]], 0 :: (store (<8 x s64>), align 256) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY [[COPY]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY3]], [[PADD_mod_pseudo]], 64 :: (store (<8 x s64>) into unknown-address + 64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:acc512 = COPY [[COPY]].sub_1024_acc_hi_then_sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY4]], [[PADD_mod_pseudo]], 128 :: (store (<8 x s64>) into unknown-address + 128, align 128) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:acc512 = COPY [[COPY]].sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY5]], [[PADD_mod_pseudo]], 192 :: (store (<8 x s64>) into unknown-address + 192) + %0:accregbank(<32 x s64>) = COPY $dm0 + %1:ptrregbank(p0) = COPY $p0 + %2:modregbank(s20) = G_CONSTANT i20 -576 + G_AIE_OFFSET_STORE %0:accregbank(<32 x s64>), %1:ptrregbank(p0), %2:modregbank(s20) :: (store (<32 x s64>)) +... diff --git a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vector-pre-post-increment.mir b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vector-pre-post-increment.mir index 80c5eee723f9..ed3f5a0769e1 100644 --- a/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vector-pre-post-increment.mir +++ b/llvm/test/CodeGen/AIE/aie2p/GlobalIsel/inst-select-vector-pre-post-increment.mir @@ -4,7 +4,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -# (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates +# (c) Copyright 2024-2025 Advanced Micro Devices, Inc. or its affiliates # RUN: llc -mtriple aie2p -run-pass=instruction-select -o - -verify-machineinstrs %s | FileCheck %s --- @@ -187,14 +187,14 @@ body: | ... --- -name: post-inc-vector-load-am-512 +name: post-inc-vector-load-bm-512 legalized: true regBankSelected: true tracksRegLiveness: true body: | bb.0: liveins: $p0, $r0, $r1 - ; CHECK-LABEL: name: post-inc-vector-load-am-512 + ; CHECK-LABEL: name: post-inc-vector-load-bm-512 ; CHECK: liveins: $p0, $r0, $r1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 @@ -228,14 +228,14 @@ body: | --- -name: post-inc-2d-vector-load-am-512 +name: post-inc-2d-vector-load-bm-512 legalized: true regBankSelected: true tracksRegLiveness: true body: | bb.0: liveins: $p0 - ; CHECK-LABEL: name: post-inc-2d-vector-load-am-512 + ; CHECK-LABEL: name: post-inc-2d-vector-load-bm-512 ; CHECK: liveins: $p0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 @@ -329,14 +329,14 @@ body: | ... --- -name: post-inc-vector-store-am-512 +name: post-inc-vector-store-bm-512 legalized: true regBankSelected: true tracksRegLiveness: true body: | bb.0: liveins: $p0, $r0, $r1, $bmll0 - ; CHECK-LABEL: name: post-inc-vector-store-am-512 + ; CHECK-LABEL: name: post-inc-vector-store-bm-512 ; CHECK: liveins: $p0, $r0, $r1, $bmll0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 @@ -372,14 +372,14 @@ body: | --- -name: post-inc-2d-vector-store-am-512 +name: post-inc-2d-vector-store-bm-512 legalized: true regBankSelected: true tracksRegLiveness: true body: | bb.0: liveins: $p0, $bmll0 - ; CHECK-LABEL: name: post-inc-2d-vector-store-am-512 + ; CHECK-LABEL: name: post-inc-2d-vector-store-bm-512 ; CHECK: liveins: $p0, $bmll0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 @@ -472,14 +472,14 @@ body: | --- -name: post-inc-3d-vector-load-am-512 +name: post-inc-3d-vector-load-bm-512 legalized: true regBankSelected: true tracksRegLiveness: true body: | bb.0: liveins: $p0 - ; CHECK-LABEL: name: post-inc-3d-vector-load-am-512 + ; CHECK-LABEL: name: post-inc-3d-vector-load-bm-512 ; CHECK: liveins: $p0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 @@ -579,14 +579,14 @@ body: | --- -name: post-inc-3d-vector-store-am-512 +name: post-inc-3d-vector-store-bm-512 legalized: true regBankSelected: true tracksRegLiveness: true body: | bb.0: liveins: $p0, $bmll0 - ; CHECK-LABEL: name: post-inc-3d-vector-store-am-512 + ; CHECK-LABEL: name: post-inc-3d-vector-store-bm-512 ; CHECK: liveins: $p0, $bmll0 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 @@ -614,5 +614,1256 @@ body: | PseudoRET implicit $lr ... +--- +name: post-inc-vector-load-fifo-512 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $r0, $r1 + ; CHECK-LABEL: name: post-inc-vector-load-fifo-512 + ; CHECK: liveins: $p0, $r0, $r1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 32 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_pstm_nrm:%[0-9]+]]:fifo512, [[VLDA_dmx_lda_fifohl_pstm_nrm1:%[0-9]+]]:ep = VLDA_dmx_lda_fifohl_pstm_nrm [[COPY]], [[COPY2]] :: (load (<16 x s32>) from stack - 64) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_pstm_nrm_imm:%[0-9]+]]:fifo512, [[VLDA_dmx_lda_fifohl_pstm_nrm_imm1:%[0-9]+]]:ep = VLDA_dmx_lda_fifohl_pstm_nrm_imm [[VLDA_dmx_lda_fifohl_pstm_nrm1]], 448 :: (load (<16 x s32>) from stack - 64) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_pstm_nrm_imm2:%[0-9]+]]:fifo512, [[VLDA_dmx_lda_fifohl_pstm_nrm_imm3:%[0-9]+]]:ep = VLDA_dmx_lda_fifohl_pstm_nrm_imm [[VLDA_dmx_lda_fifohl_pstm_nrm_imm1]], -512 :: (load (<16 x s32>) from stack - 64) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_pstm_nrm_imm4:%[0-9]+]]:fifo512, [[VLDA_dmx_lda_fifohl_pstm_nrm_imm5:%[0-9]+]]:ep = VLDA_dmx_lda_fifohl_pstm_nrm_imm [[VLDA_dmx_lda_fifohl_pstm_nrm_imm3]], 0 :: (load (<16 x s32>) from stack - 64) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_pstm_nrm2:%[0-9]+]]:fifo512, [[VLDA_dmx_lda_fifohl_pstm_nrm3:%[0-9]+]]:ep = VLDA_dmx_lda_fifohl_pstm_nrm [[VLDA_dmx_lda_fifohl_pstm_nrm_imm5]], [[MOV_PD_imm11_pseudo]] :: (load (<16 x s32>) from stack - 64) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_pstm_nrm4:%[0-9]+]]:fifo512, [[VLDA_dmx_lda_fifohl_pstm_nrm5:%[0-9]+]]:ep = VLDA_dmx_lda_fifohl_pstm_nrm [[VLDA_dmx_lda_fifohl_pstm_nrm3]], [[MOV_PD_imm11_pseudo1]] :: (load (<16 x s32>) from stack - 64) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDA_dmx_lda_fifohl_pstm_nrm4]] + %0:ptrregbank(p0) = COPY $p0 + %1:gprregbank(s32) = COPY $r0 + %7:modregbank(s20) = G_TRUNC %1 + %8:modregbank(s20) = G_CONSTANT i20 448 + %9:modregbank(s20) = G_CONSTANT i20 -512 + %10:modregbank(s20) = G_CONSTANT i20 0 + %11:modregbank(s20) = G_CONSTANT i20 32 + %12:modregbank(s20) = G_CONSTANT i20 512 + %13:fiforegbank(<16 x s32>), %19:ptrregbank(p0) = G_AIE_POSTINC_LOAD %0, %7 :: (load (<16 x s32>) from stack - 64) + %14:fiforegbank(<16 x s32>), %20:ptrregbank(p0) = G_AIE_POSTINC_LOAD %19, %8 :: (load (<16 x s32>) from stack - 64) + %15:fiforegbank(<16 x s32>), %21:ptrregbank(p0) = G_AIE_POSTINC_LOAD %20, %9 :: (load (<16 x s32>) from stack - 64) + %16:fiforegbank(<16 x s32>), %22:ptrregbank(p0) = G_AIE_POSTINC_LOAD %21, %10 :: (load (<16 x s32>) from stack - 64) + %17:fiforegbank(<16 x s32>), %23:ptrregbank(p0) = G_AIE_POSTINC_LOAD %22, %11 :: (load (<16 x s32>) from stack - 64) + %18:fiforegbank(<16 x s32>), %24:ptrregbank(p0) = G_AIE_POSTINC_LOAD %23, %12 :: (load (<16 x s32>) from stack - 64) + PseudoRET implicit $lr, implicit %18 +... + +--- +name: post-inc-2d-vector-load-fifo-512 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: post-inc-2d-vector-load-fifo-512 + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: [[VLDA_2D_dmx_lda_fifohl:%[0-9]+]]:fifo512, [[VLDA_2D_dmx_lda_fifohl1:%[0-9]+]]:ep, [[VLDA_2D_dmx_lda_fifohl2:%[0-9]+]]:edc = VLDA_2D_dmx_lda_fifohl [[COPY]], [[REG_SEQUENCE]] :: (load (<16 x s32>)) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDA_2D_dmx_lda_fifohl]] + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %5:fiforegbank(<16 x s32>), %6:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_LOAD %0, %1, %2, %3, %4 :: (load (<16 x s32>)) + PseudoRET implicit $lr, implicit %5 +... + +--- +name: post-inc-3d-vector-load-fifo-512 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: post-inc-3d-vector-load-fifo-512 + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[VLDA_3D_dmx_lda_fifohl:%[0-9]+]]:fifo512, [[VLDA_3D_dmx_lda_fifohl1:%[0-9]+]]:ep, [[VLDA_3D_dmx_lda_fifohl2:%[0-9]+]]:edcl, [[VLDA_3D_dmx_lda_fifohl3:%[0-9]+]]:edch = VLDA_3D_dmx_lda_fifohl [[COPY]], [[REG_SEQUENCE]] :: (load (<8 x s64>)) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDA_3D_dmx_lda_fifohl]] + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %8:fiforegbank(<8 x s64>), %9:ptrregbank(p0), %10:modregbank(s20), %11:modregbank(s20) = G_AIE_POSTINC_3D_LOAD %0, %1, %2, %3, %4, %6, %5, %7 :: (load (<8 x s64>)) + PseudoRET implicit $lr, implicit %8 +... + +--- +name: post-inc-vector-store-fifo-512 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $r0, $r1, $lfl0 + ; CHECK-LABEL: name: post-inc-vector-store-fifo-512 + ; CHECK: liveins: $p0, $r0, $r1, $lfl0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 32 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fifo512 = COPY $lfl0 + ; CHECK-NEXT: [[VST_dmx_sts_fifohl_pstm_nrm:%[0-9]+]]:ep = VST_dmx_sts_fifohl_pstm_nrm [[COPY3]], [[COPY]], [[COPY2]] :: (store (<16 x s32>) into stack - 64) + ; CHECK-NEXT: [[VST_dmx_sts_fifohl_pstm_nrm_imm:%[0-9]+]]:ep = VST_dmx_sts_fifohl_pstm_nrm_imm [[COPY3]], [[VST_dmx_sts_fifohl_pstm_nrm]], 448 :: (store (<16 x s32>) into stack - 64) + ; CHECK-NEXT: [[VST_dmx_sts_fifohl_pstm_nrm_imm1:%[0-9]+]]:ep = VST_dmx_sts_fifohl_pstm_nrm_imm [[COPY3]], [[VST_dmx_sts_fifohl_pstm_nrm_imm]], -512 :: (store (<16 x s32>) into stack - 64) + ; CHECK-NEXT: [[VST_dmx_sts_fifohl_pstm_nrm_imm2:%[0-9]+]]:ep = VST_dmx_sts_fifohl_pstm_nrm_imm [[COPY3]], [[VST_dmx_sts_fifohl_pstm_nrm_imm1]], 0 :: (store (<16 x s32>) into stack - 64) + ; CHECK-NEXT: [[VST_dmx_sts_fifohl_pstm_nrm1:%[0-9]+]]:ep = VST_dmx_sts_fifohl_pstm_nrm [[COPY3]], [[VST_dmx_sts_fifohl_pstm_nrm_imm2]], [[MOV_PD_imm11_pseudo]] :: (store (<16 x s32>) into stack - 64) + ; CHECK-NEXT: [[VST_dmx_sts_fifohl_pstm_nrm2:%[0-9]+]]:ep = VST_dmx_sts_fifohl_pstm_nrm [[COPY3]], [[VST_dmx_sts_fifohl_pstm_nrm1]], [[MOV_PD_imm11_pseudo1]] :: (store (<16 x s32>) into stack - 64) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VST_dmx_sts_fifohl_pstm_nrm2]] + %0:ptrregbank(p0) = COPY $p0 + %1:gprregbank(s32) = COPY $r0 + %7:modregbank(s20) = G_TRUNC %1 + %8:modregbank(s20) = G_CONSTANT i20 448 + %9:modregbank(s20) = G_CONSTANT i20 -512 + %10:modregbank(s20) = G_CONSTANT i20 0 + %11:modregbank(s20) = G_CONSTANT i20 32 + %12:modregbank(s20) = G_CONSTANT i20 512 + %13:fiforegbank(<16 x s32>) = COPY $lfl0 + %19:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %0, %7 :: (store (<16 x s32>) into stack - 64) + %20:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %19, %8 :: (store (<16 x s32>) into stack - 64) + %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %20, %9 :: (store (<16 x s32>) into stack - 64) + %22:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %21, %10 :: (store (<16 x s32>) into stack - 64) + %23:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %22, %11 :: (store (<16 x s32>) into stack - 64) + %24:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %23, %12 :: (store (<16 x s32>) into stack - 64) + PseudoRET implicit $lr, implicit %24 +... + +--- +name: post-inc-2d-vector-store-fifo-512 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $lfl0 + ; CHECK-LABEL: name: post-inc-2d-vector-store-fifo-512 + ; CHECK: liveins: $p0, $lfl0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fifo512 = COPY $lfl0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mxs = COPY [[COPY1]] + ; CHECK-NEXT: [[VST_2D_dmx_sts_x:%[0-9]+]]:ep, [[VST_2D_dmx_sts_x1:%[0-9]+]]:edc = VST_2D_dmx_sts_x [[COPY2]], [[COPY]], [[REG_SEQUENCE]] :: (store (<16 x s32>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %5:fiforegbank(<16 x s32>) = COPY $lfl0 + %6:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_STORE %5, %0, %1, %2, %3, %4 :: (store (<16 x s32>)) + PseudoRET implicit $lr +... + +--- +name: post-inc-3d-vector-store-fifo-512 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $lfl0 + ; CHECK-LABEL: name: post-inc-3d-vector-store-fifo-512 + ; CHECK: liveins: $p0, $lfl0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fifo512 = COPY $lfl0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:mxs = COPY [[COPY1]] + ; CHECK-NEXT: [[VST_3D_dmx_sts_x:%[0-9]+]]:ep, [[VST_3D_dmx_sts_x1:%[0-9]+]]:edcl, [[VST_3D_dmx_sts_x2:%[0-9]+]]:edch = VST_3D_dmx_sts_x [[COPY2]], [[COPY]], [[REG_SEQUENCE]] :: (store (<16 x s32>)) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %8:fiforegbank(<16 x s32>) = COPY $lfl0 + %9:ptrregbank(p0), %10:modregbank(s20), %11:modregbank(s20) = G_AIE_POSTINC_3D_STORE %8, %0, %1, %2, %3, %4, %6, %5, %7 :: (store (<16 x s32>)) + PseudoRET implicit $lr +... + +--- +name: post-inc-vector-load-fifo-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $r0, $r1 + ; CHECK-LABEL: name: post-inc-vector-load-fifo-1024 + ; CHECK: liveins: $p0, $r0, $r1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 32 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_pstm_nrm:%[0-9]+]]:fifo512, [[VLDA_dmx_lda_fifohl_pstm_nrm1:%[0-9]+]]:ep = VLDA_dmx_lda_fifohl_pstm_nrm [[COPY]], [[COPY2]] :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:fifo1024 = REG_SEQUENCE [[VLDA_dmx_lda_fifohl_pstm_nrm]], %subreg.sub_lo_fifo, [[VLDA_dmx_lda_fifohl_idx_imm]], %subreg.sub_hi_fifo + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_pstm_nrm_imm:%[0-9]+]]:fifo512, [[VLDA_dmx_lda_fifohl_pstm_nrm_imm1:%[0-9]+]]:ep = VLDA_dmx_lda_fifohl_pstm_nrm_imm [[VLDA_dmx_lda_fifohl_pstm_nrm1]], 448 :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm1:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[VLDA_dmx_lda_fifohl_pstm_nrm1]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:fifo1024 = REG_SEQUENCE [[VLDA_dmx_lda_fifohl_pstm_nrm_imm]], %subreg.sub_lo_fifo, [[VLDA_dmx_lda_fifohl_idx_imm1]], %subreg.sub_hi_fifo + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_pstm_nrm_imm2:%[0-9]+]]:fifo512, [[VLDA_dmx_lda_fifohl_pstm_nrm_imm3:%[0-9]+]]:ep = VLDA_dmx_lda_fifohl_pstm_nrm_imm [[VLDA_dmx_lda_fifohl_pstm_nrm_imm1]], -512 :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm2:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[VLDA_dmx_lda_fifohl_pstm_nrm_imm1]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:fifo1024 = REG_SEQUENCE [[VLDA_dmx_lda_fifohl_pstm_nrm_imm2]], %subreg.sub_lo_fifo, [[VLDA_dmx_lda_fifohl_idx_imm2]], %subreg.sub_hi_fifo + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_pstm_nrm_imm4:%[0-9]+]]:fifo512, [[VLDA_dmx_lda_fifohl_pstm_nrm_imm5:%[0-9]+]]:ep = VLDA_dmx_lda_fifohl_pstm_nrm_imm [[VLDA_dmx_lda_fifohl_pstm_nrm_imm3]], 0 :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm3:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[VLDA_dmx_lda_fifohl_pstm_nrm_imm3]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:fifo1024 = REG_SEQUENCE [[VLDA_dmx_lda_fifohl_pstm_nrm_imm4]], %subreg.sub_lo_fifo, [[VLDA_dmx_lda_fifohl_idx_imm3]], %subreg.sub_hi_fifo + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_pstm_nrm2:%[0-9]+]]:fifo512, [[VLDA_dmx_lda_fifohl_pstm_nrm3:%[0-9]+]]:ep = VLDA_dmx_lda_fifohl_pstm_nrm [[VLDA_dmx_lda_fifohl_pstm_nrm_imm5]], [[MOV_PD_imm11_pseudo]] :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm4:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[VLDA_dmx_lda_fifohl_pstm_nrm_imm5]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:fifo1024 = REG_SEQUENCE [[VLDA_dmx_lda_fifohl_pstm_nrm2]], %subreg.sub_lo_fifo, [[VLDA_dmx_lda_fifohl_idx_imm4]], %subreg.sub_hi_fifo + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_pstm_nrm4:%[0-9]+]]:fifo512, [[VLDA_dmx_lda_fifohl_pstm_nrm5:%[0-9]+]]:ep = VLDA_dmx_lda_fifohl_pstm_nrm [[VLDA_dmx_lda_fifohl_pstm_nrm3]], [[MOV_PD_imm11_pseudo1]] :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm5:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[VLDA_dmx_lda_fifohl_pstm_nrm3]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:fifo1024 = REG_SEQUENCE [[VLDA_dmx_lda_fifohl_pstm_nrm4]], %subreg.sub_lo_fifo, [[VLDA_dmx_lda_fifohl_idx_imm5]], %subreg.sub_hi_fifo + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[REG_SEQUENCE5]] + %0:ptrregbank(p0) = COPY $p0 + %1:gprregbank(s32) = COPY $r0 + %7:modregbank(s20) = G_TRUNC %1 + %8:modregbank(s20) = G_CONSTANT i20 448 + %9:modregbank(s20) = G_CONSTANT i20 -512 + %10:modregbank(s20) = G_CONSTANT i20 0 + %11:modregbank(s20) = G_CONSTANT i20 32 + %12:modregbank(s20) = G_CONSTANT i20 512 + %13:fiforegbank(<32 x s32>), %19:ptrregbank(p0) = G_AIE_POSTINC_LOAD %0, %7 :: (load (<32 x s32>) from stack - 64) + %14:fiforegbank(<32 x s32>), %20:ptrregbank(p0) = G_AIE_POSTINC_LOAD %19, %8 :: (load (<32 x s32>) from stack - 64) + %15:fiforegbank(<32 x s32>), %21:ptrregbank(p0) = G_AIE_POSTINC_LOAD %20, %9 :: (load (<32 x s32>) from stack - 64) + %16:fiforegbank(<32 x s32>), %22:ptrregbank(p0) = G_AIE_POSTINC_LOAD %21, %10 :: (load (<32 x s32>) from stack - 64) + %17:fiforegbank(<32 x s32>), %23:ptrregbank(p0) = G_AIE_POSTINC_LOAD %22, %11 :: (load (<32 x s32>) from stack - 64) + %18:fiforegbank(<32 x s32>), %24:ptrregbank(p0) = G_AIE_POSTINC_LOAD %23, %12 :: (load (<32 x s32>) from stack - 64) + PseudoRET implicit $lr, implicit %18 +... + +--- +name: post-inc-2d-vector-load-fifo-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: post-inc-2d-vector-load-fifo-1024 + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: [[VLDA_2D_dmx_lda_fifohl:%[0-9]+]]:fifo512, [[VLDA_2D_dmx_lda_fifohl1:%[0-9]+]]:ep, [[VLDA_2D_dmx_lda_fifohl2:%[0-9]+]]:edc = VLDA_2D_dmx_lda_fifohl [[COPY]], [[REG_SEQUENCE]] :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_fifohl_idx_imm:%[0-9]+]]:fifo512 = VLDA_dmx_lda_fifohl_idx_imm [[COPY]], 64 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:fifo1024 = REG_SEQUENCE [[VLDA_2D_dmx_lda_fifohl]], %subreg.sub_lo_fifo, [[VLDA_dmx_lda_fifohl_idx_imm]], %subreg.sub_hi_fifo + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[REG_SEQUENCE1]] + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %5:fiforegbank(<32 x s32>), %6:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_LOAD %0, %1, %2, %3, %4 :: (load (<32 x s32>)) + PseudoRET implicit $lr, implicit %5 +... + +--- +name: post-inc-3d-vector-load-fifo-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: post-inc-3d-vector-load-fifo-1024 + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[VLDA_3D_dmx_lda_fifohl:%[0-9]+]]:fifo512, [[VLDA_3D_dmx_lda_fifohl1:%[0-9]+]]:ep, [[VLDA_3D_dmx_lda_fifohl2:%[0-9]+]]:edcl, [[VLDA_3D_dmx_lda_fifohl3:%[0-9]+]]:edch = VLDA_3D_dmx_lda_fifohl [[COPY]], [[REG_SEQUENCE]] :: (load (<8 x s64>)) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDA_3D_dmx_lda_fifohl]] + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %8:fiforegbank(<8 x s64>), %9:ptrregbank(p0), %10:modregbank(s20), %11:modregbank(s20) = G_AIE_POSTINC_3D_LOAD %0, %1, %2, %3, %4, %6, %5, %7 :: (load (<8 x s64>)) + PseudoRET implicit $lr, implicit %8 +... + +--- +name: post-inc-vector-store-fifo-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $r0, $r1, $lf0 + ; CHECK-LABEL: name: post-inc-vector-store-fifo-1024 + ; CHECK: liveins: $p0, $r0, $r1, $lf0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 32 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fifo1024 = COPY $lf0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fifo512 = COPY [[COPY3]].sub_lo_fifo + ; CHECK-NEXT: [[VST_dmx_sts_fifohl_pstm_nrm:%[0-9]+]]:ep = VST_dmx_sts_fifohl_pstm_nrm [[COPY4]], [[COPY]], [[COPY2]] :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:fifo512 = COPY [[COPY3]].sub_hi_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY5]], [[COPY]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:fifo512 = COPY [[COPY3]].sub_lo_fifo + ; CHECK-NEXT: [[VST_dmx_sts_fifohl_pstm_nrm_imm:%[0-9]+]]:ep = VST_dmx_sts_fifohl_pstm_nrm_imm [[COPY6]], [[VST_dmx_sts_fifohl_pstm_nrm]], 448 :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:fifo512 = COPY [[COPY3]].sub_hi_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY7]], [[VST_dmx_sts_fifohl_pstm_nrm]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:fifo512 = COPY [[COPY3]].sub_lo_fifo + ; CHECK-NEXT: [[VST_dmx_sts_fifohl_pstm_nrm_imm1:%[0-9]+]]:ep = VST_dmx_sts_fifohl_pstm_nrm_imm [[COPY8]], [[VST_dmx_sts_fifohl_pstm_nrm_imm]], -512 :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:fifo512 = COPY [[COPY3]].sub_hi_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY9]], [[VST_dmx_sts_fifohl_pstm_nrm_imm]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:fifo512 = COPY [[COPY3]].sub_lo_fifo + ; CHECK-NEXT: [[VST_dmx_sts_fifohl_pstm_nrm_imm2:%[0-9]+]]:ep = VST_dmx_sts_fifohl_pstm_nrm_imm [[COPY10]], [[VST_dmx_sts_fifohl_pstm_nrm_imm1]], 0 :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:fifo512 = COPY [[COPY3]].sub_hi_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY11]], [[VST_dmx_sts_fifohl_pstm_nrm_imm1]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:fifo512 = COPY [[COPY3]].sub_lo_fifo + ; CHECK-NEXT: [[VST_dmx_sts_fifohl_pstm_nrm1:%[0-9]+]]:ep = VST_dmx_sts_fifohl_pstm_nrm [[COPY12]], [[VST_dmx_sts_fifohl_pstm_nrm_imm2]], [[MOV_PD_imm11_pseudo]] :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:fifo512 = COPY [[COPY3]].sub_hi_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY13]], [[VST_dmx_sts_fifohl_pstm_nrm_imm2]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:fifo512 = COPY [[COPY3]].sub_lo_fifo + ; CHECK-NEXT: [[VST_dmx_sts_fifohl_pstm_nrm2:%[0-9]+]]:ep = VST_dmx_sts_fifohl_pstm_nrm [[COPY14]], [[VST_dmx_sts_fifohl_pstm_nrm1]], [[MOV_PD_imm11_pseudo1]] :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:fifo512 = COPY [[COPY3]].sub_hi_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY15]], [[VST_dmx_sts_fifohl_pstm_nrm1]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VST_dmx_sts_fifohl_pstm_nrm2]] + %0:ptrregbank(p0) = COPY $p0 + %1:gprregbank(s32) = COPY $r0 + %7:modregbank(s20) = G_TRUNC %1 + %8:modregbank(s20) = G_CONSTANT i20 448 + %9:modregbank(s20) = G_CONSTANT i20 -512 + %10:modregbank(s20) = G_CONSTANT i20 0 + %11:modregbank(s20) = G_CONSTANT i20 32 + %12:modregbank(s20) = G_CONSTANT i20 512 + %13:fiforegbank(<32 x s32>) = COPY $lf0 + %19:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %0, %7 :: (store (<32 x s32>) into stack - 64) + %20:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %19, %8 :: (store (<32 x s32>) into stack - 64) + %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %20, %9 :: (store (<32 x s32>) into stack - 64) + %22:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %21, %10 :: (store (<32 x s32>) into stack - 64) + %23:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %22, %11 :: (store (<32 x s32>) into stack - 64) + %24:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %23, %12 :: (store (<32 x s32>) into stack - 64) + PseudoRET implicit $lr, implicit %24 +... + +--- +name: post-inc-2d-vector-store-fifo-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $lf0 + ; CHECK-LABEL: name: post-inc-2d-vector-store-fifo-1024 + ; CHECK: liveins: $p0, $lf0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fifo1024 = COPY $lf0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fifo512 = COPY [[COPY1]].sub_lo_fifo + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:mxs = COPY [[COPY2]] + ; CHECK-NEXT: [[VST_2D_dmx_sts_x:%[0-9]+]]:ep, [[VST_2D_dmx_sts_x1:%[0-9]+]]:edc = VST_2D_dmx_sts_x [[COPY3]], [[COPY]], [[REG_SEQUENCE]] :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fifo512 = COPY [[COPY1]].sub_hi_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY4]], [[COPY]], 64 :: (store (<16 x s32>) into unknown-address + 64) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %5:fiforegbank(<32 x s32>) = COPY $lf0 + %6:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_STORE %5, %0, %1, %2, %3, %4 :: (store (<32 x s32>)) + PseudoRET implicit $lr +... + +--- +name: post-inc-3d-vector-store-fifo-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $lf0 + ; CHECK-LABEL: name: post-inc-3d-vector-store-fifo-1024 + ; CHECK: liveins: $p0, $lf0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fifo1024 = COPY $lf0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fifo512 = COPY [[COPY1]].sub_lo_fifo + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:mxs = COPY [[COPY2]] + ; CHECK-NEXT: [[VST_3D_dmx_sts_x:%[0-9]+]]:ep, [[VST_3D_dmx_sts_x1:%[0-9]+]]:edcl, [[VST_3D_dmx_sts_x2:%[0-9]+]]:edch = VST_3D_dmx_sts_x [[COPY3]], [[COPY]], [[REG_SEQUENCE]] :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:fifo512 = COPY [[COPY1]].sub_hi_fifo + ; CHECK-NEXT: VST_dmx_sts_fifohl_idx_imm [[COPY4]], [[COPY]], 64 :: (store (<16 x s32>) into unknown-address + 64) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %8:fiforegbank(<32 x s32>) = COPY $lf0 + %9:ptrregbank(p0), %10:modregbank(s20), %11:modregbank(s20) = G_AIE_POSTINC_3D_STORE %8, %0, %1, %2, %3, %4, %6, %5, %7 :: (store (<32 x s32>)) + PseudoRET implicit $lr +... + +--- +name: post-inc-vector-load-vreg-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $r0, $r1 + ; CHECK-LABEL: name: post-inc-vector-load-vreg-1024 + ; CHECK: liveins: $p0, $r0, $r1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 32 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: [[VLDA_dmx_lda_x_pstm_nrm:%[0-9]+]]:vec512, [[VLDA_dmx_lda_x_pstm_nrm1:%[0-9]+]]:ep = VLDA_dmx_lda_x_pstm_nrm [[COPY]], [[COPY2]] :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[COPY]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vec1024 = REG_SEQUENCE [[VLDA_dmx_lda_x_pstm_nrm]], %subreg.sub_512_lo, [[VLDA_dmx_lda_x_idx_imm]], %subreg.sub_512_hi + ; CHECK-NEXT: [[VLDA_dmx_lda_x_pstm_nrm_imm:%[0-9]+]]:vec512, [[VLDA_dmx_lda_x_pstm_nrm_imm1:%[0-9]+]]:ep = VLDA_dmx_lda_x_pstm_nrm_imm [[VLDA_dmx_lda_x_pstm_nrm1]], 448 :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm1:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[VLDA_dmx_lda_x_pstm_nrm1]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vec1024 = REG_SEQUENCE [[VLDA_dmx_lda_x_pstm_nrm_imm]], %subreg.sub_512_lo, [[VLDA_dmx_lda_x_idx_imm1]], %subreg.sub_512_hi + ; CHECK-NEXT: [[VLDA_dmx_lda_x_pstm_nrm_imm2:%[0-9]+]]:vec512, [[VLDA_dmx_lda_x_pstm_nrm_imm3:%[0-9]+]]:ep = VLDA_dmx_lda_x_pstm_nrm_imm [[VLDA_dmx_lda_x_pstm_nrm_imm1]], -512 :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm2:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[VLDA_dmx_lda_x_pstm_nrm_imm1]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vec1024 = REG_SEQUENCE [[VLDA_dmx_lda_x_pstm_nrm_imm2]], %subreg.sub_512_lo, [[VLDA_dmx_lda_x_idx_imm2]], %subreg.sub_512_hi + ; CHECK-NEXT: [[VLDA_dmx_lda_x_pstm_nrm_imm4:%[0-9]+]]:vec512, [[VLDA_dmx_lda_x_pstm_nrm_imm5:%[0-9]+]]:ep = VLDA_dmx_lda_x_pstm_nrm_imm [[VLDA_dmx_lda_x_pstm_nrm_imm3]], 0 :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm3:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[VLDA_dmx_lda_x_pstm_nrm_imm3]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:vec1024 = REG_SEQUENCE [[VLDA_dmx_lda_x_pstm_nrm_imm4]], %subreg.sub_512_lo, [[VLDA_dmx_lda_x_idx_imm3]], %subreg.sub_512_hi + ; CHECK-NEXT: [[VLDA_dmx_lda_x_pstm_nrm2:%[0-9]+]]:vec512, [[VLDA_dmx_lda_x_pstm_nrm3:%[0-9]+]]:ep = VLDA_dmx_lda_x_pstm_nrm [[VLDA_dmx_lda_x_pstm_nrm_imm5]], [[MOV_PD_imm11_pseudo]] :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm4:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[VLDA_dmx_lda_x_pstm_nrm_imm5]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:vec1024 = REG_SEQUENCE [[VLDA_dmx_lda_x_pstm_nrm2]], %subreg.sub_512_lo, [[VLDA_dmx_lda_x_idx_imm4]], %subreg.sub_512_hi + ; CHECK-NEXT: [[VLDA_dmx_lda_x_pstm_nrm4:%[0-9]+]]:vec512, [[VLDA_dmx_lda_x_pstm_nrm5:%[0-9]+]]:ep = VLDA_dmx_lda_x_pstm_nrm [[VLDA_dmx_lda_x_pstm_nrm3]], [[MOV_PD_imm11_pseudo1]] :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_x_idx_imm5:%[0-9]+]]:vec512 = VLDA_dmx_lda_x_idx_imm [[VLDA_dmx_lda_x_pstm_nrm3]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:vec1024 = REG_SEQUENCE [[VLDA_dmx_lda_x_pstm_nrm4]], %subreg.sub_512_lo, [[VLDA_dmx_lda_x_idx_imm5]], %subreg.sub_512_hi + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[REG_SEQUENCE5]] + %0:ptrregbank(p0) = COPY $p0 + %1:gprregbank(s32) = COPY $r0 + %7:modregbank(s20) = G_TRUNC %1 + %8:modregbank(s20) = G_CONSTANT i20 448 + %9:modregbank(s20) = G_CONSTANT i20 -512 + %10:modregbank(s20) = G_CONSTANT i20 0 + %11:modregbank(s20) = G_CONSTANT i20 32 + %12:modregbank(s20) = G_CONSTANT i20 512 + %13:vregbank(<32 x s32>), %19:ptrregbank(p0) = G_AIE_POSTINC_LOAD %0, %7 :: (load (<32 x s32>) from stack - 64) + %14:vregbank(<32 x s32>), %20:ptrregbank(p0) = G_AIE_POSTINC_LOAD %19, %8 :: (load (<32 x s32>) from stack - 64) + %15:vregbank(<32 x s32>), %21:ptrregbank(p0) = G_AIE_POSTINC_LOAD %20, %9 :: (load (<32 x s32>) from stack - 64) + %16:vregbank(<32 x s32>), %22:ptrregbank(p0) = G_AIE_POSTINC_LOAD %21, %10 :: (load (<32 x s32>) from stack - 64) + %17:vregbank(<32 x s32>), %23:ptrregbank(p0) = G_AIE_POSTINC_LOAD %22, %11 :: (load (<32 x s32>) from stack - 64) + %18:vregbank(<32 x s32>), %24:ptrregbank(p0) = G_AIE_POSTINC_LOAD %23, %12 :: (load (<32 x s32>) from stack - 64) + PseudoRET implicit $lr, implicit %18 +... + +--- +name: post-inc-2d-vector-load-vreg-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: post-inc-2d-vector-load-vreg-1024 + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: [[VLDA_2D_dmx_lda_x:%[0-9]+]]:vec512, [[VLDA_2D_dmx_lda_x1:%[0-9]+]]:ep, [[VLDA_2D_dmx_lda_x2:%[0-9]+]]:edc = VLDA_2D_dmx_lda_x [[COPY]], [[REG_SEQUENCE]] :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmw_lda_w_idx_imm:%[0-9]+]]:mwa = VLDA_dmw_lda_w_idx_imm [[COPY]], 64 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec512 = COPY [[VLDA_dmw_lda_w_idx_imm]] + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vec1024 = REG_SEQUENCE [[VLDA_2D_dmx_lda_x]], %subreg.sub_512_lo, [[COPY1]], %subreg.sub_512_hi + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[REG_SEQUENCE1]] + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %5:vregbank(<32 x s32>), %6:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_LOAD %0, %1, %2, %3, %4 :: (load (<32 x s32>)) + PseudoRET implicit $lr, implicit %5 +... + +--- +name: post-inc-3d-vector-load-vreg-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: post-inc-3d-vector-load-vreg-1024 + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[VLDA_3D_dmx_lda_x:%[0-9]+]]:vec512, [[VLDA_3D_dmx_lda_x1:%[0-9]+]]:ep, [[VLDA_3D_dmx_lda_x2:%[0-9]+]]:edcl, [[VLDA_3D_dmx_lda_x3:%[0-9]+]]:edch = VLDA_3D_dmx_lda_x [[COPY]], [[REG_SEQUENCE]] :: (load (<8 x s64>)) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDA_3D_dmx_lda_x]] + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %8:vregbank(<8 x s64>), %9:ptrregbank(p0), %10:modregbank(s20), %11:modregbank(s20) = G_AIE_POSTINC_3D_LOAD %0, %1, %2, %3, %4, %6, %5, %7 :: (load (<8 x s64>)) + PseudoRET implicit $lr, implicit %8 +... + +--- +name: post-inc-vector-store-vreg-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $r0, $r1, $y0 + ; CHECK-LABEL: name: post-inc-vector-store-vreg-1024 + ; CHECK: liveins: $p0, $r0, $r1, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 32 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vec512 = COPY [[COPY3]].sub_512_lo + ; CHECK-NEXT: [[VST_dmx_sts_x_pstm_nrm:%[0-9]+]]:ep = VST_dmx_sts_x_pstm_nrm [[COPY4]], [[COPY]], [[COPY2]] :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vec512 = COPY [[COPY3]].sub_512_hi + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY5]], [[COPY]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:vec512 = COPY [[COPY3]].sub_512_lo + ; CHECK-NEXT: [[VST_dmx_sts_x_pstm_nrm_imm:%[0-9]+]]:ep = VST_dmx_sts_x_pstm_nrm_imm [[COPY6]], [[VST_dmx_sts_x_pstm_nrm]], 448 :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:vec512 = COPY [[COPY3]].sub_512_hi + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY7]], [[VST_dmx_sts_x_pstm_nrm]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:vec512 = COPY [[COPY3]].sub_512_lo + ; CHECK-NEXT: [[VST_dmx_sts_x_pstm_nrm_imm1:%[0-9]+]]:ep = VST_dmx_sts_x_pstm_nrm_imm [[COPY8]], [[VST_dmx_sts_x_pstm_nrm_imm]], -512 :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:vec512 = COPY [[COPY3]].sub_512_hi + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY9]], [[VST_dmx_sts_x_pstm_nrm_imm]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:vec512 = COPY [[COPY3]].sub_512_lo + ; CHECK-NEXT: [[VST_dmx_sts_x_pstm_nrm_imm2:%[0-9]+]]:ep = VST_dmx_sts_x_pstm_nrm_imm [[COPY10]], [[VST_dmx_sts_x_pstm_nrm_imm1]], 0 :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:vec512 = COPY [[COPY3]].sub_512_hi + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY11]], [[VST_dmx_sts_x_pstm_nrm_imm1]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:vec512 = COPY [[COPY3]].sub_512_lo + ; CHECK-NEXT: [[VST_dmx_sts_x_pstm_nrm1:%[0-9]+]]:ep = VST_dmx_sts_x_pstm_nrm [[COPY12]], [[VST_dmx_sts_x_pstm_nrm_imm2]], [[MOV_PD_imm11_pseudo]] :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:vec512 = COPY [[COPY3]].sub_512_hi + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY13]], [[VST_dmx_sts_x_pstm_nrm_imm2]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:vec512 = COPY [[COPY3]].sub_512_lo + ; CHECK-NEXT: [[VST_dmx_sts_x_pstm_nrm2:%[0-9]+]]:ep = VST_dmx_sts_x_pstm_nrm [[COPY14]], [[VST_dmx_sts_x_pstm_nrm1]], [[MOV_PD_imm11_pseudo1]] :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:vec512 = COPY [[COPY3]].sub_512_hi + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY15]], [[VST_dmx_sts_x_pstm_nrm1]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VST_dmx_sts_x_pstm_nrm2]] + %0:ptrregbank(p0) = COPY $p0 + %1:gprregbank(s32) = COPY $r0 + %7:modregbank(s20) = G_TRUNC %1 + %8:modregbank(s20) = G_CONSTANT i20 448 + %9:modregbank(s20) = G_CONSTANT i20 -512 + %10:modregbank(s20) = G_CONSTANT i20 0 + %11:modregbank(s20) = G_CONSTANT i20 32 + %12:modregbank(s20) = G_CONSTANT i20 512 + %13:vregbank(<32 x s32>) = COPY $y0 + %19:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %0, %7 :: (store (<32 x s32>) into stack - 64) + %20:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %19, %8 :: (store (<32 x s32>) into stack - 64) + %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %20, %9 :: (store (<32 x s32>) into stack - 64) + %22:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %21, %10 :: (store (<32 x s32>) into stack - 64) + %23:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %22, %11 :: (store (<32 x s32>) into stack - 64) + %24:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %23, %12 :: (store (<32 x s32>) into stack - 64) + PseudoRET implicit $lr, implicit %24 +... + +--- +name: post-inc-2d-vector-store-vreg-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $y0 + ; CHECK-LABEL: name: post-inc-2d-vector-store-vreg-1024 + ; CHECK: liveins: $p0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY [[COPY1]].sub_512_lo + ; CHECK-NEXT: [[VST_2D_dmx_sts_x:%[0-9]+]]:ep, [[VST_2D_dmx_sts_x1:%[0-9]+]]:edc = VST_2D_dmx_sts_x [[COPY2]], [[COPY]], [[REG_SEQUENCE]] :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vec512 = COPY [[COPY1]].sub_512_hi + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY3]], [[COPY]], 64 :: (store (<16 x s32>) into unknown-address + 64) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %5:vregbank(<32 x s32>) = COPY $y0 + %6:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_STORE %5, %0, %1, %2, %3, %4 :: (store (<32 x s32>)) + PseudoRET implicit $lr +... + +--- +name: post-inc-3d-vector-store-vreg-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $y0 + ; CHECK-LABEL: name: post-inc-3d-vector-store-vreg-1024 + ; CHECK: liveins: $p0, $y0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vec1024 = COPY $y0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vec512 = COPY [[COPY1]].sub_512_lo + ; CHECK-NEXT: [[VST_3D_dmx_sts_x:%[0-9]+]]:ep, [[VST_3D_dmx_sts_x1:%[0-9]+]]:edcl, [[VST_3D_dmx_sts_x2:%[0-9]+]]:edch = VST_3D_dmx_sts_x [[COPY2]], [[COPY]], [[REG_SEQUENCE]] :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vec512 = COPY [[COPY1]].sub_512_hi + ; CHECK-NEXT: VST_dmx_sts_x_idx_imm [[COPY3]], [[COPY]], 64 :: (store (<16 x s32>) into unknown-address + 64) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %8:vregbank(<32 x s32>) = COPY $y0 + %9:ptrregbank(p0), %10:modregbank(s20), %11:modregbank(s20) = G_AIE_POSTINC_3D_STORE %8, %0, %1, %2, %3, %4, %6, %5, %7 :: (store (<32 x s32>)) + PseudoRET implicit $lr +... + +--- +name: post-inc-vector-load-acc-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $r0, $r1 + ; CHECK-LABEL: name: post-inc-vector-load-acc-1024 + ; CHECK: liveins: $p0, $r0, $r1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 32 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_pstm_nrm:%[0-9]+]]:acc512, [[VLDA_dmx_lda_bm_pstm_nrm1:%[0-9]+]]:ep = VLDA_dmx_lda_bm_pstm_nrm [[COPY]], [[COPY2]] :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:acc1024 = REG_SEQUENCE [[VLDA_dmx_lda_bm_pstm_nrm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm]], %subreg.sub_512_acc_hi + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_pstm_nrm_imm:%[0-9]+]]:acc512, [[VLDA_dmx_lda_bm_pstm_nrm_imm1:%[0-9]+]]:ep = VLDA_dmx_lda_bm_pstm_nrm_imm [[VLDA_dmx_lda_bm_pstm_nrm1]], 448 :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm1:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm1]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:acc1024 = REG_SEQUENCE [[VLDA_dmx_lda_bm_pstm_nrm_imm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm1]], %subreg.sub_512_acc_hi + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_pstm_nrm_imm2:%[0-9]+]]:acc512, [[VLDA_dmx_lda_bm_pstm_nrm_imm3:%[0-9]+]]:ep = VLDA_dmx_lda_bm_pstm_nrm_imm [[VLDA_dmx_lda_bm_pstm_nrm_imm1]], -512 :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm2:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm_imm1]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:acc1024 = REG_SEQUENCE [[VLDA_dmx_lda_bm_pstm_nrm_imm2]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm2]], %subreg.sub_512_acc_hi + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_pstm_nrm_imm4:%[0-9]+]]:acc512, [[VLDA_dmx_lda_bm_pstm_nrm_imm5:%[0-9]+]]:ep = VLDA_dmx_lda_bm_pstm_nrm_imm [[VLDA_dmx_lda_bm_pstm_nrm_imm3]], 0 :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm3:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm_imm3]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:acc1024 = REG_SEQUENCE [[VLDA_dmx_lda_bm_pstm_nrm_imm4]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm3]], %subreg.sub_512_acc_hi + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_pstm_nrm2:%[0-9]+]]:acc512, [[VLDA_dmx_lda_bm_pstm_nrm3:%[0-9]+]]:ep = VLDA_dmx_lda_bm_pstm_nrm [[VLDA_dmx_lda_bm_pstm_nrm_imm5]], [[MOV_PD_imm11_pseudo]] :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm4:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm_imm5]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:acc1024 = REG_SEQUENCE [[VLDA_dmx_lda_bm_pstm_nrm2]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm4]], %subreg.sub_512_acc_hi + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_pstm_nrm4:%[0-9]+]]:acc512, [[VLDA_dmx_lda_bm_pstm_nrm5:%[0-9]+]]:ep = VLDA_dmx_lda_bm_pstm_nrm [[VLDA_dmx_lda_bm_pstm_nrm3]], [[MOV_PD_imm11_pseudo1]] :: (load (<16 x s32>) from stack - 64, basealign 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm5:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm3]], 64 :: (load (<16 x s32>) from stack, align 128) + ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:acc1024 = REG_SEQUENCE [[VLDA_dmx_lda_bm_pstm_nrm4]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm5]], %subreg.sub_512_acc_hi + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[REG_SEQUENCE5]] + %0:ptrregbank(p0) = COPY $p0 + %1:gprregbank(s32) = COPY $r0 + %7:modregbank(s20) = G_TRUNC %1 + %8:modregbank(s20) = G_CONSTANT i20 448 + %9:modregbank(s20) = G_CONSTANT i20 -512 + %10:modregbank(s20) = G_CONSTANT i20 0 + %11:modregbank(s20) = G_CONSTANT i20 32 + %12:modregbank(s20) = G_CONSTANT i20 512 + %13:accregbank(<32 x s32>), %19:ptrregbank(p0) = G_AIE_POSTINC_LOAD %0, %7 :: (load (<32 x s32>) from stack - 64) + %14:accregbank(<32 x s32>), %20:ptrregbank(p0) = G_AIE_POSTINC_LOAD %19, %8 :: (load (<32 x s32>) from stack - 64) + %15:accregbank(<32 x s32>), %21:ptrregbank(p0) = G_AIE_POSTINC_LOAD %20, %9 :: (load (<32 x s32>) from stack - 64) + %16:accregbank(<32 x s32>), %22:ptrregbank(p0) = G_AIE_POSTINC_LOAD %21, %10 :: (load (<32 x s32>) from stack - 64) + %17:accregbank(<32 x s32>), %23:ptrregbank(p0) = G_AIE_POSTINC_LOAD %22, %11 :: (load (<32 x s32>) from stack - 64) + %18:accregbank(<32 x s32>), %24:ptrregbank(p0) = G_AIE_POSTINC_LOAD %23, %12 :: (load (<32 x s32>) from stack - 64) + PseudoRET implicit $lr, implicit %18 +... + +--- +name: post-inc-2d-vector-load-acc-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: post-inc-2d-vector-load-acc-1024 + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: [[VLDA_2D_dmx_lda_bm:%[0-9]+]]:acc512, [[VLDA_2D_dmx_lda_bm1:%[0-9]+]]:ep, [[VLDA_2D_dmx_lda_bm2:%[0-9]+]]:edc = VLDA_2D_dmx_lda_bm [[COPY]], [[REG_SEQUENCE]] :: (load (<16 x s32>), align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 64 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:acc1024 = REG_SEQUENCE [[VLDA_2D_dmx_lda_bm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm]], %subreg.sub_512_acc_hi + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[REG_SEQUENCE1]] + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %5:accregbank(<32 x s32>), %6:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_LOAD %0, %1, %2, %3, %4 :: (load (<32 x s32>)) + PseudoRET implicit $lr, implicit %5 +... + +--- +name: post-inc-3d-vector-load-acc-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: post-inc-3d-vector-load-acc-1024 + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[VLDA_3D_dmx_lda_bm:%[0-9]+]]:acc512, [[VLDA_3D_dmx_lda_bm1:%[0-9]+]]:ep, [[VLDA_3D_dmx_lda_bm2:%[0-9]+]]:edcl, [[VLDA_3D_dmx_lda_bm3:%[0-9]+]]:edch = VLDA_3D_dmx_lda_bm [[COPY]], [[REG_SEQUENCE]] :: (load (<8 x s64>)) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDA_3D_dmx_lda_bm]] + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %8:accregbank(<8 x s64>), %9:ptrregbank(p0), %10:modregbank(s20), %11:modregbank(s20) = G_AIE_POSTINC_3D_LOAD %0, %1, %2, %3, %4, %6, %5, %7 :: (load (<8 x s64>)) + PseudoRET implicit $lr, implicit %8 +... + +--- +name: post-inc-vector-store-acc-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $r0, $r1, $cml0 + ; CHECK-LABEL: name: post-inc-vector-store-acc-1024 + ; CHECK: liveins: $p0, $r0, $r1, $cml0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 32 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc1024 = COPY $cml0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_lo + ; CHECK-NEXT: [[VST_dmx_sts_bm_pstm_nrm:%[0-9]+]]:ep = VST_dmx_sts_bm_pstm_nrm [[COPY4]], [[COPY]], [[COPY2]] :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY5]], [[COPY]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_lo + ; CHECK-NEXT: [[VST_dmx_sts_bm_pstm_nrm_imm:%[0-9]+]]:ep = VST_dmx_sts_bm_pstm_nrm_imm [[COPY6]], [[VST_dmx_sts_bm_pstm_nrm]], 448 :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY7]], [[VST_dmx_sts_bm_pstm_nrm]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_lo + ; CHECK-NEXT: [[VST_dmx_sts_bm_pstm_nrm_imm1:%[0-9]+]]:ep = VST_dmx_sts_bm_pstm_nrm_imm [[COPY8]], [[VST_dmx_sts_bm_pstm_nrm_imm]], -512 :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY9]], [[VST_dmx_sts_bm_pstm_nrm_imm]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_lo + ; CHECK-NEXT: [[VST_dmx_sts_bm_pstm_nrm_imm2:%[0-9]+]]:ep = VST_dmx_sts_bm_pstm_nrm_imm [[COPY10]], [[VST_dmx_sts_bm_pstm_nrm_imm1]], 0 :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY11]], [[VST_dmx_sts_bm_pstm_nrm_imm1]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_lo + ; CHECK-NEXT: [[VST_dmx_sts_bm_pstm_nrm1:%[0-9]+]]:ep = VST_dmx_sts_bm_pstm_nrm [[COPY12]], [[VST_dmx_sts_bm_pstm_nrm_imm2]], [[MOV_PD_imm11_pseudo]] :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY13]], [[VST_dmx_sts_bm_pstm_nrm_imm2]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_lo + ; CHECK-NEXT: [[VST_dmx_sts_bm_pstm_nrm2:%[0-9]+]]:ep = VST_dmx_sts_bm_pstm_nrm [[COPY14]], [[VST_dmx_sts_bm_pstm_nrm1]], [[MOV_PD_imm11_pseudo1]] :: (store (<16 x s32>) into stack - 64, basealign 128) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY15]], [[VST_dmx_sts_bm_pstm_nrm1]], 64 :: (store (<16 x s32>) into stack, align 128) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VST_dmx_sts_bm_pstm_nrm2]] + %0:ptrregbank(p0) = COPY $p0 + %1:gprregbank(s32) = COPY $r0 + %7:modregbank(s20) = G_TRUNC %1 + %8:modregbank(s20) = G_CONSTANT i20 448 + %9:modregbank(s20) = G_CONSTANT i20 -512 + %10:modregbank(s20) = G_CONSTANT i20 0 + %11:modregbank(s20) = G_CONSTANT i20 32 + %12:modregbank(s20) = G_CONSTANT i20 512 + %13:accregbank(<32 x s32>) = COPY $cml0 + %19:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %0, %7 :: (store (<32 x s32>) into stack - 64) + %20:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %19, %8 :: (store (<32 x s32>) into stack - 64) + %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %20, %9 :: (store (<32 x s32>) into stack - 64) + %22:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %21, %10 :: (store (<32 x s32>) into stack - 64) + %23:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %22, %11 :: (store (<32 x s32>) into stack - 64) + %24:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %23, %12 :: (store (<32 x s32>) into stack - 64) + PseudoRET implicit $lr, implicit %24 +... + +--- +name: post-inc-2d-vector-store-acc-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $cml0 + ; CHECK-LABEL: name: post-inc-2d-vector-store-acc-1024 + ; CHECK: liveins: $p0, $cml0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc1024 = COPY $cml0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_512_acc_lo + ; CHECK-NEXT: [[VST_2D_dmx_sts_bm:%[0-9]+]]:ep, [[VST_2D_dmx_sts_bm1:%[0-9]+]]:edc = VST_2D_dmx_sts_bm [[COPY2]], [[COPY]], [[REG_SEQUENCE]] :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY3]], [[COPY]], 64 :: (store (<16 x s32>) into unknown-address + 64) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %5:accregbank(<32 x s32>) = COPY $cml0 + %6:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_STORE %5, %0, %1, %2, %3, %4 :: (store (<32 x s32>)) + PseudoRET implicit $lr +... + +--- +name: post-inc-3d-vector-store-acc-1024 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $cml0 + ; CHECK-LABEL: name: post-inc-3d-vector-store-acc-1024 + ; CHECK: liveins: $p0, $cml0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc1024 = COPY $cml0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_512_acc_lo + ; CHECK-NEXT: [[VST_3D_dmx_sts_bm:%[0-9]+]]:ep, [[VST_3D_dmx_sts_bm1:%[0-9]+]]:edcl, [[VST_3D_dmx_sts_bm2:%[0-9]+]]:edch = VST_3D_dmx_sts_bm [[COPY2]], [[COPY]], [[REG_SEQUENCE]] :: (store (<16 x s32>), align 128) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY3]], [[COPY]], 64 :: (store (<16 x s32>) into unknown-address + 64) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %8:accregbank(<32 x s32>) = COPY $cml0 + %9:ptrregbank(p0), %10:modregbank(s20), %11:modregbank(s20) = G_AIE_POSTINC_3D_STORE %8, %0, %1, %2, %3, %4, %6, %5, %7 :: (store (<32 x s32>)) + PseudoRET implicit $lr +... + +--- +name: post-inc-vector-load-acc-2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $r0, $r1 + ; CHECK-LABEL: name: post-inc-vector-load-acc-2048 + ; CHECK: liveins: $p0, $r0, $r1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 32 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_pstm_nrm:%[0-9]+]]:acc512, [[VLDA_dmx_lda_bm_pstm_nrm1:%[0-9]+]]:ep = VLDA_dmx_lda_bm_pstm_nrm [[COPY]], [[COPY2]] :: (load (<16 x s32>) from stack - 64, basealign 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 64 :: (load (<16 x s32>) from stack, align 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm1:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 128 :: (load (<16 x s32>) from stack + 64, basealign 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm2:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 192 :: (load (<16 x s32>) from stack + 128, align 128, basealign 256) + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:acc2048 = REG_SEQUENCE [[VLDA_dmx_lda_bm_pstm_nrm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm]], %subreg.sub_512_acc_hi, [[VLDA_dmx_lda_bm_idx_imm1]], %subreg.sub_1024_acc_hi_then_sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm2]], %subreg.sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_pstm_nrm_imm:%[0-9]+]]:acc512, [[VLDA_dmx_lda_bm_pstm_nrm_imm1:%[0-9]+]]:ep = VLDA_dmx_lda_bm_pstm_nrm_imm [[VLDA_dmx_lda_bm_pstm_nrm1]], 448 :: (load (<16 x s32>) from stack - 64, basealign 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm3:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm1]], 64 :: (load (<16 x s32>) from stack, align 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm4:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm1]], 128 :: (load (<16 x s32>) from stack + 64, basealign 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm5:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm1]], 192 :: (load (<16 x s32>) from stack + 128, align 128, basealign 256) + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:acc2048 = REG_SEQUENCE [[VLDA_dmx_lda_bm_pstm_nrm_imm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm3]], %subreg.sub_512_acc_hi, [[VLDA_dmx_lda_bm_idx_imm4]], %subreg.sub_1024_acc_hi_then_sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm5]], %subreg.sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_pstm_nrm_imm2:%[0-9]+]]:acc512, [[VLDA_dmx_lda_bm_pstm_nrm_imm3:%[0-9]+]]:ep = VLDA_dmx_lda_bm_pstm_nrm_imm [[VLDA_dmx_lda_bm_pstm_nrm_imm1]], -512 :: (load (<16 x s32>) from stack - 64, basealign 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm6:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm_imm1]], 64 :: (load (<16 x s32>) from stack, align 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm7:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm_imm1]], 128 :: (load (<16 x s32>) from stack + 64, basealign 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm8:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm_imm1]], 192 :: (load (<16 x s32>) from stack + 128, align 128, basealign 256) + ; CHECK-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:acc2048 = REG_SEQUENCE [[VLDA_dmx_lda_bm_pstm_nrm_imm2]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm6]], %subreg.sub_512_acc_hi, [[VLDA_dmx_lda_bm_idx_imm7]], %subreg.sub_1024_acc_hi_then_sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm8]], %subreg.sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_pstm_nrm_imm4:%[0-9]+]]:acc512, [[VLDA_dmx_lda_bm_pstm_nrm_imm5:%[0-9]+]]:ep = VLDA_dmx_lda_bm_pstm_nrm_imm [[VLDA_dmx_lda_bm_pstm_nrm_imm3]], 0 :: (load (<16 x s32>) from stack - 64, basealign 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm9:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm_imm3]], 64 :: (load (<16 x s32>) from stack, align 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm10:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm_imm3]], 128 :: (load (<16 x s32>) from stack + 64, basealign 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm11:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm_imm3]], 192 :: (load (<16 x s32>) from stack + 128, align 128, basealign 256) + ; CHECK-NEXT: [[REG_SEQUENCE3:%[0-9]+]]:acc2048 = REG_SEQUENCE [[VLDA_dmx_lda_bm_pstm_nrm_imm4]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm9]], %subreg.sub_512_acc_hi, [[VLDA_dmx_lda_bm_idx_imm10]], %subreg.sub_1024_acc_hi_then_sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm11]], %subreg.sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_pstm_nrm2:%[0-9]+]]:acc512, [[VLDA_dmx_lda_bm_pstm_nrm3:%[0-9]+]]:ep = VLDA_dmx_lda_bm_pstm_nrm [[VLDA_dmx_lda_bm_pstm_nrm_imm5]], [[MOV_PD_imm11_pseudo]] :: (load (<16 x s32>) from stack - 64, basealign 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm12:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm_imm5]], 64 :: (load (<16 x s32>) from stack, align 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm13:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm_imm5]], 128 :: (load (<16 x s32>) from stack + 64, basealign 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm14:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm_imm5]], 192 :: (load (<16 x s32>) from stack + 128, align 128, basealign 256) + ; CHECK-NEXT: [[REG_SEQUENCE4:%[0-9]+]]:acc2048 = REG_SEQUENCE [[VLDA_dmx_lda_bm_pstm_nrm2]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm12]], %subreg.sub_512_acc_hi, [[VLDA_dmx_lda_bm_idx_imm13]], %subreg.sub_1024_acc_hi_then_sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm14]], %subreg.sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_pstm_nrm4:%[0-9]+]]:acc512, [[VLDA_dmx_lda_bm_pstm_nrm5:%[0-9]+]]:ep = VLDA_dmx_lda_bm_pstm_nrm [[VLDA_dmx_lda_bm_pstm_nrm3]], [[MOV_PD_imm11_pseudo1]] :: (load (<16 x s32>) from stack - 64, basealign 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm15:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm3]], 64 :: (load (<16 x s32>) from stack, align 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm16:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm3]], 128 :: (load (<16 x s32>) from stack + 64, basealign 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm17:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[VLDA_dmx_lda_bm_pstm_nrm3]], 192 :: (load (<16 x s32>) from stack + 128, align 128, basealign 256) + ; CHECK-NEXT: [[REG_SEQUENCE5:%[0-9]+]]:acc2048 = REG_SEQUENCE [[VLDA_dmx_lda_bm_pstm_nrm4]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm15]], %subreg.sub_512_acc_hi, [[VLDA_dmx_lda_bm_idx_imm16]], %subreg.sub_1024_acc_hi_then_sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm17]], %subreg.sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[REG_SEQUENCE5]] + %0:ptrregbank(p0) = COPY $p0 + %1:gprregbank(s32) = COPY $r0 + %7:modregbank(s20) = G_TRUNC %1 + %8:modregbank(s20) = G_CONSTANT i20 448 + %9:modregbank(s20) = G_CONSTANT i20 -512 + %10:modregbank(s20) = G_CONSTANT i20 0 + %11:modregbank(s20) = G_CONSTANT i20 32 + %12:modregbank(s20) = G_CONSTANT i20 512 + %13:accregbank(<64 x s32>), %19:ptrregbank(p0) = G_AIE_POSTINC_LOAD %0, %7 :: (load (<64 x s32>) from stack - 64) + %14:accregbank(<64 x s32>), %20:ptrregbank(p0) = G_AIE_POSTINC_LOAD %19, %8 :: (load (<64 x s32>) from stack - 64) + %15:accregbank(<64 x s32>), %21:ptrregbank(p0) = G_AIE_POSTINC_LOAD %20, %9 :: (load (<64 x s32>) from stack - 64) + %16:accregbank(<64 x s32>), %22:ptrregbank(p0) = G_AIE_POSTINC_LOAD %21, %10 :: (load (<64 x s32>) from stack - 64) + %17:accregbank(<64 x s32>), %23:ptrregbank(p0) = G_AIE_POSTINC_LOAD %22, %11 :: (load (<64 x s32>) from stack - 64) + %18:accregbank(<64 x s32>), %24:ptrregbank(p0) = G_AIE_POSTINC_LOAD %23, %12 :: (load (<64 x s32>) from stack - 64) + PseudoRET implicit $lr, implicit %18 +... +--- +name: post-inc-2d-vector-load-acc-2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: post-inc-2d-vector-load-acc-2048 + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: [[VLDA_2D_dmx_lda_bm:%[0-9]+]]:acc512, [[VLDA_2D_dmx_lda_bm1:%[0-9]+]]:ep, [[VLDA_2D_dmx_lda_bm2:%[0-9]+]]:edc = VLDA_2D_dmx_lda_bm [[COPY]], [[REG_SEQUENCE]] :: (load (<16 x s32>), align 256) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 64 :: (load (<16 x s32>) from unknown-address + 64) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm1:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 128 :: (load (<16 x s32>) from unknown-address + 128, align 128) + ; CHECK-NEXT: [[VLDA_dmx_lda_bm_idx_imm2:%[0-9]+]]:acc512 = VLDA_dmx_lda_bm_idx_imm [[COPY]], 192 :: (load (<16 x s32>) from unknown-address + 192) + ; CHECK-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:acc2048 = REG_SEQUENCE [[VLDA_2D_dmx_lda_bm]], %subreg.sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm]], %subreg.sub_512_acc_hi, [[VLDA_dmx_lda_bm_idx_imm1]], %subreg.sub_1024_acc_hi_then_sub_512_acc_lo, [[VLDA_dmx_lda_bm_idx_imm2]], %subreg.sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[REG_SEQUENCE1]] + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %5:accregbank(<64 x s32>), %6:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_LOAD %0, %1, %2, %3, %4 :: (load (<64 x s32>)) + PseudoRET implicit $lr, implicit %5 +... + +--- +name: post-inc-3d-vector-load-acc-2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: post-inc-3d-vector-load-acc-2048 + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[VLDA_3D_dmx_lda_bm:%[0-9]+]]:acc512, [[VLDA_3D_dmx_lda_bm1:%[0-9]+]]:ep, [[VLDA_3D_dmx_lda_bm2:%[0-9]+]]:edcl, [[VLDA_3D_dmx_lda_bm3:%[0-9]+]]:edch = VLDA_3D_dmx_lda_bm [[COPY]], [[REG_SEQUENCE]] :: (load (<8 x s64>)) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VLDA_3D_dmx_lda_bm]] + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %8:accregbank(<8 x s64>), %9:ptrregbank(p0), %10:modregbank(s20), %11:modregbank(s20) = G_AIE_POSTINC_3D_LOAD %0, %1, %2, %3, %4, %6, %5, %7 :: (load (<8 x s64>)) + PseudoRET implicit $lr, implicit %8 +... + +--- +name: post-inc-vector-store-acc-2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $r0, $r1, $dm0 + ; CHECK-LABEL: name: post-inc-vector-store-acc-2048 + ; CHECK: liveins: $p0, $r0, $r1, $dm0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:em = COPY [[COPY1]] + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 32 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:em = MOV_PD_imm11_pseudo 512 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc2048 = COPY $dm0 + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_lo + ; CHECK-NEXT: [[VST_dmx_sts_bm_pstm_nrm:%[0-9]+]]:ep = VST_dmx_sts_bm_pstm_nrm [[COPY4]], [[COPY]], [[COPY2]] :: (store (<16 x s32>) into stack - 64, basealign 256) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY5]], [[COPY]], 64 :: (store (<16 x s32>) into stack, align 256) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_1024_acc_hi_then_sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY6]], [[COPY]], 128 :: (store (<16 x s32>) into stack + 64, basealign 256) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY7]], [[COPY]], 192 :: (store (<16 x s32>) into stack + 128, align 128, basealign 256) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_lo + ; CHECK-NEXT: [[VST_dmx_sts_bm_pstm_nrm_imm:%[0-9]+]]:ep = VST_dmx_sts_bm_pstm_nrm_imm [[COPY8]], [[VST_dmx_sts_bm_pstm_nrm]], 448 :: (store (<16 x s32>) into stack - 64, basealign 256) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY9]], [[VST_dmx_sts_bm_pstm_nrm]], 64 :: (store (<16 x s32>) into stack, align 256) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_1024_acc_hi_then_sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY10]], [[VST_dmx_sts_bm_pstm_nrm]], 128 :: (store (<16 x s32>) into stack + 64, basealign 256) + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY11]], [[VST_dmx_sts_bm_pstm_nrm]], 192 :: (store (<16 x s32>) into stack + 128, align 128, basealign 256) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_lo + ; CHECK-NEXT: [[VST_dmx_sts_bm_pstm_nrm_imm1:%[0-9]+]]:ep = VST_dmx_sts_bm_pstm_nrm_imm [[COPY12]], [[VST_dmx_sts_bm_pstm_nrm_imm]], -512 :: (store (<16 x s32>) into stack - 64, basealign 256) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY13]], [[VST_dmx_sts_bm_pstm_nrm_imm]], 64 :: (store (<16 x s32>) into stack, align 256) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_1024_acc_hi_then_sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY14]], [[VST_dmx_sts_bm_pstm_nrm_imm]], 128 :: (store (<16 x s32>) into stack + 64, basealign 256) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY15]], [[VST_dmx_sts_bm_pstm_nrm_imm]], 192 :: (store (<16 x s32>) into stack + 128, align 128, basealign 256) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_lo + ; CHECK-NEXT: [[VST_dmx_sts_bm_pstm_nrm_imm2:%[0-9]+]]:ep = VST_dmx_sts_bm_pstm_nrm_imm [[COPY16]], [[VST_dmx_sts_bm_pstm_nrm_imm1]], 0 :: (store (<16 x s32>) into stack - 64, basealign 256) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY17]], [[VST_dmx_sts_bm_pstm_nrm_imm1]], 64 :: (store (<16 x s32>) into stack, align 256) + ; CHECK-NEXT: [[COPY18:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_1024_acc_hi_then_sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY18]], [[VST_dmx_sts_bm_pstm_nrm_imm1]], 128 :: (store (<16 x s32>) into stack + 64, basealign 256) + ; CHECK-NEXT: [[COPY19:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY19]], [[VST_dmx_sts_bm_pstm_nrm_imm1]], 192 :: (store (<16 x s32>) into stack + 128, align 128, basealign 256) + ; CHECK-NEXT: [[COPY20:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_lo + ; CHECK-NEXT: [[VST_dmx_sts_bm_pstm_nrm1:%[0-9]+]]:ep = VST_dmx_sts_bm_pstm_nrm [[COPY20]], [[VST_dmx_sts_bm_pstm_nrm_imm2]], [[MOV_PD_imm11_pseudo]] :: (store (<16 x s32>) into stack - 64, basealign 256) + ; CHECK-NEXT: [[COPY21:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY21]], [[VST_dmx_sts_bm_pstm_nrm_imm2]], 64 :: (store (<16 x s32>) into stack, align 256) + ; CHECK-NEXT: [[COPY22:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_1024_acc_hi_then_sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY22]], [[VST_dmx_sts_bm_pstm_nrm_imm2]], 128 :: (store (<16 x s32>) into stack + 64, basealign 256) + ; CHECK-NEXT: [[COPY23:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY23]], [[VST_dmx_sts_bm_pstm_nrm_imm2]], 192 :: (store (<16 x s32>) into stack + 128, align 128, basealign 256) + ; CHECK-NEXT: [[COPY24:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_lo + ; CHECK-NEXT: [[VST_dmx_sts_bm_pstm_nrm2:%[0-9]+]]:ep = VST_dmx_sts_bm_pstm_nrm [[COPY24]], [[VST_dmx_sts_bm_pstm_nrm1]], [[MOV_PD_imm11_pseudo1]] :: (store (<16 x s32>) into stack - 64, basealign 256) + ; CHECK-NEXT: [[COPY25:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY25]], [[VST_dmx_sts_bm_pstm_nrm1]], 64 :: (store (<16 x s32>) into stack, align 256) + ; CHECK-NEXT: [[COPY26:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_1024_acc_hi_then_sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY26]], [[VST_dmx_sts_bm_pstm_nrm1]], 128 :: (store (<16 x s32>) into stack + 64, basealign 256) + ; CHECK-NEXT: [[COPY27:%[0-9]+]]:acc512 = COPY [[COPY3]].sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY27]], [[VST_dmx_sts_bm_pstm_nrm1]], 192 :: (store (<16 x s32>) into stack + 128, align 128, basealign 256) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[VST_dmx_sts_bm_pstm_nrm2]] + %0:ptrregbank(p0) = COPY $p0 + %1:gprregbank(s32) = COPY $r0 + %7:modregbank(s20) = G_TRUNC %1 + %8:modregbank(s20) = G_CONSTANT i20 448 + %9:modregbank(s20) = G_CONSTANT i20 -512 + %10:modregbank(s20) = G_CONSTANT i20 0 + %11:modregbank(s20) = G_CONSTANT i20 32 + %12:modregbank(s20) = G_CONSTANT i20 512 + %13:accregbank(<64 x s32>) = COPY $dm0 + %19:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %0, %7 :: (store (<64 x s32>) into stack - 64) + %20:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %19, %8 :: (store (<64 x s32>) into stack - 64) + %21:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %20, %9 :: (store (<64 x s32>) into stack - 64) + %22:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %21, %10 :: (store (<64 x s32>) into stack - 64) + %23:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %22, %11 :: (store (<64 x s32>) into stack - 64) + %24:ptrregbank(p0) = G_AIE_POSTINC_STORE %13, %23, %12 :: (store (<64 x s32>) into stack - 64) + PseudoRET implicit $lr, implicit %24 +... + +--- +name: post-inc-2d-vector-store-acc-2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $dm0 + ; CHECK-LABEL: name: post-inc-2d-vector-store-acc-2048 + ; CHECK: liveins: $p0, $dm0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc2048 = COPY $dm0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:ed = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo2]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_count + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_512_acc_lo + ; CHECK-NEXT: [[VST_2D_dmx_sts_bm:%[0-9]+]]:ep, [[VST_2D_dmx_sts_bm1:%[0-9]+]]:edc = VST_2D_dmx_sts_bm [[COPY2]], [[COPY]], [[REG_SEQUENCE]] :: (store (<16 x s32>), align 256) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY3]], [[COPY]], 64 :: (store (<16 x s32>) into unknown-address + 64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_1024_acc_hi_then_sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY4]], [[COPY]], 128 :: (store (<16 x s32>) into unknown-address + 128, align 128) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY5]], [[COPY]], 192 :: (store (<16 x s32>) into unknown-address + 192) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edn(s20) = G_CONSTANT i20 3 + %4:edc(s20) = G_CONSTANT i20 4 + %5:accregbank(<64 x s32>) = COPY $dm0 + %6:ptrregbank(p0), %7:modregbank(s20) = G_AIE_POSTINC_2D_STORE %5, %0, %1, %2, %3, %4 :: (store (<64 x s32>)) + PseudoRET implicit $lr +... + +--- +name: post-inc-3d-vector-store-acc-2048 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + liveins: $p0, $dm0 + ; CHECK-LABEL: name: post-inc-3d-vector-store-acc-2048 + ; CHECK: liveins: $p0, $dm0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo:%[0-9]+]]:em = MOV_PD_imm11_pseudo 1 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo1:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 2 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo2:%[0-9]+]]:edj = MOV_PD_imm11_pseudo 3 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo3:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 4 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo4:%[0-9]+]]:edn = MOV_PD_imm11_pseudo 5 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo5:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 6 + ; CHECK-NEXT: [[MOV_PD_imm11_pseudo6:%[0-9]+]]:edc = MOV_PD_imm11_pseudo 7 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:acc2048 = COPY $dm0 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:eds = REG_SEQUENCE [[MOV_PD_imm11_pseudo]], %subreg.sub_mod, [[MOV_PD_imm11_pseudo3]], %subreg.sub_dim_size, [[MOV_PD_imm11_pseudo1]], %subreg.sub_dim_stride, [[MOV_PD_imm11_pseudo5]], %subreg.sub_dim_count, [[MOV_PD_imm11_pseudo4]], %subreg.sub_hi_dim_then_sub_dim_size, [[MOV_PD_imm11_pseudo2]], %subreg.sub_hi_dim_then_sub_dim_stride, [[MOV_PD_imm11_pseudo6]], %subreg.sub_hi_dim_then_sub_dim_count + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_512_acc_lo + ; CHECK-NEXT: [[VST_3D_dmx_sts_bm:%[0-9]+]]:ep, [[VST_3D_dmx_sts_bm1:%[0-9]+]]:edcl, [[VST_3D_dmx_sts_bm2:%[0-9]+]]:edch = VST_3D_dmx_sts_bm [[COPY2]], [[COPY]], [[REG_SEQUENCE]] :: (store (<16 x s32>), align 256) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY3]], [[COPY]], 64 :: (store (<16 x s32>) into unknown-address + 64) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_1024_acc_hi_then_sub_512_acc_lo + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY4]], [[COPY]], 128 :: (store (<16 x s32>) into unknown-address + 128, align 128) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:acc512 = COPY [[COPY1]].sub_1024_acc_hi_then_sub_512_acc_hi + ; CHECK-NEXT: VST_dmx_sts_bm_idx_imm [[COPY5]], [[COPY]], 192 :: (store (<16 x s32>) into unknown-address + 192) + ; CHECK-NEXT: PseudoRET implicit $lr + %0:ptrregbank(p0) = COPY $p0 + %1:em(s20) = G_CONSTANT i20 1 + %2:edj(s20) = G_CONSTANT i20 2 + %3:edj(s20) = G_CONSTANT i20 3 + %4:edn(s20) = G_CONSTANT i20 4 + %5:edn(s20) = G_CONSTANT i20 5 + %6:edc(s20) = G_CONSTANT i20 6 + %7:edc(s20) = G_CONSTANT i20 7 + %8:accregbank(<64 x s32>) = COPY $dm0 + %9:ptrregbank(p0), %10:modregbank(s20), %11:modregbank(s20) = G_AIE_POSTINC_3D_STORE %8, %0, %1, %2, %3, %4, %6, %5, %7 :: (store (<64 x s32>)) + PseudoRET implicit $lr +... diff --git a/llvm/test/CodeGen/AIE/aie2p/combine-loads-stores.mir b/llvm/test/CodeGen/AIE/aie2p/combine-loads-stores.mir new file mode 100644 index 000000000000..514ba9c2bdec --- /dev/null +++ b/llvm/test/CodeGen/AIE/aie2p/combine-loads-stores.mir @@ -0,0 +1,1661 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2025 Advanced Micro Devices, Inc. or its affiliates +# RUN: llc -mtriple aie2p -run-pass=aie2p-postlegalizer-custom-combiner %s -verify-machineinstrs -o - | FileCheck %s + +--- +name: load_to_preinc +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: load_to_preinc + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[TRUNC]](s20) + ; CHECK-NEXT: [[AIE_OFFSET_LOAD:%[0-9]+]]:_(s32) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[TRUNC]](s20) :: (load (s32)) + ; CHECK-NEXT: $r0 = COPY [[AIE_OFFSET_LOAD]](s32) + ; CHECK-NEXT: $p0 = COPY [[PTR_ADD]](p0) + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 24 + %2:_(s20) = G_TRUNC %1 + %3:_(p0) = G_PTR_ADD %0, %2 + %4:_(s32) = G_LOAD %3 :: (load (s32)) + $r0 = COPY %4 + $p0 = COPY %3 +... + +--- +name: load_to_preinc_with_copies +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: load_to_preinc_with_copies + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[TRUNC]](s20) + ; CHECK-NEXT: [[AIE_OFFSET_LOAD:%[0-9]+]]:_(s32) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[TRUNC]](s20) :: (load (s32)) + ; CHECK-NEXT: $r0 = COPY [[AIE_OFFSET_LOAD]](s32) + ; CHECK-NEXT: $p0 = COPY [[PTR_ADD]](p0) + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 24 + %2:_(s20) = G_TRUNC %1 + %3:_(p0) = G_PTR_ADD %0, %2 + %5:_(p0) = COPY %3 + %4:_(s32) = G_LOAD %5 :: (load (s32)) + $r0 = COPY %4 + $p0 = COPY %3 +... + +--- +name: load_to_preinc_dead_ptr_add +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: load_to_preinc_dead_ptr_add + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[AIE_OFFSET_LOAD:%[0-9]+]]:_(s32) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[TRUNC]](s20) :: (load (s32)) + ; CHECK-NEXT: $r0 = COPY [[AIE_OFFSET_LOAD]](s32) + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 24 + %2:_(s20) = G_TRUNC %1 + %4:_(p0) = G_PTR_ADD %0, %2 + %3:_(s32) = G_LOAD %4 :: (load (s32)) + $r0 = COPY %3 +... + +--- +name: load_to_postinc +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: load_to_postinc + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[TRUNC]](s20) :: (load (s32)) + ; CHECK-NEXT: $r0 = COPY [[AIE_POSTINC_LOAD]](s32) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_LOAD1]](p0) + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 24 + %2:_(s20) = G_TRUNC %1 + %3:_(s32) = G_LOAD %0 :: (load (s32)) + %4:_(p0) = G_PTR_ADD %0, %2 + $r0 = COPY %3 + $p0 = COPY %4 +... + +--- +name: load_to_postinc_ptradd_before_load +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: load_to_postinc_ptradd_before_load + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[TRUNC]](s20) :: (load (s32)) + ; CHECK-NEXT: $r0 = COPY [[AIE_POSTINC_LOAD]](s32) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_LOAD1]](p0) + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 24 + %2:_(s20) = G_TRUNC %1 + %4:_(p0) = G_PTR_ADD %0, %2 + %3:_(s32) = G_LOAD %0 :: (load (s32)) + $r0 = COPY %3 + $p0 = COPY %4 +... + +--- +name: load_to_postinc_ptradd_before_usedinphi +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: load_to_postinc_ptradd_before_usedinphi + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: G_BR %bb.1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:_(p0) = G_PHI [[COPY]](p0), %bb.0, %2(p0), %bb.1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[PHI]], [[TRUNC]](s20) :: (load (s32)) + ; CHECK-NEXT: $r0 = COPY [[AIE_POSTINC_LOAD]](s32) + ; CHECK-NEXT: G_BR %bb.1 + bb.0: + successors: %bb.1 + liveins: $p0 + %0:_(p0) = COPY $p0 + G_BR %bb.1 + bb.1: + successors: %bb.1 + %1:_(p0) = G_PHI %0(p0), %bb.0, %4(p0), %bb.1 + %2:_(s32) = G_CONSTANT i32 32 + %3:_(s20) = G_TRUNC %2 + %4:_(p0) = G_PTR_ADD %1, %3 + %5:_(s32) = G_LOAD %1 :: (load (s32)) + $r0 = COPY %5 + G_BR %bb.1 +... + + +# Our current combine code is not able to move the memory operation up. In this +# case we cannot just move the pointer add to the load and we therefore don't +# combine. This could be improved. +--- +name: load_not_to_postinc_ptradd_before_load +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: load_not_to_postinc_ptradd_before_load + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[TRUNC]](s20) + ; CHECK-NEXT: $p0 = COPY [[PTR_ADD]](p0) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) + ; CHECK-NEXT: $r0 = COPY [[LOAD]](s32) + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 24 + %2:_(s20) = G_TRUNC %1 + %4:_(p0) = G_PTR_ADD %0, %2 + $p0 = COPY %4 + %3:_(s32) = G_LOAD %0 :: (load (s32)) + $r0 = COPY %3 +... + +--- +name: load_to_postinc_move_offset +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: load_to_postinc_move_offset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[TRUNC]](s20) :: (load (s32)) + ; CHECK-NEXT: $r0 = COPY [[AIE_POSTINC_LOAD]](s32) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_LOAD1]](p0) + %0:_(p0) = COPY $p0 + %3:_(s32) = G_LOAD %0 :: (load (s32)) + %1:_(s32) = G_CONSTANT i32 12 + %2:_(s20) = G_TRUNC %1 + %4:_(p0) = G_PTR_ADD %0, %2 + $r0 = COPY %3 + $p0 = COPY %4 +... + +--- +name: load_not_to_postinc_cannot_move_offset +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: load_not_to_postinc_cannot_move_offset + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) + ; CHECK-NEXT: $r0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[TRUNC]](s20) + ; CHECK-NEXT: $p0 = COPY [[PTR_ADD]](p0) + %0:_(p0) = COPY $p0 + %3:_(s32) = G_LOAD %0 :: (load (s32)) + $r0 = COPY %3 + %1:_(s32) = G_CONSTANT i32 12 + %2:_(s20) = G_TRUNC %1 + %4:_(p0) = G_PTR_ADD %0, %2 + $p0 = COPY %4 +... + +--- +name: load_to_postinc_arg_offset +body: | + bb.0: + liveins: $p0, $r1 + ; CHECK-LABEL: name: load_to_postinc_arg_offset + ; CHECK: liveins: $p0, $r1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[LSHR]](s32) + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[TRUNC]](s20) :: (load (s32)) + ; CHECK-NEXT: $r0 = COPY [[AIE_POSTINC_LOAD]](s32) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_LOAD1]](p0) + %0:_(p0) = COPY $p0 + %5:_(s32) = COPY $r1 + %3:_(s32) = G_LOAD %0 :: (load (s32)) + %1:_(s32) = G_CONSTANT i32 2 + %4:_(s32) = G_LSHR %5, %1 + %6:_(s20) = G_TRUNC %4 + %2:_(p0) = G_PTR_ADD %0, %6 + $r0 = COPY %3 + $p0 = COPY %2 +... + +--- +name: load_cannot_combine +body: | + bb.0: + liveins: $p0, $r1 + ; CHECK-LABEL: name: load_cannot_combine + ; CHECK: liveins: $p0, $r1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[LOAD]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[TRUNC]](s20) + ; CHECK-NEXT: $r0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: $p0 = COPY [[PTR_ADD]](p0) + %0:_(p0) = COPY $p0 + %3:_(s32) = G_LOAD %0 :: (load (s32)) + %1:_(s20) = G_TRUNC %3 + %2:_(p0) = G_PTR_ADD %0, %1 + $r0 = COPY %3 + $p0 = COPY %2 +... + +--- +name: store_to_preinc +body: | + bb.0: + liveins: $p0, $r0 + ; CHECK-LABEL: name: store_to_preinc + ; CHECK: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[TRUNC]](s20) + ; CHECK-NEXT: G_AIE_OFFSET_STORE [[COPY1]](s32), [[COPY]](p0), [[TRUNC]](s20) :: (store (s32)) + ; CHECK-NEXT: $p0 = COPY [[PTR_ADD]](p0) + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 24 + %4:_(s20) = G_TRUNC %2 + %3:_(p0) = G_PTR_ADD %0, %4 + G_STORE %1, %3 :: (store (s32)) + $p0 = COPY %3 +... + +--- +name: store_to_postinc +body: | + bb.0: + liveins: $p0, $r0 + ; CHECK-LABEL: name: store_to_postinc + ; CHECK: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[COPY1]](s32), [[COPY]], [[TRUNC]](s20) :: (store (s32)) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_STORE]](p0) + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s32) = G_CONSTANT i32 24 + %4:_(s20) = G_TRUNC %2 + G_STORE %1, %0 :: (store (s32)) + %3:_(p0) = G_PTR_ADD %0, %4 + $p0 = COPY %3 +... + +--- +name: store_to_postinc_ptr_add_before +body: | + bb.0: + liveins: $p0, $r0 + ; CHECK-LABEL: name: store_to_postinc_ptr_add_before + ; CHECK: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 24 + ; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[COPY1]](s32), [[COPY]], [[C]](s20) :: (store (s32)) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_STORE]](p0) + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s20) = G_CONSTANT i20 24 + %3:_(p0) = G_PTR_ADD %0, %2 + G_STORE %1, %0 :: (store (s32)) + $p0 = COPY %3 +... + +# Our current combine code is not able to move the memory operation up. In this +# case we cannot just move the pointer add to the store and we therefore don't +# combine. This could be improved. +--- +name: store_not_to_postinc_ptr_add_before +body: | + bb.0: + liveins: $p0, $r0 + ; CHECK-LABEL: name: store_not_to_postinc_ptr_add_before + ; CHECK: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 24 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; CHECK-NEXT: $p0 = COPY [[PTR_ADD]](p0) + ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32)) + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + %2:_(s20) = G_CONSTANT i20 24 + %3:_(p0) = G_PTR_ADD %0, %2 + $p0 = COPY %3 + G_STORE %1, %0 :: (store (s32)) +... + +--- +name: store_not_to_postinc_def_use_dependency +body: | + bb.0: + liveins: $p0, $r0 + ; CHECK-LABEL: name: store_not_to_postinc_def_use_dependency + ; CHECK: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 24 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; CHECK-NEXT: G_STORE [[PTR_ADD]](p0), [[COPY]](p0) :: (store (s20), align 4) + ; CHECK-NEXT: $p0 = COPY [[PTR_ADD]](p0) + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 24 + %2:_(p0) = G_PTR_ADD %0, %1 + G_STORE %2, %0 :: (store (s20)) + $p0 = COPY %2 +... + +--- +name: store_to_postinc_move_offset +body: | + bb.0: + liveins: $p0, $r0 + ; CHECK-LABEL: name: store_to_postinc_move_offset + ; CHECK: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[COPY1]](s32), [[COPY]], [[TRUNC]](s20) :: (store (s32)) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_STORE]](p0) + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + G_STORE %1, %0 :: (store (s32)) + %2:_(s32) = G_CONSTANT i32 24 + %4:_(s20) = G_TRUNC %2 + %3:_(p0) = G_PTR_ADD %0, %4 + $p0 = COPY %3 +... + +--- +name: store_not_to_postinc_cannot_move_offset +body: | + bb.0: + liveins: $p0, $r0 + ; CHECK-LABEL: name: store_not_to_postinc_cannot_move_offset + ; CHECK: liveins: $p0, $r0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $r0 + ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[COPY]](p0) :: (store (s32)) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $p1 + ; CHECK-NEXT: G_STORE [[COPY1]](s32), [[COPY2]](p0) :: (store (s32)) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[TRUNC]](s20) + ; CHECK-NEXT: $p0 = COPY [[PTR_ADD]](p0) + %0:_(p0) = COPY $p0 + %1:_(s32) = COPY $r0 + G_STORE %1, %0 :: (store (s32)) + %10:_(p0) = COPY $p1 + G_STORE %1, %10 :: (store (s32)) + %2:_(s32) = G_CONSTANT i32 24 + %4:_(s20) = G_TRUNC %2 + %3:_(p0) = G_PTR_ADD %0, %4 + $p0 = COPY %3 +... + +--- +name: zextload_to_postinc +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: zextload_to_postinc + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[AIE_POSTINC_ZEXTLOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_ZEXTLOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_ZEXTLOAD [[COPY]], [[TRUNC]](s20) :: (load (s8)) + ; CHECK-NEXT: $r0 = COPY [[AIE_POSTINC_ZEXTLOAD]](s32) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_ZEXTLOAD1]](p0) + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 24 + %4:_(s20) = G_TRUNC %1 + %3:_(s32) = G_ZEXTLOAD %0 :: (load (s8)) + %2:_(p0) = G_PTR_ADD %0, %4 + $r0 = COPY %3 + $p0 = COPY %2 +... + +--- +name: different_bb_memop_dominating +body: | + ; CHECK-LABEL: name: different_bb_memop_dominating + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[TRUNC]](s20) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) + ; CHECK-NEXT: $p0 = COPY [[PTR_ADD]](p0) + ; CHECK-NEXT: $r0 = COPY [[LOAD]](s32) + bb.0: + liveins: $p0 + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 24 + %4:_(s20) = G_TRUNC %1 + %2:_(p0) = G_PTR_ADD %0, %4 + + bb.1: + %3:_(s32) = G_LOAD %0 :: (load (s32)) + $p0 = COPY %2 + $r0 = COPY %3 +... + +--- +name: same_bb_ptradd_uses_different_bb +body: | + ; CHECK-LABEL: name: same_bb_ptradd_uses_different_bb + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[TRUNC]](s20) :: (load (s32)) + ; CHECK-NEXT: $r0 = COPY [[AIE_POSTINC_LOAD]](s32) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_LOAD1]](p0) + bb.0: + liveins: $p0 + %0:_(p0) = COPY $p0 + %1:_(s32) = G_CONSTANT i32 24 + %4:_(s20) = G_TRUNC %1 + + bb.1: + %2:_(p0) = G_PTR_ADD %0, %4 + %3:_(s32) = G_LOAD %0 :: (load (s32)) + $r0 = COPY %3 + + bb.2: + $p0 = COPY %2 +... + +--- +name: different_bb_memop_dominating_offset_different_bb +body: | + ; CHECK-LABEL: name: different_bb_memop_dominating_offset_different_bb + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[TRUNC]](s20) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) + ; CHECK-NEXT: $p0 = COPY [[PTR_ADD]](p0) + ; CHECK-NEXT: $r0 = COPY [[LOAD]](s32) + bb.0: + %1:_(s32) = G_CONSTANT i32 24 + %4:_(s20) = G_TRUNC %1 + + bb.1: + liveins: $p0 + %0:_(p0) = COPY $p0 + %2:_(p0) = G_PTR_ADD %0, %4 + + bb.2: + %3:_(s32) = G_LOAD %0 :: (load (s32)) + $p0 = COPY %2 + $r0 = COPY %3 +... + +--- +name: different_bb_ptradd_dominating +body: | + ; CHECK-LABEL: name: different_bb_ptradd_dominating + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: G_BR %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.2(0x80000000) + ; CHECK-NEXT: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) + ; CHECK-NEXT: $r0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[TRUNC]](s20) + ; CHECK-NEXT: $p0 = COPY [[PTR_ADD]](p0) + bb.0: + G_BR %bb.2 + + bb.1: + liveins: $p0 + %0:_(p0) = COPY $p0 + %3:_(s32) = G_LOAD %0 :: (load (s32)) + $r0 = COPY %3 + + bb.2: + %1:_(s32) = G_CONSTANT i32 24 + %4:_(s20) = G_TRUNC %1 + %2:_(p0) = G_PTR_ADD %0, %4 + $p0 = COPY %2 +... + +--- +name: dominatin_ptradd_use +body: | + bb.0: + ; CHECK-LABEL: name: dominatin_ptradd_use + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[TRUNC]](s20) + ; CHECK-NEXT: $p0 = COPY [[PTR_ADD]](p0) + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) + ; CHECK-NEXT: $r0 = COPY [[LOAD]](s32) + %1:_(s32) = G_CONSTANT i32 24 + %4:_(s20) = G_TRUNC %1 + %0:_(p0) = COPY $p0 + %2:_(p0) = G_PTR_ADD %0, %4 + $p0 = COPY %2 + %3:_(s32) = G_LOAD %0 :: (load (s32)) + $r0 = COPY %3 +... + +--- +name: two_ptradds +body: | + bb.0: + ; CHECK-LABEL: name: two_ptradds + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s20) = G_TRUNC [[C]](s32) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[TRUNC]](s20) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s20) = G_TRUNC [[C1]](s32) + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[TRUNC1]](s20) :: (load (s32)) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_LOAD1]](p0) + ; CHECK-NEXT: $p1 = COPY [[PTR_ADD]](p0) + ; CHECK-NEXT: $r0 = COPY [[AIE_POSTINC_LOAD]](s32) + %2:_(p0) = COPY $p0 + %7:_(s32) = G_LOAD %2 :: (load (s32)) + %0:_(s32) = G_CONSTANT i32 24 + %1:_(s20) = G_TRUNC %0 + %3:_(p0) = G_PTR_ADD %2, %1 + %4:_(s32) = G_CONSTANT i32 24 + %5:_(s20) = G_TRUNC %4 + %6:_(p0) = G_PTR_ADD %2, %5 + $p0 = COPY %6 + $p1 = COPY %3 + $r0 = COPY %7 +... + +--- +name: post_inc_ignore_dead_ptr_add +body: | + bb.0: + liveins: $p0 + ; CHECK-LABEL: name: post_inc_ignore_dead_ptr_add + ; CHECK: liveins: $p0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) + ; CHECK-NEXT: $r0 = COPY [[LOAD]](s32) + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 24 + %2:_(s32) = G_LOAD %0 :: (load (s32)) + %3:_(p0) = G_PTR_ADD %0, %1 + $r0 = COPY %2 +... + +# Try moving ptr_add up for post-increment combines +--- +name: move_ptr_add_up +body: | + bb.0: + ; CHECK-LABEL: name: move_ptr_add_up + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C]](s20) :: (load (s32)) + ; CHECK-NEXT: G_STORE [[AIE_POSTINC_LOAD]](s32), [[COPY1]](p0) :: (store (s32)) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0, implicit [[AIE_POSTINC_LOAD1]](p0) + %0:_(p0) = COPY $p0 + %6:_(p0) = COPY $p1 + %1:_(s20) = G_CONSTANT i20 64 + %4:_(s32) = G_LOAD %0(p0) :: (load (s32)) + G_STORE %4, %6 :: (store (s32)) + %3:_(p0) = G_PTR_ADD %0, %1 + %10:_(p0) = COPY %0 + PseudoRET implicit $lr, implicit $wl0, implicit %3 +... + +# When moving PTR_ADDs up we have to be able to move the G_CONSTANT up too +--- +name: move_g_constant +body: | + bb.0: + ; CHECK-LABEL: name: move_g_constant + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C]](s20) :: (load (s32)) + ; CHECK-NEXT: G_STORE [[AIE_POSTINC_LOAD]](s32), [[COPY1]](p0) :: (store (s32)) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0, implicit [[AIE_POSTINC_LOAD1]](p0) + %0:_(p0) = COPY $p0 + %6:_(p0) = COPY $p1 + %4:_(s32) = G_LOAD %0(p0) :: (load (s32)) + G_STORE %4, %6 :: (store (s32)) + %1:_(s20) = G_CONSTANT i20 64 + %3:_(p0) = G_PTR_ADD %0, %1 + %10:_(p0) = COPY %0 + PseudoRET implicit $lr, implicit $wl0, implicit %3 +... + +# When moving PTR_ADDs up we have to be able to move the G_CONSTANT up too +# The G_CONSTANT that defines the pointer does not interfere with this combining +--- +name: move_g_constant_ptr_g_constant +body: | + bb.0: + ; CHECK-LABEL: name: move_g_constant_ptr_g_constant + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(p0) = G_CONSTANT i20 123 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C1]](s20) :: (load (s32)) + ; CHECK-NEXT: G_STORE [[AIE_POSTINC_LOAD]](s32), [[C]](p0) :: (store (s32)) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0, implicit [[AIE_POSTINC_LOAD1]](p0) + %0:_(p0) = COPY $p0 + %6:_(p0) = G_CONSTANT i20 123 + %4:_(s32) = G_LOAD %0(p0) :: (load (s32)) + G_STORE %4, %6 :: (store (s32)) + %1:_(s20) = G_CONSTANT i20 64 + %3:_(p0) = G_PTR_ADD %0, %1 + %10:_(p0) = COPY %0 + PseudoRET implicit $lr, implicit $wl0, implicit %3 +... + +# Test G_CONSTANT move up with postinc_2d +--- +name: postinc_2d_move_g_constant +body: | + bb.0: + ; CHECK-LABEL: name: postinc_2d_move_g_constant + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 128 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: [[AIE_POSTINC_2D_LOAD:%[0-9]+]]:_(<32 x s8>), [[AIE_POSTINC_2D_LOAD1:%[0-9]+]]:_(p0), [[AIE_POSTINC_2D_LOAD2:%[0-9]+]]:_(s20) = G_AIE_POSTINC_2D_LOAD [[COPY]], [[C]], [[C1]], [[C2]], [[C3]] :: (load (<32 x s8>)) + ; CHECK-NEXT: $wl0 = COPY [[AIE_POSTINC_2D_LOAD]](<32 x s8>) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_2D_LOAD1]](p0) + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 64 + %6:_(<32 x s8>) = G_LOAD %0(p0) :: (load (<32 x s8>)) + $wl0 = COPY %6(<32 x s8>) + %2:_(s20) = G_CONSTANT i20 128 + %3:_(s20) = G_CONSTANT i20 0 + %4:_(s20) = G_CONSTANT i20 32 + %5:_(p0), %8:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.2d), %0:_(p0), %1:_(s20), %2:_(s20), %3:_(s20), %4:_(s20) + $p0 = COPY %5 +... + +--- +name: zextload_postinc_2d +body: | + bb.0: + ; CHECK-LABEL: name: zextload_postinc_2d + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 128 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: [[AIE_POSTINC_2D_ZEXTLOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_2D_ZEXTLOAD1:%[0-9]+]]:_(p0), [[AIE_POSTINC_2D_ZEXTLOAD2:%[0-9]+]]:_(s20) = G_AIE_POSTINC_2D_ZEXTLOAD [[COPY]], [[C]], [[C1]], [[C2]], [[C3]] :: (load (s8)) + ; CHECK-NEXT: $r0 = COPY [[AIE_POSTINC_2D_ZEXTLOAD]](s32) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_2D_ZEXTLOAD1]](p0) + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 64 + %6:_(s32) = G_ZEXTLOAD %0(p0) :: (load (s8)) + $r0 = COPY %6(s32) + %2:_(s20) = G_CONSTANT i20 128 + %3:_(s20) = G_CONSTANT i20 0 + %4:_(s20) = G_CONSTANT i20 32 + %5:_(p0), %8:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.2d), %0:_(p0), %1:_(s20), %2:_(s20), %3:_(s20), %4:_(s20) + $p0 = COPY %5 +... + +--- +name: sextload_postinc_2d +body: | + bb.0: + ; CHECK-LABEL: name: sextload_postinc_2d + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 128 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: [[AIE_POSTINC_2D_SEXTLOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_2D_SEXTLOAD1:%[0-9]+]]:_(p0), [[AIE_POSTINC_2D_SEXTLOAD2:%[0-9]+]]:_(s20) = G_AIE_POSTINC_2D_SEXTLOAD [[COPY]], [[C]], [[C1]], [[C2]], [[C3]] :: (load (s20), align 4) + ; CHECK-NEXT: $r0 = COPY [[AIE_POSTINC_2D_SEXTLOAD]](s32) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_2D_SEXTLOAD1]](p0) + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 64 + %6:_(s32) = G_SEXTLOAD %0(p0) :: (load (s20)) + $r0 = COPY %6(s32) + %2:_(s20) = G_CONSTANT i20 128 + %3:_(s20) = G_CONSTANT i20 0 + %4:_(s20) = G_CONSTANT i20 32 + %5:_(p0), %8:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.2d), %0:_(p0), %1:_(s20), %2:_(s20), %3:_(s20), %4:_(s20) + $p0 = COPY %5 +... + +--- +name: sextload_postinc_3d +body: | + bb.0: + ; CHECK-LABEL: name: sextload_postinc_3d + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 128 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s20) = G_CONSTANT i20 128 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s20) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: [[AIE_POSTINC_3D_SEXTLOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_3D_SEXTLOAD1:%[0-9]+]]:_(p0), [[AIE_POSTINC_3D_SEXTLOAD2:%[0-9]+]]:_(s20), [[AIE_POSTINC_3D_SEXTLOAD3:%[0-9]+]]:_ = G_AIE_POSTINC_3D_SEXTLOAD [[COPY]], [[C]], [[C1]], [[C2]], [[C3]], [[C4]], [[C5]], [[C6]] :: (load (s20), align 4) + ; CHECK-NEXT: $r0 = COPY [[AIE_POSTINC_3D_SEXTLOAD]](s32) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_3D_SEXTLOAD1]](p0) + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 64 + %9:_(s32) = G_SEXTLOAD %0(p0) :: (load (s20)) + $r0 = COPY %9(s32) + %2:_(s20) = G_CONSTANT i20 128 + %3:_(s20) = G_CONSTANT i20 0 + %4:_(s20) = G_CONSTANT i20 32 + %5:_(s20) = G_CONSTANT i20 128 + %6:_(s20) = G_CONSTANT i20 0 + %7:_(s20) = G_CONSTANT i20 32 + %8:_(p0), %10:_(s20), %11:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.3d), %0:_(p0), %1:_(s20), %2:_(s20), %3:_(s20), %4:_(s20), %5:_(s20), %6:_(s20), %7:_(s20) + $p0 = COPY %8 +... + +--- +name: zextload_postinc_3d +body: | + bb.0: + ; CHECK-LABEL: name: zextload_postinc_3d + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 128 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s20) = G_CONSTANT i20 128 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s20) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: [[AIE_POSTINC_3D_ZEXTLOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_3D_ZEXTLOAD1:%[0-9]+]]:_(p0), [[AIE_POSTINC_3D_ZEXTLOAD2:%[0-9]+]]:_(s20), [[AIE_POSTINC_3D_ZEXTLOAD3:%[0-9]+]]:_ = G_AIE_POSTINC_3D_ZEXTLOAD [[COPY]], [[C]], [[C1]], [[C2]], [[C3]], [[C4]], [[C5]], [[C6]] :: (load (s20), align 4) + ; CHECK-NEXT: $r0 = COPY [[AIE_POSTINC_3D_ZEXTLOAD]](s32) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_3D_ZEXTLOAD1]](p0) + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 64 + %9:_(s32) = G_ZEXTLOAD %0(p0) :: (load (s20)) + $r0 = COPY %9(s32) + %2:_(s20) = G_CONSTANT i20 128 + %3:_(s20) = G_CONSTANT i20 0 + %4:_(s20) = G_CONSTANT i20 32 + %5:_(s20) = G_CONSTANT i20 128 + %6:_(s20) = G_CONSTANT i20 0 + %7:_(s20) = G_CONSTANT i20 32 + %8:_(p0), %10:_(s20), %11:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.3d), %0:_(p0), %1:_(s20), %2:_(s20), %3:_(s20), %4:_(s20), %5:_(s20), %6:_(s20), %7:_(s20) + $p0 = COPY %8 +... + +# Test G_CONSTANT move up with postinc_3d +--- +name: postinc_3d_move_g_constant +body: | + bb.0: + ; CHECK-LABEL: name: postinc_3d_move_g_constant + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 128 + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s20) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s20) = G_CONSTANT i20 128 + ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s20) = G_CONSTANT i20 0 + ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s20) = G_CONSTANT i20 32 + ; CHECK-NEXT: [[AIE_POSTINC_3D_LOAD:%[0-9]+]]:_(<32 x s8>), [[AIE_POSTINC_3D_LOAD1:%[0-9]+]]:_(p0), [[AIE_POSTINC_3D_LOAD2:%[0-9]+]]:_(s20), [[AIE_POSTINC_3D_LOAD3:%[0-9]+]]:_ = G_AIE_POSTINC_3D_LOAD [[COPY]], [[C]], [[C1]], [[C2]], [[C3]], [[C4]], [[C5]], [[C6]] :: (load (<32 x s8>)) + ; CHECK-NEXT: $wl0 = COPY [[AIE_POSTINC_3D_LOAD]](<32 x s8>) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_3D_LOAD1]](p0) + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 64 + %9:_(<32 x s8>) = G_LOAD %0(p0) :: (load (<32 x s8>)) + $wl0 = COPY %9(<32 x s8>) + %2:_(s20) = G_CONSTANT i20 128 + %3:_(s20) = G_CONSTANT i20 0 + %4:_(s20) = G_CONSTANT i20 32 + %5:_(s20) = G_CONSTANT i20 128 + %6:_(s20) = G_CONSTANT i20 0 + %7:_(s20) = G_CONSTANT i20 32 + %8:_(p0), %10:_(s20), %11:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.3d), %0:_(p0), %1:_(s20), %2:_(s20), %3:_(s20), %4:_(s20), %5:_(s20), %6:_(s20), %7:_(s20) + $p0 = COPY %8 +... + +--- +name: offset_combine_vectors +body: | + bb.0: + ; CHECK-LABEL: name: offset_combine_vectors + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_OFFSET_LOAD:%[0-9]+]]:_(<32 x s8>) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[C]](s20) :: (load (<32 x s8>)) + ; CHECK-NEXT: $wl0 = COPY [[AIE_OFFSET_LOAD]](<32 x s8>) + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 64 + %3:_(p0) = G_PTR_ADD %0, %1 + %4:_(<32 x s8>) = G_LOAD %3(p0) :: (load (<32 x s8>)) + $wl0 = COPY %4(<32 x s8>) +... + +--- +name: preinc_combine_vectors_512_bits +body: | + bb.0: + ; CHECK-LABEL: name: preinc_combine_vectors_512_bits + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_OFFSET_LOAD:%[0-9]+]]:_(<32 x s16>) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[C]](s20) :: (load (<32 x s16>)) + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: G_AIE_OFFSET_STORE [[AIE_OFFSET_LOAD]](<32 x s16>), [[COPY1]](p0), [[C1]](s20) :: (store (<32 x s16>)) + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 64 + %3:_(p0) = G_PTR_ADD %0, %1 + %4:_(<32 x s16>) = G_LOAD %3(p0) :: (load (<32 x s16>)) + %5:_(p0) = COPY $p0 + %6:_(s20) = G_CONSTANT i20 64 + %7:_(p0) = G_PTR_ADD %5, %6 + G_STORE %4:_(<32 x s16>), %7(p0) :: (store (<32 x s16>)) +... + +--- +name: preinc_combine_vectors_1024_bits +body: | + bb.0: + ; CHECK-LABEL: name: preinc_combine_vectors_1024_bits + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_OFFSET_LOAD:%[0-9]+]]:_(<32 x s32>) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[C]](s20) :: (load (<32 x s32>)) + ; CHECK-NEXT: $y2 = COPY [[AIE_OFFSET_LOAD]](<32 x s32>) + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 64 + %3:_(p0) = G_PTR_ADD %0, %1 + %4:_(<32 x s32>) = G_LOAD %3(p0) :: (load (<32 x s32>)) + $y2 = COPY %4(<32 x s32>) +... + +--- +name: preinc_combine_vectors_2048_bits +body: | + bb.0: + ; CHECK-LABEL: name: preinc_combine_vectors_2048_bits + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_OFFSET_LOAD:%[0-9]+]]:_(<32 x s64>) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[C]](s20) :: (load (<32 x s64>)) + ; CHECK-NEXT: $dm0 = COPY [[AIE_OFFSET_LOAD]](<32 x s64>) + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 64 + %3:_(p0) = G_PTR_ADD %0, %1 + %4:_(<32 x s64>) = G_LOAD %3(p0) :: (load (<32 x s64>)) + $dm0 = COPY %4(<32 x s64>) +... + +# If the original pointer is used after the ptr_add (in this case the implicit +# %0 in bb.1) then do not combine to a post increment because that would lead to +# an additional COPY to preserve the original pointer. +# Note: If the copy is inevitable (in this case it is) we might as well combine. +# Our current implementation does not consider this case +--- +name: not_combine_postinc_later_use +body: | + ; CHECK-LABEL: name: not_combine_postinc_later_use + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s20) + ; CHECK-NEXT: $r0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[PTR_ADD]](p0), implicit [[PTR_ADD1]](p0), implicit [[COPY]](p0) + bb.0: + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 64 + %2:_(s20) = G_CONSTANT i20 64 + %4:_(s32) = G_LOAD %0(p0) :: (load (s32)) + %3:_(p0) = G_PTR_ADD %0, %1 + %5:_(p0) = G_PTR_ADD %0, %2 + $r0 = COPY %4(s32) + + bb.1: + PseudoRET implicit $lr, implicit %3(p0), implicit %5, implicit %0 +... + +# In the following tests we are testing the behaviour of the combiner with uses +# in different basic blocks than the instruction to be combined +--- +name: postinc_bb_0_use_bb_1 +body: | + ; CHECK-LABEL: name: postinc_bb_0_use_bb_1 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; CHECK-NEXT: $r0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: PseudoJNZ $r1, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $p0 = COPY [[COPY]](p0) + ; CHECK-NEXT: PseudoJ_jump_imm %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: PseudoJ_jump_imm %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[PTR_ADD]](p0) + bb.0: + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 64 + %4:_(s32) = G_LOAD %0(p0) :: (load (s32)) + %3:_(p0) = G_PTR_ADD %0, %1 + $r0 = COPY %4(s32) + PseudoJNZ $r1, %bb.2 + + bb.1: + $p0 = COPY %0 + PseudoJ_jump_imm %bb.3 + + bb.2: + PseudoJ_jump_imm %bb.3 + + bb.3: + PseudoRET implicit $lr, implicit %3 +... + +--- +name: postinc_bb_0_use_bb_3 +body: | + ; CHECK-LABEL: name: postinc_bb_0_use_bb_3 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; CHECK-NEXT: $r0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: PseudoJNZ $r1, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: PseudoJ_jump_imm %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: PseudoJ_jump_imm %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $p0 = COPY [[COPY]](p0) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[PTR_ADD]](p0) + bb.0: + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 64 + %4:_(s32) = G_LOAD %0(p0) :: (load (s32)) + %3:_(p0) = G_PTR_ADD %0, %1 + $r0 = COPY %4(s32) + PseudoJNZ $r1, %bb.2 + + bb.1: + PseudoJ_jump_imm %bb.3 + + bb.2: + PseudoJ_jump_imm %bb.3 + + bb.3: + $p0 = COPY %0 + PseudoRET implicit $lr, implicit %3 +... + +--- +name: postinc_bb_1_use_bb_0 +body: | + ; CHECK-LABEL: name: postinc_bb_1_use_bb_0 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: $p0 = COPY [[COPY]](p0) + ; CHECK-NEXT: PseudoJNZ $r1, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C]](s20) :: (load (s32)) + ; CHECK-NEXT: $r0 = COPY [[AIE_POSTINC_LOAD]](s32) + ; CHECK-NEXT: PseudoJ_jump_imm %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: PseudoJ_jump_imm %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_POSTINC_LOAD1]](p0) + bb.0: + %0:_(p0) = COPY $p0 + $p0 = COPY %0 + PseudoJNZ $r1, %bb.2 + + bb.1: + %1:_(s20) = G_CONSTANT i20 64 + %4:_(s32) = G_LOAD %0(p0) :: (load (s32)) + %3:_(p0) = G_PTR_ADD %0, %1 + $r0 = COPY %4(s32) + PseudoJ_jump_imm %bb.3 + + bb.2: + PseudoJ_jump_imm %bb.3 + + bb.3: + PseudoRET implicit $lr, implicit %3 +... + +# In this case we would want to combine the postincrement. +# But the current heuristic is too conservative and since the use in bb.2 does +# not dominate the combined instruction in bb.1 it aborts the combining. +--- +name: postinc_bb_1_use_bb_2 +body: | + ; CHECK-LABEL: name: postinc_bb_1_use_bb_2 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: PseudoJNZ $r1, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; CHECK-NEXT: $r0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: PseudoJ_jump_imm %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $p0 = COPY [[COPY]](p0) + ; CHECK-NEXT: PseudoJ_jump_imm %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[PTR_ADD]](p0) + bb.0: + %0:_(p0) = COPY $p0 + PseudoJNZ $r1, %bb.2 + + bb.1: + %1:_(s20) = G_CONSTANT i20 64 + %4:_(s32) = G_LOAD %0(p0) :: (load (s32)) + %3:_(p0) = G_PTR_ADD %0, %1 + $r0 = COPY %4(s32) + PseudoJ_jump_imm %bb.3 + + bb.2: + $p0 = COPY %0 + PseudoJ_jump_imm %bb.3 + + bb.3: + PseudoRET implicit $lr, implicit %3 +... + +--- +name: postinc_bb_1_use_bb_3 +body: | + ; CHECK-LABEL: name: postinc_bb_1_use_bb_3 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: PseudoJNZ $r1, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; CHECK-NEXT: $r0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: PseudoJ_jump_imm %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: PseudoJ_jump_imm %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: $p0 = COPY [[COPY]](p0) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[PTR_ADD]](p0) + bb.0: + %0:_(p0) = COPY $p0 + PseudoJNZ $r1, %bb.2 + + bb.1: + %1:_(s20) = G_CONSTANT i20 64 + %4:_(s32) = G_LOAD %0(p0) :: (load (s32)) + %3:_(p0) = G_PTR_ADD %0, %1 + $r0 = COPY %4(s32) + PseudoJ_jump_imm %bb.3 + + bb.2: + PseudoJ_jump_imm %bb.3 + + bb.3: + $p0 = COPY %0 + PseudoRET implicit $lr, implicit %3 +... + +--- +name: postinc_bb_3_use_bb_0 +body: | + ; CHECK-LABEL: name: postinc_bb_3_use_bb_0 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: $p0 = COPY [[COPY]](p0) + ; CHECK-NEXT: PseudoJNZ $r1, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: PseudoJ_jump_imm %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: PseudoJ_jump_imm %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(s32), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C]](s20) :: (load (s32)) + ; CHECK-NEXT: $r0 = COPY [[AIE_POSTINC_LOAD]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_POSTINC_LOAD1]](p0) + bb.0: + %0:_(p0) = COPY $p0 + $p0 = COPY %0 + PseudoJNZ $r1, %bb.2 + + bb.1: + PseudoJ_jump_imm %bb.3 + + bb.2: + PseudoJ_jump_imm %bb.3 + + bb.3: + %1:_(s20) = G_CONSTANT i20 64 + %4:_(s32) = G_LOAD %0(p0) :: (load (s32)) + %3:_(p0) = G_PTR_ADD %0, %1 + $r0 = COPY %4(s32) + PseudoRET implicit $lr, implicit %3 +... + +# In this case we would want to combine the postincrement. +# But the current heuristic is too conservative and since the use in bb.1 does +# not dominate the combined instruction in bb.3 it aborts the combining. +--- +name: postinc_bb_3_use_bb_1 +body: | + ; CHECK-LABEL: name: postinc_bb_3_use_bb_1 + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: PseudoJNZ $r1, %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: $p0 = COPY [[COPY]](p0) + ; CHECK-NEXT: PseudoJ_jump_imm %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: PseudoJ_jump_imm %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3: + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[COPY]](p0) :: (load (s32)) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; CHECK-NEXT: $r0 = COPY [[LOAD]](s32) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[PTR_ADD]](p0) + bb.0: + %0:_(p0) = COPY $p0 + PseudoJNZ $r1, %bb.2 + + bb.1: + $p0 = COPY %0 + PseudoJ_jump_imm %bb.3 + + bb.2: + PseudoJ_jump_imm %bb.3 + + bb.3: + %1:_(s20) = G_CONSTANT i20 64 + %4:_(s32) = G_LOAD %0(p0) :: (load (s32)) + %3:_(p0) = G_PTR_ADD %0, %1 + $r0 = COPY %4(s32) + PseudoRET implicit $lr, implicit %3 +... + +--- +name: vector_256_combine_postinc +body: | + bb.0: + ; CHECK-LABEL: name: vector_256_combine_postinc + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(<32 x s8>), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C]](s20) :: (load (<32 x s8>)) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_LOAD1]](p0) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s20) + ; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD]](<32 x s8>), [[COPY1]], [[C1]](s20) :: (store (<32 x s8>)) + ; CHECK-NEXT: $p1 = COPY [[PTR_ADD]](p0) + ; CHECK-NEXT: $p2 = COPY [[AIE_POSTINC_STORE]](p0) + ; CHECK-NEXT: $wl0 = COPY [[AIE_POSTINC_LOAD]](<32 x s8>) + %0:_(p0) = COPY $p0 + %6:_(p0) = COPY $p1 + %1:_(s20) = G_CONSTANT i20 64 + %2:_(s20) = G_CONSTANT i20 64 + %4:_(<32 x s8>) = G_LOAD %0(p0) :: (load (<32 x s8>)) + G_STORE %4, %6 :: (store (<32 x s8>)) + %3:_(p0) = G_PTR_ADD %0, %1 + $p0 = COPY %3 + %7:_(p0) = G_PTR_ADD %6, %1 + $p1 = COPY %7 + %5:_(p0) = G_PTR_ADD %6, %2 + $p2 = COPY %5 + $wl0 = COPY %4(<32 x s8>) +... + +--- +name: vector_256_combine_postinc_2d +body: | + bb.0: + ; CHECK-LABEL: name: vector_256_combine_postinc_2d + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_POSTINC_2D_LOAD:%[0-9]+]]:_(<32 x s8>), [[AIE_POSTINC_2D_LOAD1:%[0-9]+]]:_(p0), [[AIE_POSTINC_2D_LOAD2:%[0-9]+]]:_(s20) = G_AIE_POSTINC_2D_LOAD [[COPY]], [[C]], [[C]], [[C]], [[C]] :: (load (<32 x s8>)) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_2D_LOAD1]](p0) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.2d), [[COPY1]](p0), [[C]](s20), [[C]](s20), [[C]](s20), [[C]](s20) + ; CHECK-NEXT: [[AIE_POSTINC_2D_STORE:%[0-9]+]]:_(p0), [[AIE_POSTINC_2D_STORE1:%[0-9]+]]:_(s20) = G_AIE_POSTINC_2D_STORE [[AIE_POSTINC_2D_LOAD]](<32 x s8>), [[COPY1]], [[C]], [[C]], [[C]], [[C]] :: (store (<32 x s8>)) + ; CHECK-NEXT: $p1 = COPY [[INT]](p0) + ; CHECK-NEXT: $p2 = COPY [[AIE_POSTINC_2D_STORE]](p0) + ; CHECK-NEXT: $wl0 = COPY [[AIE_POSTINC_2D_LOAD]](<32 x s8>) + %0:_(p0) = COPY $p0 + %6:_(p0) = COPY $p1 + %1:_(s20) = G_CONSTANT i20 64 + %4:_(<32 x s8>) = G_LOAD %0(p0) :: (load (<32 x s8>)) + G_STORE %4, %6 :: (store (<32 x s8>)) + %3:_(p0), %8:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.2d), %0:_(p0), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20) + $p0 = COPY %3 + %7:_(p0), %9:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.2d), %6:_(p0), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20) + $p1 = COPY %7 + %5:_(p0), %10:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.2d), %6:_(p0), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20) + $p2 = COPY %5 + $wl0 = COPY %4(<32 x s8>) +... + +--- +name: vector_256_combine_postinc_3d +body: | + bb.0: + ; CHECK-LABEL: name: vector_256_combine_postinc_3d + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_POSTINC_3D_LOAD:%[0-9]+]]:_(<32 x s8>), [[AIE_POSTINC_3D_LOAD1:%[0-9]+]]:_(p0), [[AIE_POSTINC_3D_LOAD2:%[0-9]+]]:_(s20), [[AIE_POSTINC_3D_LOAD3:%[0-9]+]]:_ = G_AIE_POSTINC_3D_LOAD [[COPY]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]] :: (load (<32 x s8>)) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_3D_LOAD1]](p0) + ; CHECK-NEXT: [[INT:%[0-9]+]]:_(p0), [[INT1:%[0-9]+]]:_(s20), [[INT2:%[0-9]+]]:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.3d), [[COPY1]](p0), [[C]](s20), [[C]](s20), [[C]](s20), [[C]](s20), [[C]](s20), [[C]](s20), [[C]](s20) + ; CHECK-NEXT: [[AIE_POSTINC_3D_STORE:%[0-9]+]]:_(p0), [[AIE_POSTINC_3D_STORE1:%[0-9]+]]:_(s20), [[AIE_POSTINC_3D_STORE2:%[0-9]+]]:_ = G_AIE_POSTINC_3D_STORE [[AIE_POSTINC_3D_LOAD]](<32 x s8>), [[COPY1]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]] :: (store (<32 x s8>)) + ; CHECK-NEXT: $p1 = COPY [[INT]](p0) + ; CHECK-NEXT: $p2 = COPY [[AIE_POSTINC_3D_STORE]](p0) + ; CHECK-NEXT: $wl0 = COPY [[AIE_POSTINC_3D_LOAD]](<32 x s8>) + %0:_(p0) = COPY $p0 + %6:_(p0) = COPY $p1 + %1:_(s20) = G_CONSTANT i20 64 + %4:_(<32 x s8>) = G_LOAD %0(p0) :: (load (<32 x s8>)) + G_STORE %4, %6 :: (store (<32 x s8>)) + %3:_(p0), %8:_(s20), %9:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.3d), %0:_(p0), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20) + $p0 = COPY %3 + %7:_(p0), %10:_(s20), %11:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.3d), %6:_(p0), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20) + $p1 = COPY %7 + %5:_(p0), %12:_(s20), %13:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.3d), %6:_(p0), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20) + $p2 = COPY %5 + $wl0 = COPY %4(<32 x s8>) +... + +--- +name: vector_256_combine_postinc_move_ptr_add +body: | + ; CHECK-LABEL: name: vector_256_combine_postinc_move_ptr_add + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(<32 x s8>), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C]](s20) :: (load (<32 x s8>)) + ; CHECK-NEXT: G_STORE [[AIE_POSTINC_LOAD]](<32 x s8>), [[COPY1]](p0) :: (store (<32 x s8>)) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0, implicit [[AIE_POSTINC_LOAD1]](p0), implicit [[AIE_POSTINC_LOAD]](<32 x s8>) + bb.0: + %0:_(p0) = COPY $p0 + %6:_(p0) = COPY $p1 + %1:_(s20) = G_CONSTANT i20 64 + %4:_(<32 x s8>) = G_LOAD %0(p0) :: (load (<32 x s8>)) + G_STORE %4, %6 :: (store (<32 x s8>)) + %3:_(p0) = G_PTR_ADD %0, %1 + + bb.1: + PseudoRET implicit $lr, implicit $wl0, implicit %3, implicit %4 +... + +--- +name: vector_512_combine_postinc +body: | + bb.0: + ; CHECK-LABEL: name: vector_512_combine_postinc + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(<32 x s16>), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C]](s20) :: (load (<32 x s16>)) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_LOAD1]](p0) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s20) + ; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD]](<32 x s16>), [[COPY1]], [[C1]](s20) :: (store (<32 x s16>)) + ; CHECK-NEXT: $p1 = COPY [[PTR_ADD]](p0) + ; CHECK-NEXT: $p2 = COPY [[AIE_POSTINC_STORE]](p0) + %0:_(p0) = COPY $p0 + %6:_(p0) = COPY $p1 + %1:_(s20) = G_CONSTANT i20 64 + %2:_(s20) = G_CONSTANT i20 64 + %4:_(<32 x s16>) = G_LOAD %0(p0) :: (load (<32 x s16>)) + G_STORE %4, %6 :: (store (<32 x s16>)) + %3:_(p0) = G_PTR_ADD %0, %1 + $p0 = COPY %3 + %7:_(p0) = G_PTR_ADD %6, %1 + $p1 = COPY %7 + %5:_(p0) = G_PTR_ADD %6, %2 + $p2 = COPY %5 +... + +--- +name: vector_512_combine_postinc_2d +body: | + bb.0: + ; CHECK-LABEL: name: vector_512_combine_postinc_2d + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_POSTINC_2D_LOAD:%[0-9]+]]:_(<32 x s16>), [[AIE_POSTINC_2D_LOAD1:%[0-9]+]]:_(p0), [[AIE_POSTINC_2D_LOAD2:%[0-9]+]]:_(s20) = G_AIE_POSTINC_2D_LOAD [[COPY]], [[C]], [[C]], [[C]], [[C]] :: (load (<32 x s16>)) + ; CHECK-NEXT: [[AIE_POSTINC_2D_STORE:%[0-9]+]]:_(p0), [[AIE_POSTINC_2D_STORE1:%[0-9]+]]:_(s20) = G_AIE_POSTINC_2D_STORE [[AIE_POSTINC_2D_LOAD]](<32 x s16>), [[COPY1]], [[C]], [[C]], [[C]], [[C]] :: (store (<32 x s16>)) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_2D_LOAD1]](p0) + ; CHECK-NEXT: $p1 = COPY [[AIE_POSTINC_2D_STORE]](p0) + %0:_(p0) = COPY $p0 + %6:_(p0) = COPY $p1 + %1:_(s20) = G_CONSTANT i20 64 + %4:_(<32 x s16>) = G_LOAD %0(p0) :: (load (<32 x s16>)) + G_STORE %4, %6 :: (store (<32 x s16>)) + %3:_(p0), %8:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.2d), %0:_(p0), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20) + $p0 = COPY %3 + %7:_(p0), %9:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.2d), %6:_(p0), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20) + $p1 = COPY %7 +... + +--- +name: vector_512_combine_postinc_3d +body: | + bb.0: + ; CHECK-LABEL: name: vector_512_combine_postinc_3d + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_POSTINC_3D_LOAD:%[0-9]+]]:_(<32 x s16>), [[AIE_POSTINC_3D_LOAD1:%[0-9]+]]:_(p0), [[AIE_POSTINC_3D_LOAD2:%[0-9]+]]:_(s20), [[AIE_POSTINC_3D_LOAD3:%[0-9]+]]:_ = G_AIE_POSTINC_3D_LOAD [[COPY]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]] :: (load (<32 x s16>)) + ; CHECK-NEXT: [[AIE_POSTINC_3D_STORE:%[0-9]+]]:_(p0), [[AIE_POSTINC_3D_STORE1:%[0-9]+]]:_(s20), [[AIE_POSTINC_3D_STORE2:%[0-9]+]]:_ = G_AIE_POSTINC_3D_STORE [[AIE_POSTINC_3D_LOAD]](<32 x s16>), [[COPY1]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]] :: (store (<32 x s16>)) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_3D_LOAD1]](p0) + ; CHECK-NEXT: $p1 = COPY [[AIE_POSTINC_3D_STORE]](p0) + %0:_(p0) = COPY $p0 + %6:_(p0) = COPY $p1 + %1:_(s20) = G_CONSTANT i20 64 + %4:_(<32 x s16>) = G_LOAD %0(p0) :: (load (<32 x s16>)) + G_STORE %4, %6 :: (store (<32 x s16>)) + %3:_(p0), %8:_(s20), %9:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.3d), %0:_(p0), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20) + $p0 = COPY %3 + %7:_(p0), %10:_(s20), %11:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.3d), %6:_(p0), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20) + $p1 = COPY %7 +... + +--- +name: vector_1024_combine_postinc +body: | + bb.0: + ; CHECK-LABEL: name: vector_1024_combine_postinc + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(<32 x s32>), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C]](s20) :: (load (<32 x s32>)) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_LOAD1]](p0) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s20) + ; CHECK-NEXT: [[AIE_POSTINC_STORE:%[0-9]+]]:_(p0) = G_AIE_POSTINC_STORE [[AIE_POSTINC_LOAD]](<32 x s32>), [[COPY1]], [[C1]](s20) :: (store (<32 x s32>)) + ; CHECK-NEXT: $p1 = COPY [[PTR_ADD]](p0) + ; CHECK-NEXT: $p2 = COPY [[AIE_POSTINC_STORE]](p0) + %0:_(p0) = COPY $p0 + %6:_(p0) = COPY $p1 + %1:_(s20) = G_CONSTANT i20 64 + %2:_(s20) = G_CONSTANT i20 64 + %4:_(<32 x s32>) = G_LOAD %0(p0) :: (load (<32 x s32>)) + G_STORE %4, %6 :: (store (<32 x s32>)) + %3:_(p0) = G_PTR_ADD %0, %1 + $p0 = COPY %3 + %7:_(p0) = G_PTR_ADD %6, %1 + $p1 = COPY %7 + %5:_(p0) = G_PTR_ADD %6, %2 + $p2 = COPY %5 +... + +--- +name: vector_2048_combine_postinc +body: | + bb.0: + ; CHECK-LABEL: name: vector_2048_combine_postinc + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $p1 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[LOAD:%[0-9]+]]:_(<64 x s32>) = G_LOAD [[COPY]](p0) :: (load (<64 x s32>)) + ; CHECK-NEXT: G_STORE [[LOAD]](<64 x s32>), [[COPY1]](p0) :: (store (<64 x s32>)) + ; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s20) + ; CHECK-NEXT: $p0 = COPY [[PTR_ADD]](p0) + ; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s20) + ; CHECK-NEXT: $p1 = COPY [[PTR_ADD1]](p0) + ; CHECK-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s20) + ; CHECK-NEXT: $p2 = COPY [[PTR_ADD2]](p0) + %0:_(p0) = COPY $p0 + %6:_(p0) = COPY $p1 + %1:_(s20) = G_CONSTANT i20 64 + %2:_(s20) = G_CONSTANT i20 64 + %4:_(<64 x s32>) = G_LOAD %0(p0) :: (load (<64 x s32>)) + G_STORE %4, %6 :: (store (<64 x s32>)) + %3:_(p0) = G_PTR_ADD %0, %1 + $p0 = COPY %3 + %7:_(p0) = G_PTR_ADD %6, %1 + $p1 = COPY %7 + %5:_(p0) = G_PTR_ADD %6, %2 + $p2 = COPY %5 +... + +--- + +name: offset_combine_128bit_load +body: | + bb.0: + ; CHECK-LABEL: name: offset_combine_128bit_load + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_OFFSET_LOAD:%[0-9]+]]:_(<4 x s32>) = G_AIE_OFFSET_LOAD [[COPY]](p0), [[C]](s20) :: (load (<4 x s32>)) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit [[AIE_OFFSET_LOAD]](<4 x s32>) + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 64 + %2:_(p0) = G_PTR_ADD %0, %1 + %3:_(<4 x s32>) = G_LOAD %2(p0) :: (load (<4 x s32>)) + PseudoRET implicit $lr, implicit %3 +... + +--- +name: postinc_combine_128bit_load +body: | + bb.0: + ; CHECK-LABEL: name: postinc_combine_128bit_load + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_POSTINC_LOAD:%[0-9]+]]:_(<4 x s32>), [[AIE_POSTINC_LOAD1:%[0-9]+]]:_(p0) = G_AIE_POSTINC_LOAD [[COPY]], [[C]](s20) :: (load (<4 x s32>)) + ; CHECK-NEXT: PseudoRET implicit $lr, implicit $wl0, implicit [[AIE_POSTINC_LOAD]](<4 x s32>), implicit [[AIE_POSTINC_LOAD1]](p0) + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 64 + %2:_(<4 x s32>) = G_LOAD %0(p0) :: (load (<4 x s32>)) + %3:_(p0) = G_PTR_ADD %0, %1 + PseudoRET implicit $lr, implicit $wl0, implicit %2, implicit %3 +... + +--- +name: postinc_2d_combine_128bit_load +body: | + bb.0: + ; CHECK-LABEL: name: postinc_2d_combine_128bit_load + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_POSTINC_2D_LOAD:%[0-9]+]]:_(<16 x s8>), [[AIE_POSTINC_2D_LOAD1:%[0-9]+]]:_(p0), [[AIE_POSTINC_2D_LOAD2:%[0-9]+]]:_(s20) = G_AIE_POSTINC_2D_LOAD [[COPY]], [[C]], [[C]], [[C]], [[C]] :: (load (<16 x s8>)) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_2D_LOAD1]](p0) + ; CHECK-NEXT: $q0 = COPY [[AIE_POSTINC_2D_LOAD]](<16 x s8>) + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 64 + %2:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>)) + %3:_(p0), %4:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.2d), %0:_(p0), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20) + $p0 = COPY %3 + $q0 = COPY %2(<16 x s8>) +... + +--- +name: postinc_3d_combine_128bit_load +body: | + bb.0: + ; CHECK-LABEL: name: postinc_3d_combine_128bit_load + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $p0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s20) = G_CONSTANT i20 64 + ; CHECK-NEXT: [[AIE_POSTINC_3D_LOAD:%[0-9]+]]:_(<16 x s8>), [[AIE_POSTINC_3D_LOAD1:%[0-9]+]]:_(p0), [[AIE_POSTINC_3D_LOAD2:%[0-9]+]]:_(s20), [[AIE_POSTINC_3D_LOAD3:%[0-9]+]]:_ = G_AIE_POSTINC_3D_LOAD [[COPY]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]], [[C]] :: (load (<16 x s8>)) + ; CHECK-NEXT: $p0 = COPY [[AIE_POSTINC_3D_LOAD1]](p0) + ; CHECK-NEXT: $q0 = COPY [[AIE_POSTINC_3D_LOAD]](<16 x s8>) + %0:_(p0) = COPY $p0 + %1:_(s20) = G_CONSTANT i20 64 + %2:_(<16 x s8>) = G_LOAD %0(p0) :: (load (<16 x s8>)) + %3:_(p0), %4:_(s20), %5:_(s20) = G_INTRINSIC intrinsic(@llvm.aie2p.add.3d), %0:_(p0), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20), %1:_(s20) + $p0 = COPY %3 + $q0 = COPY %2(<16 x s8>) +... diff --git a/llvm/test/CodeGen/AIE/aie2p/fifo-loads.ll b/llvm/test/CodeGen/AIE/aie2p/fifo-loads.ll index e7f98db34fbb..a7429fd3c2f1 100644 --- a/llvm/test/CodeGen/AIE/aie2p/fifo-loads.ll +++ b/llvm/test/CodeGen/AIE/aie2p/fifo-loads.ll @@ -15,8 +15,8 @@ define dso_local void @_Z17test_fifo_ld_fillRPDv64_DB8_R12fifo_state_t(ptr nocap ; CHECK-LABEL: _Z17test_fifo_ld_fillRPDv64_DB8_R12fifo_state_t: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: lda p0, [p0, #0]; mov dj0, #128 -; CHECK-NEXT: lda r24, [p1, dj0]; nopx +; CHECK-NEXT: lda p0, [p0, #0]; nopb ; nopx ; mov dj0, #128; nops +; CHECK-NEXT: lda r24, [p1, dj0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop @@ -28,10 +28,13 @@ define dso_local void @_Z17test_fifo_ld_fillRPDv64_DB8_R12fifo_state_t(ptr nocap ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: ret lr -; CHECK-NEXT: st r24, [p1, dj0] // Delay Slot 5 -; CHECK-NEXT: vst lfl0, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst lfh0, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x1, lfh0 +; CHECK-NEXT: ret lr; vmov x0, lfl0 +; CHECK-NEXT: vst x1, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x0, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r24, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p0, [p2, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -53,8 +56,8 @@ define dso_local noundef <64 x i8> @_Z16test_fifo_ld_popRPDv64_DB8_R12fifo_state ; CHECK-LABEL: _Z16test_fifo_ld_popRPDv64_DB8_R12fifo_state_t: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: lda p0, [p0, #0]; mov dj0, #128 -; CHECK-NEXT: lda r24, [p1, dj0]; nopx +; CHECK-NEXT: lda p0, [p0, #0]; nopb ; nopx ; mov dj0, #128; nops +; CHECK-NEXT: lda r24, [p1, dj0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop @@ -66,10 +69,13 @@ define dso_local noundef <64 x i8> @_Z16test_fifo_ld_popRPDv64_DB8_R12fifo_state ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: ret lr -; CHECK-NEXT: st r24, [p1, dj0] // Delay Slot 5 -; CHECK-NEXT: vst lfl0, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst lfh0, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x3, lfh0 +; CHECK-NEXT: ret lr; vmov x2, lfl0 +; CHECK-NEXT: vst x3, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x2, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r24, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p0, [p2, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -92,7 +98,7 @@ define dso_local noundef <64 x i8> @_Z24test_fifo_ld_pop_1d_byteRPDv64_DB8_R12fi ; CHECK-LABEL: _Z24test_fifo_ld_pop_1d_byteRPDv64_DB8_R12fifo_state_ti: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: lda p0, [p0, #0]; mov dj0, #128 +; CHECK-NEXT: lda p0, [p0, #0]; nopb ; nopx ; mov dj0, #128 ; CHECK-NEXT: lda r24, [p1, dj0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop @@ -105,10 +111,13 @@ define dso_local noundef <64 x i8> @_Z24test_fifo_ld_pop_1d_byteRPDv64_DB8_R12fi ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: ret lr -; CHECK-NEXT: st r24, [p1, dj0] // Delay Slot 5 -; CHECK-NEXT: vst lfl0, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst lfh0, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x3, lfh0 +; CHECK-NEXT: ret lr; vmov x2, lfl0 +; CHECK-NEXT: vst x3, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x2, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r24, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p0, [p2, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -132,9 +141,9 @@ define dso_local noundef <64 x i8> @_Z24test_fifo_ld_pop_2d_byteRPDv64_DB8_R12fi ; CHECK-LABEL: _Z24test_fifo_ld_pop_2d_byteRPDv64_DB8_R12fifo_state_tiiRii: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: lda dc0, [p2, #0]; nopxm -; CHECK-NEXT: lda p0, [p0, #0]; mov dj1, #128 -; CHECK-NEXT: lda r24, [p1, dj1] +; CHECK-NEXT: lda dc0, [p2, #0]; nopb ; nopxm ; nops +; CHECK-NEXT: lda p0, [p0, #0]; mov dj0, #128 +; CHECK-NEXT: lda r24, [p1, dj0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: mov m0, r0 @@ -146,10 +155,13 @@ define dso_local noundef <64 x i8> @_Z24test_fifo_ld_pop_2d_byteRPDv64_DB8_R12fi ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: st dc0, [p2, #0]; ret lr -; CHECK-NEXT: st r24, [p1, dj1] // Delay Slot 5 -; CHECK-NEXT: vst lfl0, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst lfh0, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x3, lfh0 +; CHECK-NEXT: st dc0, [p2, #0]; ret lr; vmov x2, lfl0 +; CHECK-NEXT: vst x3, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x2, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r24, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p0, [p3, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -181,24 +193,27 @@ define dso_local noundef <64 x i8> @_Z24test_fifo_ld_pop_3d_byteRPDv64_DB8_R12fi ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: lda dc0, [p2, #0]; nopb ; nops ; nopxm ; nopv -; CHECK-NEXT: lda dc4, [p3, #0] -; CHECK-NEXT: lda p0, [p0, #0]; mov dj1, #128 -; CHECK-NEXT: lda r24, [p1, dj1] +; CHECK-NEXT: lda dc4, [p3, #0]; nopx +; CHECK-NEXT: lda p0, [p0, #0]; mov dj0, #128 +; CHECK-NEXT: lda r24, [p1, dj0] ; CHECK-NEXT: mov m0, r0 ; CHECK-NEXT: mov dn0, r1 -; CHECK-NEXT: mov dj0, r2 +; CHECK-NEXT: mov dn4, r3 ; CHECK-NEXT: mov p4, p0 -; CHECK-NEXT: vlda lfl0, [p1, #0]; mov dn4, r3 -; CHECK-NEXT: vlda lfh0, [p1, #64]; mov dj4, r4 +; CHECK-NEXT: vlda lfl0, [p1, #0]; mov dj4, r4 +; CHECK-NEXT: vlda lfh0, [p1, #64]; mov dj0, r2 ; CHECK-NEXT: vldb.pop.512.3d x0, [p0, lf0, r24, d0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: st dc0, [p2, #0] -; CHECK-NEXT: st dc4, [p3, #0]; ret lr -; CHECK-NEXT: st r24, [p1, dj1] // Delay Slot 5 -; CHECK-NEXT: vst lfl0, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst lfh0, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: st dc0, [p2, #0]; vmov x3, lfh0 +; CHECK-NEXT: st dc4, [p3, #0]; ret lr; vmov x2, lfl0 +; CHECK-NEXT: vst x3, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x2, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r24, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p0, [p4, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -237,8 +252,8 @@ define dso_local %struct.v64bfp16ebs8 @_Z16test_fifo_ld_popRP22v64bfp16ebs8_unal ; CHECK-LABEL: _Z16test_fifo_ld_popRP22v64bfp16ebs8_unalignedR12fifo_state_t: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: lda p0, [p0, #0]; mov dj0, #128 -; CHECK-NEXT: lda r24, [p1, dj0]; nopx +; CHECK-NEXT: lda p0, [p0, #0]; nopb ; nopx ; mov dj0, #128; nops +; CHECK-NEXT: lda r24, [p1, dj0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop @@ -250,10 +265,13 @@ define dso_local %struct.v64bfp16ebs8 @_Z16test_fifo_ld_popRP22v64bfp16ebs8_unal ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: ret lr -; CHECK-NEXT: st r24, [p1, dj0] // Delay Slot 5 -; CHECK-NEXT: vst lfl0, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst lfh0, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x3, lfh0 +; CHECK-NEXT: ret lr; vmov x2, lfl0 +; CHECK-NEXT: vst x3, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x2, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r24, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p0, [p2, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -279,7 +297,7 @@ define dso_local %struct.v64bfp16ebs8 @_Z24test_fifo_ld_pop_1d_byteRP22v64bfp16e ; CHECK-LABEL: _Z24test_fifo_ld_pop_1d_byteRP22v64bfp16ebs8_unalignedR12fifo_state_ti: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: lda p0, [p0, #0]; mov dj0, #128 +; CHECK-NEXT: lda p0, [p0, #0]; nopb ; nopx ; mov dj0, #128 ; CHECK-NEXT: lda r24, [p1, dj0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop @@ -292,10 +310,13 @@ define dso_local %struct.v64bfp16ebs8 @_Z24test_fifo_ld_pop_1d_byteRP22v64bfp16e ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: ret lr -; CHECK-NEXT: st r24, [p1, dj0] // Delay Slot 5 -; CHECK-NEXT: vst lfl0, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst lfh0, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x3, lfh0 +; CHECK-NEXT: ret lr; vmov x2, lfl0 +; CHECK-NEXT: vst x3, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x2, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r24, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p0, [p2, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -322,9 +343,9 @@ define dso_local %struct.v64bfp16ebs8 @_Z24test_fifo_ld_pop_2d_byteRP22v64bfp16e ; CHECK-LABEL: _Z24test_fifo_ld_pop_2d_byteRP22v64bfp16ebs8_unalignedR12fifo_state_tiiRii: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: lda dc0, [p2, #0]; nopxm -; CHECK-NEXT: lda p0, [p0, #0]; mov dj1, #128 -; CHECK-NEXT: lda r24, [p1, dj1] +; CHECK-NEXT: lda dc0, [p2, #0]; nopb ; nopxm ; nops +; CHECK-NEXT: lda p0, [p0, #0]; mov dj0, #128 +; CHECK-NEXT: lda r24, [p1, dj0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: mov m0, r0 @@ -336,10 +357,13 @@ define dso_local %struct.v64bfp16ebs8 @_Z24test_fifo_ld_pop_2d_byteRP22v64bfp16e ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: st dc0, [p2, #0]; ret lr -; CHECK-NEXT: st r24, [p1, dj1] // Delay Slot 5 -; CHECK-NEXT: vst lfl0, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst lfh0, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x3, lfh0 +; CHECK-NEXT: st dc0, [p2, #0]; ret lr; vmov x2, lfl0 +; CHECK-NEXT: vst x3, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x2, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r24, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p0, [p3, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -374,24 +398,27 @@ define dso_local %struct.v64bfp16ebs8 @_Z24test_fifo_ld_pop_3d_byteRP22v64bfp16e ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: lda dc0, [p2, #0]; nopb ; nops ; nopxm ; nopv -; CHECK-NEXT: lda dc4, [p3, #0] -; CHECK-NEXT: lda p0, [p0, #0]; mov dj1, #128 -; CHECK-NEXT: lda r24, [p1, dj1] +; CHECK-NEXT: lda dc4, [p3, #0]; nopx +; CHECK-NEXT: lda p0, [p0, #0]; mov dj0, #128 +; CHECK-NEXT: lda r24, [p1, dj0] ; CHECK-NEXT: mov m0, r0 ; CHECK-NEXT: mov dn0, r1 -; CHECK-NEXT: mov dj0, r2 +; CHECK-NEXT: mov dn4, r3 ; CHECK-NEXT: mov p4, p0 -; CHECK-NEXT: vlda lfl0, [p1, #0]; mov dn4, r3 -; CHECK-NEXT: vlda lfh0, [p1, #64]; mov dj4, r4 +; CHECK-NEXT: vlda lfl0, [p1, #0]; mov dj4, r4 +; CHECK-NEXT: vlda lfh0, [p1, #64]; mov dj0, r2 ; CHECK-NEXT: vldb.pop.576.3d ex0, [p0, lf0, r24, d0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: st dc0, [p2, #0] -; CHECK-NEXT: st dc4, [p3, #0]; ret lr -; CHECK-NEXT: st r24, [p1, dj1] // Delay Slot 5 -; CHECK-NEXT: vst lfl0, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst lfh0, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: st dc0, [p2, #0]; vmov x3, lfh0 +; CHECK-NEXT: st dc4, [p3, #0]; ret lr; vmov x2, lfl0 +; CHECK-NEXT: vst x3, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x2, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r24, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p0, [p4, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -433,8 +460,8 @@ define dso_local %struct.v64bfp16ebs16 @_Z16test_fifo_ld_popRP23v64bfp16ebs16_un ; CHECK-LABEL: _Z16test_fifo_ld_popRP23v64bfp16ebs16_unalignedR12fifo_state_t: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: lda p0, [p0, #0]; mov dj0, #128 -; CHECK-NEXT: lda r24, [p1, dj0]; nopx +; CHECK-NEXT: lda p0, [p0, #0]; nopb ; nopx ; mov dj0, #128; nops +; CHECK-NEXT: lda r24, [p1, dj0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop @@ -446,10 +473,13 @@ define dso_local %struct.v64bfp16ebs16 @_Z16test_fifo_ld_popRP23v64bfp16ebs16_un ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: ret lr -; CHECK-NEXT: st r24, [p1, dj0] // Delay Slot 5 -; CHECK-NEXT: vst lfl0, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst lfh0, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x3, lfh0 +; CHECK-NEXT: ret lr; vmov x2, lfl0 +; CHECK-NEXT: vst x3, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x2, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r24, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p0, [p2, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -475,7 +505,7 @@ define dso_local %struct.v64bfp16ebs16 @_Z24test_fifo_ld_pop_1d_byteRP23v64bfp16 ; CHECK-LABEL: _Z24test_fifo_ld_pop_1d_byteRP23v64bfp16ebs16_unalignedR12fifo_state_ti: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: lda p0, [p0, #0]; mov dj0, #128 +; CHECK-NEXT: lda p0, [p0, #0]; nopb ; nopx ; mov dj0, #128 ; CHECK-NEXT: lda r24, [p1, dj0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop @@ -488,10 +518,13 @@ define dso_local %struct.v64bfp16ebs16 @_Z24test_fifo_ld_pop_1d_byteRP23v64bfp16 ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: ret lr -; CHECK-NEXT: st r24, [p1, dj0] // Delay Slot 5 -; CHECK-NEXT: vst lfl0, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst lfh0, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x3, lfh0 +; CHECK-NEXT: ret lr; vmov x2, lfl0 +; CHECK-NEXT: vst x3, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x2, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r24, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p0, [p2, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -518,9 +551,9 @@ define dso_local %struct.v64bfp16ebs16 @_Z24test_fifo_ld_pop_2d_byteRP23v64bfp16 ; CHECK-LABEL: _Z24test_fifo_ld_pop_2d_byteRP23v64bfp16ebs16_unalignedR12fifo_state_tiiRii: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: lda dc0, [p2, #0]; nopxm -; CHECK-NEXT: lda p0, [p0, #0]; mov dj1, #128 -; CHECK-NEXT: lda r24, [p1, dj1] +; CHECK-NEXT: lda dc0, [p2, #0]; nopb ; nopxm ; nops +; CHECK-NEXT: lda p0, [p0, #0]; mov dj0, #128 +; CHECK-NEXT: lda r24, [p1, dj0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: mov m0, r0 @@ -532,10 +565,13 @@ define dso_local %struct.v64bfp16ebs16 @_Z24test_fifo_ld_pop_2d_byteRP23v64bfp16 ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: st dc0, [p2, #0]; ret lr -; CHECK-NEXT: st r24, [p1, dj1] // Delay Slot 5 -; CHECK-NEXT: vst lfl0, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst lfh0, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x3, lfh0 +; CHECK-NEXT: st dc0, [p2, #0]; ret lr; vmov x2, lfl0 +; CHECK-NEXT: vst x3, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x2, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r24, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p0, [p3, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -570,24 +606,27 @@ define dso_local %struct.v64bfp16ebs16 @_Z24test_fifo_ld_pop_3d_byteRP23v64bfp16 ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry ; CHECK-NEXT: lda dc0, [p2, #0]; nopb ; nops ; nopxm ; nopv -; CHECK-NEXT: lda dc4, [p3, #0] -; CHECK-NEXT: lda p0, [p0, #0]; mov dj1, #128 -; CHECK-NEXT: lda r24, [p1, dj1] +; CHECK-NEXT: lda dc4, [p3, #0]; nopx +; CHECK-NEXT: lda p0, [p0, #0]; mov dj0, #128 +; CHECK-NEXT: lda r24, [p1, dj0] ; CHECK-NEXT: mov m0, r0 ; CHECK-NEXT: mov dn0, r1 -; CHECK-NEXT: mov dj0, r2 +; CHECK-NEXT: mov dn4, r3 ; CHECK-NEXT: mov p4, p0 -; CHECK-NEXT: vlda lfl0, [p1, #0]; mov dn4, r3 -; CHECK-NEXT: vlda lfh0, [p1, #64]; mov dj4, r4 +; CHECK-NEXT: vlda lfl0, [p1, #0]; mov dj4, r4 +; CHECK-NEXT: vlda lfh0, [p1, #64]; mov dj0, r2 ; CHECK-NEXT: vldb.pop.544.3d ex0, [p0, lf0, r24, d0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: st dc0, [p2, #0] -; CHECK-NEXT: st dc4, [p3, #0]; ret lr -; CHECK-NEXT: st r24, [p1, dj1] // Delay Slot 5 -; CHECK-NEXT: vst lfl0, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst lfh0, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: nop +; CHECK-NEXT: st dc0, [p2, #0]; vmov x3, lfh0 +; CHECK-NEXT: st dc4, [p3, #0]; ret lr; vmov x2, lfl0 +; CHECK-NEXT: vst x3, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x2, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r24, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p0, [p4, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: diff --git a/llvm/test/CodeGen/AIE/aie2p/ldst-fifo-stores.ll b/llvm/test/CodeGen/AIE/aie2p/ldst-fifo-stores.ll index a717eeb05134..1866580f47d8 100644 --- a/llvm/test/CodeGen/AIE/aie2p/ldst-fifo-stores.ll +++ b/llvm/test/CodeGen/AIE/aie2p/ldst-fifo-stores.ll @@ -46,7 +46,7 @@ define dso_local void @_Z17test_fifo_st_pushRPDv64_DB8_S0_R12fifo_state_t(ptr no ; CHECK-LABEL: _Z17test_fifo_st_pushRPDv64_DB8_S0_R12fifo_state_t: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: vlda sfl, [p1, #0]; nopb ; nopxm ; nops +; CHECK-NEXT: vlda sfl, [p1, #0]; nopx ; CHECK-NEXT: lda p2, [p0, #0]; mov dj0, #128 ; CHECK-NEXT: lda r26, [p1, dj0] ; CHECK-NEXT: vlda sfh, [p1, #64] @@ -56,10 +56,12 @@ define dso_local void @_Z17test_fifo_st_pushRPDv64_DB8_S0_R12fifo_state_t(ptr no ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: vst.push.512 x0, [p2, sf, r26] -; CHECK-NEXT: ret lr -; CHECK-NEXT: st r26, [p1, dj0] // Delay Slot 5 -; CHECK-NEXT: vst sfl, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst sfh, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x1, sfh +; CHECK-NEXT: ret lr; vmov x0, sfl +; CHECK-NEXT: vst x1, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x0, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r26, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p2, [p0, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -83,7 +85,7 @@ define dso_local void @_Z18test_fifo_st_flushRPDv64_DB8_R12fifo_state_t(ptr noca ; CHECK-LABEL: _Z18test_fifo_st_flushRPDv64_DB8_R12fifo_state_t: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: vlda sfl, [p1, #0]; nopb ; nopxm ; nops +; CHECK-NEXT: vlda sfl, [p1, #0]; nopx ; CHECK-NEXT: lda p2, [p0, #0]; mov dj0, #128 ; CHECK-NEXT: lda r26, [p1, dj0] ; CHECK-NEXT: vlda sfh, [p1, #64] @@ -93,10 +95,12 @@ define dso_local void @_Z18test_fifo_st_flushRPDv64_DB8_R12fifo_state_t(ptr noca ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: vst.flush.512 [p2, sf, r26] -; CHECK-NEXT: ret lr -; CHECK-NEXT: st r26, [p1, dj0] // Delay Slot 5 -; CHECK-NEXT: vst sfl, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst sfh, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x1, sfh +; CHECK-NEXT: ret lr; vmov x0, sfl +; CHECK-NEXT: vst x1, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x0, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r26, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p2, [p0, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -119,7 +123,7 @@ define dso_local void @_Z26test_fifo_st_flush_1d_byteRPDv64_DB8_R12fifo_state_ti ; CHECK-LABEL: _Z26test_fifo_st_flush_1d_byteRPDv64_DB8_R12fifo_state_ti: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: vlda sfl, [p1, #0]; nopb ; nopxm +; CHECK-NEXT: vlda sfl, [p1, #0] ; CHECK-NEXT: lda p2, [p0, #0]; mov dj0, #128 ; CHECK-NEXT: lda r26, [p1, dj0] ; CHECK-NEXT: vlda sfh, [p1, #64] @@ -129,10 +133,12 @@ define dso_local void @_Z26test_fifo_st_flush_1d_byteRPDv64_DB8_R12fifo_state_ti ; CHECK-NEXT: nop ; CHECK-NEXT: mov m0, r0 ; CHECK-NEXT: vst.flush.512 [p2, sf, r26, m0] -; CHECK-NEXT: ret lr -; CHECK-NEXT: st r26, [p1, dj0] // Delay Slot 5 -; CHECK-NEXT: vst sfl, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst sfh, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x1, sfh +; CHECK-NEXT: ret lr; vmov x0, sfl +; CHECK-NEXT: vst x1, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x0, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r26, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p2, [p0, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -156,9 +162,9 @@ define dso_local void @_Z26test_fifo_st_flush_2d_byteRPDv64_DB8_R12fifo_state_ti ; CHECK-LABEL: _Z26test_fifo_st_flush_2d_byteRPDv64_DB8_R12fifo_state_tiiRii: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: lda dc0, [p2, #0]; nopb ; nopxm -; CHECK-NEXT: lda p2, [p0, #0]; mov dj1, #128 -; CHECK-NEXT: lda r26, [p1, dj1] +; CHECK-NEXT: lda dc0, [p2, #0] +; CHECK-NEXT: lda p2, [p0, #0]; mov dj0, #128 +; CHECK-NEXT: lda r26, [p1, dj0] ; CHECK-NEXT: nop ; CHECK-NEXT: vlda sfl, [p1, #0] ; CHECK-NEXT: vlda sfh, [p1, #64]; mov m0, r0 @@ -169,10 +175,11 @@ define dso_local void @_Z26test_fifo_st_flush_2d_byteRPDv64_DB8_R12fifo_state_ti ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: st dc0, [p3, #0]; ret lr -; CHECK-NEXT: st r26, [p1, dj1] // Delay Slot 5 -; CHECK-NEXT: vst sfl, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst sfh, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: vmov x1, sfh +; CHECK-NEXT: st dc0, [p3, #0]; ret lr; vmov x0, sfl +; CHECK-NEXT: vst x1, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x0, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r26, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p2, [p0, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -203,24 +210,24 @@ define dso_local void @_Z26test_fifo_st_flush_3d_byteRPDv64_DB8_R12fifo_state_ti ; CHECK-LABEL: _Z26test_fifo_st_flush_3d_byteRPDv64_DB8_R12fifo_state_tiiRiiiS5_i: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: vlda sfl, [p1, #0]; nopb ; nops ; nopxm ; nopv -; CHECK-NEXT: lda dc0, [p2, #0]; nopx +; CHECK-NEXT: vlda sfl, [p1, #0]; nopxm +; CHECK-NEXT: lda dc0, [p2, #0] ; CHECK-NEXT: lda dc4, [p3, #0] -; CHECK-NEXT: lda p2, [p0, #0]; mov dj1, #128 -; CHECK-NEXT: lda r26, [p1, dj1] +; CHECK-NEXT: lda p2, [p0, #0]; mov dj0, #128 +; CHECK-NEXT: lda r26, [p1, dj0] ; CHECK-NEXT: vlda sfh, [p1, #64]; mov m0, r0 ; CHECK-NEXT: mov dn0, r1 -; CHECK-NEXT: mov dj0, r2 -; CHECK-NEXT: mov p4, p2 ; CHECK-NEXT: mov dn4, r3 +; CHECK-NEXT: mov p4, p2 ; CHECK-NEXT: mov dj4, r4 +; CHECK-NEXT: mov dj0, r2 ; CHECK-NEXT: vst.flush.512.3d [p2, sf, r26, d0] ; CHECK-NEXT: nop -; CHECK-NEXT: st dc0, [p4, #0] -; CHECK-NEXT: st dc4, [p3, #0]; ret lr -; CHECK-NEXT: st r26, [p1, dj1] // Delay Slot 5 -; CHECK-NEXT: vst sfl, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst sfh, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: st dc0, [p4, #0]; vmov x1, sfh +; CHECK-NEXT: st dc4, [p3, #0]; ret lr; vmov x0, sfl +; CHECK-NEXT: vst x1, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x0, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r26, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p2, [p0, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -258,7 +265,7 @@ define dso_local void @_Z23test_fifo_st_flush_convRPDv64_DB8_R12fifo_state_t(ptr ; CHECK-LABEL: _Z23test_fifo_st_flush_convRPDv64_DB8_R12fifo_state_t: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: lda p2, [p0, #0]; nopb ; nopx ; mov dj0, #128; nops +; CHECK-NEXT: lda p2, [p0, #0]; mov dj0, #128 ; CHECK-NEXT: lda r26, [p1, dj0] ; CHECK-NEXT: nop ; CHECK-NEXT: vlda sfl, [p1, #0] @@ -269,10 +276,12 @@ define dso_local void @_Z23test_fifo_st_flush_convRPDv64_DB8_R12fifo_state_t(ptr ; CHECK-NEXT: vst.flush.512.conv [p2, sf, r26] ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: ret lr -; CHECK-NEXT: st r26, [p1, dj0] // Delay Slot 5 -; CHECK-NEXT: vst sfl, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst sfh, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x1, sfh +; CHECK-NEXT: ret lr; vmov x0, sfl +; CHECK-NEXT: vst x1, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x0, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r26, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p2, [p0, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -295,8 +304,8 @@ define dso_local void @_Z31test_fifo_st_flush_conv_1d_byteRPDv64_DB8_R12fifo_sta ; CHECK-LABEL: _Z31test_fifo_st_flush_conv_1d_byteRPDv64_DB8_R12fifo_state_ti: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: lda p2, [p0, #0]; nopb ; nopx ; mov dj0, #128 -; CHECK-NEXT: lda r26, [p1, dj0] +; CHECK-NEXT: lda p2, [p0, #0]; nopb ; nops ; nopx ; mov dj0, #128; nopv +; CHECK-NEXT: lda r26, [p1, dj0]; nopb ; nopx ; CHECK-NEXT: nop ; CHECK-NEXT: vlda sfl, [p1, #0] ; CHECK-NEXT: vlda sfh, [p1, #64] @@ -306,10 +315,12 @@ define dso_local void @_Z31test_fifo_st_flush_conv_1d_byteRPDv64_DB8_R12fifo_sta ; CHECK-NEXT: vst.flush.512.conv [p2, sf, r26, m0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: ret lr -; CHECK-NEXT: st r26, [p1, dj0] // Delay Slot 5 -; CHECK-NEXT: vst sfl, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst sfh, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x1, sfh +; CHECK-NEXT: ret lr; vmov x0, sfl +; CHECK-NEXT: vst x1, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x0, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r26, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p2, [p0, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -333,10 +344,10 @@ define dso_local void @_Z31test_fifo_st_flush_conv_2d_byteRPDv64_DB8_R12fifo_sta ; CHECK-LABEL: _Z31test_fifo_st_flush_conv_2d_byteRPDv64_DB8_R12fifo_state_tiiRii: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: vlda sfl, [p1, #0]; nopb ; nopxm ; nops +; CHECK-NEXT: vlda sfl, [p1, #0]; nopx ; CHECK-NEXT: lda dc0, [p2, #0] -; CHECK-NEXT: lda p2, [p0, #0]; mov dj1, #128 -; CHECK-NEXT: lda r26, [p1, dj1] +; CHECK-NEXT: lda p2, [p0, #0]; mov dj0, #128 +; CHECK-NEXT: lda r26, [p1, dj0] ; CHECK-NEXT: vlda sfh, [p1, #64] ; CHECK-NEXT: nop ; CHECK-NEXT: mov m0, r0 @@ -345,10 +356,11 @@ define dso_local void @_Z31test_fifo_st_flush_conv_2d_byteRPDv64_DB8_R12fifo_sta ; CHECK-NEXT: mov dj0, r2 ; CHECK-NEXT: vst.flush.512.2d [p2, sf, r26, d0] ; CHECK-NEXT: nop -; CHECK-NEXT: st dc0, [p3, #0]; ret lr -; CHECK-NEXT: st r26, [p1, dj1] // Delay Slot 5 -; CHECK-NEXT: vst sfl, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst sfh, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: vmov x1, sfh +; CHECK-NEXT: st dc0, [p3, #0]; ret lr; vmov x0, sfl +; CHECK-NEXT: vst x1, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x0, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r26, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p2, [p0, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -379,25 +391,25 @@ define dso_local void @_Z31test_fifo_st_flush_conv_3d_byteRPDv64_DB8_R12fifo_sta ; CHECK-LABEL: _Z31test_fifo_st_flush_conv_3d_byteRPDv64_DB8_R12fifo_state_tiiRiiiS5_i: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: lda dc0, [p2, #0]; nopb ; nops ; nopxm ; nopv +; CHECK-NEXT: lda dc0, [p2, #0]; nopb ; nopx ; CHECK-NEXT: lda dc4, [p3, #0] -; CHECK-NEXT: lda p2, [p0, #0]; mov dj1, #128 -; CHECK-NEXT: lda r26, [p1, dj1] +; CHECK-NEXT: lda p2, [p0, #0]; mov dj0, #128 +; CHECK-NEXT: lda r26, [p1, dj0] ; CHECK-NEXT: mov m0, r0 ; CHECK-NEXT: vlda sfl, [p1, #0]; mov dn0, r1 -; CHECK-NEXT: vlda sfh, [p1, #64]; mov dj0, r2 +; CHECK-NEXT: vlda sfh, [p1, #64]; mov dn4, r3 ; CHECK-NEXT: mov p4, p2 -; CHECK-NEXT: mov dn4, r3 ; CHECK-NEXT: mov dj4, r4 +; CHECK-NEXT: mov dj0, r2 ; CHECK-NEXT: vst.flush.512.conv.3d [p2, sf, r26, d0] ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK-NEXT: st dc0, [p4, #0] -; CHECK-NEXT: st dc4, [p3, #0]; ret lr -; CHECK-NEXT: st r26, [p1, dj1] // Delay Slot 5 -; CHECK-NEXT: vst sfl, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst sfh, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: st dc0, [p4, #0]; vmov x1, sfh +; CHECK-NEXT: st dc4, [p3, #0]; ret lr; vmov x0, sfl +; CHECK-NEXT: vst x1, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x0, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r26, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p2, [p0, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -468,7 +480,7 @@ define dso_local void @test_fifo_st_push_v64bfp16ebs16(ptr nocapture nonnull ali ; CHECK-LABEL: test_fifo_st_push_v64bfp16ebs16: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: vlda sfl, [p1, #0]; nopb ; nopxm ; nops +; CHECK-NEXT: vlda sfl, [p1, #0]; nopx ; CHECK-NEXT: lda p2, [p0, #0]; mov dj0, #128 ; CHECK-NEXT: lda r26, [p1, dj0] ; CHECK-NEXT: vlda sfh, [p1, #64] @@ -478,10 +490,12 @@ define dso_local void @test_fifo_st_push_v64bfp16ebs16(ptr nocapture nonnull ali ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: vst.push.544 ex0, [p2, sf, r26] -; CHECK-NEXT: ret lr -; CHECK-NEXT: st r26, [p1, dj0] // Delay Slot 5 -; CHECK-NEXT: vst sfl, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst sfh, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x1, sfh +; CHECK-NEXT: ret lr; vmov x0, sfl +; CHECK-NEXT: vst x1, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x0, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r26, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p2, [p0, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: @@ -538,7 +552,7 @@ define dso_local void @test_fifo_st_push_v64bfp16ebs8(ptr nocapture nonnull alig ; CHECK-LABEL: test_fifo_st_push_v64bfp16ebs8: ; CHECK: .p2align 4 ; CHECK-NEXT: // %bb.0: // %entry -; CHECK-NEXT: vlda sfl, [p1, #0]; nopb ; nopxm ; nops +; CHECK-NEXT: vlda sfl, [p1, #0]; nopx ; CHECK-NEXT: lda p2, [p0, #0]; mov dj0, #128 ; CHECK-NEXT: lda r26, [p1, dj0] ; CHECK-NEXT: vlda sfh, [p1, #64] @@ -548,10 +562,12 @@ define dso_local void @test_fifo_st_push_v64bfp16ebs8(ptr nocapture nonnull alig ; CHECK-NEXT: nop ; CHECK-NEXT: nop ; CHECK-NEXT: vst.push.576 ex0, [p2, sf, r26] -; CHECK-NEXT: ret lr -; CHECK-NEXT: st r26, [p1, dj0] // Delay Slot 5 -; CHECK-NEXT: vst sfl, [p1, #0] // Delay Slot 4 -; CHECK-NEXT: vst sfh, [p1, #64] // Delay Slot 3 +; CHECK-NEXT: nop +; CHECK-NEXT: vmov x1, sfh +; CHECK-NEXT: ret lr; vmov x0, sfl +; CHECK-NEXT: vst x1, [p1, #64] // Delay Slot 5 +; CHECK-NEXT: vst x0, [p1], #128 // Delay Slot 4 +; CHECK-NEXT: st r26, [p1, #0] // Delay Slot 3 ; CHECK-NEXT: st p2, [p0, #0] // Delay Slot 2 ; CHECK-NEXT: nop // Delay Slot 1 entry: