[AMDGPU] Move into SIProgramInfo and cache getFunctionCodeSize. NFCI. #127111

rampitec · 2025-02-13T19:19:35Z

This moves function as is, improvements to the estimate go into
a subseqent patch.

This moves function as is, improvements to the estimate go into a subseqent patch.

rampitec · 2025-02-13T19:19:55Z

[AMDGPU] Move into SIProgramInfo and cache getFunctionCodeSize. NFCI. #127111 : 3 dependent PRs (#126981 , #127129 , #127246 ) 👈 (View in Graphite)
main

This stack of pull requests is managed by Graphite. Learn more about stacking.

llvmbot · 2025-02-13T19:21:28Z

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

Changes

This moves function as is, improvements to the estimate go into
a subseqent patch.

Full diff: https://github.com/llvm/llvm-project/pull/127111.diff

4 Files Affected:

(modified) llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (+3-23)
(modified) llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h (-2)
(modified) llvm/lib/Target/AMDGPU/SIProgramInfo.cpp (+27)
(modified) llvm/lib/Target/AMDGPU/SIProgramInfo.h (+6)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 031d8f0560ff2..a8d0bb746d2ef 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -748,7 +748,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
           RI.getSymbol(CurrentFnSym->getName(), RIK::RIK_PrivateSegSize,
                        OutContext, IsLocal)
               ->getVariableValue(),
-          getFunctionCodeSize(MF), MFI);
+          CurrentProgramInfo.getFunctionCodeSize(MF), MFI);
       return false;
     }
 
@@ -757,7 +757,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
         CurrentProgramInfo.NumArchVGPR,
         STM.hasMAIInsts() ? CurrentProgramInfo.NumAccVGPR : nullptr,
         CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR,
-        CurrentProgramInfo.ScratchSize, getFunctionCodeSize(MF), MFI);
+        CurrentProgramInfo.ScratchSize,
+        CurrentProgramInfo.getFunctionCodeSize(MF), MFI);
 
     OutStreamer->emitRawComment(
       " FloatMode: " + Twine(CurrentProgramInfo.FloatMode), false);
@@ -893,27 +894,6 @@ void AMDGPUAsmPrinter::initializeTargetID(const Module &M) {
   }
 }
 
-uint64_t AMDGPUAsmPrinter::getFunctionCodeSize(const MachineFunction &MF) const {
-  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
-  const SIInstrInfo *TII = STM.getInstrInfo();
-
-  uint64_t CodeSize = 0;
-
-  for (const MachineBasicBlock &MBB : MF) {
-    for (const MachineInstr &MI : MBB) {
-      // TODO: CodeSize should account for multiple functions.
-
-      // TODO: Should we count size of debug info?
-      if (MI.isDebugInstr())
-        continue;
-
-      CodeSize += TII->getInstSizeInBytes(MI);
-    }
-  }
-
-  return CodeSize;
-}
-
 // AccumOffset computed for the MCExpr equivalent of:
 // alignTo(std::max(1, NumVGPR), 4) / 4 - 1;
 static const MCExpr *computeAccumOffset(const MCExpr *NumVGPR, MCContext &Ctx) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index cc8c4411805e2..2c959d7dbbd07 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -50,8 +50,6 @@ class AMDGPUAsmPrinter final : public AsmPrinter {
 
   MCCodeEmitter *DumpCodeInstEmitter = nullptr;
 
-  uint64_t getFunctionCodeSize(const MachineFunction &MF) const;
-
   void getSIProgramInfo(SIProgramInfo &Out, const MachineFunction &MF);
   void getAmdKernelCode(AMDGPU::AMDGPUMCKernelCodeT &Out,
                         const SIProgramInfo &KernelInfo,
diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
index 212edff097837..5179288084010 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.cpp
@@ -27,6 +27,8 @@ void SIProgramInfo::reset(const MachineFunction &MF) {
 
   const MCExpr *ZeroExpr = MCConstantExpr::create(0, Ctx);
 
+  CodeSizeInBytes.reset();
+
   VGPRBlocks = ZeroExpr;
   SGPRBlocks = ZeroExpr;
   Priority = 0;
@@ -199,3 +201,28 @@ const MCExpr *SIProgramInfo::getPGMRSrc2(CallingConv::ID CC,
 
   return MCConstantExpr::create(0, Ctx);
 }
+
+uint64_t SIProgramInfo::getFunctionCodeSize(const MachineFunction &MF) {
+  if (CodeSizeInBytes.has_value())
+    return *CodeSizeInBytes;
+
+  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+  const SIInstrInfo *TII = STM.getInstrInfo();
+
+  uint64_t CodeSize = 0;
+
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      // TODO: CodeSize should account for multiple functions.
+
+      // TODO: Should we count size of debug info?
+      if (MI.isDebugInstr())
+        continue;
+
+      CodeSize += TII->getInstSizeInBytes(MI);
+    }
+  }
+
+  CodeSizeInBytes = CodeSize;
+  return CodeSize;
+}
diff --git a/llvm/lib/Target/AMDGPU/SIProgramInfo.h b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
index 37c03d9b637f0..d7087436ae758 100644
--- a/llvm/lib/Target/AMDGPU/SIProgramInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIProgramInfo.h
@@ -19,6 +19,7 @@
 #include "llvm/IR/CallingConv.h"
 #include "llvm/Support/Compiler.h"
 #include <cstdint>
+#include <optional>
 
 namespace llvm {
 
@@ -29,6 +30,8 @@ class MachineFunction;
 
 /// Track resource usage for kernels / entry functions.
 struct LLVM_EXTERNAL_VISIBILITY SIProgramInfo {
+  std::optional<uint64_t> CodeSizeInBytes;
+
   // Fields set in PGM_RSRC1 pm4 packet.
   const MCExpr *VGPRBlocks = nullptr;
   const MCExpr *SGPRBlocks = nullptr;
@@ -97,6 +100,9 @@ struct LLVM_EXTERNAL_VISIBILITY SIProgramInfo {
   // non-MCExpr members.
   void reset(const MachineFunction &MF);
 
+  // Get function code size and cache the value.
+  uint64_t getFunctionCodeSize(const MachineFunction &MF);
+
   /// Compute the value of the ComputePGMRsrc1 register.
   const MCExpr *getComputePGMRSrc1(const GCNSubtarget &ST,
                                    MCContext &Ctx) const;

[AMDGPU] Move into SIProgramInfo and cache getFunctionCodeSize. NFCI.

eb3ece0

This moves function as is, improvements to the estimate go into a subseqent patch.

rampitec mentioned this pull request Feb 13, 2025

[AMDGPU] Set inst_pref_size to maximum #126981

Open

rampitec requested a review from arsenm February 13, 2025 19:20

rampitec marked this pull request as ready for review February 13, 2025 19:20

llvmbot added the backend:AMDGPU label Feb 13, 2025

This was referenced Feb 13, 2025

[AMDGPU] Early bail in getFunctionCodeSize for meta inst. NFC. #127129

Open

[AMDGPU] Respect MBB alignment in the getFunctionCodeSize() #127142

Open

[AMDGPU] Switch to MF.estimateFunctionSizeInBytes() #127246

Open

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[AMDGPU] Move into SIProgramInfo and cache getFunctionCodeSize. NFCI. #127111

[AMDGPU] Move into SIProgramInfo and cache getFunctionCodeSize. NFCI. #127111

rampitec commented Feb 13, 2025

rampitec commented Feb 13, 2025 •

edited

Loading

llvmbot commented Feb 13, 2025

[AMDGPU] Move into SIProgramInfo and cache getFunctionCodeSize. NFCI. #127111

Are you sure you want to change the base?

[AMDGPU] Move into SIProgramInfo and cache getFunctionCodeSize. NFCI. #127111

Conversation

rampitec commented Feb 13, 2025

rampitec commented Feb 13, 2025 • edited Loading

llvmbot commented Feb 13, 2025

rampitec commented Feb 13, 2025 •

edited

Loading