Skip to content

Commit

Permalink
New intrinsic GenISA_URBReadOutput added, required by a new vulkan fe…
Browse files Browse the repository at this point in the history
…ature.

Intended for urb output data loads, using urb write handle provided in R0 header

Change-Id: I2a0cb3cacb1e331d6aa078b81bf3e236a76912db
  • Loading branch information
MaciejKalinski authored and gfxbot committed May 22, 2019
1 parent 83c14ed commit ebfc688
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 77 deletions.
53 changes: 51 additions & 2 deletions IGC/Compiler/CISACodeGen/EmitVISAPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5873,6 +5873,46 @@ void EmitPass::emitURBRead(llvm::GenIntrinsicInst* inst)
m_currShader->isInputsPulled = true;
}

void EmitPass::emitURBReadOutput(QuadEltUnit globalOffset, CVariable* pPerSlotOffsetVar, CVariable* pDest)
{
const bool hasPerSlotOffsets = pPerSlotOffsetVar != nullptr;
// Payload size is just URB handles (1 GRF) or URB handles and per-slot offsets (2 GRFs).
const Unit<Element> payloadSize(hasPerSlotOffsets ? 2 : 1);

CVariable* pPayload =
m_currShader->GetNewVariable(payloadSize.Count() * numLanes(m_SimdMode), ISA_TYPE_UD, EALIGN_GRF);

// get the register with URBHandles
m_encoder->Copy(pPayload, m_currShader->GetURBOutputHandle());
m_encoder->Push();

// If we have runtime value in per-slot offsets, we need to copy per-slot offsets to payload
if (hasPerSlotOffsets)
{
m_encoder->SetDstSubVar(1);
m_encoder->Copy(pPayload, pPerSlotOffsetVar);
m_encoder->Push();
}

constexpr bool eot = false;
const Unit<Element> messageLength = payloadSize;
const Unit<Element> responseLength(pDest->GetNumberElement() / numLanes(m_SimdMode));
const uint desc = UrbMessage(
messageLength.Count(),
responseLength.Count(),
eot,
hasPerSlotOffsets,
false,
globalOffset.Count(),
EU_GEN8_URB_OPCODE_SIMD8_READ);

const uint exDesc = EU_MESSAGE_TARGET_URB | (eot ? 1 << 5 : 0);
CVariable* pMessDesc = m_currShader->ImmToVariable(desc, ISA_TYPE_UD);

m_encoder->Send(pDest, pPayload, exDesc, pMessDesc);
m_encoder->Push();
}

void EmitPass::emitURBWrite(llvm::GenIntrinsicInst* inst)
{
// input: GenISA_URBWrite(%offset, %mask, %data0, ..., %data7)
Expand Down Expand Up @@ -7142,6 +7182,9 @@ void EmitPass::EmitGenIntrinsicMessage(llvm::GenIntrinsicInst* inst)
case GenISAIntrinsic::GenISA_URBRead:
emitURBRead(inst);
break;
case GenISAIntrinsic::GenISA_URBReadOutput:
emitURBReadOutput(QuadEltUnit(0), GetSymbol(inst->getOperand(0)), m_destination);
break;
case GenISAIntrinsic::GenISA_cycleCounter:
emitcycleCounter(inst);
break;
Expand Down Expand Up @@ -12162,14 +12205,20 @@ void EmitPass::emitHSPatchConstantInput(llvm::Instruction* pInst)
{
assert(m_currShader->GetShaderType() == ShaderType::HULL_SHADER);
CHullShader* hsProgram = static_cast<CHullShader*>(m_currShader);
hsProgram->EmitPatchConstantInput(pInst, m_destination);
QuadEltUnit attributeOffset(0);
CVariable* pPerSlotOffsetVar = nullptr;
hsProgram->EmitPatchConstantInput(pInst, attributeOffset, pPerSlotOffsetVar);
emitURBReadOutput(attributeOffset, pPerSlotOffsetVar, m_destination);
}

void EmitPass::emitHSOutputControlPtInput(llvm::Instruction* pInst)
{
assert(m_currShader->GetShaderType() == ShaderType::HULL_SHADER);
CHullShader* hsProgram = static_cast<CHullShader*>(m_currShader);
hsProgram->EmitOutputControlPointInput(pInst, m_destination);
QuadEltUnit attributeOffset(0);
CVariable* pPerSlotOffsetVar = nullptr;
hsProgram->EmitOutputControlPointInput(pInst, attributeOffset, pPerSlotOffsetVar);
emitURBReadOutput(attributeOffset, pPerSlotOffsetVar, m_destination);
}

void EmitPass::emitHSTessFactors(llvm::Instruction* pInst)
Expand Down
1 change: 1 addition & 0 deletions IGC/Compiler/CISACodeGen/EmitVISAPass.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ class EmitPass : public llvm::FunctionPass
void emitMediaBlockRectangleRead(llvm::Instruction* inst);
void emitURBWrite(llvm::GenIntrinsicInst* inst);
void emitURBRead(llvm::GenIntrinsicInst* inst);
void emitURBReadOutput(QuadEltUnit globalOffset, CVariable* pPerSlotOffsetVar, CVariable* pDest);
void emitSampleInstruction(llvm::SampleIntrinsic* inst);
void emitLdInstruction(llvm::Instruction* inst);
void emitInfoInstruction(llvm::InfoIntrinsic* inst);
Expand Down
85 changes: 20 additions & 65 deletions IGC/Compiler/CISACodeGen/HullShaderCodeGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ void CHullShader::AllocateEightPatchPayload()

assert(offset % getGRFSize() == 0);
ProgramOutput()->m_startReg = offset / getGRFSize();

// allocate space for NOS constants and pushed constants
AllocateConstants3DShader(offset);;

Expand All @@ -214,7 +214,7 @@ void CHullShader::AllocateSinglePatchPayload()
uint offset = 0;

//R0 is always allocated as a predefined variable. Increase offset for R0
assert(m_R0);
assert(m_R0);
offset += getGRFSize();

// if m_pURBReadHandlesReg != nullptr, then we need to allocate ( (m_pOutputControlPointCount - 1)/8 + 1 ) registers for input handles
Expand Down Expand Up @@ -303,7 +303,7 @@ CVariable* CHullShader::GetURBReadHandlesReg()
{
m_pURBReadHandlesReg = GetNewVariable(
numLanes(m_SIMDSize) * ( m_pNumURBReadHandleGRF ),
ISA_TYPE_UD,
ISA_TYPE_UD,
EALIGN_GRF);
}
return m_pURBReadHandlesReg;
Expand Down Expand Up @@ -362,18 +362,18 @@ CVariable* CHullShader::GetURBInputHandle(CVariable* pVertexIndex)
}
}

QuadEltUnit CHullShader::GetFinalGlobalOffet(QuadEltUnit globalOffset)
{
QuadEltUnit CHullShader::GetFinalGlobalOffet(QuadEltUnit globalOffset)
{
return globalOffset;
}

uint32_t CHullShader::GetMaxNumOfPushedInputs() const
{
{
uint numberOfPatches = (m_properties.m_pShaderDispatchMode == EIGHT_PATCH_DISPATCH_MODE) ? 8 : 1;

// Determine how many of input attributes per InputControlPoint (Vertex) can be POTENTIALLY pushed
// Determine how many of input attributes per InputControlPoint (Vertex) can be POTENTIALLY pushed
// in current dispatch mode for current topology ( InputPatch size ).
uint32_t maxNumOfPushedInputAttributesPerICP =
uint32_t maxNumOfPushedInputAttributesPerICP =
m_pMaxNumOfPushedInputs / (m_properties.m_pInputControlPointCount*numberOfPatches);

// Input attributes can be pushed only in pairs, so we need to round down the limit.
Expand All @@ -383,28 +383,28 @@ uint32_t CHullShader::GetMaxNumOfPushedInputs() const
// They can be pushed only in pairs.
uint32_t reqNumOfInputAttributesPerICP = iSTD::Align(m_properties.m_pMaxInputSignatureCount, 2);

// TODO: reqNumOfInputAttributesPerICP will have to be incremented by size of Vertex Header
// TODO: reqNumOfInputAttributesPerICP will have to be incremented by size of Vertex Header
// in case of SGV inputs have to be taken into consideration (will be done in next step).
// reqNumOfInputAttributes += HeaderSize().Count();

// Determine ACTUAL number of attributes that can be pushed.
// If the required number of input attributes is less that maximum potential number,
// than all of the will be pushed.
uint32_t actualNumOfPushedInputAttributesPerICP =
uint32_t actualNumOfPushedInputAttributesPerICP =
iSTD::Min(reqNumOfInputAttributesPerICP, maxNumOfPushedInputAttributesPerICP);

return actualNumOfPushedInputAttributesPerICP;
}

void CHullShader::EmitPatchConstantInput(llvm::Instruction* pInst, CVariable* pDest)
void CHullShader::EmitPatchConstantInput(llvm::Instruction* pInst, QuadEltUnit& attributeOffset, CVariable*& pPerSlotOffsetVar)
{
bool readHeader = ((dyn_cast<GenIntrinsicInst>(pInst))->getIntrinsicID() == GenISAIntrinsic::GenISA_HSURBPatchHeaderRead);

// patch constant input read
llvm::Value* pIndirectVertexIdx = pInst->getOperand(0);

CVariable* pPerSlotOffsetVar = nullptr;
QuadEltUnit attributeOffset(0);
pPerSlotOffsetVar = nullptr;
attributeOffset = QuadEltUnit(0);

// {BDW - WA, HS} Do not set pPerSlotOffset or change globalOffset to read TessFactors from URB.
if (!readHeader)
Expand All @@ -422,25 +422,23 @@ void CHullShader::EmitPatchConstantInput(llvm::Instruction* pInst, CVariable* pD

attributeOffset = attributeOffset + GetURBHeaderSize();
}

URBReadPatchConstOrOutputCntrlPtInput(pPerSlotOffsetVar, attributeOffset, false, pDest);
}

void CHullShader::EmitOutputControlPointInput(llvm::Instruction* pInst, CVariable* pDest)
void CHullShader::EmitOutputControlPointInput(llvm::Instruction* pInst, QuadEltUnit& attributeOffset, CVariable*& pPerSlotOffsetVar)
{
// patch constant input read
llvm::Value* pIndirectVertexIdx = pInst->getOperand(0);
llvm::Value* pAttribIdx = pInst->getOperand(1);

CVariable* pPerSlotOffsetVar = nullptr;
QuadEltUnit attributeOffset(GetPatchConstantOutputSize());
pPerSlotOffsetVar = nullptr;
attributeOffset = GetPatchConstantOutputSize();

// Compute offset from vertex index
if (llvm::ConstantInt* pConstVertexIdx = llvm::dyn_cast<llvm::ConstantInt>(pIndirectVertexIdx))
{
// attribute index is a constant, we can compute the URB read offset directly
attributeOffset =
attributeOffset +
attributeOffset =
attributeOffset +
QuadEltUnit(int_cast<unsigned int>(pConstVertexIdx->getZExtValue())) * m_properties.m_pMaxOutputSignatureCount;
}
else
Expand All @@ -459,7 +457,7 @@ void CHullShader::EmitOutputControlPointInput(llvm::Instruction* pInst, CVariabl
}
}

// Compute additionall offset coming from atribute index
// Compute additional offset coming from attribute index
if (llvm::ConstantInt* pConstAttribIdx = llvm::dyn_cast<llvm::ConstantInt>(pAttribIdx))
{
// attribute offset is a constant, we can compute the URB read offset directly
Expand All @@ -482,53 +480,10 @@ void CHullShader::EmitOutputControlPointInput(llvm::Instruction* pInst, CVariabl
pPerSlotOffsetVar = GetSymbol(pAttribIdx);
}
}

URBReadPatchConstOrOutputCntrlPtInput(pPerSlotOffsetVar, attributeOffset, false, pDest);
}

void CHullShader::URBReadPatchConstOrOutputCntrlPtInput(
CVariable* pPerSlotOffsetVar,
QuadEltUnit globalOffset,
bool EOT,
CVariable* pDest )
{
CEncoder& encoder = GetEncoder();

const bool hasPerSlotOffsets = pPerSlotOffsetVar != nullptr;
// Payload size is just URB handles (1 GRF) or URB handles and per-slot offsets (2 GRFs).
const Unit<Element> payloadSize(hasPerSlotOffsets ? 2 : 1);
CVariable* pPayload =
GetNewVariable(payloadSize.Count() * numLanes(m_SIMDSize), ISA_TYPE_UD, EALIGN_GRF);

// get the register with URBHandles
CopyVariable(pPayload, m_pURBWriteHandleReg);

// If we have runtime value in per-slot offsets, we need to copy per-slot offsets to payload
if (hasPerSlotOffsets)
{
CopyVariable(pPayload, pPerSlotOffsetVar, 1);
}

const Unit<Element> messageLength = payloadSize;
const Unit<Element> responseLength(pDest->GetNumberElement()/numLanes(m_SIMDSize));
const uint desc = UrbMessage(
messageLength.Count(),
responseLength.Count(),
EOT,
hasPerSlotOffsets,
false,
globalOffset.Count(),
EU_GEN8_URB_OPCODE_SIMD8_READ);

const uint exDesc = EU_MESSAGE_TARGET_URB | (EOT ? 1 << 5 : 0);
CVariable* pMessDesc = ImmToVariable(desc, ISA_TYPE_UD);

encoder.Send(pDest, pPayload, exDesc, pMessDesc);
encoder.Push();
}

/// Returns the size of the output vertex.
/// Unit: 16B = 4 DWORDs
/// Unit: 16B = 4 DWORDs
/// Note: The PatchConstantOutput size must be 32B-aligned when rendering is enabled
/// Therefore, the PatchConstantOutput size is also rounded up to a multiple of 2.
QuadEltUnit CHullShader::GetPatchConstantOutputSize() const
Expand Down
15 changes: 5 additions & 10 deletions IGC/Compiler/CISACodeGen/HullShaderCodeGen.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ class CHullShader : public CShader
/// Fills in the kernel program structure with data determined during compilation.
void FillProgram(SHullShaderKernelProgram* pKernelProgram);
void PreCompile();
void EmitPatchConstantInput(llvm::Instruction* pInst, CVariable* pDest);
void EmitOutputControlPointInput(llvm::Instruction* pInst, CVariable* pDest);
void EmitPatchConstantInput(llvm::Instruction* pInst, QuadEltUnit& attributeOffset, CVariable*& pPerSlotOffsetVar);
void EmitOutputControlPointInput(llvm::Instruction* pInst, QuadEltUnit& attributeOffset, CVariable*& pPerSlotOffsetVar);
void ParseShaderSpecificOpcode(llvm::Instruction* inst);
void AddPrologue();

Expand All @@ -51,18 +51,13 @@ class CHullShader : public CShader
void AllocateEightPatchPayload();

void SetShaderSpecificHelper(EmitPass* emitPass);

CVariable* GetURBReadHandlesReg();
CVariable* GetR1();
CVariable* GetR2();
virtual CVariable* GetURBInputHandle(CVariable* pVertexIndex);
virtual QuadEltUnit GetFinalGlobalOffet(QuadEltUnit globalOffset);
virtual uint32_t GetMaxNumOfPushedInputs() const;
void URBReadPatchConstOrOutputCntrlPtInput(
CVariable* pPerSlotOffset,
QuadEltUnit globalOffset,
bool EOT,
CVariable* pDest);

void EmitPatchConstantHeader(
CVariable* var[],
Expand All @@ -78,7 +73,7 @@ class CHullShader : public CShader
OctEltUnit GetVertexURBEntryReadLength() const;

/// Returns a variable that stores URB write handle register
virtual CVariable* GetURBOutputHandle();
virtual CVariable* GetURBOutputHandle();

CVariable* GetPrimitiveID();

Expand Down Expand Up @@ -108,7 +103,7 @@ class CHullShader : public CShader
CVariable* m_pURBWriteHandleReg;
CVariable* m_pURBReadHandlesReg; // used for vertex data pulled from URB

static const uint32_t m_pMaxNumOfPushedInputs; // holds max number of inputs that can be pushed for this shader unit
static const uint32_t m_pMaxNumOfPushedInputs; // holds max number of inputs that can be pushed for this shader unit
CVariable* m_IncludeVertexHandles;
bool m_HasPrimitiveIDInstruction;
uint32_t m_pNumURBReadHandleGRF;
Expand Down
4 changes: 4 additions & 0 deletions IGC/GenISAIntrinsics/Intrinsic_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,12 @@
"int","bool","bool","bool","bool","int"],"None"],
"GenISA_RTWrite": ["void",["anyfloat","float","bool",0,0,0,0,"float","float","int","int","bool",
"bool","bool","bool","int"],"None"],
# (owordOffset, mask, x1, y1, z1, w1, x2, y2, z2, w2)
"GenISA_URBWrite": ["void",["int","int","float","float","float","float","float","float","float","float"],"None"],
# (index, owordOffset)->float8
"GenISA_URBRead": ["float8",["int","int"],"NoMem"],
# In-place data read using URB Write Handle. (owordOffset)->float8
"GenISA_URBReadOutput": ["float8",["int"],"NoMem"],
"GenISA_SetDebugReg": ["int",["int"],"None"],
"GenISA_add_pair": [["int","int"],["int","int","int","int"],"NoMem"],
"GenISA_sub_pair": [["int","int"],["int","int","int","int"],"NoMem"],
Expand Down

0 comments on commit ebfc688

Please sign in to comment.