Skip to content

Commit

Permalink
D3D12 fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
godlikepanos committed Sep 30, 2024
1 parent 3905c88 commit bc8ef45
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 52 deletions.
3 changes: 3 additions & 0 deletions AnKi/Gr/Common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ Error ShaderReflection::linkShaderReflection(const ShaderReflection& a, const Sh
c.m_descriptor.m_bindings[space][c.m_descriptor.m_bindingCounts[space]++] = bbinding;
}
}

// Sort again
std::sort(c.m_descriptor.m_bindings[space].getBegin(), c.m_descriptor.m_bindings[space].getBegin() + c.m_descriptor.m_bindingCounts[space]);
}

if(a.m_descriptor.m_fastConstantsSize != 0 && b.m_descriptor.m_fastConstantsSize != 0
Expand Down
12 changes: 6 additions & 6 deletions AnKi/Gr/Common.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,14 @@ inline BoolCVar g_workGraphcsCVar("Gr", "WorkGraphs", false, "Enable or not Work
inline NumericCVar<U32> g_maxBindlessSampledTextureCountCVar("Gr", "MaxBindlessSampledTextureCountCVar", 512, 16, kMaxU16);

#if ANKI_GR_BACKEND_DIRECT3D
inline NumericCVar<U16> g_maxRtvDescriptorsCVar("Gr", "MaxRvtDescriptors", 128, 8, kMaxU16, "Max number of RTVs");
inline NumericCVar<U16> g_maxDsvDescriptorsCVar("Gr", "MaxDsvDescriptors", 128, 8, kMaxU16, "Max number of DSVs");
inline NumericCVar<U16> g_maxCpuCbvSrvUavDescriptorsCVar("Gr", "MaxCpuCbvSrvUavDescriptors", 1024, 8, kMaxU16,
inline NumericCVar<U16> g_maxRtvDescriptorsCVar("Gr", "MaxRvtDescriptors", 1024, 8, kMaxU16, "Max number of RTVs");
inline NumericCVar<U16> g_maxDsvDescriptorsCVar("Gr", "MaxDsvDescriptors", 512, 8, kMaxU16, "Max number of DSVs");
inline NumericCVar<U16> g_maxCpuCbvSrvUavDescriptorsCVar("Gr", "MaxCpuCbvSrvUavDescriptors", 16 * 1024, 8, kMaxU16,
"Max number of CBV/SRV/UAV descriptors");
inline NumericCVar<U16> g_maxCpuSamplerDescriptorsCVar("Gr", "MaxCpuSamplerDescriptors", 64, 8, kMaxU16, "Max number of sampler descriptors");
inline NumericCVar<U16> g_maxGpuCbvSrvUavDescriptorsCVar("Gr", "MaxGpuCbvSrvUavDescriptors", 2 * 1024, 8, kMaxU16,
inline NumericCVar<U16> g_maxCpuSamplerDescriptorsCVar("Gr", "MaxCpuSamplerDescriptors", 512, 8, kMaxU16, "Max number of sampler descriptors");
inline NumericCVar<U16> g_maxGpuCbvSrvUavDescriptorsCVar("Gr", "MaxGpuCbvSrvUavDescriptors", 16 * 1024, 8, kMaxU16,
"Max number of CBV/SRV/UAV descriptors");
inline NumericCVar<U16> g_maxGpuSamplerDescriptorsCVar("Gr", "MaxGpuSamplerDescriptors", 128, 8, kMaxU16, "Max number of sampler descriptors");
inline NumericCVar<U16> g_maxGpuSamplerDescriptorsCVar("Gr", "MaxGpuSamplerDescriptors", 2 * 1024, 8, kMaxU16, "Max number of sampler descriptors");

inline BoolCVar g_dredCVar("Gr", "Dred", false, "Enable DRED");
#else
Expand Down
104 changes: 61 additions & 43 deletions AnKi/Gr/D3D/D3DDescriptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

namespace anki {

static Error createDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE type, D3D12_DESCRIPTOR_HEAP_FLAGS flags, U32 descriptorCount,
static Error createDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE type, D3D12_DESCRIPTOR_HEAP_FLAGS flags, U32 descriptorCount, CString name,
ID3D12DescriptorHeap*& heap, D3D12_CPU_DESCRIPTOR_HANDLE& cpuHeapStart, D3D12_GPU_DESCRIPTOR_HANDLE& gpuHeapStart,
U32& descriptorSize)
{
Expand All @@ -19,6 +19,12 @@ static Error createDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE type, D3D12_DESCRIP
heapDesc.Flags = flags;
ANKI_D3D_CHECK(getDevice().CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(&heap)));

ANKI_ASSERT(name.getLength() > 0);
GrDynamicArray<WChar> wstr;
wstr.resize(name.getLength() + 1);
name.toWideChars(wstr.getBegin(), wstr.getSize());
heap->SetName(wstr.getBegin());

cpuHeapStart = heap->GetCPUDescriptorHandleForHeapStart();
if(flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE)
{
Expand Down Expand Up @@ -107,7 +113,7 @@ void PersistentDescriptorAllocator::free(DescriptorHeapHandle& handle)
}

void RingDescriptorAllocator::init(D3D12_CPU_DESCRIPTOR_HANDLE cpuHeapStart, D3D12_GPU_DESCRIPTOR_HANDLE gpuHeapStart, U32 descriptorSize,
U32 descriptorCount)
U32 descriptorCount, CString name)
{
ANKI_ASSERT(descriptorSize > 0);
ANKI_ASSERT(descriptorCount > 0);
Expand All @@ -117,21 +123,32 @@ void RingDescriptorAllocator::init(D3D12_CPU_DESCRIPTOR_HANDLE cpuHeapStart, D3D
m_gpuHeapStart = gpuHeapStart;
m_descriptorSize = descriptorSize;
m_descriptorCount = descriptorCount;
m_name = name;
}

DescriptorHeapHandle RingDescriptorAllocator::allocate(U32 descriptorCount)
{
ANKI_ASSERT(descriptorCount > 0);
ANKI_ASSERT(m_descriptorSize > 0);

U32 firstDescriptor;
Bool allocationPassesEnd = false;
U64 increment;
do
{
firstDescriptor = m_increment.fetchAdd(descriptorCount) % m_descriptorCount;
increment = m_increment.fetchAdd(descriptorCount);
firstDescriptor = increment % m_descriptorCount;

allocationPassesEnd = firstDescriptor + descriptorCount > m_descriptorCount;
} while(allocationPassesEnd);

const U64 frameIncrementEnd = m_incrementAtFrameStart.load() + m_descriptorCount / (kMaxFramesInFlight + 1);

if(increment >= frameIncrementEnd)
{
ANKI_D3D_LOGW("Allocated too many descriptors from the ring buffer %s. Need to increase the limits", m_name.cstr());
}

DescriptorHeapHandle out;
out.m_cpuHandle.ptr = (m_cpuHeapStart.ptr) ? (m_cpuHeapStart.ptr + firstDescriptor * m_descriptorSize) : 0;
out.m_gpuHandle.ptr = (m_gpuHeapStart.ptr) ? (m_gpuHeapStart.ptr + firstDescriptor * m_descriptorSize) : 0;
Expand All @@ -146,17 +163,7 @@ DescriptorHeapHandle RingDescriptorAllocator::allocate(U32 descriptorCount)

void RingDescriptorAllocator::endFrame()
{
const U64 crntIncrement = m_increment.load();

const U32 descriptorsAllocatedThisFrame = U32(crntIncrement - m_incrementAtFrameStart);

const U32 maxFramesInFlight = kMaxFramesInFlight + 1; // Be very conservative
if(descriptorsAllocatedThisFrame > m_descriptorCount / maxFramesInFlight)
{
ANKI_D3D_LOGW("Allocated too many descriptors this frame");
}

m_incrementAtFrameStart = crntIncrement;
m_incrementAtFrameStart.store(m_increment.load());
}

DescriptorFactory::~DescriptorFactory()
Expand All @@ -170,47 +177,47 @@ DescriptorFactory::~DescriptorFactory()
Error DescriptorFactory::init()
{
// Init CPU descriptors first
auto createHeapAndAllocator = [this](D3D12_DESCRIPTOR_HEAP_TYPE type, D3D12_DESCRIPTOR_HEAP_FLAGS flags, U16 descriptorCount,
auto createHeapAndAllocator = [this](D3D12_DESCRIPTOR_HEAP_TYPE type, D3D12_DESCRIPTOR_HEAP_FLAGS flags, U16 descriptorCount, CString name,
PersistentDescriptorAllocator& alloc) -> Error {
ID3D12DescriptorHeap* heap;
D3D12_CPU_DESCRIPTOR_HANDLE cpuHeapStart;
D3D12_GPU_DESCRIPTOR_HANDLE gpuHeapStart;
U32 descriptorSize;
ANKI_CHECK(createDescriptorHeap(type, flags, descriptorCount, heap, cpuHeapStart, gpuHeapStart, descriptorSize));
ANKI_CHECK(createDescriptorHeap(type, flags, descriptorCount, name, heap, cpuHeapStart, gpuHeapStart, descriptorSize));
alloc.init(cpuHeapStart, gpuHeapStart, descriptorSize, descriptorCount);
m_descriptorHeaps.emplaceBack(heap);
return Error::kNone;
};

ANKI_CHECK(createHeapAndAllocator(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, D3D12_DESCRIPTOR_HEAP_FLAG_NONE, g_maxCpuCbvSrvUavDescriptorsCVar,
m_cpuPersistent.m_cbvSrvUav));
"CPU CBV/SRV/UAV", m_cpuPersistent.m_cbvSrvUav));
ANKI_CHECK(createHeapAndAllocator(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, D3D12_DESCRIPTOR_HEAP_FLAG_NONE, g_maxCpuSamplerDescriptorsCVar,
m_cpuPersistent.m_sampler));
ANKI_CHECK(
createHeapAndAllocator(D3D12_DESCRIPTOR_HEAP_TYPE_RTV, D3D12_DESCRIPTOR_HEAP_FLAG_NONE, g_maxRtvDescriptorsCVar, m_cpuPersistent.m_rtv));
ANKI_CHECK(
createHeapAndAllocator(D3D12_DESCRIPTOR_HEAP_TYPE_DSV, D3D12_DESCRIPTOR_HEAP_FLAG_NONE, g_maxDsvDescriptorsCVar, m_cpuPersistent.m_dsv));
"CPU samplers", m_cpuPersistent.m_sampler));
ANKI_CHECK(createHeapAndAllocator(D3D12_DESCRIPTOR_HEAP_TYPE_RTV, D3D12_DESCRIPTOR_HEAP_FLAG_NONE, g_maxRtvDescriptorsCVar, "CPU RTV",
m_cpuPersistent.m_rtv));
ANKI_CHECK(createHeapAndAllocator(D3D12_DESCRIPTOR_HEAP_TYPE_DSV, D3D12_DESCRIPTOR_HEAP_FLAG_NONE, g_maxDsvDescriptorsCVar, "CPU DSV",
m_cpuPersistent.m_dsv));

// Init GPU visible heaps
ID3D12DescriptorHeap* heap;
D3D12_CPU_DESCRIPTOR_HANDLE cpuHeapStart;
D3D12_GPU_DESCRIPTOR_HANDLE gpuHeapStart;
U32 descriptorSize;
ANKI_CHECK(createDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE,
g_maxGpuCbvSrvUavDescriptorsCVar + g_maxBindlessSampledTextureCountCVar, heap, cpuHeapStart, gpuHeapStart,
descriptorSize));
g_maxGpuCbvSrvUavDescriptorsCVar + g_maxBindlessSampledTextureCountCVar, "GPU CBV/SRV/UAV", heap, cpuHeapStart,
gpuHeapStart, descriptorSize));
m_descriptorHeaps.emplaceBack(heap);

m_gpuPersistent.m_cbvSrvUav.init(cpuHeapStart, gpuHeapStart, descriptorSize, U16(g_maxBindlessSampledTextureCountCVar));

cpuHeapStart.ptr += descriptorSize * g_maxBindlessSampledTextureCountCVar;
gpuHeapStart.ptr += descriptorSize * g_maxBindlessSampledTextureCountCVar;
m_gpuRing.m_cbvSrvUav.init(cpuHeapStart, gpuHeapStart, descriptorSize, g_maxGpuCbvSrvUavDescriptorsCVar);
m_gpuRing.m_cbvSrvUav.init(cpuHeapStart, gpuHeapStart, descriptorSize, g_maxGpuCbvSrvUavDescriptorsCVar, "CBV/SRV/UAV");

ANKI_CHECK(createDescriptorHeap(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE, g_maxGpuSamplerDescriptorsCVar,
heap, cpuHeapStart, gpuHeapStart, descriptorSize));
"GPU samplers", heap, cpuHeapStart, gpuHeapStart, descriptorSize));
m_descriptorHeaps.emplaceBack(heap);
m_gpuRing.m_sampler.init(cpuHeapStart, gpuHeapStart, descriptorSize, g_maxGpuSamplerDescriptorsCVar);
m_gpuRing.m_sampler.init(cpuHeapStart, gpuHeapStart, descriptorSize, g_maxGpuSamplerDescriptorsCVar, "Samplers");

// Misc
for(D3D12_DESCRIPTOR_HEAP_TYPE type = D3D12_DESCRIPTOR_HEAP_TYPE(0); type < D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES;
Expand Down Expand Up @@ -452,7 +459,7 @@ void DescriptorState::bindRootSignature(const RootSignature* rootSignature, Bool
{
ANKI_ASSERT(rootSignature);

if(rootSignature == m_rootSignature)
if(m_rootSignature && rootSignature->m_hash == m_rootSignature->m_hash)
{
ANKI_ASSERT(m_rootSignatureNeedsRebinding == false);
return;
Expand Down Expand Up @@ -501,26 +508,37 @@ void DescriptorState::flush(ID3D12GraphicsCommandList& cmdList)
}

Space& stateSpace = m_spaces[spaceIdx];
Bool skip = true;

// Allocate descriptor memory (doesn't include holes)
if(stateSpace.m_cbvSrvUavDirty && rootSignatureSpace.m_cbvSrvUavCount)
{
stateSpace.m_cbvSrvUavHeapHandle = DescriptorFactory::getSingleton().allocateTransient(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
rootSignatureSpace.m_cbvSrvUavCount, true);
skip = false;
}
Bool skip = true;

if(stateSpace.m_samplersDirty && rootSignatureSpace.m_samplerCount)
{
stateSpace.m_samplerHeapHandle =
DescriptorFactory::getSingleton().allocateTransient(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, rootSignatureSpace.m_samplerCount, true);
skip = false;
}
if(rootSignatureSpace.m_cbvSrvUavCount && (stateSpace.m_cbvSrvUavDirty || rootSignatureNeedsRebinding))
{
skip = false;
}

if(skip)
{
continue;
if(rootSignatureSpace.m_samplerCount && (stateSpace.m_samplersDirty || rootSignatureNeedsRebinding))
{
skip = false;
}

if(skip)
{
continue;
}

if(rootSignatureSpace.m_cbvSrvUavCount)
{
stateSpace.m_cbvSrvUavHeapHandle = DescriptorFactory::getSingleton().allocateTransient(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
rootSignatureSpace.m_cbvSrvUavCount, true);
}

if(rootSignatureSpace.m_samplerCount)
{
stateSpace.m_samplerHeapHandle =
DescriptorFactory::getSingleton().allocateTransient(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, rootSignatureSpace.m_samplerCount, true);
}
}

// Populate descriptors
Expand Down
14 changes: 12 additions & 2 deletions AnKi/Gr/D3D/D3DDescriptor.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ class PersistentDescriptorAllocator
class RingDescriptorAllocator
{
public:
void init(D3D12_CPU_DESCRIPTOR_HANDLE cpuHeapStart, D3D12_GPU_DESCRIPTOR_HANDLE gpuHeapStart, U32 descriptorSize, U32 descriptorCount);
void init(D3D12_CPU_DESCRIPTOR_HANDLE cpuHeapStart, D3D12_GPU_DESCRIPTOR_HANDLE gpuHeapStart, U32 descriptorSize, U32 descriptorCount,
CString name);

/// Allocate for this frame. Memory will be reclaimed a few frames in the future.
/// @note Thread-safe.
Expand All @@ -124,7 +125,9 @@ class RingDescriptorAllocator

Atomic<U64> m_increment = 0;

U64 m_incrementAtFrameStart = 0;
Atomic<U64> m_incrementAtFrameStart = 0;

GrString m_name;
};

/// A container of all descriptor heaps.
Expand Down Expand Up @@ -200,6 +203,13 @@ class DescriptorFactory : public MakeSingleton<DescriptorFactory>
return U32(idx);
}

/// @note Thread-safe.
void endFrame()
{
m_gpuRing.m_cbvSrvUav.endFrame();
m_gpuRing.m_sampler.endFrame();
}

private:
class
{
Expand Down
7 changes: 7 additions & 0 deletions AnKi/Gr/D3D/D3DGrManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ namespace anki {
static void NTAPI d3dDebugMessageCallback([[maybe_unused]] D3D12_MESSAGE_CATEGORY category, D3D12_MESSAGE_SEVERITY severity,
[[maybe_unused]] D3D12_MESSAGE_ID id, LPCSTR pDescription, [[maybe_unused]] void* pContext)
{
if(id == D3D12_MESSAGE_ID_INVALID_BARRIER_ACCESS)
{
// Skip these for now
return;
}

if(!Logger::isAllocated())
{
printf("d3dDebugMessageCallback : %s", pDescription);
Expand Down Expand Up @@ -138,6 +144,7 @@ void GrManager::swapBuffers()
self.m_crntFrame = (self.m_crntFrame + 1) % self.m_frames.getSize();

FrameGarbageCollector::getSingleton().endFrame(presentFence.get());
DescriptorFactory::getSingleton().endFrame();
}

void GrManager::finish()
Expand Down
2 changes: 1 addition & 1 deletion AnKi/Shaders/Bloom.ankiprog
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ Texture2D<RVec4> g_inTex : register(t0);

struct Consts
{
RF32 m_threshold;
F32 m_threshold;
F32 m_scale;
F32 m_padding0;
F32 m_padding1;
Expand Down

0 comments on commit bc8ef45

Please sign in to comment.