Skip to content

Commit

Permalink
More refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
godlikepanos committed Aug 18, 2024
1 parent 1e9f47b commit 2d5ecdb
Show file tree
Hide file tree
Showing 31 changed files with 236 additions and 261 deletions.
9 changes: 6 additions & 3 deletions AnKi/Core/GpuMemory/GpuReadbackMemoryPool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,20 @@ GpuReadbackMemoryPool::GpuReadbackMemoryPool()

m_pool.init(buffUsage, classes, classes.getBack(), "GpuReadback", false, mapAccess);

m_alignment = GrManager::getSingleton().getDeviceCapabilities().m_storageBufferBindOffsetAlignment;
if(!GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferNaturalAlignment)
{
m_structuredBufferAlignment = GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferBindOffsetAlignment;
}
}

GpuReadbackMemoryPool ::~GpuReadbackMemoryPool()
{
}

GpuReadbackMemoryAllocation GpuReadbackMemoryPool::allocate(PtrSize size)
GpuReadbackMemoryAllocation GpuReadbackMemoryPool::allocate(PtrSize size, U32 alignment)
{
GpuReadbackMemoryAllocation out;
m_pool.allocate(size, m_alignment, out.m_token);
m_pool.allocate(size, alignment, out.m_token);
out.m_buffer = &m_pool.getGpuBuffer();
out.m_mappedMemory = static_cast<U8*>(m_pool.getGpuBufferMappedMemory()) + out.m_token.m_offset;
return out;
Expand Down
11 changes: 9 additions & 2 deletions AnKi/Core/GpuMemory/GpuReadbackMemoryPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,22 @@ class GpuReadbackMemoryPool : public MakeSingleton<GpuReadbackMemoryPool>
friend class MakeSingleton;

public:
GpuReadbackMemoryAllocation allocate(PtrSize size);
GpuReadbackMemoryAllocation allocate(PtrSize size, U32 alignment);

template<typename T>
GpuReadbackMemoryAllocation allocateStructuredBuffer(U32 count)
{
const U32 alignment = (m_structuredBufferAlignment == kMaxU32) ? sizeof(T) : m_structuredBufferAlignment;
return allocate(sizeof(T) * count, alignment);
}

void deferredFree(GpuReadbackMemoryAllocation& allocation);

void endFrame();

private:
SegregatedListsGpuMemoryPool m_pool;
U32 m_alignment = 0;
U32 m_structuredBufferAlignment = kMaxU32;

GpuReadbackMemoryPool();

Expand Down
15 changes: 8 additions & 7 deletions AnKi/Core/GpuMemory/GpuSceneBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,15 +130,16 @@ void GpuSceneMicroPatcher::patchGpuScene(CommandBuffer& cmdb)
ANKI_TRACE_INC_COUNTER(GpuSceneMicroPatches, m_crntFramePatchHeaders.getSize());
ANKI_TRACE_INC_COUNTER(GpuSceneMicroPatchUploadData, m_crntFramePatchData.getSizeInBytes());

void* mapped;
const RebarAllocation headersToken = RebarTransientMemoryPool::getSingleton().allocateFrame(m_crntFramePatchHeaders.getSizeInBytes(), mapped);
memcpy(mapped, &m_crntFramePatchHeaders[0], m_crntFramePatchHeaders.getSizeInBytes());
WeakArray<PatchHeader> mapped;
const BufferView headersBuff = RebarTransientMemoryPool::getSingleton().allocateStructuredBuffer(m_crntFramePatchHeaders.getSize(), mapped);
memcpy(mapped.getBegin(), &m_crntFramePatchHeaders[0], m_crntFramePatchHeaders.getSizeInBytes());

const RebarAllocation dataToken = RebarTransientMemoryPool::getSingleton().allocateFrame(m_crntFramePatchData.getSizeInBytes(), mapped);
memcpy(mapped, &m_crntFramePatchData[0], m_crntFramePatchData.getSizeInBytes());
WeakArray<U32> mapped2;
const BufferView dataBuff = RebarTransientMemoryPool::getSingleton().allocateStructuredBuffer(m_crntFramePatchData.getSize(), mapped2);
memcpy(mapped2.getBegin(), &m_crntFramePatchData[0], m_crntFramePatchData.getSizeInBytes());

cmdb.bindSrv(0, 0, headersToken);
cmdb.bindSrv(1, 0, dataToken);
cmdb.bindSrv(0, 0, headersBuff);
cmdb.bindSrv(1, 0, dataBuff);
cmdb.bindUav(0, 0, BufferView(&GpuSceneBuffer::getSingleton().getBuffer()));

cmdb.bindShaderProgram(m_grProgram.get());
Expand Down
11 changes: 11 additions & 0 deletions AnKi/Core/GpuMemory/GpuSceneBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,17 @@ class GpuSceneBuffer : public MakeSingleton<GpuSceneBuffer>
return alloc;
}

template<typename T>
GpuSceneBufferAllocation allocateStructuredBuffer(U32 count)
{
const U32 alignment = (GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferNaturalAlignment)
? sizeof(T)
: GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferBindOffsetAlignment;
GpuSceneBufferAllocation alloc;
m_pool.allocate(count * sizeof(T), alignment, alloc.m_token);
return alloc;
}

void deferredFree(GpuSceneBufferAllocation& alloc)
{
m_pool.deferredFree(alloc.m_token);
Expand Down
18 changes: 6 additions & 12 deletions AnKi/Core/GpuMemory/GpuVisibleTransientMemoryPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@ class GpuVisibleTransientMemoryPool : public MakeSingleton<GpuVisibleTransientMe
friend class MakeSingleton;

public:
BufferView allocate(PtrSize size, PtrSize alignment = 0)
BufferView allocate(PtrSize size, PtrSize alignment)
{
alignment = (alignment == 0) ? m_alignment : alignment;
PtrSize offset;
Buffer* buffer;
m_pool.allocate(size, alignment, offset, buffer);
Expand All @@ -46,20 +45,15 @@ class GpuVisibleTransientMemoryPool : public MakeSingleton<GpuVisibleTransientMe

private:
StackGpuMemoryPool m_pool;
U32 m_alignment = 0;
U32 m_frame = 0;
U32 m_structuredBufferAlignment = 0;
U32 m_structuredBufferAlignment = kMaxU32;

GpuVisibleTransientMemoryPool()
{
m_structuredBufferAlignment = (GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferNaturalAlignment)
? kMaxU32
: GrManager::getSingleton().getDeviceCapabilities().m_storageBufferBindOffsetAlignment;

m_alignment = GrManager::getSingleton().getDeviceCapabilities().m_uniformBufferBindOffsetAlignment;
m_alignment = max(m_alignment, GrManager::getSingleton().getDeviceCapabilities().m_storageBufferBindOffsetAlignment);
m_alignment = max(m_alignment, GrManager::getSingleton().getDeviceCapabilities().m_sbtRecordAlignment);
m_alignment = max(m_alignment, GrManager::getSingleton().getDeviceCapabilities().m_accelerationStructureBuildScratchOffsetAlignment);
if(!GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferNaturalAlignment)
{
m_structuredBufferAlignment = GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferBindOffsetAlignment;
}

BufferUsageBit buffUsage = BufferUsageBit::kAllConstant | BufferUsageBit::kAllUav | BufferUsageBit::kAllSrv | BufferUsageBit::kIndirectDraw
| BufferUsageBit::kIndirectCompute | BufferUsageBit::kVertex | BufferUsageBit::kAllCopy;
Expand Down
32 changes: 11 additions & 21 deletions AnKi/Core/GpuMemory/RebarTransientMemoryPool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,46 +36,36 @@ void RebarTransientMemoryPool::init()

m_bufferSize = buffInit.m_size;

m_alignment = GrManager::getSingleton().getDeviceCapabilities().m_uniformBufferBindOffsetAlignment;
m_alignment = max(m_alignment, GrManager::getSingleton().getDeviceCapabilities().m_storageBufferBindOffsetAlignment);
m_alignment = max(m_alignment, GrManager::getSingleton().getDeviceCapabilities().m_sbtRecordAlignment);

m_mappedMem = static_cast<U8*>(m_buffer->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite));
}

RebarAllocation RebarTransientMemoryPool::allocateFrame(PtrSize size, void*& mappedMem)
{
RebarAllocation out = tryAllocateFrame(size, mappedMem);
if(!out.isValid()) [[unlikely]]
if(!GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferNaturalAlignment)
{
ANKI_CORE_LOGF("Out of ReBAR GPU memory");
m_structuredBufferAlignment = GrManager::getSingleton().getDeviceCapabilities().m_structuredBufferBindOffsetAlignment;
}

return out;
m_mappedMem = static_cast<U8*>(m_buffer->map(0, kMaxPtrSize, BufferMapAccessBit::kWrite));
}

RebarAllocation RebarTransientMemoryPool::tryAllocateFrame(PtrSize origSize, void*& mappedMem)
BufferView RebarTransientMemoryPool::allocateInternal(PtrSize origSize, U32 alignment, void*& mappedMem)
{
ANKI_ASSERT(origSize > 0);
const PtrSize size = getAlignedRoundUp(m_alignment, origSize);
ANKI_ASSERT(alignment > 0);
const PtrSize size = origSize + alignment;

// Try in a loop because we may end up with an allocation its offset crosses the buffer's end
PtrSize offset;
Bool done = false;
do
{
offset = m_offset.fetchAdd(size) % m_bufferSize;
const PtrSize end = (offset + origSize) % (m_bufferSize + 1);
const PtrSize end = (offset + size) % (m_bufferSize + 1);

done = offset < end;
} while(!done);

mappedMem = m_mappedMem + offset;
RebarAllocation out;
out.m_offset = offset;
out.m_range = origSize;
const PtrSize alignedOffset = getAlignedRoundUp(alignment, offset);
ANKI_ASSERT(alignedOffset + origSize <= offset + size);

return out;
mappedMem = m_mappedMem + alignedOffset;
return BufferView(m_buffer.get(), alignedOffset, origSize);
}

void RebarTransientMemoryPool::endFrame()
Expand Down
91 changes: 28 additions & 63 deletions AnKi/Core/GpuMemory/RebarTransientMemoryPool.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,47 +13,6 @@ namespace anki {
/// @addtogroup core
/// @{

/// Token that gets returned when requesting for memory to write to a resource.
class RebarAllocation
{
friend class RebarTransientMemoryPool;

public:
RebarAllocation() = default;

~RebarAllocation() = default;

Bool operator==(const RebarAllocation& b) const
{
return m_offset == b.m_offset && m_range == b.m_range;
}

Bool isValid() const
{
return m_range != 0;
}

PtrSize getOffset() const
{
ANKI_ASSERT(isValid());
return m_offset;
}

PtrSize getRange() const
{
ANKI_ASSERT(isValid());
return m_range;
}

Buffer& getBuffer() const;

operator BufferView() const;

private:
PtrSize m_offset = kMaxPtrSize;
PtrSize m_range = 0;
};

/// Manages staging GPU memory.
class RebarTransientMemoryPool : public MakeSingleton<RebarTransientMemoryPool>
{
Expand All @@ -70,28 +29,43 @@ class RebarTransientMemoryPool : public MakeSingleton<RebarTransientMemoryPool>
void endFrame();

/// Allocate staging memory for various operations. The memory will be reclaimed at the begining of the N-(kMaxFramesInFlight-1) frame.
RebarAllocation allocateFrame(PtrSize size, void*& mappedMem);

template<typename T>
RebarAllocation allocateFrame(U32 count, T*& mappedMem)
BufferView allocate(PtrSize size, U32 alignment, T*& mappedMem)
{
void* mem;
const RebarAllocation out = allocateFrame(count * sizeof(T), mem);
const BufferView out = allocateInternal(size, alignment, mem);
mappedMem = static_cast<T*>(mem);
return out;
}

/// @copydoc allocate
template<typename T>
RebarAllocation allocateFrame(U32 count, WeakArray<T>& arr)
BufferView allocateConstantBuffer(T*& mappedMem)
{
void* mem;
const RebarAllocation out = allocateFrame(count * sizeof(T), mem);
arr = {static_cast<T*>(mem), count};
return allocate(sizeof(T), GrManager::getSingleton().getDeviceCapabilities().m_constantBufferBindOffsetAlignment, mappedMem);
}

/// @copydoc allocate
template<typename T>
BufferView allocateStructuredBuffer(U32 count, WeakArray<T>& arr)
{
T* mem;
const U32 alignment = (m_structuredBufferAlignment == kMaxU32) ? sizeof(T) : m_structuredBufferAlignment;
const BufferView out = allocate(count * sizeof(T), alignment, mem);
arr = {mem, count};
return out;
}

/// Allocate staging memory for various operations. The memory will be reclaimed at the begining of the N-(kMaxFramesInFlight-1) frame.
RebarAllocation tryAllocateFrame(PtrSize size, void*& mappedMem);
/// @copydoc allocate
template<typename T>
BufferView allocateCopyBuffer(U32 count, WeakArray<T>& arr)
{
T* mem;
const U32 alignment = sizeof(U32);
const BufferView out = allocate(sizeof(T) * count, alignment, mem);
arr = {mem, count};
return out;
}

ANKI_PURE Buffer& getBuffer() const
{
Expand All @@ -109,23 +83,14 @@ class RebarTransientMemoryPool : public MakeSingleton<RebarTransientMemoryPool>
PtrSize m_bufferSize = 0; ///< Cache it.
Atomic<PtrSize> m_offset = {0};
PtrSize m_previousFrameEndOffset = 0;
U32 m_alignment = 0;
U32 m_structuredBufferAlignment = kMaxU32;

RebarTransientMemoryPool() = default;

~RebarTransientMemoryPool();
};

inline Buffer& RebarAllocation::getBuffer() const
{
return RebarTransientMemoryPool::getSingleton().getBuffer();
}

inline RebarAllocation::operator BufferView() const
{
ANKI_ASSERT(isValid());
return {&RebarTransientMemoryPool::getSingleton().getBuffer(), m_offset, m_range};
}
BufferView allocateInternal(PtrSize size, U32 alignment, void*& mappedMem);
};
/// @}

} // end namespace anki
22 changes: 6 additions & 16 deletions AnKi/Gr/Common.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ constexpr U32 kMaxBindingsPerRegisterSpace = 32;
constexpr U32 kMaxFramesInFlight = 3; ///< Triple buffering.
constexpr U32 kMaxGrObjectNameLength = 61;
constexpr U32 kMaxBindlessTextures = 512;
constexpr U32 kMaxFastConstantsSize = 128; ///< Thanks AMD!!
constexpr U32 kMaxFastConstantsSize = 128; ///< Push/root constants size. Thanks AMD!!

/// The number of commands in a command buffer that make it a small batch command buffer.
constexpr U32 kCommandBufferSmallBatchMaxCommands = 100;
Expand All @@ -90,7 +90,6 @@ ANKI_GR_CLASS(Texture)
ANKI_GR_CLASS(Sampler)
ANKI_GR_CLASS(CommandBuffer)
ANKI_GR_CLASS(Shader)
ANKI_GR_CLASS(Framebuffer)
ANKI_GR_CLASS(OcclusionQuery)
ANKI_GR_CLASS(TimestampQuery)
ANKI_GR_CLASS(PipelineQuery)
Expand Down Expand Up @@ -144,23 +143,14 @@ class GpuDeviceCapabilities
{
public:
/// The alignment of offsets when bounding constant buffers.
U32 m_uniformBufferBindOffsetAlignment = kMaxU32;
U32 m_constantBufferBindOffsetAlignment = kMaxU32;

/// The max visible range of constant buffers inside the shaders.
PtrSize m_uniformBufferMaxRange = 0;

/// The alignment of offsets when bounding storage buffers.
U32 m_storageBufferBindOffsetAlignment = kMaxU32;

/// The max visible range of storage buffers inside the shaders.
PtrSize m_storageBufferMaxRange = 0;
/// The alignment of offsets when bounding structured buffers.
U32 m_structuredBufferBindOffsetAlignment = kMaxU32;

/// The alignment of offsets when bounding texture buffers.
U32 m_texelBufferBindOffsetAlignment = kMaxU32;

/// The max visible range of texture buffers inside the shaders.
PtrSize m_textureBufferMaxRange = 0;

/// Max push/root constant size.
PtrSize m_fastConstantsSize = 128;

Expand All @@ -177,10 +167,10 @@ class GpuDeviceCapabilities
U32 m_shaderGroupHandleSize = 0;

/// Min subgroup size of the GPU.
U32 m_minSubgroupSize = 0;
U32 m_minWaveSize = 0;

/// Max subgroup size of the GPU.
U32 m_maxSubgroupSize = 0;
U32 m_maxWaveSize = 0;

/// Min size of a texel in the shading rate image.
U32 m_minShadingRateImageTexelSize = 0;
Expand Down
Loading

0 comments on commit 2d5ecdb

Please sign in to comment.