From 152ec27e9e7b8a843ae658dfd3d365f7211630dc Mon Sep 17 00:00:00 2001 From: Zhou Zhenglong Date: Thu, 5 Dec 2024 16:26:35 +0800 Subject: [PATCH] fix staging buffer --- native/cocos/renderer/gfx-metal/MTLDevice.mm | 1 + .../cocos/renderer/gfx-metal/MTLGPUObjects.h | 14 ++++++++--- native/cocos/renderer/gfx-vulkan/VKDevice.cpp | 20 ++++++++++------ .../cocos/renderer/gfx-vulkan/VKGPUObjects.h | 24 +++++++++++++++---- 4 files changed, 45 insertions(+), 14 deletions(-) diff --git a/native/cocos/renderer/gfx-metal/MTLDevice.mm b/native/cocos/renderer/gfx-metal/MTLDevice.mm index 3f8dacf8ae1..de5d32f7baf 100644 --- a/native/cocos/renderer/gfx-metal/MTLDevice.mm +++ b/native/cocos/renderer/gfx-metal/MTLDevice.mm @@ -256,6 +256,7 @@ of this software and associated engine source code (the "Software"), a limited, CCMTLGPUStagingBufferPool *bufferPool = _gpuStagingBufferPools[index]; if (bufferPool) { bufferPool->reset(); + bufferPool->shrinkSize(); CCMTLGPUGarbageCollectionPool::getInstance()->clear(index); static_cast(_cmdBuff)->signalFence(); } diff --git a/native/cocos/renderer/gfx-metal/MTLGPUObjects.h b/native/cocos/renderer/gfx-metal/MTLGPUObjects.h index 1a431ede3e4..6901d85af3f 100644 --- a/native/cocos/renderer/gfx-metal/MTLGPUObjects.h +++ b/native/cocos/renderer/gfx-metal/MTLGPUObjects.h @@ -178,6 +178,8 @@ struct CCMTLGPUDescriptorSet { class CCMTLGPUStagingBufferPool final { public: + static constexpr uint32_t CHUNK_SIZE = 16 * 1024 * 1024; // 16M per block by default + CCMTLGPUStagingBufferPool(id device) : _device(device) {} @@ -225,8 +227,14 @@ class CCMTLGPUStagingBufferPool final { } } - void shrinkSize() { + void shrinkSize(size_t minimalSize = CHUNK_SIZE) { + size_t reservedSize = 0; for (auto iter = _pool.begin(); iter != _pool.end() && _pool.size() > 1;) { + if (reservedSize < minimalSize) { + reservedSize += [iter->mtlBuffer length]; + ++iter; + continue; + } if (iter->curOffset == 0) { [iter->mtlBuffer release]; iter = _pool.erase(iter); @@ -256,7 +264,7 @@ struct CCMTLGPUBufferImageCopy { MTLOrigin destinationOrigin = {0, 0, 0}; }; -//destroy GPU resource only, delete the owner object mannually. +// destroy GPU resource only, delete the owner object mannually. class CCMTLGPUGarbageCollectionPool final { using GCFunc = std::function; @@ -298,7 +306,7 @@ class CCMTLGPUGarbageCollectionPool final { } protected: - //avoid cross-reference with CCMTLDevice + // avoid cross-reference with CCMTLDevice std::function _getFrameIndex; ccstd::queue _releaseQueue[MAX_FRAMES_IN_FLIGHT]; }; diff --git a/native/cocos/renderer/gfx-vulkan/VKDevice.cpp b/native/cocos/renderer/gfx-vulkan/VKDevice.cpp index ce3e89b09dc..b86fac5f032 100644 --- a/native/cocos/renderer/gfx-vulkan/VKDevice.cpp +++ b/native/cocos/renderer/gfx-vulkan/VKDevice.cpp @@ -62,7 +62,7 @@ CC_DISABLE_WARNINGS() #include "vk_mem_alloc.h" #define THSVS_ERROR_CHECK_MIXED_IMAGE_LAYOUT // remote potential hazard because of programmable blend -//#define THSVS_ERROR_CHECK_POTENTIAL_HAZARD +// #define THSVS_ERROR_CHECK_POTENTIAL_HAZARD #define THSVS_SIMPLER_VULKAN_SYNCHRONIZATION_IMPLEMENTATION #include "thsvs_simpler_vulkan_synchronization.h" CC_ENABLE_WARNINGS() @@ -642,6 +642,10 @@ void CCVKDevice::acquire(Swapchain *const *swapchains, uint32_t count) { _gpuDescriptorSetHub->flush(); _gpuSemaphorePool->reset(); + auto *bufferPool = gpuStagingBufferPool(); + bufferPool->reset(); + bufferPool->shrinkSize(); + for (uint32_t i = 0; i < vkSwapchains.size(); ++i) { VkSemaphore acquireSemaphore = _gpuSemaphorePool->alloc(); VkResult res = vkAcquireNextImageKHR(_gpuDevice->vkDevice, vkSwapchains[i], ~0ULL, @@ -773,12 +777,14 @@ void CCVKDevice::initDeviceFeature() { _features[toNumber(Feature::RASTERIZATION_ORDER_NOCOHERENT)] = true; _features[toNumber(Feature::MULTI_SAMPLE_RESOLVE_DEPTH_STENCIL)] = checkExtension("VK_KHR_depth_stencil_resolve"); - _gpuContext->debugReport = _gpuContext->checkExtension(VK_EXT_DEBUG_REPORT_EXTENSION_NAME) && + _gpuContext->debugReport = + _gpuContext->checkExtension(VK_EXT_DEBUG_REPORT_EXTENSION_NAME) && checkExtension(VK_EXT_DEBUG_MARKER_EXTENSION_NAME) && (vkCmdDebugMarkerBeginEXT != nullptr) && (vkCmdDebugMarkerInsertEXT != nullptr) && (vkCmdDebugMarkerEndEXT != nullptr); - _gpuContext->debugUtils = _gpuContext->checkExtension(VK_EXT_DEBUG_UTILS_EXTENSION_NAME) && + _gpuContext->debugUtils = + _gpuContext->checkExtension(VK_EXT_DEBUG_UTILS_EXTENSION_NAME) && (vkCmdBeginDebugUtilsLabelEXT != nullptr) && (vkCmdInsertDebugUtilsLabelEXT != nullptr) && (vkCmdEndDebugUtilsLabelEXT != nullptr); @@ -1104,14 +1110,14 @@ SampleCount CCVKDevice::getMaxSampleCount(Format format, TextureUsage usage, Tex VkImageFormatProperties imageFormatProperties = {}; vkGetPhysicalDeviceImageFormatProperties(_gpuContext->physicalDevice, vkFormat, VK_IMAGE_TYPE_2D, - VK_IMAGE_TILING_OPTIMAL, usages, 0, &imageFormatProperties); + VK_IMAGE_TILING_OPTIMAL, usages, 0, &imageFormatProperties); if (imageFormatProperties.sampleCounts & VK_SAMPLE_COUNT_64_BIT) return SampleCount::X64; if (imageFormatProperties.sampleCounts & VK_SAMPLE_COUNT_32_BIT) return SampleCount::X32; if (imageFormatProperties.sampleCounts & VK_SAMPLE_COUNT_16_BIT) return SampleCount::X16; - if (imageFormatProperties.sampleCounts & VK_SAMPLE_COUNT_8_BIT) return SampleCount::X8; - if (imageFormatProperties.sampleCounts & VK_SAMPLE_COUNT_4_BIT) return SampleCount::X4; - if (imageFormatProperties.sampleCounts & VK_SAMPLE_COUNT_2_BIT) return SampleCount::X2; + if (imageFormatProperties.sampleCounts & VK_SAMPLE_COUNT_8_BIT) return SampleCount::X8; + if (imageFormatProperties.sampleCounts & VK_SAMPLE_COUNT_4_BIT) return SampleCount::X4; + if (imageFormatProperties.sampleCounts & VK_SAMPLE_COUNT_2_BIT) return SampleCount::X2; return SampleCount::X1; } diff --git a/native/cocos/renderer/gfx-vulkan/VKGPUObjects.h b/native/cocos/renderer/gfx-vulkan/VKGPUObjects.h index a353d1ca3f1..8c8f3c278b9 100644 --- a/native/cocos/renderer/gfx-vulkan/VKGPUObjects.h +++ b/native/cocos/renderer/gfx-vulkan/VKGPUObjects.h @@ -103,7 +103,7 @@ class CCVKGPUDeviceObject : public GFXDeviceObject { CCVKGPUDeviceObject() = default; ~CCVKGPUDeviceObject() = default; - virtual void shutdown(){}; + virtual void shutdown() {}; }; template @@ -128,7 +128,7 @@ class CCVKGPURenderPass final : public CCVKGPUDeviceObject { // helper storage ccstd::vector clearValues; ccstd::vector sampleCounts; // per subpass - ccstd::vector hasSelfDependency; // per subpass + ccstd::vector hasSelfDependency; // per subpass const CCVKGPUGeneralBarrier *getBarrier(size_t index, CCVKGPUDevice *gpuDevice) const; bool hasShadingAttachment(uint32_t subPassId) const; @@ -697,8 +697,8 @@ class CCVKGPUCommandBufferPool final { vkDestroyCommandPool(_device->vkDevice, pool.vkCommandPool, nullptr); pool.vkCommandPool = VK_NULL_HANDLE; } - for (auto &item: pool.usedCommandBuffers)item.clear(); - for (auto &item: pool.commandBuffers)item.clear(); + for (auto &item : pool.usedCommandBuffers) item.clear(); + for (auto &item : pool.commandBuffers) item.clear(); } _pools.clear(); } @@ -839,6 +839,22 @@ class CCVKGPUStagingBufferPool final { } } + void shrinkSize(size_t minimalSize = CHUNK_SIZE) { + size_t reservedSize = 0; + for (auto iter = _pool.begin(); iter != _pool.end() && _pool.size() > 1;) { + if (reservedSize < minimalSize) { + reservedSize += iter->gpuBuffer->size; + ++iter; + continue; + } + if (iter->curOffset == 0) { + iter = _pool.erase(iter); + } else { + ++iter; + } + } + } + private: struct Buffer { IntrusivePtr gpuBuffer;