From 9a418164142854329e0db9e5e5f20260d46f8552 Mon Sep 17 00:00:00 2001 From: stanleyljl <798080536@qq.com> Date: Fri, 18 Aug 2023 16:12:39 +0800 Subject: [PATCH 01/16] occlusion culling test --- .../pipeline/gpu-driven/gpu-culling.effect | 74 ++----------------- native/cocos/renderer/gfx-base/SPIRVUtils.cpp | 1 + .../pipeline/custom/NativeExecutor.cpp | 8 +- .../pipeline/custom/NativePipeline.cpp | 35 +++++++-- .../pipeline/custom/NativePipelineTypes.h | 6 +- .../pipeline/custom/NativeSceneCulling.cpp | 30 ++++++-- 6 files changed, 68 insertions(+), 86 deletions(-) diff --git a/editor/assets/effects/pipeline/gpu-driven/gpu-culling.effect b/editor/assets/effects/pipeline/gpu-driven/gpu-culling.effect index 5ab3a3dbae4..2efbb68b94a 100644 --- a/editor/assets/effects/pipeline/gpu-driven/gpu-culling.effect +++ b/editor/assets/effects/pipeline/gpu-driven/gpu-culling.effect @@ -93,22 +93,6 @@ CCProgram culling-main %{ #if CC_USE_OCCLUSION_CULLING // 2D Polyhedral Bounds of a Clipped, Perspective-Projected 3D Sphere. Michael Mara, Morgan McGuire. 2013 // Consider: minClipZ & projectionSignY & orientation in projection, so we can not use p00 & p11 directly. - void projectAABBPerspective(mat4 proj, uint orientation, float minx, float miny, float maxx, float maxy, out vec4 aabb) { - // project to ndc space - if (orientation == 0) { - aabb = vec4(minx * proj[0][0], miny * proj[1][1], maxx * proj[0][0], maxy * proj[1][1]); - } - else if (orientation == 1) { - aabb = vec4(maxy * proj[1][0], minx * proj[0][1], miny * proj[1][0], maxx * proj[0][1]); - } - else if (orientation == 2) { - aabb = vec4(maxx * proj[0][0], maxy * proj[1][1], minx * proj[0][0], miny * proj[1][1]); - } - else { - aabb = vec4(miny * proj[1][0], maxx * proj[0][1], maxy * proj[1][0], minx * proj[0][1]); - } - } - bool projectSpherePerspective(vec3 c, float r, float znear, mat4 proj, uint orientation, out vec4 aabb) { if (-c.z < r + znear) return false; @@ -124,42 +108,7 @@ CCProgram culling-main %{ float miny = (vy * c.y + cr.z) / -(vy * c.z - cr.y); float maxy = (vy * c.y - cr.z) / -(vy * c.z + cr.y); - projectAABBPerspective(proj, orientation, minx, miny, maxx, maxy, aabb); - // ndc space -> uv space - aabb = aabb * vec4(0.5f) + vec4(0.5f); - - return true; - } - - void projectAABBOrtho(mat4 proj, uint orientation, float minx, float miny, float maxx, float maxy, out vec4 aabb) { - // project to ndc space - if (orientation == 0) { aabb = vec4(minx * proj[0][0], miny * proj[1][1], maxx * proj[0][0], maxy * proj[1][1]); - } - else if (orientation == 1) { - aabb = vec4(maxy * proj[1][0], minx * proj[0][1], miny * proj[1][0], maxx * proj[0][1]); - } - else if (orientation == 2) { - aabb = vec4(maxx * proj[0][0], maxy * proj[1][1], minx * proj[0][0], miny * proj[1][1]); - } - else { - aabb = vec4(miny * proj[1][0], maxx * proj[0][1], maxy * proj[1][0], minx * proj[0][1]); - } - - aabb = aabb + vec4(proj[3][0], proj[3][1], proj[3][0], proj[3][1]); - } - - bool projectSphereOrtho(vec3 c, float r, float znear, mat4 proj, uint orientation, out vec4 aabb) - { - if (-c.z < r + znear) return false; - - float minx = c.x - r; - float maxx = c.x + r; - - float miny = c.y - r; - float maxy = c.y + r; - - projectAABBOrtho(proj, orientation, minx, miny, maxx, maxy, aabb); // ndc space -> uv space aabb = aabb * vec4(0.5) + vec4(0.5); @@ -172,28 +121,15 @@ CCProgram culling-main %{ vec3 center = (cc_view * vec4(sphere.xyz, 1.0)).xyz; float radius = sphere.w; - if (cc_isPerspective != 0) { - if (!projectSpherePerspective(center, radius, cc_znear, cc_proj, cc_orientation, aabb)) { - return false; - } - } else { - if (!projectSphereOrtho(center, radius, cc_znear, cc_proj, cc_orientation, aabb)) { - return false; - } + if (!projectSpherePerspective(center, radius, cc_znear, cc_proj, cc_orientation, aabb)) { + return false; } - float width = (aabb.z - aabb.x) * cc_depthWidth; - float height = (aabb.w - aabb.y) * cc_depthHeight; + float width = abs(aabb.z - aabb.x) * cc_depthWidth; + float height = abs(aabb.w - aabb.y) * cc_depthHeight; float level = floor(log2(max(width, height))); float depth = textureLod(CCDepthMap, (aabb.xy + aabb.zw) * vec2(0.5), level).x; - float nearest = 0; - - if (cc_isPerspective != 0) { - nearest = -(cc_proj[2][2] + cc_proj[3][2] / (center.z + radius)); - } else { - nearest = (cc_proj[2][2] * (center.z + radius) + cc_proj[3][2]); - } - + float nearest = -(cc_proj[2][2] + cc_proj[3][2] / (center.z + radius)); return nearest > depth; } #endif diff --git a/native/cocos/renderer/gfx-base/SPIRVUtils.cpp b/native/cocos/renderer/gfx-base/SPIRVUtils.cpp index f47730a28fb..f0c9cf159c8 100644 --- a/native/cocos/renderer/gfx-base/SPIRVUtils.cpp +++ b/native/cocos/renderer/gfx-base/SPIRVUtils.cpp @@ -146,6 +146,7 @@ void SPIRVUtils::compileGLSL(ShaderStageFlagBit type, const ccstd::string &sourc spvOptions.disableOptimizer = false; // Do not disable optimizer in debug mode. It will cause the shader to fail to compile. spvOptions.optimizeSize = true; + spvOptions.stripDebugInfo = false; #if CC_DEBUG > 0 // spvOptions.validate = true; #else diff --git a/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp b/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp index 7401bca285e..666418fb90f 100644 --- a/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp +++ b/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp @@ -1379,12 +1379,16 @@ struct RenderGraphVisitor : boost::dfs_visitor<> { const auto* scene = camera->getScene(); const auto& queueDesc = ctx.context.sceneCulling.sceneQueryIndex.at(sceneID); const auto& queue = ctx.context.sceneCulling.renderQueues[queueDesc.renderQueueTarget]; + if (any(sceneData.flags & SceneFlags::GPU_DRIVEN)) { + queue.opaqueBatchingQueue.recordCommandBuffer( + ctx.resourceGraph, ctx.device, camera, + ctx.currentPass, ctx.cmdBuff, queue.sceneFlags, sceneData.cullingID); + return; + } queue.opaqueQueue.recordCommandBuffer( ctx.device, camera, ctx.currentPass, ctx.cmdBuff, 0); queue.opaqueInstancingQueue.recordCommandBuffer( ctx.currentPass, ctx.cmdBuff); - queue.opaqueBatchingQueue.recordCommandBuffer(ctx.resourceGraph, ctx.device, camera, - ctx.currentPass, ctx.cmdBuff, queue.sceneFlags, sceneData.cullingID); queue.transparentQueue.recordCommandBuffer( ctx.device, camera, ctx.currentPass, ctx.cmdBuff, 0); queue.transparentInstancingQueue.recordCommandBuffer( diff --git a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp index 74921235375..885f89e4a80 100644 --- a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp +++ b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp @@ -1020,8 +1020,10 @@ void setupGpuDrivenResources( ppl.updateStorageBuffer(std::string(name), bufferSize, gfx::Format::UNKNOWN); } } + if (!hzbName.empty()) { name = hzbName; + name.append(std::to_string(cullingID)); const auto width = utils::previousPOT(camera->getWidth()); const auto height = utils::previousPOT(camera->getHeight()); const auto mipLevels = getMipLevels(width, height); @@ -1039,8 +1041,29 @@ void setupGpuDrivenResources( } // namespace -void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, - const scene::Camera *camera, const std::string &hzbName, const scene::Light *light, bool bMainPass) { +bool projectSpherePerspective(Vec3 c, float r, float znear, Mat4 proj) { + if (-c.z < r + znear) return false; + + Vec3 cr = c * r; + float czr2 = c.z * c.z - r * r; + + float vx = std::sqrtf(c.x * c.x + czr2); + float minx = (vx * c.x + cr.z) / -(vx * c.z - cr.x); + float maxx = (vx * c.x - cr.z) / -(vx * c.z + cr.x); + + float vy = std::sqrtf(c.y * c.y + czr2); + float miny = (vy * c.y + cr.z) / -(vy * c.z - cr.y); + float maxy = (vy * c.y - cr.z) / -(vy * c.z + cr.y); + + Vec4 aabb = Vec4(minx * proj.m[0], miny * proj.m[5], maxx * proj.m[0], maxy * proj.m[5]); + // ndc space -> uv space + aabb = aabb * 0.5f + Vec4(0.5f, 0.5f, 0.5f, 0.5f); + + return true; +} + +void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, + const scene::Camera *camera, const std::string &hzbName, const scene::Light *light, bool bMainPass) { auto *scene = camera->getScene(); if (!scene) { return; @@ -1051,6 +1074,8 @@ void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, return; } + //bool result = projectSpherePerspective(Vec3(0, 0, -10), 3, 1, camera->getMatProj()); + auto &sceneCulling = nativeContext.sceneCulling; auto iter = sceneCulling.sceneIDs.find(scene); if (iter == sceneCulling.sceneIDs.cend()) { @@ -1107,7 +1132,7 @@ void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, auto *sampler = device->getSampler({gfx::Filter::POINT, gfx::Filter::POINT, gfx::Filter::NONE, gfx::Address::CLAMP, gfx::Address::CLAMP, gfx::Address::CLAMP}); - gpuCullPass->addTexture(hzbName, "CCDepthMap", sampler, 0); + gpuCullPass->addTexture(hzbName + std::to_string(cullingID), "CCDepthMap", sampler, 0); } const auto materialIndex = hzbName.empty() ? 2 : (bMainPass ? 0 : 1); @@ -1209,8 +1234,8 @@ void NativePipeline::addBuiltinHzbGenerationPass( gfx::Sampler *sampler = nullptr; if (device->getCapabilities().supportFilterMinMax) { sampler = device->getSampler({ - gfx::Filter::POINT, - gfx::Filter::POINT, + gfx::Filter::LINEAR, + gfx::Filter::LINEAR, gfx::Filter::NONE, gfx::Address::CLAMP, gfx::Address::CLAMP, diff --git a/native/cocos/renderer/pipeline/custom/NativePipelineTypes.h b/native/cocos/renderer/pipeline/custom/NativePipelineTypes.h index dc137fa1057..05e86e738ea 100644 --- a/native/cocos/renderer/pipeline/custom/NativePipelineTypes.h +++ b/native/cocos/renderer/pipeline/custom/NativePipelineTypes.h @@ -1201,12 +1201,13 @@ struct CullingKey { const scene::Camera* camera{nullptr}; const scene::Light* light{nullptr}; bool castShadow{false}; + bool gpuDriven{false}; uint32_t lightLevel{0xFFFFFFFF}; }; inline bool operator==(const CullingKey& lhs, const CullingKey& rhs) noexcept { - return std::forward_as_tuple(lhs.camera, lhs.light, lhs.castShadow, lhs.lightLevel) == - std::forward_as_tuple(rhs.camera, rhs.light, rhs.castShadow, rhs.lightLevel); + return std::forward_as_tuple(lhs.camera, lhs.light, lhs.castShadow, lhs.gpuDriven, lhs.lightLevel) == + std::forward_as_tuple(rhs.camera, rhs.light, rhs.castShadow, rhs.gpuDriven, rhs.lightLevel); } inline bool operator!=(const CullingKey& lhs, const CullingKey& rhs) noexcept { @@ -1519,6 +1520,7 @@ inline hash_t hash::operator()(const cc::render::Culling hash_combine(seed, val.camera); hash_combine(seed, val.light); hash_combine(seed, val.castShadow); + hash_combine(seed, val.gpuDriven); hash_combine(seed, val.lightLevel); return seed; } diff --git a/native/cocos/renderer/pipeline/custom/NativeSceneCulling.cpp b/native/cocos/renderer/pipeline/custom/NativeSceneCulling.cpp index 522a8480a38..db794800a8a 100644 --- a/native/cocos/renderer/pipeline/custom/NativeSceneCulling.cpp +++ b/native/cocos/renderer/pipeline/custom/NativeSceneCulling.cpp @@ -41,19 +41,23 @@ uint32_t SceneCulling::getOrCreateSceneCullingQuery(const SceneData& sceneData) sceneData.camera, sceneData.light.light, bCastShadow, + any(sceneData.flags & SceneFlags::GPU_DRIVEN), sceneData.light.level, }; // find query source auto iter = queries.culledResultIndex.find(key); if (iter == queries.culledResultIndex.end()) { - // create query source - // make query source id - const auto sourceID = numCullingQueries++; - if (numCullingQueries > culledResults.size()) { - // space is not enough, create query source - CC_EXPECTS(numCullingQueries == culledResults.size() + 1); - culledResults.emplace_back(); + uint32_t sourceID = 0xFFFFFFFF; + if (!any(sceneData.flags & SceneFlags::GPU_DRIVEN)) { + // create query source + // make query source id + sourceID = numCullingQueries++; + if (numCullingQueries > culledResults.size()) { + // space is not enough, create query source + CC_EXPECTS(numCullingQueries == culledResults.size() + 1); + culledResults.emplace_back(); + } } // add query source to query index bool added = false; @@ -236,6 +240,9 @@ void SceneCulling::batchCulling(const pipeline::PipelineSceneData& pplSceneData) for (const auto& [scene, queries] : sceneQueries) { CC_ENSURES(scene); for (const auto& [key, sourceID] : queries.culledResultIndex) { + if (sourceID == 0xFFFFFFFF) { + continue; + } CC_EXPECTS(key.camera); CC_EXPECTS(key.camera->getScene() == scene); const auto& camera = *key.camera; @@ -385,10 +392,17 @@ void SceneCulling::fillRenderQueues( const auto& sceneData = get(SceneTag{}, sceneID, rg); // check scene flags + const bool bDrawGPUDriven = any(sceneData.flags & SceneFlags::GPU_DRIVEN); const bool bDrawBlend = any(sceneData.flags & SceneFlags::TRANSPARENT_OBJECT); const bool bDrawOpaqueOrMask = any(sceneData.flags & (SceneFlags::OPAQUE_OBJECT | SceneFlags::CUTOUT_OBJECT)); const bool bDrawShadowCaster = any(sceneData.flags & SceneFlags::SHADOW_CASTER); + if (bDrawGPUDriven) { + CC_EXPECTS(sourceID == 0xFFFFFFFF); + continue; + } + CC_EXPECTS(sourceID != 0xFFFFFFFF); + if (!bDrawShadowCaster && !bDrawBlend && !bDrawOpaqueOrMask) { // nothing to draw continue; @@ -413,7 +427,7 @@ void SceneCulling::fillRenderQueues( // skybox const auto* camera = sceneData.camera; CC_EXPECTS(camera); - if (!any(sceneData.flags & SceneFlags::SHADOW_CASTER) && + if (!any(sceneData.flags & (SceneFlags::SHADOW_CASTER | SceneFlags::TRANSPARENT_OBJECT)) && skybox && skybox->isEnabled() && (static_cast(camera->getClearFlag()) & scene::Camera::SKYBOX_FLAG)) { CC_EXPECTS(skybox->getModel()); From 8faf21ca4bcf078182a54fb3c7314a1f9f162e58 Mon Sep 17 00:00:00 2001 From: stanleyljl <798080536@qq.com> Date: Tue, 22 Aug 2023 10:43:19 +0800 Subject: [PATCH 02/16] update shader --- .../effects/pipeline/gpu-driven/gpu-culling.effect | 9 +++++---- .../effects/pipeline/gpu-driven/hierarchical-z.effect | 11 +++++++---- .../cocos/renderer/pipeline/custom/NativePipeline.cpp | 2 +- native/cocos/scene/gpu-scene/GPUBatchPool.cpp | 2 +- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/editor/assets/effects/pipeline/gpu-driven/gpu-culling.effect b/editor/assets/effects/pipeline/gpu-driven/gpu-culling.effect index 2efbb68b94a..819c8dfe9ff 100644 --- a/editor/assets/effects/pipeline/gpu-driven/gpu-culling.effect +++ b/editor/assets/effects/pipeline/gpu-driven/gpu-culling.effect @@ -72,6 +72,7 @@ CCProgram culling-main %{ #if CC_USE_OCCLUSION_CULLING #pragma rate CCDepthMap pass + #pragma glBinding(5) uniform sampler2D CCDepthMap; #endif @@ -145,7 +146,7 @@ CCProgram culling-main %{ #if CC_GPU_CULLING_MAIN_PASS #if CC_USE_OCCLUSION_CULLING - cc_visibilities[id] = 1; + cc_visibilities[id] = 1U; #endif uint objectId = cc_instances[id].objectId; @@ -157,12 +158,12 @@ CCProgram culling-main %{ #if CC_USE_OCCLUSION_CULLING if (isOcclusionCulled(objectId)) { - cc_visibilities[id] = 0; + cc_visibilities[id] = 0U; return; } #endif #else - if (cc_visibilities[id] == 1) { + if (cc_visibilities[id] == 1U) { return; } @@ -173,7 +174,7 @@ CCProgram culling-main %{ #endif uint batchId = cc_instances[id].batchId; - uint index = atomicAdd(cc_indirectCmds[batchId].instanceCount, 1); + uint index = atomicAdd(cc_indirectCmds[batchId].instanceCount, 1U); uint instanceId = cc_indirectCmds[batchId].firstInstance + index; cc_drawInstances[instanceId] = objectId; diff --git a/editor/assets/effects/pipeline/gpu-driven/hierarchical-z.effect b/editor/assets/effects/pipeline/gpu-driven/hierarchical-z.effect index 609ed8dc589..4cd243265df 100644 --- a/editor/assets/effects/pipeline/gpu-driven/hierarchical-z.effect +++ b/editor/assets/effects/pipeline/gpu-driven/hierarchical-z.effect @@ -11,12 +11,15 @@ CCProgram hiz-main %{ precision highp float; #pragma rate CCImageData pass - layout(std140) uniform CCImageData { vec2 imageSize; }; + #pragma glBinding(0) + layout(std140) uniform CCImageData { vec2 cc_imageSize; }; #pragma rate targetImage pass - layout(r32f) writeonly uniform image2D targetImage; + #pragma glBinding(1) + layout(r32f) writeonly uniform highp image2D targetImage; #pragma rate sourceImage pass + #pragma glBinding(2) uniform sampler2D sourceImage; layout(local_size_x = 32, local_size_y = 32, local_size_z = 1) in; @@ -26,9 +29,9 @@ CCProgram hiz-main %{ uvec2 uv = gl_GlobalInvocationID.xy; #if CC_USE_SAMPLER_FILTER_MIN_MAX - float depth = texture(sourceImage, (vec2(uv) + vec2(0.5)) / imageSize).x; + float depth = texture(sourceImage, (vec2(uv) + vec2(0.5)) / cc_imageSize).x; #else - vec4 texels = textureGather(sourceImage, (vec2(uv) + vec2(0.5)) / imageSize, 0); + vec4 texels = textureGather(sourceImage, (vec2(uv) + vec2(0.5)) / cc_imageSize, 0); float depth = max(max(texels.x, texels.y), max(texels.z, texels.w)); #endif diff --git a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp index 885f89e4a80..1b1a1ad9259 100644 --- a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp +++ b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp @@ -1321,7 +1321,7 @@ void NativePipeline::addBuiltinHzbGenerationPass( // uniforms auto &data = get(RenderGraph::DataTag{}, renderGraph, passID); setVec2Impl( - data, lg, "imageSize", + data, lg, "cc_imageSize", Vec2{ static_cast(width), static_cast(height), diff --git a/native/cocos/scene/gpu-scene/GPUBatchPool.cpp b/native/cocos/scene/gpu-scene/GPUBatchPool.cpp index a459ce601ab..3cc037d94dd 100644 --- a/native/cocos/scene/gpu-scene/GPUBatchPool.cpp +++ b/native/cocos/scene/gpu-scene/GPUBatchPool.cpp @@ -308,7 +308,7 @@ void GPUBatchPool::createBuffers() { const auto indirectStride = getIndirectStride(); const auto indirectSize = indirectStride * _indirectCapacity; - _indirectBuffer = device->createBuffer({gfx::BufferUsageBit::TRANSFER_SRC, + _indirectBuffer = device->createBuffer({gfx::BufferUsageBit::TRANSFER_SRC | gfx::BufferUsageBit::STORAGE, gfx::MemoryUsageBit::DEVICE, indirectSize, indirectStride}); From 285489dfd65d1227b3d39cf4c29a304114fd92eb Mon Sep 17 00:00:00 2001 From: Zeqiang Li Date: Thu, 24 Aug 2023 15:04:33 +0800 Subject: [PATCH 03/16] V3.8.1 gpudriven (#15) --- native/cocos/renderer/gfx-base/GFXDef.cpp | 19 +++ native/cocos/renderer/gfx-base/GFXDef.h | 1 + .../renderer/gfx-vulkan/VKCommandBuffer.cpp | 15 ++- .../cocos/renderer/gfx-vulkan/VKGPUObjects.h | 4 +- .../pipeline/custom/FrameGraphDispatcher.cpp | 117 +++++++++++++++--- .../pipeline/custom/NativeExecutor.cpp | 19 ++- .../pipeline/custom/NativePipeline.cpp | 41 +++--- .../pipeline/custom/NativeRenderQueue.cpp | 4 +- .../pipeline/custom/NativeResourceGraph.cpp | 9 +- 9 files changed, 184 insertions(+), 45 deletions(-) diff --git a/native/cocos/renderer/gfx-base/GFXDef.cpp b/native/cocos/renderer/gfx-base/GFXDef.cpp index ef5664c9923..4a2aac6a036 100644 --- a/native/cocos/renderer/gfx-base/GFXDef.cpp +++ b/native/cocos/renderer/gfx-base/GFXDef.cpp @@ -230,6 +230,25 @@ bool operator==(const GeneralBarrierInfo &lhs, const GeneralBarrierInfo &rhs) { return !memcmp(&lhs, &rhs, sizeof(GeneralBarrierInfo)); } +template <> +ccstd::hash_t Hasher::operator()(const ResourceRange &info) const { + ccstd::hash_t seed = sizeof(info); + ccstd::hash_combine(seed, info.width); + ccstd::hash_combine(seed, info.height); + ccstd::hash_combine(seed, info.depthOrArraySize); + ccstd::hash_combine(seed, info.firstSlice); + ccstd::hash_combine(seed, info.numSlices); + ccstd::hash_combine(seed, info.mipLevel); + ccstd::hash_combine(seed, info.levelCount); + ccstd::hash_combine(seed, info.basePlane); + ccstd::hash_combine(seed, info.planeCount); + return seed; +} + +bool operator==(const ResourceRange &lhs, const ResourceRange &rhs) { + return !memcmp(&lhs, &rhs, sizeof(ResourceRange)); +} + template <> ccstd::hash_t Hasher::operator()(const TextureBarrierInfo &info) const { return quickHashTrivialStruct(&info); diff --git a/native/cocos/renderer/gfx-base/GFXDef.h b/native/cocos/renderer/gfx-base/GFXDef.h index 9e0cf77ad8a..06a53df35e5 100644 --- a/native/cocos/renderer/gfx-base/GFXDef.h +++ b/native/cocos/renderer/gfx-base/GFXDef.h @@ -68,6 +68,7 @@ DEFINE_CMP_OP(SamplerInfo) DEFINE_CMP_OP(GeneralBarrierInfo) DEFINE_CMP_OP(TextureBarrierInfo) DEFINE_CMP_OP(BufferBarrierInfo) +DEFINE_CMP_OP(ResourceRange) #undef DEFINE_CMP_OP diff --git a/native/cocos/renderer/gfx-vulkan/VKCommandBuffer.cpp b/native/cocos/renderer/gfx-vulkan/VKCommandBuffer.cpp index f4d7b72c3ed..d3e567c01f3 100644 --- a/native/cocos/renderer/gfx-vulkan/VKCommandBuffer.cpp +++ b/native/cocos/renderer/gfx-vulkan/VKCommandBuffer.cpp @@ -867,6 +867,7 @@ void CCVKCommandBuffer::pipelineBarrier(const GeneralBarrier *barrier, const Buf _barrierEvents.insert({obj, event}); }; + static constexpr bool ENABLE_SPLIT_BARRIER{false}; if (textureBarrierCount > 0) { for (uint32_t i = 0U; i < textureBarrierCount; ++i) { const auto *ccBarrier = static_cast(textureBarriers[i]); @@ -875,11 +876,13 @@ void CCVKCommandBuffer::pipelineBarrier(const GeneralBarrier *barrier, const Buf auto *gpuTexture = ccTexture->gpuTexture(); if (ccBarrier->getInfo().type == BarrierType::SPLIT_BEGIN) { - signalEvent(ccTexture, gpuBarrier->srcStageMask); + if (ENABLE_SPLIT_BARRIER) { + signalEvent(ccTexture, gpuBarrier->srcStageMask); + } } else { bool fullBarrier = ccBarrier->getInfo().type == BarrierType::FULL; bool missed = _barrierEvents.find(ccTexture) == _barrierEvents.end(); - if (!fullBarrier && !missed) { + if (!fullBarrier && !missed && ENABLE_SPLIT_BARRIER) { //CC_ASSERT(_barrierEvents.find(ccTexture) != _barrierEvents.end()); VkEvent event = _barrierEvents.at(ccTexture); scheduledEvents.push_back(event); @@ -898,7 +901,6 @@ void CCVKCommandBuffer::pipelineBarrier(const GeneralBarrier *barrier, const Buf } else { gpuTexture->currentAccessTypes.assign(gpuBarrier->barrier.pNextAccesses, gpuBarrier->barrier.pNextAccesses + gpuBarrier->barrier.nextAccessCount); fullImageBarriers.push_back(gpuBarrier->vkBarrier); - fullImageBarriers.back().srcAccessMask = missed ? VK_IMAGE_LAYOUT_UNDEFINED : fullImageBarriers.back().srcAccessMask; fullImageBarriers.back().subresourceRange.aspectMask = gpuTexture->aspectMask; if (gpuTexture->swapchain) { fullImageBarriers.back().image = gpuTexture->swapchainVkImages[gpuTexture->swapchain->curImageIndex]; @@ -920,11 +922,13 @@ void CCVKCommandBuffer::pipelineBarrier(const GeneralBarrier *barrier, const Buf auto *gpuBuffer = ccBuffer->gpuBuffer(); if (ccBarrier->getInfo().type == BarrierType::SPLIT_BEGIN) { - signalEvent(ccBuffer, gpuBarrier->srcStageMask); + if (ENABLE_SPLIT_BARRIER) { + signalEvent(ccBuffer, gpuBarrier->srcStageMask); + } } else { bool fullBarrier = ccBarrier->getInfo().type == BarrierType::FULL; bool missed = _barrierEvents.find(ccBuffer) != _barrierEvents.end(); - if (!fullBarrier && !missed) { + if (!fullBarrier && !missed && ENABLE_SPLIT_BARRIER) { CC_ASSERT(_barrierEvents.find(ccBuffer) != _barrierEvents.end()); VkEvent event = _barrierEvents.at(ccBuffer); scheduledEvents.push_back(event); @@ -937,7 +941,6 @@ void CCVKCommandBuffer::pipelineBarrier(const GeneralBarrier *barrier, const Buf } else { gpuBuffer->currentAccessTypes.assign(gpuBarrier->barrier.pNextAccesses, gpuBarrier->barrier.pNextAccesses + gpuBarrier->barrier.nextAccessCount); fullBufferBarriers.push_back(gpuBarrier->vkBarrier); - fullBufferBarriers.back().srcAccessMask = missed ? VK_IMAGE_LAYOUT_UNDEFINED : fullBufferBarriers.back().srcAccessMask; fullBufferBarriers.back().buffer = gpuBuffer->vkBuffer; fullSrcStageMask |= gpuBarrier->srcStageMask; fullDstStageMask |= gpuBarrier->dstStageMask; diff --git a/native/cocos/renderer/gfx-vulkan/VKGPUObjects.h b/native/cocos/renderer/gfx-vulkan/VKGPUObjects.h index e0bf8b615cc..8803d7dd624 100644 --- a/native/cocos/renderer/gfx-vulkan/VKGPUObjects.h +++ b/native/cocos/renderer/gfx-vulkan/VKGPUObjects.h @@ -1063,8 +1063,10 @@ class CCVKGPUDescriptorHub final { descriptor->imageLayout = VK_IMAGE_LAYOUT_GENERAL; } else if (hasFlag(texture->gpuTexture->usage, TextureUsage::DEPTH_STENCIL_ATTACHMENT)) { descriptor->imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL; - } else { + } else if (flags == AccessFlags::NONE) { descriptor->imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + } else { + descriptor->imageLayout = VK_IMAGE_LAYOUT_GENERAL; } } } diff --git a/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp b/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp index b92b33917af..52e6fdf64b5 100644 --- a/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp +++ b/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp @@ -545,9 +545,13 @@ auto dependencyCheck(ResourceAccessGraph &rag, ResourceAccessGraph::vertex_descr const auto &states = get(ResourceGraph::StatesTag{}, resourceGraph, resourceID); const auto &desc = get(ResourceGraph::DescTag{}, resourceGraph, resourceID); - auto range = originRange; - if (rag.movedSourceStatus.find(name) != rag.movedSourceStatus.end()) { - range = rag.movedSourceStatus.at(name).range; + const auto& range = originRange; + if (rag.movedTargetStatus.find(name) != rag.movedTargetStatus.end()) { + CC_ASSERT(rag.movedTargetStatus.at(name).full); + for (const auto &[src, ignored] : rag.movedTarget.at(name)) { + const auto &srcRange = rag.movedSourceStatus.at(src).range; + rag.resourceAccess.at(src).emplace(curVertID, AccessStatus{accessFlag, srcRange}); + } } bool isExternalPass = get(get(ResourceGraph::TraitsTag{}, resourceGraph), resourceID).hasSideEffects(); @@ -1560,12 +1564,19 @@ void startMovePass(const Graphs &graphs, uint32_t passID, const MovePass &pass) auto lastStatusIter = resourceAccessGraph.resourceAccess.at(pair.source).rbegin(); resourceAccessGraph.movedSourceStatus.emplace(pair.source, AccessStatus{lastStatusIter->second.accessFlag, srcResourceRange}); - resourceAccessGraph.movedTarget[pair.target].emplace(getSubresourceNameByRange(srcResourceRange, resourceAccessGraph.resource()), pair.source); + resourceAccessGraph.movedTarget[pair.target].emplace(pair.source, getSubresourceNameByRange(srcResourceRange, resourceAccessGraph.resource())); - const auto &srcAccess = resourceAccessGraph.resourceAccess.at(pair.source); + auto &srcAccess = resourceAccessGraph.resourceAccess.at(pair.source); auto &targetAccess = resourceAccessGraph.resourceAccess[pair.target]; - targetAccess.clear(); - targetAccess.emplace(*srcAccess.rbegin()); + + if (!targetAccess.empty()) { + // second move + srcAccess.emplace(targetAccess.rbegin()->first, AccessStatus{targetAccess.rbegin()->second.accessFlag, srcResourceRange}); + } + + if (resourceAccessGraph.movedTarget.find(pair.target) != resourceAccessGraph.movedTarget.end()) { + srcAccess.erase(0); + } auto targetResID = findVertex(pair.target, resourceGraph); resourceAccessGraph.resourceIndex[pair.target] = targetResID; @@ -1577,7 +1588,7 @@ void startMovePass(const Graphs &graphs, uint32_t passID, const MovePass &pass) rag.resourceIndex[source] = v; if (rag.movedTarget.find(source) != rag.movedTarget.end()) { - for (const auto &[rangeStr, prt] : rag.movedTarget[source]) { + for (const auto &[prt, rangeStr] : rag.movedTarget[source]) { feedBack(prt, v); } } @@ -1648,14 +1659,14 @@ void subresourceAnalysis(ResourceAccessGraph &rag, ResourceGraph &resg) { using RecursiveFuncType = std::function &, const ccstd::pmr::string &)>; RecursiveFuncType addSubres = [&](const PmrFlatMap &subreses, const ccstd::pmr::string &resName) { if (subreses.size() == 1) { - const auto &src = subreses.begin()->second; + const auto &src = subreses.begin()->first; rag.resourceIndex[src] = rag.resourceIndex.at(resName); if (rag.movedTarget.find(src) != rag.movedTarget.end()) { addSubres(rag.movedTarget.at(src), src); } } else { - for (const auto &[rangeStr, subres] : subreses) { + for (const auto &[subres, rangeStr] : subreses) { auto descResViewID = findVertex(subres, resg); auto targetResID = rag.resourceIndex.at(resName); @@ -1664,7 +1675,7 @@ void subresourceAnalysis(ResourceAccessGraph &rag, ResourceGraph &resg) { const auto &targetDesc = get(ResourceGraph::DescTag{}, resg, targetResID); const auto &srcResourceRange = rag.movedSourceStatus.at(subres).range; const auto &targetTraits = get(ResourceGraph::TraitsTag{}, resg, targetResID); - const auto &indexName = concatResName(targetName, subres, rag.resource()); + const auto &indexName = concatResName(targetName, rangeStr, rag.resource()); auto subresID = findVertex(indexName, resg); if (subresID == ResourceGraph::null_vertex()) { const auto &subView = makeSubresourceView(srcDesc, targetDesc, srcResourceRange); @@ -1681,6 +1692,7 @@ void subresourceAnalysis(ResourceAccessGraph &rag, ResourceGraph &resg) { targetResID); } rag.resourceIndex[subres] = subresID; + rag.resourceIndex[indexName] = subresID; if (rag.movedTarget.find(subres) != rag.movedTarget.end()) { addSubres(rag.movedTarget.at(subres), subres); @@ -1875,6 +1887,13 @@ void buildBarriers(FrameGraphDispatcher &fgDispatcher) { return gfxBarrier; }; + struct AccessWeight{ + ccstd::pmr::string name; + gfx::AccessFlags access{gfx::AccessFlags::NONE}; + ResourceAccessGraph::vertex_descriptor vertID{0}; + }; + ccstd::unordered_map>> rangeLastAccess; + // found pass id in this map ? barriers you should commit when run into this pass // : or no extra barrier needed. for (auto &accessPair : rag.resourceAccess) { @@ -1895,6 +1914,27 @@ void buildBarriers(FrameGraphDispatcher &fgDispatcher) { auto srcPassID = get(ResourceAccessGraph::PassIDTag{}, rag, srcRagVertID); auto dstPassID = get(ResourceAccessGraph::PassIDTag{}, rag, dstRagVertID); + std::function interAccessCheck = + [&](const ccstd::pmr::string &sName, ResourceAccessGraph::vertex_descriptor startVert, ResourceAccessGraph::vertex_descriptor endVert) { + bool interAccessed = false; + if (rag.movedTarget.find(sName) != rag.movedTarget.end()) { + for (const auto &[src, ignored] : rag.movedTarget.at(sName)) { + const auto srcStart = rag.resourceAccess.at(src).begin()->first; + const auto srcEnd = rag.resourceAccess.at(src).rbegin()->first; + interAccessed |= !((srcStart > endVert) || (srcEnd < startVert)); + interAccessed |= interAccessCheck(src, startVert, endVert); + if (interAccessed) { + return true; + } + } + } + return interAccessed; + }; + + if (interAccessCheck(resName, srcRagVertID, dstRagVertID)) { + continue; + } + if (holds(dstPassID, renderGraph) || holds(dstPassID, renderGraph)) { const auto &fgRenderPassInfo = get(ResourceAccessGraph::RenderPassInfoTag{}, rag, dstRagVertID); if (fgRenderPassInfo.viewIndex.find(resName) != fgRenderPassInfo.viewIndex.end() || @@ -1938,8 +1978,14 @@ void buildBarriers(FrameGraphDispatcher &fgDispatcher) { continue; } + auto sRange = iter->second.range; if (rag.movedSourceStatus.find(resName) != rag.movedSourceStatus.end()) { + // resource id realResourceID = rag.resourceIndex.at(resName); + const auto &tName = get(ResourceGraph::NameTag{}, resourceGraph, realResourceID); + // parent id + realResourceID = realID(tName, resourceGraph); + sRange = rag.movedSourceStatus.at(resName).range; } // undefined access @@ -1950,8 +1996,8 @@ void buildBarriers(FrameGraphDispatcher &fgDispatcher) { firstMeetBarrier.type = gfx::BarrierType::FULL; firstMeetBarrier.beginVert = dstPassID; firstMeetBarrier.endVert = dstPassID; - firstMeetBarrier.beginStatus = iter->second; - firstMeetBarrier.endStatus = nextIter->second; + firstMeetBarrier.beginStatus = {iter->second.accessFlag, sRange}; + firstMeetBarrier.endStatus = {nextIter->second.accessFlag, sRange}; firstMeetBarrier.barrier = getGFXBarrier(firstMeetBarrier); } else if (accessDependent(iter->second.accessFlag, nextIter->second.accessFlag, isBuffer)) { auto &srcBarrierNode = get(ResourceAccessGraph::BarrierTag{}, rag, srcRagVertID); @@ -1959,8 +2005,8 @@ void buildBarriers(FrameGraphDispatcher &fgDispatcher) { beginBarrier.resourceID = realResourceID; beginBarrier.beginVert = srcPassID; beginBarrier.endVert = dstPassID; - beginBarrier.beginStatus = iter->second; - beginBarrier.endStatus = nextIter->second; + beginBarrier.beginStatus = {iter->second.accessFlag, sRange}; + beginBarrier.endStatus = {nextIter->second.accessFlag, sRange}; if (isPassExecAdjecent(iter->first, nextIter->first)) { beginBarrier.type = gfx::BarrierType::FULL; } else { @@ -1972,8 +2018,8 @@ void buildBarriers(FrameGraphDispatcher &fgDispatcher) { endBarrier.type = gfx::BarrierType::SPLIT_END; endBarrier.beginVert = srcPassID; endBarrier.endVert = dstPassID; - endBarrier.beginStatus = iter->second; - endBarrier.endStatus = nextIter->second; + endBarrier.beginStatus = {iter->second.accessFlag, sRange}; + endBarrier.endStatus = {nextIter->second.accessFlag, sRange}; endBarrier.barrier = getGFXBarrier(endBarrier); } beginBarrier.barrier = getGFXBarrier(beginBarrier); @@ -1982,6 +2028,15 @@ void buildBarriers(FrameGraphDispatcher &fgDispatcher) { const auto &traits = get(ResourceGraph::TraitsTag{}, resourceGraph, realResourceID); auto &states = get(ResourceGraph::StatesTag{}, resourceGraph, realResourceID); if (traits.hasSideEffects()) { + if (out_degree(realResourceID, resourceGraph) > 1) { + auto sRange = iter->second.range; + if (rag.movedSourceStatus.find(resName) != rag.movedSourceStatus.end()) { + sRange = rag.movedSourceStatus.at(resName).range; + } + if (rangeLastAccess[realResourceID][sRange].vertID < (accessRecord.rbegin()->first)) { + rangeLastAccess[realResourceID][sRange] = AccessWeight{resName, iter->second.accessFlag, accessRecord.rbegin()->first}; + } + } states.states = iter->second.accessFlag; if (traits.residency == ResourceResidency::BACKBUFFER) { auto lastAccessPassID = get(ResourceAccessGraph::PassIDTag{}, rag, iter->first); @@ -2000,6 +2055,34 @@ void buildBarriers(FrameGraphDispatcher &fgDispatcher) { } } + for (const auto &[resID, rangeWeight] : rangeLastAccess) { + ccstd::unordered_map rangeMap; + for (const auto& [range, weight] : rangeWeight) { + rangeMap[weight.access] += 1; + } + auto iter = std::max_element(rangeMap.begin(), rangeMap.end(), [](const auto &lhs, const auto &rhs) { + return lhs.second < rhs.second; + }); + auto mostCommonAccesss = iter->first; + + for (const auto &[range, weight] : rangeWeight) { + if (weight.access != mostCommonAccesss) { + auto &barrierNode = get(ResourceAccessGraph::BarrierTag{}, rag, weight.vertID); + auto& barrier = barrierNode.rearBarriers.emplace_back(); + barrier.resourceID = resID; + barrier.beginVert = weight.vertID; + barrier.endVert = weight.vertID; + barrier.type = gfx::BarrierType::FULL; + barrier.beginStatus = {weight.access, range}; + barrier.endStatus = {mostCommonAccesss, range}; + barrier.barrier = getGFXBarrier(barrier); + } + } + auto &states = get(ResourceGraph::StatesTag{}, resourceGraph, resID); + states.states = mostCommonAccesss; + } + + { for (auto &fgRenderpassInfo : rag.rpInfo) { auto &colorAttachments = fgRenderpassInfo.rpInfo.colorAttachments; diff --git a/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp b/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp index 666418fb90f..db1eb79e012 100644 --- a/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp +++ b/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp @@ -541,7 +541,21 @@ gfx::DescriptorSet* initDescriptorSet( // render graph textures auto* texture = resg.getTexture(iter->second); CC_ENSURES(texture); - newSet->bindTexture(bindID, texture); + gfx::AccessFlags access = gfx::AccessFlagBit::NONE; + if (accessNode != nullptr) { + auto resID = iter->second; + // whole access only now. + auto parentID = parent(resID, resg); + parentID = parentID == ResourceGraph::null_vertex() ? resID : parentID; + const auto& resName = get(ResourceGraph::NameTag{}, resg, parentID); + + auto iter2 = std::find_if(accessNode->resourceStatus.begin(), accessNode->resourceStatus.end(), [&resName, &resg](const auto& pair) { + return strstr(resName.c_str(), pair.first.c_str()) || strstr(pair.first.c_str(), resName.c_str()); + }); + + access = iter2->second.accessFlag; + } + newSet->bindTexture(bindID, texture, 0, access); } bindID += d.count; } @@ -925,11 +939,12 @@ struct RenderGraphUploadVisitor : boost::dfs_visitor<> { auto& set = iter->second; const auto& user = get(RenderGraph::DataTag{}, ctx.g, vertID); auto& node = ctx.context.layoutGraphResources.at(layoutID); + const auto& accessNode = ctx.fgd.getAccessNode(vertID); auto* perPassSet = initDescriptorSet( ctx.resourceGraph, ctx.device, ctx.cmdBuff, *ctx.context.defaultResource, ctx.lg, - resourceIndex, set, user, node); + resourceIndex, set, user, node, &accessNode); CC_ENSURES(perPassSet); ctx.renderGraphDescriptorSet[vertID] = perPassSet; } else if (holds(vertID, ctx.g)) { diff --git a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp index 1b1a1ad9259..d8896be1bef 100644 --- a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp +++ b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp @@ -1031,9 +1031,9 @@ void setupGpuDrivenResources( auto resID = findVertex(name, resg); if (resID == ResourceGraph::null_vertex()) { ppl.addResource(std::string(name), ResourceDimension::TEXTURE2D, gfx::Format::R32F, width, height, 1, 1, - mipLevels, gfx::SampleCount::X1, ResourceFlags::SAMPLED | ResourceFlags::STORAGE, ResourceResidency::MANAGED); + mipLevels, gfx::SampleCount::X1, ResourceFlags::SAMPLED | ResourceFlags::STORAGE, ResourceResidency::PERSISTENT); } else { - CC_EXPECTS(holds(resID, resg)); + CC_EXPECTS(holds(resID, resg)); ppl.updateResource(std::string(name), gfx::Format::R32F, width, height, 1, 1, mipLevels, gfx::SampleCount::X1); } } @@ -1181,6 +1181,7 @@ void NativePipeline::addBuiltinHzbGenerationPass( currMipName.reserve(targetHzbName.size() + 6); currMipName.append(targetHzbName); + const std::string hzbSubreIDStr = std::to_string(renderGraph.computePasses.size()); MovePass move(renderGraph.get_allocator()); move.movePairs.reserve(targetDesc.mipLevels); // register all mips @@ -1194,6 +1195,7 @@ void NativePipeline::addBuiltinHzbGenerationPass( for (uint32_t k = 0; k != targetDesc.mipLevels; ++k) { currMipName.resize(targetHzbName.size()); CC_ENSURES(currMipName == std::string_view{targetHzbName}); + currMipName.append(hzbSubreIDStr); currMipName.append("_Mip"); currMipName.append(std::to_string(k)); @@ -1208,26 +1210,35 @@ void NativePipeline::addBuiltinHzbGenerationPass( std::forward_as_tuple(), std::forward_as_tuple(), resourceGraph); - - MovePair pair(move.get_allocator()); - pair.source = currMipName; - pair.target = targetHzbName; - pair.mipLevels = 1; - pair.numSlices = 1; - pair.targetMostDetailedMip = k; - move.movePairs.emplace_back(std::move(pair)); } else { CC_EXPECTS(holds(resID, resourceGraph)); + const auto &localDesc = get(ResourceGraph::DescTag{}, resourceGraph, resID); updateResourceImpl( resourceGraph, currMipName, desc.format, - desc.width, - desc.height, + localDesc.width, + localDesc.height, 1, 1, 1, gfx::SampleCount::X1); } - desc.width = getHalfSize(desc.width); - desc.height = getHalfSize(desc.height); + + bool moved = std::any_of(renderGraph.movePasses.begin(), renderGraph.movePasses.end(), [&currMipName](const MovePass &movePass) { + return std::any_of(movePass.movePairs.begin(), movePass.movePairs.end(), [&currMipName](const MovePair &pair) { + return pair.source == currMipName; + }); + }); + if (!moved) { + MovePair pair(move.get_allocator()); + pair.source = currMipName; + pair.target = targetHzbName; + pair.mipLevels = 1; + pair.numSlices = 1; + pair.targetMostDetailedMip = k; + move.movePairs.emplace_back(std::move(pair)); + + desc.width = getHalfSize(desc.width); + desc.height =getHalfSize(desc.height); + } } } @@ -1278,6 +1289,7 @@ void NativePipeline::addBuiltinHzbGenerationPass( } else { prevMipName.resize(targetHzbName.size()); CC_ENSURES(prevMipName == std::string_view{targetHzbName}); + prevMipName.append(hzbSubreIDStr); prevMipName.append("_Mip"); CC_EXPECTS(k > 0); prevMipName.append(std::to_string(k - 1)); // previous mip, k - 1 @@ -1305,6 +1317,7 @@ void NativePipeline::addBuiltinHzbGenerationPass( // target currMipName.resize(targetHzbName.size()); CC_ENSURES(currMipName == std::string_view{targetHzbName}); + currMipName.append(hzbSubreIDStr); currMipName.append("_Mip"); currMipName.append(std::to_string(k)); auto res = pass.computeViews.emplace( diff --git a/native/cocos/renderer/pipeline/custom/NativeRenderQueue.cpp b/native/cocos/renderer/pipeline/custom/NativeRenderQueue.cpp index 144c5054eee..19f31d45ce3 100644 --- a/native/cocos/renderer/pipeline/custom/NativeRenderQueue.cpp +++ b/native/cocos/renderer/pipeline/custom/NativeRenderQueue.cpp @@ -205,8 +205,8 @@ void RenderBatchingQueue::recordCommandBuffer( 0, indirectBuffer->getSize() }; - auto *bufferBarrier = gfx::Device::getInstance()->getBufferBarrier(barrierInfo); - cmdBuffer->pipelineBarrier(nullptr, {bufferBarrier}, {indirectBuffer}, {}, {}); + //auto *bufferBarrier = gfx::Device::getInstance()->getBufferBarrier(barrierInfo); + //cmdBuffer->pipelineBarrier(nullptr, {bufferBarrier}, {indirectBuffer}, {}, {}); const auto supportFirstInstance = device->getCapabilities().supportFirstInstance; auto *batchPool = gpuScene->getBatchPool(); diff --git a/native/cocos/renderer/pipeline/custom/NativeResourceGraph.cpp b/native/cocos/renderer/pipeline/custom/NativeResourceGraph.cpp index 57e26188b6c..edea0639c7a 100644 --- a/native/cocos/renderer/pipeline/custom/NativeResourceGraph.cpp +++ b/native/cocos/renderer/pipeline/custom/NativeResourceGraph.cpp @@ -215,9 +215,12 @@ void ResourceGraph::mount(gfx::Device* device, vertex_descriptor vertID) { } CC_ENSURES(buffer); }, - [&](const IntrusivePtr& texture) { - CC_EXPECTS(texture); - std::ignore = texture; + [&](IntrusivePtr& texture) { + if (!texture) { + auto info = getTextureInfo(desc); + texture = device->createTexture(info); + } + CC_ENSURES(texture); }, [&](const IntrusivePtr& fb) { CC_EXPECTS(fb); From 428dc06e6dd9c6b1942200a3a8fd1ae0d655f558 Mon Sep 17 00:00:00 2001 From: Zeqiang Li Date: Thu, 24 Aug 2023 18:20:20 +0800 Subject: [PATCH 04/16] indirect barrier (#16) * indirect barrier --- .../pipeline/custom/FGDispatcherTypes.h | 7 ++- .../pipeline/custom/FrameGraphDispatcher.cpp | 53 ++++++++++++++++++- .../pipeline/custom/NativeExecutor.cpp | 2 +- 3 files changed, 58 insertions(+), 4 deletions(-) diff --git a/native/cocos/renderer/pipeline/custom/FGDispatcherTypes.h b/native/cocos/renderer/pipeline/custom/FGDispatcherTypes.h index 4f6b586f6f4..7e4ef7588ec 100644 --- a/native/cocos/renderer/pipeline/custom/FGDispatcherTypes.h +++ b/native/cocos/renderer/pipeline/custom/FGDispatcherTypes.h @@ -298,7 +298,8 @@ struct ResourceAccessGraph { PmrTransparentMap> resourceAccess; PmrFlatMap> movedTarget; PmrFlatMap movedSourceStatus; - PmrFlatMap movedTargetStatus; + PmrFlatMap movedTargetStatus; + PmrFlatMap> externalAccess; }; struct RelationGraph { @@ -484,7 +485,9 @@ struct FrameGraphDispatcher { PmrFlatMap buildDescriptorIndex( const PmrTransparentMap>&computeViews, - boost::container::pmr::memory_resource* scratch) const; + boost::container::pmr::memory_resource* scratch) const; + + void registerResourceAccess(RenderGraph::vertex_descriptor v, const ccstd::pmr::string & name, gfx::AccessFlags access); ResourceAccessGraph resourceAccessGraph; ResourceGraph& resourceGraph; diff --git a/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp b/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp index 52e6fdf64b5..52f7f4860a1 100644 --- a/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp +++ b/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp @@ -230,6 +230,10 @@ PmrFlatMap FrameGraphDispatcher:: return resourceIndex; } +void FrameGraphDispatcher::registerResourceAccess(RenderGraph::vertex_descriptor v, const ccstd::pmr::string &name, gfx::AccessFlags access) { + resourceAccessGraph.externalAccess[v] = std::make_pair(name, access); +} + PmrFlatMap FrameGraphDispatcher::buildDescriptorIndex( const PmrTransparentMap> &computeViews, const PmrTransparentMap &rasterViews, @@ -922,6 +926,29 @@ void extractNames(const ccstd::pmr::string &resName, } } +auto checkExternalAccess(const Graphs &graphs, + ResourceAccessGraph::vertex_descriptor ragVertID) { + const auto &[renderGraph, layoutGraphData, resourceGraph, resourceAccessGraph, relationGraph] = graphs; + if (!resourceAccessGraph.externalAccess.empty()) { + auto iter = resourceAccessGraph.externalAccess.begin(); + auto targetPassID = iter->first; + while (parent(targetPassID, renderGraph) != RenderGraph::null_vertex()) { + targetPassID = parent(targetPassID, renderGraph); + } + auto rvID = resourceAccessGraph.passIndex.at(targetPassID); + while (iter != resourceAccessGraph.externalAccess.end() && rvID <= ragVertID) { + const auto &range = getResourceRange(vertex(iter->second.first, resourceGraph), resourceGraph); + ViewStatus view{iter->second.first, AccessType::READ_WRITE /*no use*/, gfx::ShaderStageFlagBit::NONE, iter->second.second, range}; + const auto&[lastVertId, nearestAccess] = dependencyCheck(resourceAccessGraph, rvID, resourceGraph, view); + tryAddEdge(lastVertId, rvID, resourceAccessGraph); + tryAddEdge(lastVertId, rvID, relationGraph); + //dependent |= (lastVertId != EXPECT_START_ID); + iter = resourceAccessGraph.externalAccess.erase(iter); + rvID = resourceAccessGraph.passIndex.at(targetPassID); + } + } +} + auto checkRasterViews(const Graphs &graphs, ResourceAccessGraph::vertex_descriptor ragVertID, ResourceAccessNode &node, @@ -1609,6 +1636,24 @@ void startMovePass(const Graphs &graphs, uint32_t passID, const MovePass &pass) } } +void startScene(const Graphs &graphs, RenderGraph::vertex_descriptor sceneID, const SceneData &sceneData) { // NOLINT(readability-convert-member-functions-to-static) + const auto &[renderGraph, layoutGraphData, resourceGraph, resourceAccessGraph, relationGraph] = graphs; + const auto *const camera = sceneData.camera; + CC_EXPECTS(camera); + + if (any(sceneData.flags & SceneFlags::GPU_DRIVEN)) { + //auto prtID = sceneID; + //while (parent(prtID, renderGraph) != RenderGraph::null_vertex()) { + // prtID = parent(prtID, renderGraph); + //} + ccstd::pmr::string name("CCDrawIndirectBuffer", resourceAccessGraph.get_allocator()); + name.append(std::to_string(sceneData.cullingID)); + // auto resID = findVertex(name, resourceGraph); + // const auto &indirectBuffer = get(ManagedBufferTag{}, resID, resg).buffer.get(); + resourceAccessGraph.externalAccess[sceneID] = std::make_pair(name, gfx::AccessFlags::INDIRECT_BUFFER); + } +} + struct DependencyVisitor : boost::dfs_visitor<> { void discover_vertex(RenderGraph::vertex_descriptor passID, const AddressableView &gv) const { @@ -1635,9 +1680,14 @@ struct DependencyVisitor : boost::dfs_visitor<> { [&](const RaytracePass &pass) { startRaytracePass(graphs, passID, pass); }, + [&](const SceneData &scene) { + startScene(graphs, passID, scene); + }, [&](const auto & /*pass*/) { // do nothing }); + const auto curRagVert = graphs.resourceAccessGraph.passIndex.size() - 2; + checkExternalAccess(graphs, curRagVert); } void finish_vertex(RenderGraph::vertex_descriptor passID, @@ -1945,7 +1995,8 @@ void buildBarriers(FrameGraphDispatcher &fgDispatcher) { } // subpass layout transition - if ((srcRagVertID != 0) && (holds(srcPassID, renderGraph) || holds(srcPassID, renderGraph))) { + bool dstRenderPass = (srcRagVertID != 0) && (holds(srcPassID, renderGraph) || holds(srcPassID, renderGraph)); + if (dstRenderPass && !isBuffer) { auto ragVertID = srcRagVertID; if (holds(srcPassID, renderGraph)) { auto parentID = parent(srcPassID, renderGraph); diff --git a/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp b/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp index db1eb79e012..38c90e68e56 100644 --- a/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp +++ b/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp @@ -78,7 +78,7 @@ struct RenderGraphVisitorContext { LayoutGraphData& lg; const RenderGraph& g; ResourceGraph& resourceGraph; - const FrameGraphDispatcher& fgd; + FrameGraphDispatcher& fgd; const ccstd::pmr::vector& validPasses; gfx::Device* device = nullptr; gfx::CommandBuffer* cmdBuff = nullptr; From ff47d90e3750167929323d69a81646db8bc6547b Mon Sep 17 00:00:00 2001 From: Zeqiang Li Date: Mon, 28 Aug 2023 17:31:26 +0800 Subject: [PATCH 05/16] handle first meet layout issue (#17) * fg move * remove dbg code * indirect barrier * remove dbg code * first meet logic * fix depth resource residency * clear first hzb --- .../pipeline/custom/FrameGraphDispatcher.cpp | 20 +++---- .../pipeline/custom/NativePipeline.cpp | 54 ++++++++++++++----- 2 files changed, 51 insertions(+), 23 deletions(-) diff --git a/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp b/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp index 52f7f4860a1..f477a18ad8d 100644 --- a/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp +++ b/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp @@ -736,7 +736,11 @@ auto getTextureStatus(std::string_view name, AccessType access, gfx::ShaderStage } if (access != AccessType::READ) { - texUsage |= (mapTextureFlags(desc.flags) & (gfx::TextureUsage::COLOR_ATTACHMENT | gfx::TextureUsage::DEPTH_STENCIL_ATTACHMENT | gfx::TextureUsage::STORAGE)); + if (rasterized) { + texUsage |= (mapTextureFlags(desc.flags) & (gfx::TextureUsage::COLOR_ATTACHMENT | gfx::TextureUsage::DEPTH_STENCIL_ATTACHMENT)); + } else { + texUsage |= (mapTextureFlags(desc.flags) & (gfx::TextureUsage::STORAGE)); + } } accesFlag = gfx::getAccessFlags(texUsage, toGfxAccess(access), vis); @@ -1593,19 +1597,15 @@ void startMovePass(const Graphs &graphs, uint32_t passID, const MovePass &pass) resourceAccessGraph.movedSourceStatus.emplace(pair.source, AccessStatus{lastStatusIter->second.accessFlag, srcResourceRange}); resourceAccessGraph.movedTarget[pair.target].emplace(pair.source, getSubresourceNameByRange(srcResourceRange, resourceAccessGraph.resource())); + auto targetResID = findVertex(pair.target, resourceGraph); auto &srcAccess = resourceAccessGraph.resourceAccess.at(pair.source); - auto &targetAccess = resourceAccessGraph.resourceAccess[pair.target]; + auto targetAccessIter = resourceAccessGraph.resourceAccess.find(pair.target); - if (!targetAccess.empty()) { - // second move - srcAccess.emplace(targetAccess.rbegin()->first, AccessStatus{targetAccess.rbegin()->second.accessFlag, srcResourceRange}); + if (targetAccessIter != resourceAccessGraph.resourceAccess.end() && !targetAccessIter->second.empty()) { + auto lastAccessIter = targetAccessIter->second.rbegin(); + srcAccess.emplace(lastAccessIter->first, AccessStatus{lastAccessIter->second.accessFlag, srcResourceRange}); } - if (resourceAccessGraph.movedTarget.find(pair.target) != resourceAccessGraph.movedTarget.end()) { - srcAccess.erase(0); - } - - auto targetResID = findVertex(pair.target, resourceGraph); resourceAccessGraph.resourceIndex[pair.target] = targetResID; auto &rag = resourceAccessGraph; diff --git a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp index d8896be1bef..38b9a5eb640 100644 --- a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp +++ b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp @@ -297,15 +297,28 @@ uint32_t NativePipeline::addDepthStencil(const ccstd::string &name, gfx::Format samplerInfo.minFilter = gfx::Filter::POINT; samplerInfo.mipFilter = gfx::Filter::NONE; - auto resID = addVertex( - ManagedTextureTag{}, - std::forward_as_tuple(name.c_str()), - std::forward_as_tuple(desc), - std::forward_as_tuple(ResourceTraits{residency}), - std::forward_as_tuple(), - std::forward_as_tuple(samplerInfo), - std::forward_as_tuple(), - resourceGraph); + ResourceGraph::vertex_descriptor resID = ResourceGraph::null_vertex(); + if (residency == ResourceResidency::PERSISTENT) { + resID = addVertex( + PersistentTextureTag{}, + std::forward_as_tuple(name.c_str()), + std::forward_as_tuple(desc), + std::forward_as_tuple(ResourceTraits{residency}), + std::forward_as_tuple(), + std::forward_as_tuple(samplerInfo), + std::forward_as_tuple(), + resourceGraph); + } else { + resID = addVertex( + ManagedTextureTag{}, + std::forward_as_tuple(name.c_str()), + std::forward_as_tuple(desc), + std::forward_as_tuple(ResourceTraits{residency}), + std::forward_as_tuple(), + std::forward_as_tuple(samplerInfo), + std::forward_as_tuple(), + resourceGraph); + } addSubresourceNode(resID, name, resourceGraph); return resID; @@ -886,7 +899,7 @@ constexpr uint32_t getMipLevels(uint32_t width, uint32_t height) noexcept { return result; } -void setupGpuDrivenResources( +bool setupGpuDrivenResources( NativePipeline &ppl, const scene::Camera *camera, scene::GPUScene *gpuScene, uint32_t sceneID, uint32_t cullingID, ResourceGraph &resg, const std::string &hzbName) { ccstd::pmr::string name(resg.get_allocator()); @@ -1021,6 +1034,7 @@ void setupGpuDrivenResources( } } + bool firstMeet = false; if (!hzbName.empty()) { name = hzbName; name.append(std::to_string(cullingID)); @@ -1031,12 +1045,14 @@ void setupGpuDrivenResources( auto resID = findVertex(name, resg); if (resID == ResourceGraph::null_vertex()) { ppl.addResource(std::string(name), ResourceDimension::TEXTURE2D, gfx::Format::R32F, width, height, 1, 1, - mipLevels, gfx::SampleCount::X1, ResourceFlags::SAMPLED | ResourceFlags::STORAGE, ResourceResidency::PERSISTENT); + mipLevels, gfx::SampleCount::X1, ResourceFlags::COLOR_ATTACHMENT | ResourceFlags::SAMPLED | ResourceFlags::STORAGE, ResourceResidency::PERSISTENT); + firstMeet = true; } else { CC_EXPECTS(holds(resID, resg)); ppl.updateResource(std::string(name), gfx::Format::R32F, width, height, 1, 1, mipLevels, gfx::SampleCount::X1); } } + return firstMeet; } } // namespace @@ -1062,7 +1078,7 @@ bool projectSpherePerspective(Vec3 c, float r, float znear, Mat4 proj) { return true; } -void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, +void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &hzbName, const scene::Light *light, bool bMainPass) { auto *scene = camera->getScene(); if (!scene) { @@ -1083,7 +1099,7 @@ void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, } const uint32_t sceneID = iter->second; - setupGpuDrivenResources(*this, camera, gpuScene, sceneID, cullingID, resourceGraph, hzbName); + bool firstPass = setupGpuDrivenResources(*this, camera, gpuScene, sceneID, cullingID, resourceGraph, hzbName); if (light) { // build light culling pass @@ -1097,6 +1113,18 @@ void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, const std::string drawInstanceBuffer = "CCDrawInstanceBuffer" + std::to_string(cullingID); const std::string visibilityBuffer = "CCVisibilityBuffer" + std::to_string(cullingID); + // first pass hzb clear + { + if (firstPass) { + const auto width = utils::previousPOT(camera->getWidth()); + const auto height = utils::previousPOT(camera->getHeight()); + std::unique_ptr clearPass(addRenderPass(width, height, "default")); + clearPass->addRenderTarget(hzbName + std::to_string(cullingID), gfx::LoadOp::CLEAR, gfx::StoreOp::STORE, gfx::Color{1.0, 0.0, 0.0, 0.0}); + std::unique_ptr clearQueue(clearPass->addQueue()); + clearQueue->addScene(camera, SceneFlags::OPAQUE); + } + } + // init indirect buffers { CopyPass copyPass{renderGraph.get_allocator()}; From 6eba597414dac7e641956fb7e30634bea5677383 Mon Sep 17 00:00:00 2001 From: Zeqiang Li Date: Mon, 28 Aug 2023 19:03:58 +0800 Subject: [PATCH 06/16] fix undefined (#18) * fix undefined --- .../renderer/pipeline/custom/FrameGraphDispatcher.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp b/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp index f477a18ad8d..8eb795caf24 100644 --- a/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp +++ b/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp @@ -1601,9 +1601,13 @@ void startMovePass(const Graphs &graphs, uint32_t passID, const MovePass &pass) auto &srcAccess = resourceAccessGraph.resourceAccess.at(pair.source); auto targetAccessIter = resourceAccessGraph.resourceAccess.find(pair.target); + srcAccess.erase(0); if (targetAccessIter != resourceAccessGraph.resourceAccess.end() && !targetAccessIter->second.empty()) { auto lastAccessIter = targetAccessIter->second.rbegin(); srcAccess.emplace(lastAccessIter->first, AccessStatus{lastAccessIter->second.accessFlag, srcResourceRange}); + } else { + const auto &lastStates = get(ResourceGraph::StatesTag{}, resourceGraph, targetResID); + srcAccess.emplace(0, AccessStatus{lastStates.states, srcResourceRange}); } resourceAccessGraph.resourceIndex[pair.target] = targetResID; @@ -1651,7 +1655,7 @@ void startScene(const Graphs &graphs, RenderGraph::vertex_descriptor sceneID, co // auto resID = findVertex(name, resourceGraph); // const auto &indirectBuffer = get(ManagedBufferTag{}, resID, resg).buffer.get(); resourceAccessGraph.externalAccess[sceneID] = std::make_pair(name, gfx::AccessFlags::INDIRECT_BUFFER); - } + } } struct DependencyVisitor : boost::dfs_visitor<> { @@ -1971,7 +1975,7 @@ void buildBarriers(FrameGraphDispatcher &fgDispatcher) { for (const auto &[src, ignored] : rag.movedTarget.at(sName)) { const auto srcStart = rag.resourceAccess.at(src).begin()->first; const auto srcEnd = rag.resourceAccess.at(src).rbegin()->first; - interAccessed |= !((srcStart > endVert) || (srcEnd < startVert)); + interAccessed |= !((srcStart >= endVert) || (srcEnd <= startVert)); interAccessed |= interAccessCheck(src, startVert, endVert); if (interAccessed) { return true; From e0c959730a3e8c9d247f4d4a14cb788ff905b49c Mon Sep 17 00:00:00 2001 From: stanleyljl <798080536@qq.com> Date: Wed, 30 Aug 2023 18:15:39 +0800 Subject: [PATCH 07/16] two pass occlusion culling --- cocos/rendering/custom/pipeline.ts | 1 + .../pipeline/gpu-driven/gpu-culling.effect | 73 +++++++++++++++++-- .../pipeline/custom/NativeExecutor.cpp | 10 ++- .../pipeline/custom/NativePipeline.cpp | 40 +++------- .../pipeline/custom/NativePipelineFwd.h | 2 +- .../pipeline/custom/NativePipelineTypes.cpp | 12 +-- .../pipeline/custom/NativePipelineTypes.h | 22 +++--- .../pipeline/custom/NativeRenderQueue.cpp | 45 ++++++++---- .../pipeline/custom/RenderInterfaceTypes.h | 15 ++-- native/cocos/scene/Pass.cpp | 10 +++ native/cocos/scene/Pass.h | 1 + native/cocos/scene/gpu-scene/GPUBatchPool.cpp | 18 ++--- native/cocos/scene/gpu-scene/GPUBatchPool.h | 1 + 13 files changed, 158 insertions(+), 92 deletions(-) diff --git a/cocos/rendering/custom/pipeline.ts b/cocos/rendering/custom/pipeline.ts index 4ab7c5f34e6..c19a4153f36 100644 --- a/cocos/rendering/custom/pipeline.ts +++ b/cocos/rendering/custom/pipeline.ts @@ -1404,6 +1404,7 @@ export interface Pipeline extends BasicPipeline { addBuiltinGpuCullingPass ( cullingID: number, camera: Camera, + layoutPath?: string, hzbName?: string, light?: Light | null, bMainPass?: boolean): void; diff --git a/editor/assets/effects/pipeline/gpu-driven/gpu-culling.effect b/editor/assets/effects/pipeline/gpu-driven/gpu-culling.effect index 819c8dfe9ff..629bb61c2c2 100644 --- a/editor/assets/effects/pipeline/gpu-driven/gpu-culling.effect +++ b/editor/assets/effects/pipeline/gpu-driven/gpu-culling.effect @@ -21,6 +21,7 @@ CCProgram culling-main %{ struct InstanceData { uint objectId; + uint phaseId; uint batchId; }; @@ -47,7 +48,7 @@ CCProgram culling-main %{ uint cc_isPerspective; uint cc_orientation; uint cc_instanceCount; - uint cc_sceneFlags; + uint cc_phaseId; }; #pragma rate CCObjectBuffer pass @@ -94,8 +95,7 @@ CCProgram culling-main %{ #if CC_USE_OCCLUSION_CULLING // 2D Polyhedral Bounds of a Clipped, Perspective-Projected 3D Sphere. Michael Mara, Morgan McGuire. 2013 // Consider: minClipZ & projectionSignY & orientation in projection, so we can not use p00 & p11 directly. - bool projectSpherePerspective(vec3 c, float r, float znear, mat4 proj, uint orientation, out vec4 aabb) - { + bool projectSpherePerspective(vec3 c, float r, float znear, mat4 proj, uint orientation, out vec4 aabb, out float nearest) { if (-c.z < r + znear) return false; vec3 cr = c * r; @@ -109,28 +109,81 @@ CCProgram culling-main %{ float miny = (vy * c.y + cr.z) / -(vy * c.z - cr.y); float maxy = (vy * c.y - cr.z) / -(vy * c.z + cr.y); - aabb = vec4(minx * proj[0][0], miny * proj[1][1], maxx * proj[0][0], maxy * proj[1][1]); + // project to ndc space + if (orientation == 0) { + aabb = vec4(minx * proj[0][0], miny * proj[1][1], maxx * proj[0][0], maxy * proj[1][1]); + } + else if (orientation == 1) { + aabb = vec4(maxy * proj[1][0], minx * proj[0][1], miny * proj[1][0], maxx * proj[0][1]); + } + else if (orientation == 2) { + aabb = vec4(maxx * proj[0][0], maxy * proj[1][1], minx * proj[0][0], miny * proj[1][1]); + } + else { + aabb = vec4(miny * proj[1][0], maxx * proj[0][1], maxy * proj[1][0], minx * proj[0][1]); + } + + // ndc space -> uv space + aabb = aabb * vec4(0.5f) + vec4(0.5f); + + nearest = -(proj[2][2] + proj[3][2] / (c.z + r)); + + return true; + } + + bool projectSphereOrtho(vec3 c, float r, float znear, mat4 proj, uint orientation, out vec4 aabb, out float nearest) { + if (-c.z < r + znear) return false; + + float minx = c.x - r; + float maxx = c.x + r; + + float miny = c.y - r; + float maxy = c.y + r; + + // project to ndc space + if (orientation == 0) { + aabb = vec4(minx * proj[0][0], miny * proj[1][1], maxx * proj[0][0], maxy * proj[1][1]); + } + else if (orientation == 1) { + aabb = vec4(maxy * proj[1][0], minx * proj[0][1], miny * proj[1][0], maxx * proj[0][1]); + } + else if (orientation == 2) { + aabb = vec4(maxx * proj[0][0], maxy * proj[1][1], minx * proj[0][0], miny * proj[1][1]); + } + else { + aabb = vec4(miny * proj[1][0], maxx * proj[0][1], maxy * proj[1][0], minx * proj[0][1]); + } + aabb = aabb + vec4(proj[3][0], proj[3][1], proj[3][0], proj[3][1]); + // ndc space -> uv space aabb = aabb * vec4(0.5) + vec4(0.5); + nearest = (proj[2][2] * (c.z + r) + proj[3][2]); + return true; } bool isOcclusionCulled(uint objectId) { vec4 aabb; + float nearest = 0; vec4 sphere = cc_objects[objectId].sphere; vec3 center = (cc_view * vec4(sphere.xyz, 1.0)).xyz; float radius = sphere.w; - if (!projectSpherePerspective(center, radius, cc_znear, cc_proj, cc_orientation, aabb)) { - return false; + if (cc_isPerspective != 0) { + if (!projectSpherePerspective(center, radius, cc_znear, cc_proj, cc_orientation, aabb, nearest)) { + return false; + } + } else { + if (!projectSphereOrtho(center, radius, cc_znear, cc_proj, cc_orientation, aabb, nearest)) { + return false; + } } float width = abs(aabb.z - aabb.x) * cc_depthWidth; float height = abs(aabb.w - aabb.y) * cc_depthHeight; - float level = floor(log2(max(width, height))); + float level = max(0.0, floor(log2(max(width, height)))); float depth = textureLod(CCDepthMap, (aabb.xy + aabb.zw) * vec2(0.5), level).x; - float nearest = -(cc_proj[2][2] + cc_proj[3][2] / (center.z + radius)); return nearest > depth; } #endif @@ -144,6 +197,10 @@ CCProgram culling-main %{ return; } + if (cc_phaseId != cc_instances[id].phaseId) { + return; + } + #if CC_GPU_CULLING_MAIN_PASS #if CC_USE_OCCLUSION_CULLING cc_visibilities[id] = 1U; diff --git a/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp b/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp index 38c90e68e56..a547d88b6af 100644 --- a/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp +++ b/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp @@ -1395,9 +1395,15 @@ struct RenderGraphVisitor : boost::dfs_visitor<> { const auto& queueDesc = ctx.context.sceneCulling.sceneQueryIndex.at(sceneID); const auto& queue = ctx.context.sceneCulling.renderQueues[queueDesc.renderQueueTarget]; if (any(sceneData.flags & SceneFlags::GPU_DRIVEN)) { - queue.opaqueBatchingQueue.recordCommandBuffer( + const auto renderQueueID = parent(sceneID, ctx.g); + CC_EXPECTS(holds(renderQueueID, ctx.g)); + const auto& renderQueue = get(QueueTag{}, renderQueueID, ctx.g); + const auto phaseLayoutID = renderQueue.phaseID; + CC_EXPECTS(phaseLayoutID != LayoutGraphData::null_vertex()); + + queue.gpuDrivenQueue.recordCommandBuffer( ctx.resourceGraph, ctx.device, camera, - ctx.currentPass, ctx.cmdBuff, queue.sceneFlags, sceneData.cullingID); + ctx.currentPass, ctx.cmdBuff, phaseLayoutID, queue.sceneFlags, sceneData.cullingID); return; } queue.opaqueQueue.recordCommandBuffer( diff --git a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp index 38b9a5eb640..461aec45d27 100644 --- a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp +++ b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp @@ -1057,29 +1057,9 @@ bool setupGpuDrivenResources( } // namespace -bool projectSpherePerspective(Vec3 c, float r, float znear, Mat4 proj) { - if (-c.z < r + znear) return false; - - Vec3 cr = c * r; - float czr2 = c.z * c.z - r * r; - - float vx = std::sqrtf(c.x * c.x + czr2); - float minx = (vx * c.x + cr.z) / -(vx * c.z - cr.x); - float maxx = (vx * c.x - cr.z) / -(vx * c.z + cr.x); - - float vy = std::sqrtf(c.y * c.y + czr2); - float miny = (vy * c.y + cr.z) / -(vy * c.z - cr.y); - float maxy = (vy * c.y - cr.z) / -(vy * c.z + cr.y); - - Vec4 aabb = Vec4(minx * proj.m[0], miny * proj.m[5], maxx * proj.m[0], maxy * proj.m[5]); - // ndc space -> uv space - aabb = aabb * 0.5f + Vec4(0.5f, 0.5f, 0.5f, 0.5f); - - return true; -} - void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, - const scene::Camera *camera, const std::string &hzbName, const scene::Light *light, bool bMainPass) { + const scene::Camera *camera, const std::string &layoutPath, + const std::string &hzbName, const scene::Light *light, bool bMainPass) { auto *scene = camera->getScene(); if (!scene) { return; @@ -1090,8 +1070,6 @@ void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, return; } - //bool result = projectSpherePerspective(Vec3(0, 0, -10), 3, 1, camera->getMatProj()); - auto &sceneCulling = nativeContext.sceneCulling; auto iter = sceneCulling.sceneIDs.find(scene); if (iter == sceneCulling.sceneIDs.cend()) { @@ -1166,15 +1144,19 @@ void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, const auto materialIndex = hzbName.empty() ? 2 : (bMainPass ? 0 : 1); const auto instanceCount = gpuScene->getInstanceCount(); const auto groupCount = getGroupCount(instanceCount, scene::CS_GPU_CULLING_LOCAL_SIZE); - std::unique_ptr gpuCullQueue(gpuCullPass->addQueue()); + const auto phaseID = locate(LayoutGraph::null_vertex(), layoutPath, programLibrary->layoutGraph); + std::unique_ptr gpuCullQueue(dynamic_cast(gpuCullPass->addQueue())); gpuCullQueue->addDispatch(groupCount, 1, 1, pipelineSceneData->getGPUCullingMaterial(materialIndex), 0); - ccstd::vector planes; + ccstd::vector planes; const auto &frustum = camera->getFrustum(); for (auto *plane : frustum.planes) { - planes.emplace_back(Vec4{plane->n.x, plane->n.y, plane->n.z, plane->d}); + planes.push_back(plane->n.x); + planes.push_back(plane->n.y); + planes.push_back(plane->n.z); + planes.push_back(plane->d); } - ArrayBuffer planesBuffer(reinterpret_cast(&planes[0]), sizeof(Vec4) * 6); + ArrayBuffer planesBuffer(reinterpret_cast(&planes[0]), sizeof(float) * planes.size()); gpuCullPass->setMat4("cc_view", camera->getMatView()); gpuCullPass->setMat4("cc_proj", camera->getMatProj()); gpuCullPass->setArrayBuffer("cc_planes", &planesBuffer); @@ -1185,7 +1167,7 @@ void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, gpuCullPass->setUint("cc_isPerspective", static_cast(camera->getProjectionType())); gpuCullPass->setUint("cc_orientation", static_cast(camera->getSurfaceTransform())); gpuCullPass->setUint("cc_instanceCount", instanceCount); - gpuCullPass->setUint("cc_sceneFlags", 0); // Stanley TODO + gpuCullPass->setUint("cc_phaseId", phaseID); } } diff --git a/native/cocos/renderer/pipeline/custom/NativePipelineFwd.h b/native/cocos/renderer/pipeline/custom/NativePipelineFwd.h index b059e3fd307..bd756768588 100644 --- a/native/cocos/renderer/pipeline/custom/NativePipelineFwd.h +++ b/native/cocos/renderer/pipeline/custom/NativePipelineFwd.h @@ -50,7 +50,7 @@ class NativeMultisampleRenderPassBuilder; class NativeComputeQueueBuilder; class NativeComputePassBuilder; struct RenderInstancingQueue; -struct RenderBatchingQueue; +struct GPUDrivenQueue; struct DrawInstance; struct RenderDrawQueue; struct NativeRenderQueue; diff --git a/native/cocos/renderer/pipeline/custom/NativePipelineTypes.cpp b/native/cocos/renderer/pipeline/custom/NativePipelineTypes.cpp index 210b40f741e..9b40fada33c 100644 --- a/native/cocos/renderer/pipeline/custom/NativePipelineTypes.cpp +++ b/native/cocos/renderer/pipeline/custom/NativePipelineTypes.cpp @@ -49,13 +49,13 @@ RenderInstancingQueue::RenderInstancingQueue(RenderInstancingQueue const& rhs, c passInstances(rhs.passInstances, alloc), instanceBuffers(rhs.instanceBuffers, alloc) {} -RenderBatchingQueue::RenderBatchingQueue(const allocator_type& alloc) noexcept +GPUDrivenQueue::GPUDrivenQueue(const allocator_type& alloc) noexcept : batches(alloc) {} -RenderBatchingQueue::RenderBatchingQueue(RenderBatchingQueue&& rhs, const allocator_type& alloc) +GPUDrivenQueue::GPUDrivenQueue(GPUDrivenQueue&& rhs, const allocator_type& alloc) : batches(std::move(rhs.batches), alloc) {} -RenderBatchingQueue::RenderBatchingQueue(RenderBatchingQueue const& rhs, const allocator_type& alloc) +GPUDrivenQueue::GPUDrivenQueue(GPUDrivenQueue const& rhs, const allocator_type& alloc) : batches(rhs.batches, alloc) {} RenderDrawQueue::RenderDrawQueue(const allocator_type& alloc) noexcept @@ -72,14 +72,14 @@ NativeRenderQueue::NativeRenderQueue(const allocator_type& alloc) noexcept transparentQueue(alloc), opaqueInstancingQueue(alloc), transparentInstancingQueue(alloc), - opaqueBatchingQueue(alloc) {} + gpuDrivenQueue(alloc) {} NativeRenderQueue::NativeRenderQueue(SceneFlags sceneFlagsIn, uint32_t subpassOrPassLayoutIDIn, const allocator_type& alloc) noexcept : opaqueQueue(alloc), transparentQueue(alloc), opaqueInstancingQueue(alloc), transparentInstancingQueue(alloc), - opaqueBatchingQueue(alloc), + gpuDrivenQueue(alloc), sceneFlags(sceneFlagsIn), subpassOrPassLayoutID(subpassOrPassLayoutIDIn) {} @@ -88,7 +88,7 @@ NativeRenderQueue::NativeRenderQueue(NativeRenderQueue&& rhs, const allocator_ty transparentQueue(std::move(rhs.transparentQueue), alloc), opaqueInstancingQueue(std::move(rhs.opaqueInstancingQueue), alloc), transparentInstancingQueue(std::move(rhs.transparentInstancingQueue), alloc), - opaqueBatchingQueue(std::move(rhs.opaqueBatchingQueue), alloc), + gpuDrivenQueue(std::move(rhs.gpuDrivenQueue), alloc), sceneFlags(rhs.sceneFlags), subpassOrPassLayoutID(rhs.subpassOrPassLayoutID) {} diff --git a/native/cocos/renderer/pipeline/custom/NativePipelineTypes.h b/native/cocos/renderer/pipeline/custom/NativePipelineTypes.h index 05e86e738ea..f0d66d8e4db 100644 --- a/native/cocos/renderer/pipeline/custom/NativePipelineTypes.h +++ b/native/cocos/renderer/pipeline/custom/NativePipelineTypes.h @@ -950,23 +950,23 @@ struct RenderInstancingQueue { ccstd::pmr::vector> instanceBuffers; }; -struct RenderBatchingQueue { +struct GPUDrivenQueue { using allocator_type = boost::container::pmr::polymorphic_allocator; allocator_type get_allocator() const noexcept { // NOLINT return {batches.get_allocator().resource()}; } - RenderBatchingQueue(const allocator_type& alloc) noexcept; // NOLINT - RenderBatchingQueue(RenderBatchingQueue&& rhs, const allocator_type& alloc); - RenderBatchingQueue(RenderBatchingQueue const& rhs, const allocator_type& alloc); + GPUDrivenQueue(const allocator_type& alloc) noexcept; // NOLINT + GPUDrivenQueue(GPUDrivenQueue&& rhs, const allocator_type& alloc); + GPUDrivenQueue(GPUDrivenQueue const& rhs, const allocator_type& alloc); - RenderBatchingQueue(RenderBatchingQueue&& rhs) noexcept = default; - RenderBatchingQueue(RenderBatchingQueue const& rhs) = delete; - RenderBatchingQueue& operator=(RenderBatchingQueue&& rhs) = default; - RenderBatchingQueue& operator=(RenderBatchingQueue const& rhs) = default; + GPUDrivenQueue(GPUDrivenQueue&& rhs) noexcept = default; + GPUDrivenQueue(GPUDrivenQueue const& rhs) = delete; + GPUDrivenQueue& operator=(GPUDrivenQueue&& rhs) = default; + GPUDrivenQueue& operator=(GPUDrivenQueue const& rhs) = default; void recordCommandBuffer(const ResourceGraph& resg, gfx::Device *device, const scene::Camera *camera, - gfx::RenderPass *renderPass, gfx::CommandBuffer *cmdBuffer, SceneFlags sceneFlags, uint32_t cullingID) const; + gfx::RenderPass *renderPass, gfx::CommandBuffer *cmdBuffer, uint32_t phaseLayoutID, SceneFlags sceneFlags, uint32_t cullingID) const; ccstd::pmr::vector batches; }; @@ -1028,7 +1028,7 @@ struct NativeRenderQueue { RenderDrawQueue transparentQueue; RenderInstancingQueue opaqueInstancingQueue; RenderInstancingQueue transparentInstancingQueue; - RenderBatchingQueue opaqueBatchingQueue; + GPUDrivenQueue gpuDrivenQueue; SceneFlags sceneFlags{SceneFlags::NONE}; uint32_t subpassOrPassLayoutID{0xFFFFFFFF}; }; @@ -1441,7 +1441,7 @@ class NativePipeline final : public Pipeline { ComputePassBuilder *addComputePass(const ccstd::string &passName) override; void addUploadPass(ccstd::vector &uploadPairs) override; void addMovePass(const ccstd::vector &movePairs) override; - void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &hzbName, const scene::Light *light, bool bMainPass) override; + void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &layoutPath, const std::string &hzbName, const scene::Light *light, bool bMainPass) override; void addBuiltinHzbGenerationPass(const std::string &sourceDepthStencilName, const std::string &targetHzbName) override; uint32_t addCustomBuffer(const ccstd::string &name, const gfx::BufferInfo &info, const std::string &type) override; uint32_t addCustomTexture(const ccstd::string &name, const gfx::TextureInfo &info, const std::string &type) override; diff --git a/native/cocos/renderer/pipeline/custom/NativeRenderQueue.cpp b/native/cocos/renderer/pipeline/custom/NativeRenderQueue.cpp index 19f31d45ce3..c37761be9f6 100644 --- a/native/cocos/renderer/pipeline/custom/NativeRenderQueue.cpp +++ b/native/cocos/renderer/pipeline/custom/NativeRenderQueue.cpp @@ -178,10 +178,10 @@ void RenderInstancingQueue::recordCommandBuffer( } } -void RenderBatchingQueue::recordCommandBuffer( +void GPUDrivenQueue::recordCommandBuffer( const ResourceGraph& resg, gfx::Device *device, const scene::Camera *camera, - gfx::RenderPass *renderPass, gfx::CommandBuffer *cmdBuffer, SceneFlags sceneFlags, uint32_t cullingID) const { + gfx::RenderPass *renderPass, gfx::CommandBuffer *cmdBuffer, uint32_t phaseLayoutID, SceneFlags sceneFlags, uint32_t cullingID) const { if (!any(sceneFlags & SceneFlags::GPU_DRIVEN)) { return; } @@ -191,23 +191,21 @@ void RenderBatchingQueue::recordCommandBuffer( return; } + const bool bDrawBlend = any(sceneFlags & SceneFlags::TRANSPARENT_OBJECT); + const bool bDrawOpaqueOrMask = any(sceneFlags & (SceneFlags::OPAQUE_OBJECT | SceneFlags::CUTOUT_OBJECT)); + CC_EXPECTS(cullingID != 0xFFFFFFFF); - ccstd::pmr::string name("CCDrawIndirectBuffer", get_allocator()); - name.append(std::to_string(cullingID)); - auto resID = findVertex(name, resg); - const auto &indirectBuffer = get(ManagedBufferTag{}, resID, resg).buffer.get(); + ccstd::pmr::string indirectName("CCDrawIndirectBuffer", get_allocator()); + indirectName.append(std::to_string(cullingID)); + auto indirectResID = findVertex(indirectName, resg); + const auto &indirectBuffer = get(ManagedBufferTag{}, indirectResID, resg).buffer.get(); - // Draw visible instances - gfx::BufferBarrierInfo barrierInfo{ - gfx::AccessFlagBit::COMPUTE_SHADER_WRITE, - gfx::AccessFlagBit::INDIRECT_BUFFER, - gfx::BarrierType::FULL, - 0, - indirectBuffer->getSize() - }; - //auto *bufferBarrier = gfx::Device::getInstance()->getBufferBarrier(barrierInfo); - //cmdBuffer->pipelineBarrier(nullptr, {bufferBarrier}, {indirectBuffer}, {}, {}); + ccstd::pmr::string instanceName("CCDrawInstanceBuffer", get_allocator()); + instanceName.append(std::to_string(cullingID)); + auto instanceResID = findVertex(instanceName, resg); + const auto &instanceBuffer = get(ManagedBufferTag{}, instanceResID, resg).buffer.get(); + // Draw visible instances const auto supportFirstInstance = device->getCapabilities().supportFirstInstance; auto *batchPool = gpuScene->getBatchPool(); gfx::PipelineState *lastPSO = nullptr; @@ -224,6 +222,21 @@ void RenderBatchingQueue::recordCommandBuffer( } const auto *drawPass = batch->getPass(); + if (phaseLayoutID != drawPass->getPhaseID()) { + continue; + } + + const bool bBlend = drawPass->isBlend(); + const bool bOpaqueOrMask = !bBlend; + if (!bDrawBlend && bBlend) { + // skip transparent object + continue; + } + if (!bDrawOpaqueOrMask && bOpaqueOrMask) { + // skip opaque object + continue; + } + cmdBuffer->bindDescriptorSet(pipeline::materialSet, drawPass->getDescriptorSet()); const auto &items = batch->getItems(); diff --git a/native/cocos/renderer/pipeline/custom/RenderInterfaceTypes.h b/native/cocos/renderer/pipeline/custom/RenderInterfaceTypes.h index 80d08269620..feabf492685 100644 --- a/native/cocos/renderer/pipeline/custom/RenderInterfaceTypes.h +++ b/native/cocos/renderer/pipeline/custom/RenderInterfaceTypes.h @@ -1457,7 +1457,7 @@ class Pipeline : public BasicPipeline { * @param hzbName @en name of hierarchical z buffer @zh 层次深度缓存的名字 * @param light @en light of the culling pass @zh 剔除通道的灯光 */ - virtual void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &hzbName, const scene::Light *light, bool bMainPass) = 0; + virtual void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &layoutPath, const std::string &hzbName, const scene::Light *light, bool bMainPass) = 0; /** * @en Add hierarchical z buffer generation pass * @zh 添加层次化深度缓存生成通道 @@ -1489,13 +1489,16 @@ class Pipeline : public BasicPipeline { updateStorageTexture(name, width, height, gfx::Format::UNKNOWN); } void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera) { - addBuiltinGpuCullingPass(cullingID, camera, "", nullptr, true); + addBuiltinGpuCullingPass(cullingID, camera, "", "", nullptr, true); } - void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &hzbName) { - addBuiltinGpuCullingPass(cullingID, camera, hzbName, nullptr, true); + void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &layoutPath) { + addBuiltinGpuCullingPass(cullingID, camera, layoutPath, "", nullptr, true); } - void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &hzbName, const scene::Light *light) { - addBuiltinGpuCullingPass(cullingID, camera, hzbName, light, true); + void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &layoutPath, const std::string &hzbName) { + addBuiltinGpuCullingPass(cullingID, camera, layoutPath, hzbName, nullptr, true); + } + void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &layoutPath, const std::string &hzbName, const scene::Light *light) { + addBuiltinGpuCullingPass(cullingID, camera, layoutPath, hzbName, light, true); } }; diff --git a/native/cocos/scene/Pass.cpp b/native/cocos/scene/Pass.cpp index cc57e887ed1..a8661691fe1 100644 --- a/native/cocos/scene/Pass.cpp +++ b/native/cocos/scene/Pass.cpp @@ -829,6 +829,16 @@ void Pass::syncBatchingScheme() { } } +bool Pass::isBlend() const { + bool bBlend = false; + for (const auto &target : getBlendState()->targets) { + if (target.blend) { + bBlend = true; + } + } + return bBlend; +} + void Pass::initPassFromTarget(Pass *target, const gfx::DepthStencilState &dss, ccstd::hash_t hashFactor) { _priority = target->_priority; _stage = target->_stage; diff --git a/native/cocos/scene/Pass.h b/native/cocos/scene/Pass.h index e714e900ad3..99b1e4c6c8d 100644 --- a/native/cocos/scene/Pass.h +++ b/native/cocos/scene/Pass.h @@ -291,6 +291,7 @@ class Pass : public RefCounted { inline gfx::DescriptorSet *getDescriptorSet() const { return _descriptorSet; } inline ccstd::hash_t getHash() const { return _hash; } inline gfx::PipelineLayout *getPipelineLayout() const { return _pipelineLayout; } + bool isBlend() const; // Only for UI void initPassFromTarget(Pass *target, const gfx::DepthStencilState &dss, ccstd::hash_t hashFactor); diff --git a/native/cocos/scene/gpu-scene/GPUBatchPool.cpp b/native/cocos/scene/gpu-scene/GPUBatchPool.cpp index 3cc037d94dd..9299ab969e5 100644 --- a/native/cocos/scene/gpu-scene/GPUBatchPool.cpp +++ b/native/cocos/scene/gpu-scene/GPUBatchPool.cpp @@ -167,6 +167,8 @@ void GPUBatchPool::update(uint32_t stamp) { for (auto &lightmapBatch : _batches) { for (auto &passBatch : lightmapBatch.second) { + auto *pass = passBatch.first; + const auto phaseId = pass->getPhaseID(); auto &items = passBatch.second->getItems(); for (auto &item : items) { @@ -187,7 +189,7 @@ void GPUBatchPool::update(uint32_t stamp) { _indirectCmds.push_back({indexCount, 0, firstIndex, firstVertex, firstInstance}); for (const auto &objectIdx : iter.second) { - _instances.push_back({objectIdx, batchId}); + _instances.push_back({objectIdx, phaseId, batchId}); } batchId++; @@ -208,17 +210,12 @@ void GPUBatchPool::addModel(const Model* model) { const auto passCount = passes.size(); for (auto passIdx = 0; passIdx < passCount; passIdx++) { - const auto &pass = passes[passIdx]; - // Stanley TODO: only support base pass now. - if (pass->getPhaseID() != 1) { - continue; - } - auto lightmapIter = _batches.find(lightmap); if (lightmapIter == _batches.cend()) { lightmapIter = _batches.insert({lightmap, PassBatchMap()}).first; } + const auto &pass = passes[passIdx]; auto passIter = lightmapIter->second.find(pass); if (passIter == lightmapIter->second.cend()) { passIter = lightmapIter->second.insert({pass, ccnew GPUBatch(_gpuScene, pass)}).first; @@ -240,17 +237,12 @@ void GPUBatchPool::removeModel(const Model* model) { const auto passCount = passes.size(); for (auto passIdx = 0; passIdx < passCount; passIdx++) { - const auto &pass = passes[passIdx]; - // Stanley TODO: only support base pass now. - if (pass->getPhaseID() != 1) { - continue; - } - auto lightmapIter = _batches.find(lightmap); if (lightmapIter == _batches.cend()) { continue; } + const auto &pass = passes[passIdx]; auto passIter = lightmapIter->second.find(pass); if (passIter == lightmapIter->second.cend()) { continue; diff --git a/native/cocos/scene/gpu-scene/GPUBatchPool.h b/native/cocos/scene/gpu-scene/GPUBatchPool.h index 27e2cb84a0b..01ec90e7ac7 100644 --- a/native/cocos/scene/gpu-scene/GPUBatchPool.h +++ b/native/cocos/scene/gpu-scene/GPUBatchPool.h @@ -51,6 +51,7 @@ class GPUScene; struct InstanceData { uint32_t objectId{UINT_MAX}; + uint32_t phaseId{UINT_MAX}; uint32_t batchId{UINT_MAX}; }; From 1618f0f98cc41512c4264bc385e81cb3bca6c8e9 Mon Sep 17 00:00:00 2001 From: Zeqiang Li Date: Fri, 1 Sep 2023 16:31:28 +0800 Subject: [PATCH 08/16] access for move-source resource & explicit recording usage for indirect buffer (#19) * fg move * remove dbg code * indirect barrier * remove dbg code * first meet logic * fix depth resource residency * clear first hzb * fix undefined * explicit move access * rg use access * update rendergraph interface * fix macro * revert useResource interface * compile issue & lint --- cocos/rendering/custom/types.ts | 5 +- .../pipeline/custom/FGDispatcherTypes.h | 14 +- .../pipeline/custom/FrameGraphDispatcher.cpp | 198 +++++++++--------- .../pipeline/custom/NativePipeline.cpp | 30 ++- .../pipeline/custom/NativePipelineTypes.h | 1 + .../pipeline/custom/NativeRenderGraph.cpp | 3 + .../pipeline/custom/RenderCommonJsb.cpp | 7 + .../custom/RenderCommonSerialization.h | 2 + .../pipeline/custom/RenderCommonTypes.cpp | 6 +- .../pipeline/custom/RenderCommonTypes.h | 3 + .../pipeline/custom/RenderGraphTypes.cpp | 3 + .../pipeline/custom/RenderGraphTypes.h | 6 +- 12 files changed, 154 insertions(+), 124 deletions(-) diff --git a/cocos/rendering/custom/types.ts b/cocos/rendering/custom/types.ts index 4848104455a..b8104d8d037 100644 --- a/cocos/rendering/custom/types.ts +++ b/cocos/rendering/custom/types.ts @@ -28,7 +28,7 @@ * ========================= !DO NOT CHANGE THE FOLLOWING SECTION MANUALLY! ========================= */ /* eslint-disable max-len */ -import { ResolveMode, ShaderStageFlagBit, Type, UniformBlock } from '../../gfx'; +import { ResolveMode, ShaderStageFlagBit, Type, UniformBlock, AccessFlags, AccessFlagBit } from '../../gfx'; import { Light } from '../../render-scene/scene'; import { OutputArchive, InputArchive } from './archive'; import { saveUniformBlock, loadUniformBlock } from './serialization'; @@ -475,6 +475,7 @@ export class MovePair { targetMostDetailedMip = 0, targetFirstSlice = 0, targetPlaneSlice = 0, + possibleUsage = AccessFlagBit.NONE, ) { this.source = source; this.target = target; @@ -483,6 +484,7 @@ export class MovePair { this.targetMostDetailedMip = targetMostDetailedMip; this.targetFirstSlice = targetFirstSlice; this.targetPlaneSlice = targetPlaneSlice; + this.possibleUsage = possibleUsage; } source: string; target: string; @@ -491,6 +493,7 @@ export class MovePair { targetMostDetailedMip: number; targetFirstSlice: number; targetPlaneSlice: number; + possibleUsage: AccessFlagBit; } export class PipelineStatistics { diff --git a/native/cocos/renderer/pipeline/custom/FGDispatcherTypes.h b/native/cocos/renderer/pipeline/custom/FGDispatcherTypes.h index 7e4ef7588ec..2303e0cf110 100644 --- a/native/cocos/renderer/pipeline/custom/FGDispatcherTypes.h +++ b/native/cocos/renderer/pipeline/custom/FGDispatcherTypes.h @@ -163,6 +163,11 @@ struct ResourceNode { ccstd::vector planes; }; +struct MoveStatus { + bool finalAccess{true}; + AccessStatus status; +}; + struct ResourceAccessGraph { using allocator_type = boost::container::pmr::polymorphic_allocator; allocator_type get_allocator() const noexcept { // NOLINT @@ -297,9 +302,8 @@ struct ResourceAccessGraph { ccstd::pmr::vector topologicalOrder; PmrTransparentMap> resourceAccess; PmrFlatMap> movedTarget; - PmrFlatMap movedSourceStatus; - PmrFlatMap movedTargetStatus; - PmrFlatMap> externalAccess; + PmrFlatMap movedSourceStatus; + PmrFlatMap movedTargetStatus; }; struct RelationGraph { @@ -485,9 +489,7 @@ struct FrameGraphDispatcher { PmrFlatMap buildDescriptorIndex( const PmrTransparentMap>&computeViews, - boost::container::pmr::memory_resource* scratch) const; - - void registerResourceAccess(RenderGraph::vertex_descriptor v, const ccstd::pmr::string & name, gfx::AccessFlags access); + boost::container::pmr::memory_resource* scratch) const; ResourceAccessGraph resourceAccessGraph; ResourceGraph& resourceGraph; diff --git a/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp b/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp index 8eb795caf24..801a22cc179 100644 --- a/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp +++ b/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp @@ -65,7 +65,7 @@ #define ENABLE_FGD_WARNNING 1 #ifdef ENABLE_FGD_WARNNING - #define FGD_WARNING(...) printf(##__VA_ARGS__) + #define FGD_WARNING(...) printf(__VA_ARGS__) #else #define FGD_WARNING(...) #endif @@ -230,10 +230,6 @@ PmrFlatMap FrameGraphDispatcher:: return resourceIndex; } -void FrameGraphDispatcher::registerResourceAccess(RenderGraph::vertex_descriptor v, const ccstd::pmr::string &name, gfx::AccessFlags access) { - resourceAccessGraph.externalAccess[v] = std::make_pair(name, access); -} - PmrFlatMap FrameGraphDispatcher::buildDescriptorIndex( const PmrTransparentMap> &computeViews, const PmrTransparentMap &rasterViews, @@ -550,12 +546,15 @@ auto dependencyCheck(ResourceAccessGraph &rag, ResourceAccessGraph::vertex_descr const auto &desc = get(ResourceGraph::DescTag{}, resourceGraph, resourceID); const auto& range = originRange; - if (rag.movedTargetStatus.find(name) != rag.movedTargetStatus.end()) { + if (rag.movedTargetStatus.find(name) != rag.movedTargetStatus.end() && !rag.movedTarget.at(name).empty()) { CC_ASSERT(rag.movedTargetStatus.at(name).full); for (const auto &[src, ignored] : rag.movedTarget.at(name)) { - const auto &srcRange = rag.movedSourceStatus.at(src).range; - rag.resourceAccess.at(src).emplace(curVertID, AccessStatus{accessFlag, srcRange}); + auto &srcStatus = rag.movedSourceStatus.at(src); + srcStatus.finalAccess = false; + rag.resourceAccess.at(src).emplace(curVertID, AccessStatus{accessFlag, srcStatus.status.range}); } + // invalidate last move source + //rag.movedTarget.at(name).clear(); } bool isExternalPass = get(get(ResourceGraph::TraitsTag{}, resourceGraph), resourceID).hasSideEffects(); @@ -589,7 +588,7 @@ auto dependencyCheck(ResourceAccessGraph &rag, ResourceAccessGraph::vertex_descr auto &transMap = iter->second; CC_ASSERT(!transMap.empty()); - const auto lastRecordIter = (--transMap.end()); + auto lastRecordIter = transMap.rbegin(); const auto &lastStatus = lastRecordIter->second; lastAccess = lastStatus.accessFlag; bool isBuffer = desc.dimension == ResourceDimension::BUFFER; @@ -930,27 +929,40 @@ void extractNames(const ccstd::pmr::string &resName, } } -auto checkExternalAccess(const Graphs &graphs, - ResourceAccessGraph::vertex_descriptor ragVertID) { +gfx::AccessFlags mapAccess(ResourceFlags flags){ + gfx::AccessFlags ret{gfx::AccessFlags::NONE}; + if (flags == ResourceFlags::INDIRECT) { + ret = gfx::AccessFlags::INDIRECT_BUFFER; + } else if (flags == ResourceFlags::VERTEX) { + ret = gfx::AccessFlags::VERTEX_BUFFER; + } else if (flags == ResourceFlags::INDEX) { + ret = gfx::AccessFlags::INDEX_BUFFER; + } + return ret; +} + +auto checkManuallyTrackResources(const Graphs &graphs, + ResourceAccessGraph::vertex_descriptor ragVertID, + ResourceAccessNode &node, + const PmrTransparentMap& resources) { const auto &[renderGraph, layoutGraphData, resourceGraph, resourceAccessGraph, relationGraph] = graphs; - if (!resourceAccessGraph.externalAccess.empty()) { - auto iter = resourceAccessGraph.externalAccess.begin(); - auto targetPassID = iter->first; - while (parent(targetPassID, renderGraph) != RenderGraph::null_vertex()) { - targetPassID = parent(targetPassID, renderGraph); - } - auto rvID = resourceAccessGraph.passIndex.at(targetPassID); - while (iter != resourceAccessGraph.externalAccess.end() && rvID <= ragVertID) { - const auto &range = getResourceRange(vertex(iter->second.first, resourceGraph), resourceGraph); - ViewStatus view{iter->second.first, AccessType::READ_WRITE /*no use*/, gfx::ShaderStageFlagBit::NONE, iter->second.second, range}; - const auto&[lastVertId, nearestAccess] = dependencyCheck(resourceAccessGraph, rvID, resourceGraph, view); - tryAddEdge(lastVertId, rvID, resourceAccessGraph); - tryAddEdge(lastVertId, rvID, relationGraph); - //dependent |= (lastVertId != EXPECT_START_ID); - iter = resourceAccessGraph.externalAccess.erase(iter); - rvID = resourceAccessGraph.passIndex.at(targetPassID); + + bool dependent = false; + for (const auto &[name, usage] : resources) { + auto manuallyAccess = mapAccess(usage); + if (manuallyAccess != gfx::AccessFlags::NONE) { + const auto &range = getResourceRange(vertex(name, resourceGraph), resourceGraph); + ViewStatus view{name, AccessType::READ_WRITE /*no use*/, gfx::ShaderStageFlagBit::NONE, manuallyAccess, range}; + addAccessStatus(resourceAccessGraph, resourceGraph, node, view); + const auto &[lastVertId, nearestAccess] = dependencyCheck(resourceAccessGraph, ragVertID, resourceGraph, view); + tryAddEdge(lastVertId, ragVertID, resourceAccessGraph); + tryAddEdge(lastVertId, ragVertID, relationGraph); + dependent |= (lastVertId != EXPECT_START_ID); + } else { + FGD_WARNING("Trying to manually add an undefined access at pass %d on resource %s, ignnored. Have you set the resource flags?\n", get(ResourceAccessGraph::PassIDTag{}, resourceAccessGraph, ragVertID), name.c_str()); } } + return dependent; } auto checkRasterViews(const Graphs &graphs, @@ -1169,6 +1181,7 @@ void startRenderPass(const Graphs &graphs, uint32_t passID, const RasterPass &pa auto &accessNode = get(ResourceAccessGraph::PassNodeTag{}, resourceAccessGraph, vertID); std::ignore = checkRasterViews(graphs, rlgVertID, accessNode, pass.rasterViews, colorMap); std::ignore = checkComputeViews(graphs, rlgVertID, accessNode, pass.computeViews); + std::ignore = checkManuallyTrackResources(graphs, rlgVertID, accessNode, pass.resources); fillRenderPassInfo(colorMap, fgRenderPassInfo, resourceGraph); } else { const auto &subpasses = pass.subpassGraph.subpasses; @@ -1582,22 +1595,45 @@ SubresourceView makeSubresourceView(const ResourceDesc &srcDesc, const ResourceD return view; } +constexpr gfx::AccessFlags allStageReadAccess(const ResourceDesc& desc) { + auto flags = desc.flags; + auto isBuffer = desc.dimension == ResourceDimension::BUFFER; + gfx::AccessFlags ret{gfx::AccessFlags::NONE}; + if ((flags & ResourceFlags::STORAGE) != ResourceFlags::NONE) { + ret = gfx::AccessFlags::COMPUTE_SHADER_READ_OTHER | gfx::AccessFlags::FRAGMENT_SHADER_READ_OTHER | gfx::AccessFlags::VERTEX_SHADER_READ_OTHER; + } else if (isBuffer) { // NOLINT(misc-redundant-expression) + ret = gfx::AccessFlags::COMPUTE_SHADER_READ_UNIFORM_BUFFER | gfx::AccessFlags::COMPUTE_SHADER_READ_UNIFORM_BUFFER | gfx::AccessFlags::VERTEX_SHADER_READ_UNIFORM_BUFFER; + } else { + ret = gfx::AccessFlags::COMPUTE_SHADER_READ_TEXTURE | gfx::AccessFlags::FRAGMENT_SHADER_READ_TEXTURE | gfx::AccessFlags::VERTEX_SHADER_READ_TEXTURE; + } + return ret; +} + void startMovePass(const Graphs &graphs, uint32_t passID, const MovePass &pass) { const auto &[renderGraph, layoutGraphData, resourceGraph, resourceAccessGraph, relationGraph] = graphs; if (moveValidation(pass, resourceAccessGraph, resourceGraph)) { for (const auto &pair : pass.movePairs) { - auto toleranceRange = getResourceRange(vertex(pair.target, resourceGraph), resourceGraph); - auto srcResourceRange = getResourceRange(vertex(pair.source, resourceGraph), resourceGraph); + auto targetResID = findVertex(pair.target, resourceGraph); + auto sourceResID = findVertex(pair.source, resourceGraph); + auto toleranceRange = getResourceRange(targetResID, resourceGraph); + auto srcResourceRange = getResourceRange(sourceResID, resourceGraph); srcResourceRange.firstSlice = pair.targetFirstSlice; srcResourceRange.mipLevel = pair.targetMostDetailedMip; srcResourceRange.basePlane = pair.targetPlaneSlice; auto lastStatusIter = resourceAccessGraph.resourceAccess.at(pair.source).rbegin(); - resourceAccessGraph.movedSourceStatus.emplace(pair.source, AccessStatus{lastStatusIter->second.accessFlag, srcResourceRange}); + + auto nextPossibleAccess = pair.possibleUsage; + if (nextPossibleAccess == gfx::AccessFlags::NONE) { + const auto &desc = get(ResourceGraph::DescTag{}, resourceGraph, sourceResID); + nextPossibleAccess = allStageReadAccess(desc); + } + resourceAccessGraph.movedSourceStatus.emplace(std::piecewise_construct, + std::forward_as_tuple(pair.source), + std::forward_as_tuple(MoveStatus{true, nextPossibleAccess, srcResourceRange})); resourceAccessGraph.movedTarget[pair.target].emplace(pair.source, getSubresourceNameByRange(srcResourceRange, resourceAccessGraph.resource())); - auto targetResID = findVertex(pair.target, resourceGraph); auto &srcAccess = resourceAccessGraph.resourceAccess.at(pair.source); auto targetAccessIter = resourceAccessGraph.resourceAccess.find(pair.target); @@ -1640,22 +1676,10 @@ void startMovePass(const Graphs &graphs, uint32_t passID, const MovePass &pass) } } -void startScene(const Graphs &graphs, RenderGraph::vertex_descriptor sceneID, const SceneData &sceneData) { // NOLINT(readability-convert-member-functions-to-static) +void startScene(const Graphs &graphs, RenderGraph::vertex_descriptor /*sceneID*/, const SceneData &sceneData) { // NOLINT(readability-convert-member-functions-to-static) const auto &[renderGraph, layoutGraphData, resourceGraph, resourceAccessGraph, relationGraph] = graphs; const auto *const camera = sceneData.camera; CC_EXPECTS(camera); - - if (any(sceneData.flags & SceneFlags::GPU_DRIVEN)) { - //auto prtID = sceneID; - //while (parent(prtID, renderGraph) != RenderGraph::null_vertex()) { - // prtID = parent(prtID, renderGraph); - //} - ccstd::pmr::string name("CCDrawIndirectBuffer", resourceAccessGraph.get_allocator()); - name.append(std::to_string(sceneData.cullingID)); - // auto resID = findVertex(name, resourceGraph); - // const auto &indirectBuffer = get(ManagedBufferTag{}, resID, resg).buffer.get(); - resourceAccessGraph.externalAccess[sceneID] = std::make_pair(name, gfx::AccessFlags::INDIRECT_BUFFER); - } } struct DependencyVisitor : boost::dfs_visitor<> { @@ -1690,8 +1714,6 @@ struct DependencyVisitor : boost::dfs_visitor<> { [&](const auto & /*pass*/) { // do nothing }); - const auto curRagVert = graphs.resourceAccessGraph.passIndex.size() - 2; - checkExternalAccess(graphs, curRagVert); } void finish_vertex(RenderGraph::vertex_descriptor passID, @@ -1727,7 +1749,7 @@ void subresourceAnalysis(ResourceAccessGraph &rag, ResourceGraph &resg) { const auto &srcDesc = get(ResourceGraph::DescTag{}, resg, descResViewID); const auto &targetName = get(ResourceGraph::NameTag{}, resg, targetResID); const auto &targetDesc = get(ResourceGraph::DescTag{}, resg, targetResID); - const auto &srcResourceRange = rag.movedSourceStatus.at(subres).range; + const auto &srcResourceRange = rag.movedSourceStatus.at(subres).status.range; const auto &targetTraits = get(ResourceGraph::TraitsTag{}, resg, targetResID); const auto &indexName = concatResName(targetName, rangeStr, rag.resource()); auto subresID = findVertex(indexName, resg); @@ -1962,6 +1984,17 @@ void buildBarriers(FrameGraphDispatcher &fgDispatcher) { auto iter = accessRecord.begin(); auto nextIter = iter; std::advance(nextIter, 1); + + auto sRange = iter->second.range; + if (rag.movedSourceStatus.find(resName) != rag.movedSourceStatus.end()) { + // resource id + realResourceID = rag.resourceIndex.at(resName); + const auto &tName = get(ResourceGraph::NameTag{}, resourceGraph, realResourceID); + // parent id + realResourceID = realID(tName, resourceGraph); + sRange = rag.movedSourceStatus.at(resName).status.range; + } + for (; nextIter != accessRecord.end(); ++iter, ++nextIter) { auto srcRagVertID = iter->first; auto dstRagVertID = nextIter->first; @@ -2033,16 +2066,6 @@ void buildBarriers(FrameGraphDispatcher &fgDispatcher) { continue; } - auto sRange = iter->second.range; - if (rag.movedSourceStatus.find(resName) != rag.movedSourceStatus.end()) { - // resource id - realResourceID = rag.resourceIndex.at(resName); - const auto &tName = get(ResourceGraph::NameTag{}, resourceGraph, realResourceID); - // parent id - realResourceID = realID(tName, resourceGraph); - sRange = rag.movedSourceStatus.at(resName).range; - } - // undefined access if (srcRagVertID == 0) { auto &dstBarrierNode = get(ResourceAccessGraph::BarrierTag{}, rag, dstRagVertID); @@ -2080,18 +2103,31 @@ void buildBarriers(FrameGraphDispatcher &fgDispatcher) { beginBarrier.barrier = getGFXBarrier(beginBarrier); } } + + // - final access check: what if moved but no later access in this frame, while subresource access of parent resource varies? + // - : if there is a hint, submit a transition from last layout/access to suggested, otherwise trasition to all-stage-read access. + if (rag.movedSourceStatus.find(resName) != rag.movedSourceStatus.end() && rag.movedSourceStatus.at(resName).finalAccess) { + auto possibleNextAccess = rag.movedSourceStatus.at(resName).status.accessFlag; + if (accessDependent(iter->second.accessFlag, possibleNextAccess, isBuffer)) { + auto &srcBarrierNode = get(ResourceAccessGraph::BarrierTag{}, rag, iter->first); + auto &predictBarrier = srcBarrierNode.rearBarriers.emplace_back(); + auto srcPassID = get(ResourceAccessGraph::PassIDTag{}, rag, iter->first); + predictBarrier.type = gfx::BarrierType::FULL; + predictBarrier.resourceID = realResourceID; + predictBarrier.beginVert = srcPassID; + predictBarrier.endVert = srcPassID; + predictBarrier.beginStatus = {iter->second.accessFlag, sRange}; + predictBarrier.endStatus = {rag.movedSourceStatus.at(resName).status.accessFlag, sRange}; + predictBarrier.barrier = getGFXBarrier(predictBarrier); + } + } + + // external res const auto &traits = get(ResourceGraph::TraitsTag{}, resourceGraph, realResourceID); auto &states = get(ResourceGraph::StatesTag{}, resourceGraph, realResourceID); - if (traits.hasSideEffects()) { - if (out_degree(realResourceID, resourceGraph) > 1) { - auto sRange = iter->second.range; - if (rag.movedSourceStatus.find(resName) != rag.movedSourceStatus.end()) { - sRange = rag.movedSourceStatus.at(resName).range; - } - if (rangeLastAccess[realResourceID][sRange].vertID < (accessRecord.rbegin()->first)) { - rangeLastAccess[realResourceID][sRange] = AccessWeight{resName, iter->second.accessFlag, accessRecord.rbegin()->first}; - } - } + // whole access or last subres access + bool lastAccess = (realResourceID == resID) || rag.movedSourceStatus.at(resName).finalAccess; + if (traits.hasSideEffects() && lastAccess) { states.states = iter->second.accessFlag; if (traits.residency == ResourceResidency::BACKBUFFER) { auto lastAccessPassID = get(ResourceAccessGraph::PassIDTag{}, rag, iter->first); @@ -2110,34 +2146,6 @@ void buildBarriers(FrameGraphDispatcher &fgDispatcher) { } } - for (const auto &[resID, rangeWeight] : rangeLastAccess) { - ccstd::unordered_map rangeMap; - for (const auto& [range, weight] : rangeWeight) { - rangeMap[weight.access] += 1; - } - auto iter = std::max_element(rangeMap.begin(), rangeMap.end(), [](const auto &lhs, const auto &rhs) { - return lhs.second < rhs.second; - }); - auto mostCommonAccesss = iter->first; - - for (const auto &[range, weight] : rangeWeight) { - if (weight.access != mostCommonAccesss) { - auto &barrierNode = get(ResourceAccessGraph::BarrierTag{}, rag, weight.vertID); - auto& barrier = barrierNode.rearBarriers.emplace_back(); - barrier.resourceID = resID; - barrier.beginVert = weight.vertID; - barrier.endVert = weight.vertID; - barrier.type = gfx::BarrierType::FULL; - barrier.beginStatus = {weight.access, range}; - barrier.endStatus = {mostCommonAccesss, range}; - barrier.barrier = getGFXBarrier(barrier); - } - } - auto &states = get(ResourceGraph::StatesTag{}, resourceGraph, resID); - states.states = mostCommonAccesss; - } - - { for (auto &fgRenderpassInfo : rag.rpInfo) { auto &colorAttachments = fgRenderpassInfo.rpInfo.colorAttachments; diff --git a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp index 461aec45d27..616db2fac66 100644 --- a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp +++ b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp @@ -1058,7 +1058,7 @@ bool setupGpuDrivenResources( } // namespace void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, - const scene::Camera *camera, const std::string &layoutPath, + const scene::Camera *camera, const std::string &layoutPath, const std::string &hzbName, const scene::Light *light, bool bMainPass) { auto *scene = camera->getScene(); if (!scene) { @@ -1156,7 +1156,7 @@ void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, planes.push_back(plane->n.z); planes.push_back(plane->d); } - ArrayBuffer planesBuffer(reinterpret_cast(&planes[0]), sizeof(float) * planes.size()); + ArrayBuffer planesBuffer(reinterpret_cast(&planes[0]), sizeof(float) * static_cast(planes.size())); gpuCullPass->setMat4("cc_view", camera->getMatView()); gpuCullPass->setMat4("cc_proj", camera->getMatProj()); gpuCullPass->setArrayBuffer("cc_planes", &planesBuffer); @@ -1232,23 +1232,17 @@ void NativePipeline::addBuiltinHzbGenerationPass( 1, 1, 1, gfx::SampleCount::X1); } - bool moved = std::any_of(renderGraph.movePasses.begin(), renderGraph.movePasses.end(), [&currMipName](const MovePass &movePass) { - return std::any_of(movePass.movePairs.begin(), movePass.movePairs.end(), [&currMipName](const MovePair &pair) { - return pair.source == currMipName; - }); - }); - if (!moved) { - MovePair pair(move.get_allocator()); - pair.source = currMipName; - pair.target = targetHzbName; - pair.mipLevels = 1; - pair.numSlices = 1; - pair.targetMostDetailedMip = k; - move.movePairs.emplace_back(std::move(pair)); + MovePair pair(move.get_allocator()); + pair.source = currMipName; + pair.target = targetHzbName; + pair.mipLevels = 1; + pair.numSlices = 1; + pair.targetMostDetailedMip = k; + pair.possibleUsage = gfx::AccessFlagBit::COMPUTE_SHADER_READ_OTHER; + move.movePairs.emplace_back(std::move(pair)); - desc.width = getHalfSize(desc.width); - desc.height =getHalfSize(desc.height); - } + desc.width = getHalfSize(desc.width); + desc.height =getHalfSize(desc.height); } } diff --git a/native/cocos/renderer/pipeline/custom/NativePipelineTypes.h b/native/cocos/renderer/pipeline/custom/NativePipelineTypes.h index f0d66d8e4db..a86aa1fdf0d 100644 --- a/native/cocos/renderer/pipeline/custom/NativePipelineTypes.h +++ b/native/cocos/renderer/pipeline/custom/NativePipelineTypes.h @@ -640,6 +640,7 @@ class NativeRenderPassBuilder final : public RenderPassBuilder, public NativeSet MultisampleRenderSubpassBuilder *addMultisampleRenderSubpass(uint32_t count, uint32_t quality, const ccstd::string &subpassName) override; ComputeSubpassBuilder *addComputeSubpass(const ccstd::string &subpassName) override; void setCustomShaderStages(const ccstd::string &name, gfx::ShaderStageFlagBit stageFlags) override; + }; class NativeMultisampleRenderPassBuilder final : public MultisampleRenderPassBuilder, public NativeSetter { diff --git a/native/cocos/renderer/pipeline/custom/NativeRenderGraph.cpp b/native/cocos/renderer/pipeline/custom/NativeRenderGraph.cpp index ab9f8eea6ac..cc6e2c863f1 100644 --- a/native/cocos/renderer/pipeline/custom/NativeRenderGraph.cpp +++ b/native/cocos/renderer/pipeline/custom/NativeRenderGraph.cpp @@ -795,6 +795,9 @@ void NativeRenderQueueBuilder::addGpuDrivenResource(const scene::Camera *camera, view.accessType = AccessType::READ; view.shaderStageFlags = gfx::ShaderStageFlagBit::VERTEX | gfx::ShaderStageFlagBit::FRAGMENT; } + ccstd::pmr::string indirectBuffer("CCDrawIndirectBuffer"); + indirectBuffer.append(std::to_string(cullingID)); + rasterPass.resources.emplace(indirectBuffer, ResourceFlags::INDIRECT); } } } diff --git a/native/cocos/renderer/pipeline/custom/RenderCommonJsb.cpp b/native/cocos/renderer/pipeline/custom/RenderCommonJsb.cpp index e4068fee44d..d16b205bd32 100644 --- a/native/cocos/renderer/pipeline/custom/RenderCommonJsb.cpp +++ b/native/cocos/renderer/pipeline/custom/RenderCommonJsb.cpp @@ -235,6 +235,9 @@ bool nativevalue_to_se(const cc::render::MovePair &from, se::Value &to, se::Obje nativevalue_to_se(from.targetPlaneSlice, tmp, ctx); obj->setProperty("targetPlaneSlice", tmp); + nativevalue_to_se(from.possibleUsage, tmp, ctx); + obj->setProperty("possibleUsage", tmp); + to.setObject(obj); return true; } @@ -504,6 +507,10 @@ bool sevalue_to_native(const se::Value &from, cc::render:: if(!field.isNullOrUndefined()) { ok &= sevalue_to_native(field, &(to->targetPlaneSlice), ctx); } + obj->getProperty("possibleUsage", &field, true); + if(!field.isNullOrUndefined()) { + ok &= sevalue_to_native(field, &(to->possibleUsage), ctx); + } return ok; } diff --git a/native/cocos/renderer/pipeline/custom/RenderCommonSerialization.h b/native/cocos/renderer/pipeline/custom/RenderCommonSerialization.h index 9c6059e6875..8c999c752c8 100644 --- a/native/cocos/renderer/pipeline/custom/RenderCommonSerialization.h +++ b/native/cocos/renderer/pipeline/custom/RenderCommonSerialization.h @@ -161,6 +161,7 @@ inline void save(OutputArchive& ar, const MovePair& v) { save(ar, v.targetMostDetailedMip); save(ar, v.targetFirstSlice); save(ar, v.targetPlaneSlice); + save(ar, v.possibleUsage); } inline void load(InputArchive& ar, MovePair& v) { @@ -171,6 +172,7 @@ inline void load(InputArchive& ar, MovePair& v) { load(ar, v.targetMostDetailedMip); load(ar, v.targetFirstSlice); load(ar, v.targetPlaneSlice); + load(ar, v.possibleUsage); } inline void save(OutputArchive& ar, const PipelineStatistics& v) { diff --git a/native/cocos/renderer/pipeline/custom/RenderCommonTypes.cpp b/native/cocos/renderer/pipeline/custom/RenderCommonTypes.cpp index ad6ef4ed792..a85ea5fa3ed 100644 --- a/native/cocos/renderer/pipeline/custom/RenderCommonTypes.cpp +++ b/native/cocos/renderer/pipeline/custom/RenderCommonTypes.cpp @@ -153,7 +153,8 @@ MovePair::MovePair(MovePair&& rhs, const allocator_type& alloc) numSlices(rhs.numSlices), targetMostDetailedMip(rhs.targetMostDetailedMip), targetFirstSlice(rhs.targetFirstSlice), - targetPlaneSlice(rhs.targetPlaneSlice) {} + targetPlaneSlice(rhs.targetPlaneSlice), + possibleUsage(rhs.possibleUsage) {} MovePair::MovePair(MovePair const& rhs, const allocator_type& alloc) : source(rhs.source, alloc), @@ -162,7 +163,8 @@ MovePair::MovePair(MovePair const& rhs, const allocator_type& alloc) numSlices(rhs.numSlices), targetMostDetailedMip(rhs.targetMostDetailedMip), targetFirstSlice(rhs.targetFirstSlice), - targetPlaneSlice(rhs.targetPlaneSlice) {} + targetPlaneSlice(rhs.targetPlaneSlice), + possibleUsage(rhs.possibleUsage) {} } // namespace render diff --git a/native/cocos/renderer/pipeline/custom/RenderCommonTypes.h b/native/cocos/renderer/pipeline/custom/RenderCommonTypes.h index 786deb24e5e..5160b25c570 100644 --- a/native/cocos/renderer/pipeline/custom/RenderCommonTypes.h +++ b/native/cocos/renderer/pipeline/custom/RenderCommonTypes.h @@ -104,6 +104,8 @@ enum class ResourceFlags : uint32_t { SHADING_RATE = 0x80, TRANSFER_SRC = 0x100, TRANSFER_DST = 0x200, + VERTEX = 0x400, + INDEX = 0x800, }; constexpr ResourceFlags operator|(const ResourceFlags lhs, const ResourceFlags rhs) noexcept { @@ -422,6 +424,7 @@ struct MovePair { uint32_t targetMostDetailedMip{0}; uint32_t targetFirstSlice{0}; uint32_t targetPlaneSlice{0}; + gfx::AccessFlagBit possibleUsage{gfx::AccessFlagBit::NONE}; }; struct PipelineStatistics { diff --git a/native/cocos/renderer/pipeline/custom/RenderGraphTypes.cpp b/native/cocos/renderer/pipeline/custom/RenderGraphTypes.cpp index bcb977b9e35..6627c262ebc 100644 --- a/native/cocos/renderer/pipeline/custom/RenderGraphTypes.cpp +++ b/native/cocos/renderer/pipeline/custom/RenderGraphTypes.cpp @@ -219,6 +219,7 @@ RasterPass::RasterPass(const allocator_type& alloc) noexcept computeViews(alloc), attachmentIndexMap(alloc), textures(alloc), + resources(alloc), subpassGraph(alloc), versionName(alloc) {} @@ -227,6 +228,7 @@ RasterPass::RasterPass(RasterPass&& rhs, const allocator_type& alloc) computeViews(std::move(rhs.computeViews), alloc), attachmentIndexMap(std::move(rhs.attachmentIndexMap), alloc), textures(std::move(rhs.textures), alloc), + resources(std::move(rhs.resources), alloc), subpassGraph(std::move(rhs.subpassGraph), alloc), width(rhs.width), height(rhs.height), @@ -243,6 +245,7 @@ RasterPass::RasterPass(RasterPass const& rhs, const allocator_type& alloc) computeViews(rhs.computeViews, alloc), attachmentIndexMap(rhs.attachmentIndexMap, alloc), textures(rhs.textures, alloc), + resources(rhs.resources, alloc), subpassGraph(rhs.subpassGraph, alloc), width(rhs.width), height(rhs.height), diff --git a/native/cocos/renderer/pipeline/custom/RenderGraphTypes.h b/native/cocos/renderer/pipeline/custom/RenderGraphTypes.h index 8c87780778f..1cccc271dbf 100644 --- a/native/cocos/renderer/pipeline/custom/RenderGraphTypes.h +++ b/native/cocos/renderer/pipeline/custom/RenderGraphTypes.h @@ -448,6 +448,7 @@ struct RasterPass { PmrTransparentMap> computeViews; PmrTransparentMap attachmentIndexMap; PmrTransparentMap textures; + PmrTransparentMap resources; SubpassGraph subpassGraph; uint32_t width{0}; uint32_t height{0}; @@ -461,8 +462,8 @@ struct RasterPass { }; inline bool operator==(const RasterPass& lhs, const RasterPass& rhs) noexcept { - return std::forward_as_tuple(lhs.rasterViews, lhs.computeViews, lhs.textures, lhs.subpassGraph, lhs.width, lhs.height, lhs.count, lhs.quality) == - std::forward_as_tuple(rhs.rasterViews, rhs.computeViews, rhs.textures, rhs.subpassGraph, rhs.width, rhs.height, rhs.count, rhs.quality); + return std::forward_as_tuple(lhs.rasterViews, lhs.computeViews, lhs.textures, lhs.resources, lhs.subpassGraph, lhs.width, lhs.height, lhs.count, lhs.quality) == + std::forward_as_tuple(rhs.rasterViews, rhs.computeViews, rhs.textures, rhs.resources, rhs.subpassGraph, rhs.width, rhs.height, rhs.count, rhs.quality); } inline bool operator!=(const RasterPass& lhs, const RasterPass& rhs) noexcept { @@ -1190,6 +1191,7 @@ inline hash_t hash::operator()(const cc::render::RasterP hash_combine(seed, val.rasterViews); hash_combine(seed, val.computeViews); hash_combine(seed, val.textures); + hash_combine(seed, val.resources); hash_combine(seed, val.subpassGraph); hash_combine(seed, val.width); hash_combine(seed, val.height); From a44d9eaf70e4ea053b1d3e8804ba4760bacbea0b Mon Sep 17 00:00:00 2001 From: stanleyljl <798080536@qq.com> Date: Wed, 13 Sep 2023 18:35:16 +0800 Subject: [PATCH 09/16] add GPUDrivenEnabled for MeshRenderer & Fix occlusion culling bug --- cocos/3d/framework/mesh-renderer.ts | 41 +++++++++++-- cocos/asset/assets/simple-texture.ts | 4 +- cocos/game/director.ts | 4 +- cocos/render-scene/core/render-scene.ts | 12 +--- cocos/render-scene/scene/model.ts | 18 ++++++ .../pipeline/gpu-driven/gpu-culling.effect | 60 +++++++++---------- editor/i18n/en/modules/rendering.js | 4 ++ editor/i18n/zh/modules/rendering.js | 4 ++ native/cocos/3d/assets/Mesh.cpp | 29 ++++++++- native/cocos/core/assets/RenderingSubMesh.cpp | 18 ++++++ native/cocos/core/assets/RenderingSubMesh.h | 8 +++ native/cocos/core/assets/SimpleTexture.cpp | 4 +- .../cocos/renderer/gfx-base/GFXDef-common.h | 5 ++ .../renderer/gfx-base/GFXInputAssembler.cpp | 10 ++-- .../gfx-gles3/GLES3PrimaryCommandBuffer.cpp | 10 ++++ .../renderer/gfx-metal/MTLCommandBuffer.mm | 2 + .../renderer/gfx-vulkan/VKCommandBuffer.cpp | 4 ++ .../renderer/pipeline/PipelineSceneData.cpp | 1 + .../pipeline/custom/NativeExecutor.cpp | 4 ++ .../pipeline/custom/NativePipeline.cpp | 11 +++- .../cocos/renderer/pipeline/helper/Utils.cpp | 2 +- native/cocos/scene/Model.cpp | 8 ++- native/cocos/scene/Model.h | 5 +- native/cocos/scene/RenderScene.cpp | 6 -- native/cocos/scene/RenderScene.h | 1 - native/cocos/scene/SubModel.cpp | 14 ++++- native/cocos/scene/SubModel.h | 1 + native/cocos/scene/gpu-scene/GPUMeshPool.cpp | 46 ++++++++++++-- native/cocos/scene/gpu-scene/GPUMeshPool.h | 7 ++- native/cocos/scene/gpu-scene/GPUScene.cpp | 5 -- native/cocos/scene/gpu-scene/GPUScene.h | 1 - native/tools/swig-config/scene.i | 1 + 32 files changed, 263 insertions(+), 87 deletions(-) diff --git a/cocos/3d/framework/mesh-renderer.ts b/cocos/3d/framework/mesh-renderer.ts index 142fd459264..ff28d56222d 100644 --- a/cocos/3d/framework/mesh-renderer.ts +++ b/cocos/3d/framework/mesh-renderer.ts @@ -312,6 +312,9 @@ export class MeshRenderer extends ModelRenderer { @serializable protected _mesh: Mesh | null = null; + @serializable + protected _gpuDrivenEnabled: boolean = true; + @serializable protected _shadowCastingMode = ModelShadowCastingMode.OFF; @@ -456,7 +459,7 @@ export class MeshRenderer extends ModelRenderer { * 注意,设置时,所有形变目标的权重都将归零。 */ @type(Mesh) - @displayOrder(1) + @displayOrder(0) get mesh (): Mesh | null { return this._mesh; } @@ -479,6 +482,21 @@ export class MeshRenderer extends ModelRenderer { this._updateReceiveDirLight(); } + /** + * @en Whether to enable GPU Driven. + * @zh 是否开启 GPU Driven 。 + */ + @editable + @displayOrder(1) + get gpuDrivenEnabled (): boolean { + return this._gpuDrivenEnabled; + } + + set gpuDrivenEnabled (val) { + this._gpuDrivenEnabled = val; + this._updateGPUDrivenEnabled(); + } + /** * @en Gets the model in [[RenderScene]]. * @zh 获取渲染场景 [[RenderScene]] 中对应的模型。 @@ -567,6 +585,7 @@ export class MeshRenderer extends ModelRenderer { this._updateUseReflectionProbe(); this._updateReceiveDirLight(); this._updateStandardSkin(); + this._updateGPUDrivenEnabled(); } // Redo, Undo, Prefab restore, etc. @@ -584,6 +603,7 @@ export class MeshRenderer extends ModelRenderer { this._updateUseReflectionProbe(); this._updateReceiveDirLight(); this._updateStandardSkin(); + this._updateGPUDrivenEnabled(); } public onEnable (): void { @@ -610,6 +630,7 @@ export class MeshRenderer extends ModelRenderer { this._onUpdateReflectionProbeDataMap(); this._onUpdateLocalReflectionProbeData(); this._updateStandardSkin(); + this._updateGPUDrivenEnabled(); this._attachToScene(); } @@ -916,6 +937,7 @@ export class MeshRenderer extends ModelRenderer { this._updateReceiveDirLight(); this._onUpdateReflectionProbeDataMap(); this._onUpdateLocalReflectionProbeData(); + this._updateGPUDrivenEnabled(); } } @@ -974,9 +996,7 @@ export class MeshRenderer extends ModelRenderer { this._detachFromScene(); } - if (this.supportGPUScene()) { - if (this.mesh) renderScene.addGPUMesh(this.mesh); - + if (this.supportGPUDriven() && this.mesh!.isInGPUScene()) { renderScene.addGPUModel(this._model); } else { renderScene.addModel(this._model); @@ -988,7 +1008,7 @@ export class MeshRenderer extends ModelRenderer { */ public _detachFromScene (): void { if (this._model && this._model.scene) { - if (this.supportGPUScene()) { + if (this.supportGPUDriven() && this.mesh!.isInGPUScene()) { this._model.scene.removeGPUModel(this._model); } else { this._model.scene.removeModel(this._model); @@ -999,12 +1019,16 @@ export class MeshRenderer extends ModelRenderer { /** * @engineInternal */ - public supportGPUScene (): boolean { + public supportGPUDriven (): boolean { const sceneData = cclegacy.director.root.pipeline.pipelineSceneData; if (!sceneData || !sceneData.isGPUDrivenEnabled()) { return false; } + if (!this._gpuDrivenEnabled) { + return false; + } + if (!this._mesh || !this.node) { return false; } @@ -1168,6 +1192,11 @@ export class MeshRenderer extends ModelRenderer { } } + protected _updateGPUDrivenEnabled (): void { + if (!this._model) { return; } + this._model.gpuDrivenEnabled = this._gpuDrivenEnabled; + } + protected onMobilityChanged (): void { this._updateUseLightProbe(); this._updateReceiveDirLight(); diff --git a/cocos/asset/assets/simple-texture.ts b/cocos/asset/assets/simple-texture.ts index a93516c20c7..a0a49cd2725 100644 --- a/cocos/asset/assets/simple-texture.ts +++ b/cocos/asset/assets/simple-texture.ts @@ -322,7 +322,9 @@ export class SimpleTexture extends TextureBase { if (this._width === 0 || this._height === 0) { return; } let flags = TextureFlagBit.NONE; if (this._mipFilter !== Filter.NONE && canGenerateMipmap(device, this._width, this._height)) { - this._mipmapLevel = getMipLevel(this._width, this._height); + if (!this.isCompressed) { + this._mipmapLevel = getMipLevel(this._width, this._height); + } if (!this.isUsingOfflineMipmaps() && !this.isCompressed) { flags = TextureFlagBit.GEN_MIPMAP; } diff --git a/cocos/game/director.ts b/cocos/game/director.ts index 5f43630cfd0..4d741a6a267 100644 --- a/cocos/game/director.ts +++ b/cocos/game/director.ts @@ -535,8 +535,8 @@ export class Director extends EventTarget { for (let i = 0; i < renderers.length; i++) { const renderer = renderers[i]; const mesh = renderer.mesh; - if (renderer.supportGPUScene()) { - meshes.push(mesh!); + if (mesh && mesh.supportGPUScene()) { + meshes.push(mesh); } } diff --git a/cocos/render-scene/core/render-scene.ts b/cocos/render-scene/core/render-scene.ts index 60208d8aa35..d8c5d957d1e 100644 --- a/cocos/render-scene/core/render-scene.ts +++ b/cocos/render-scene/core/render-scene.ts @@ -203,13 +203,14 @@ export class RenderScene { /** * @engineInternal */ - public activate () { + public activate (): void { + // Do nothing } /** * @engineInternal */ - public buildGPUScene (meshes: Mesh[]) { + public buildGPUScene (meshes: Mesh[]): void { // Only support in native. } @@ -557,13 +558,6 @@ export class RenderScene { this._models.length = 0; } - /** - * Add a mesh to GPUScene. - * Only support in native. - * @internal - */ - public addGPUMesh (m: Mesh): void {} - /** * @en Add a GPU Driven model, all models attached to the render scene will be submitted for rendering. * @zh 增加一个 GPU Driven 模型,渲染场景上挂载的所有模型都会被提交渲染。 diff --git a/cocos/render-scene/scene/model.ts b/cocos/render-scene/scene/model.ts index 4bd1ce433b8..23859d2889f 100644 --- a/cocos/render-scene/scene/model.ts +++ b/cocos/render-scene/scene/model.ts @@ -384,6 +384,18 @@ export class Model { this._reflectionProbeBlendWeight = val; } + /** + * @en Whether to enable GPU Driven. + * @zh 是否开启 GPU Driven 。 + */ + get gpuDrivenEnabled (): boolean { + return this._gpuDrivenEnabled; + } + + set gpuDrivenEnabled (val) { + this._gpuDrivenEnabled = val; + } + /** * @en The type of the model * @zh 模型类型 @@ -577,6 +589,12 @@ export class Model { */ protected _reflectionProbeType = ReflectionProbeType.NONE; + /** + * @en Whether to enable GPU Driven. + * @zh 是否开启 GPU Driven 。 + */ + protected _gpuDrivenEnabled = true; + /** * @internal * @en native object diff --git a/editor/assets/effects/pipeline/gpu-driven/gpu-culling.effect b/editor/assets/effects/pipeline/gpu-driven/gpu-culling.effect index 629bb61c2c2..51cc75956a1 100644 --- a/editor/assets/effects/pipeline/gpu-driven/gpu-culling.effect +++ b/editor/assets/effects/pipeline/gpu-driven/gpu-culling.effect @@ -95,7 +95,21 @@ CCProgram culling-main %{ #if CC_USE_OCCLUSION_CULLING // 2D Polyhedral Bounds of a Clipped, Perspective-Projected 3D Sphere. Michael Mara, Morgan McGuire. 2013 // Consider: minClipZ & projectionSignY & orientation in projection, so we can not use p00 & p11 directly. - bool projectSpherePerspective(vec3 c, float r, float znear, mat4 proj, uint orientation, out vec4 aabb, out float nearest) { + /** + if (orientation == 0) { + aabb = vec4(minx * proj[0][0], miny * proj[1][1], maxx * proj[0][0], maxy * proj[1][1]); + } + else if (orientation == 1) { + aabb = vec4(maxy * proj[1][0], minx * proj[0][1], miny * proj[1][0], maxx * proj[0][1]); + } + else if (orientation == 2) { + aabb = vec4(maxx * proj[0][0], maxy * proj[1][1], minx * proj[0][0], miny * proj[1][1]); + } + else { + aabb = vec4(miny * proj[1][0], maxx * proj[0][1], maxy * proj[1][0], minx * proj[0][1]); + } + */ + bool projectSpherePerspective(vec3 c, float r, float znear, mat4 proj, out vec4 aabb, out float nearest) { if (-c.z < r + znear) return false; vec3 cr = c * r; @@ -110,18 +124,7 @@ CCProgram culling-main %{ float maxy = (vy * c.y - cr.z) / -(vy * c.z + cr.y); // project to ndc space - if (orientation == 0) { - aabb = vec4(minx * proj[0][0], miny * proj[1][1], maxx * proj[0][0], maxy * proj[1][1]); - } - else if (orientation == 1) { - aabb = vec4(maxy * proj[1][0], minx * proj[0][1], miny * proj[1][0], maxx * proj[0][1]); - } - else if (orientation == 2) { - aabb = vec4(maxx * proj[0][0], maxy * proj[1][1], minx * proj[0][0], miny * proj[1][1]); - } - else { - aabb = vec4(miny * proj[1][0], maxx * proj[0][1], maxy * proj[1][0], minx * proj[0][1]); - } + aabb = vec4(minx * proj[0][0], miny * proj[1][1], maxx * proj[0][0], maxy * proj[1][1]); // ndc space -> uv space aabb = aabb * vec4(0.5f) + vec4(0.5f); @@ -131,7 +134,7 @@ CCProgram culling-main %{ return true; } - bool projectSphereOrtho(vec3 c, float r, float znear, mat4 proj, uint orientation, out vec4 aabb, out float nearest) { + bool projectSphereOrtho(vec3 c, float r, float znear, mat4 proj, out vec4 aabb, out float nearest) { if (-c.z < r + znear) return false; float minx = c.x - r; @@ -141,18 +144,7 @@ CCProgram culling-main %{ float maxy = c.y + r; // project to ndc space - if (orientation == 0) { - aabb = vec4(minx * proj[0][0], miny * proj[1][1], maxx * proj[0][0], maxy * proj[1][1]); - } - else if (orientation == 1) { - aabb = vec4(maxy * proj[1][0], minx * proj[0][1], miny * proj[1][0], maxx * proj[0][1]); - } - else if (orientation == 2) { - aabb = vec4(maxx * proj[0][0], maxy * proj[1][1], minx * proj[0][0], miny * proj[1][1]); - } - else { - aabb = vec4(miny * proj[1][0], maxx * proj[0][1], maxy * proj[1][0], minx * proj[0][1]); - } + aabb = vec4(minx * proj[0][0], miny * proj[1][1], maxx * proj[0][0], maxy * proj[1][1]); aabb = aabb + vec4(proj[3][0], proj[3][1], proj[3][0], proj[3][1]); // ndc space -> uv space @@ -171,19 +163,27 @@ CCProgram culling-main %{ float radius = sphere.w; if (cc_isPerspective != 0) { - if (!projectSpherePerspective(center, radius, cc_znear, cc_proj, cc_orientation, aabb, nearest)) { + if (!projectSpherePerspective(center, radius, cc_znear, cc_proj, aabb, nearest)) { return false; } } else { - if (!projectSphereOrtho(center, radius, cc_znear, cc_proj, cc_orientation, aabb, nearest)) { + if (!projectSphereOrtho(center, radius, cc_znear, cc_proj, aabb, nearest)) { return false; } } float width = abs(aabb.z - aabb.x) * cc_depthWidth; float height = abs(aabb.w - aabb.y) * cc_depthHeight; - float level = max(0.0, floor(log2(max(width, height)))); - float depth = textureLod(CCDepthMap, (aabb.xy + aabb.zw) * vec2(0.5), level).x; + float level = max(0.0, ceil(log2(max(width, height)))); + #if CC_USE_SAMPLER_FILTER_MIN_MAX + float depth = textureLod(CCDepthMap, (aabb.xy + aabb.zw) * vec2(0.5), level).x; + #else + float a = textureLod(CCDepthMap, aabb.xy, level).x; + float b = textureLod(CCDepthMap, aabb.xw, level).x; + float c = textureLod(CCDepthMap, aabb.zy, level).x; + float d = textureLod(CCDepthMap, aabb.zw, level).x; + float depth = max(max(a, b), max(c, d)); + #endif return nearest > depth; } #endif diff --git a/editor/i18n/en/modules/rendering.js b/editor/i18n/en/modules/rendering.js index 3806a1f47a6..674e43e5c8b 100644 --- a/editor/i18n/en/modules/rendering.js +++ b/editor/i18n/en/modules/rendering.js @@ -57,6 +57,10 @@ module.exports = { displayName: 'Materials', tooltip: 'Material array. Each item in turn specifies material of sub mesh.', }, + 'gpuDrivenEnabled': { + displayName: 'Enable GPU Driven', + tooltip: 'Whether to enable GPU Driven.', + }, 'shadowCastingModeForInspector': { displayName: 'Cast Shadows', tooltip: 'Whether if this mesh casts shadows.', diff --git a/editor/i18n/zh/modules/rendering.js b/editor/i18n/zh/modules/rendering.js index c202e2eda20..dae14f2f74b 100644 --- a/editor/i18n/zh/modules/rendering.js +++ b/editor/i18n/zh/modules/rendering.js @@ -57,6 +57,10 @@ module.exports = { displayName: '材质', tooltip: '材质资源数组。每一项依次指定了子网格的材质。', }, + 'gpuDrivenEnabled': { + displayName: '启用 GPU Driven', + tooltip: '此物体是否启用 GPU Driven。', + }, 'shadowCastingModeForInspector': { displayName: '投射阴影', tooltip: '此网格是否投射阴影。', diff --git a/native/cocos/3d/assets/Mesh.cpp b/native/cocos/3d/assets/Mesh.cpp index 00bc466d063..f01d66e9234 100644 --- a/native/cocos/3d/assets/Mesh.cpp +++ b/native/cocos/3d/assets/Mesh.cpp @@ -449,7 +449,7 @@ void Mesh::initialize() { } _initialized = true; - _supportGPUScene = (!_struct.supportGPUScene.has_value() || _struct.supportGPUScene.value()) && isGPUMeshFormat(); + _supportGPUScene = (_struct.supportGPUScene.has_value() ? _struct.supportGPUScene.value() : true) && isGPUMeshFormat(); if (_struct.compressed) { // decompress @@ -463,7 +463,30 @@ void Mesh::initialize() { MeshUtils::dequantizeMesh(_struct, _data); } - if (_struct.dynamic.has_value()) { + const auto *pipeline = Root::getInstance()->getPipeline(); + const auto *sceneData = pipeline ? pipeline->getPipelineSceneData() : nullptr; + const auto gpuDrivenEnabled = sceneData && sceneData->isGPUDrivenEnabled(); + if (gpuDrivenEnabled && _supportGPUScene) { + for (auto i = 0U; i < _struct.primitives.size(); i++) { + const auto &primitive = _struct.primitives[i]; + gfx::BufferList subVBs = {}; + gfx::AttributeList attributes; + + for (const auto idx : primitive.vertexBundelIndices) { + const auto &vertexBundle = _struct.vertexBundles[idx]; + for (const auto &attr : vertexBundle.attributes) { + attributes.emplace_back(attr); + } + } + + // Update buffers later in GPUMeshPool + auto *subMesh = ccnew RenderingSubMesh(subVBs, attributes, primitive.primitiveMode, nullptr); + subMesh->setMesh(this); + subMesh->setSubMeshIdx(static_cast(i)); + + _renderingSubMeshes.emplace_back(subMesh); + } + } else if (_struct.dynamic.has_value()) { auto *device = gfx::Device::getInstance(); gfx::BufferList vertexBuffers; @@ -610,7 +633,7 @@ void Mesh::initialize() { _isMeshDataUploaded = true; #if !CC_EDITOR - if (!_allowDataAccess && !supportGPUScene()) { + if (!_allowDataAccess) { releaseData(); } #endif diff --git a/native/cocos/core/assets/RenderingSubMesh.cpp b/native/cocos/core/assets/RenderingSubMesh.cpp index 97ed95e5c00..bb6ba40598a 100644 --- a/native/cocos/core/assets/RenderingSubMesh.cpp +++ b/native/cocos/core/assets/RenderingSubMesh.cpp @@ -381,4 +381,22 @@ gfx::Buffer *RenderingSubMesh::allocVertexIdBuffer(gfx::Device *device) { return vertexIdBuffer; } +void RenderingSubMesh::resetBuffers(const gfx::BufferList &vertexBuffers, + gfx::Buffer* indexBuffer, + uint32_t vertexCount, + uint32_t firstVertex, + uint32_t indexCount, + uint32_t firstIndex, + int32_t vertexOffset) { + _vertexBuffers = vertexBuffers; + _indexBuffer = indexBuffer; + _iaInfo.vertexBuffers = vertexBuffers; + _iaInfo.indexBuffer = indexBuffer; + _iaInfo.vertexCount = vertexCount; + _iaInfo.firstVertex = firstVertex; + _iaInfo.indexCount = indexCount; + _iaInfo.firstIndex = firstIndex; + _iaInfo.vertexOffset = vertexOffset; +} + } // namespace cc diff --git a/native/cocos/core/assets/RenderingSubMesh.h b/native/cocos/core/assets/RenderingSubMesh.h index f270d713b83..40b3923468c 100644 --- a/native/cocos/core/assets/RenderingSubMesh.h +++ b/native/cocos/core/assets/RenderingSubMesh.h @@ -194,6 +194,14 @@ class RenderingSubMesh : public RefCounted { inline void setMeshPoolIndex(uint32_t index) { _meshPoolIndex = index; } inline uint32_t getMeshPoolIndex() const { return _meshPoolIndex; } + void resetBuffers(const gfx::BufferList &vertexBuffers, + gfx::Buffer *indexBuffer, + uint32_t vertexCount, + uint32_t firstVertex, + uint32_t indexCount, + uint32_t firstIndex, + int32_t vertexOffset); + private: gfx::Buffer *allocVertexIdBuffer(gfx::Device *device); diff --git a/native/cocos/core/assets/SimpleTexture.cpp b/native/cocos/core/assets/SimpleTexture.cpp index b24457e5318..86fb47d5440 100644 --- a/native/cocos/core/assets/SimpleTexture.cpp +++ b/native/cocos/core/assets/SimpleTexture.cpp @@ -134,7 +134,9 @@ void SimpleTexture::createTexture(gfx::Device *device) { auto flags = gfx::TextureFlagBit::NONE; auto usage = gfx::TextureUsageBit::SAMPLED | gfx::TextureUsageBit::TRANSFER_DST; if (_mipFilter != Filter::NONE && canGenerateMipmap(_width, _height)) { - _mipmapLevel = getMipLevel(_width, _height); + if (!isCompressed()) { + _mipmapLevel = getMipLevel(_width, _height); + } if (!isUsingOfflineMipmaps() && !isCompressed()) { flags = gfx::TextureFlagBit::GEN_MIPMAP; } diff --git a/native/cocos/renderer/gfx-base/GFXDef-common.h b/native/cocos/renderer/gfx-base/GFXDef-common.h index bf0f329685a..1e8a4e6ada2 100644 --- a/native/cocos/renderer/gfx-base/GFXDef-common.h +++ b/native/cocos/renderer/gfx-base/GFXDef-common.h @@ -1314,6 +1314,11 @@ struct InputAssemblerInfo { AttributeList attributes; BufferList vertexBuffers; Buffer *indexBuffer{nullptr}; // @ts-nullable + uint32_t vertexCount{0}; + uint32_t firstVertex{0}; + uint32_t indexCount{0}; + uint32_t firstIndex{0}; + int32_t vertexOffset{0}; EXPOSE_COPY_FN(InputAssemblerInfo) }; diff --git a/native/cocos/renderer/gfx-base/GFXInputAssembler.cpp b/native/cocos/renderer/gfx-base/GFXInputAssembler.cpp index 616b6851e7c..4b5b702b0ee 100644 --- a/native/cocos/renderer/gfx-base/GFXInputAssembler.cpp +++ b/native/cocos/renderer/gfx-base/GFXInputAssembler.cpp @@ -43,12 +43,12 @@ void InputAssembler::initialize(const InputAssemblerInfo &info) { _attributesHash = computeAttributesHash(_attributes); if (_indexBuffer) { - _drawInfo.indexCount = _indexBuffer->getCount(); - _drawInfo.firstIndex = 0; + _drawInfo.indexCount = info.indexCount > 0 ? info.indexCount : _indexBuffer->getCount(); + _drawInfo.firstIndex = info.firstIndex; + _drawInfo.vertexOffset = info.vertexOffset; } else if (!_vertexBuffers.empty()) { - _drawInfo.vertexCount = _vertexBuffers[0]->getCount(); - _drawInfo.firstVertex = 0; - _drawInfo.vertexOffset = 0; + _drawInfo.vertexCount = info.vertexCount > 0 ? info.vertexCount : _vertexBuffers[0]->getCount(); + _drawInfo.firstVertex = info.firstVertex; } doInit(info); diff --git a/native/cocos/renderer/gfx-gles3/GLES3PrimaryCommandBuffer.cpp b/native/cocos/renderer/gfx-gles3/GLES3PrimaryCommandBuffer.cpp index a942f163c18..b0a1eb6eb9c 100644 --- a/native/cocos/renderer/gfx-gles3/GLES3PrimaryCommandBuffer.cpp +++ b/native/cocos/renderer/gfx-gles3/GLES3PrimaryCommandBuffer.cpp @@ -92,6 +92,11 @@ void GLES3PrimaryCommandBuffer::drawIndirect(Buffer *buffer, uint32_t offset, ui auto *glesBuffer = static_cast(buffer); auto *gpuBuffer = glesBuffer->gpuBuffer(); cmdFuncGLES3DrawIndirect(GLES3Device::getInstance(), gpuBuffer, offset, count, stride, false); + if (GLES3Device::getInstance()->constantRegistry()->multiDrawIndirect) { + ++_numDrawCalls; + } else { + _numDrawCalls += count; + } } void GLES3PrimaryCommandBuffer::drawIndexedIndirect(Buffer *buffer, uint32_t offset, uint32_t count, uint32_t stride) { @@ -102,6 +107,11 @@ void GLES3PrimaryCommandBuffer::drawIndexedIndirect(Buffer *buffer, uint32_t off auto *glesBuffer = static_cast(buffer); auto *gpuBuffer = glesBuffer->gpuBuffer(); cmdFuncGLES3DrawIndirect(GLES3Device::getInstance(), gpuBuffer, offset, count, stride, true); + if (GLES3Device::getInstance()->constantRegistry()->multiDrawIndirect) { + ++_numDrawCalls; + } else { + _numDrawCalls += count; + } } void GLES3PrimaryCommandBuffer::beginMarker(const MarkerInfo &marker) { diff --git a/native/cocos/renderer/gfx-metal/MTLCommandBuffer.mm b/native/cocos/renderer/gfx-metal/MTLCommandBuffer.mm index 23db390c4b8..c73e0743760 100644 --- a/native/cocos/renderer/gfx-metal/MTLCommandBuffer.mm +++ b/native/cocos/renderer/gfx-metal/MTLCommandBuffer.mm @@ -595,6 +595,7 @@ of this software and associated engine source code (the "Software"), a limited, indirectBuffer: mtlBuffer->mtlBuffer() indirectBufferOffset: off]; } + _numDrawCalls += count; } void CCMTLCommandBuffer::drawIndexedIndirect(Buffer *buffer, uint32_t offset, uint32_t count, uint32_t stride) { @@ -613,6 +614,7 @@ of this software and associated engine source code (the "Software"), a limited, indirectBuffer: mtlBuffer->mtlBuffer() indirectBufferOffset: off]; } + _numDrawCalls += count; } void CCMTLCommandBuffer::draw(const DrawInfo &info) { diff --git a/native/cocos/renderer/gfx-vulkan/VKCommandBuffer.cpp b/native/cocos/renderer/gfx-vulkan/VKCommandBuffer.cpp index d3e567c01f3..4e3ca777a98 100644 --- a/native/cocos/renderer/gfx-vulkan/VKCommandBuffer.cpp +++ b/native/cocos/renderer/gfx-vulkan/VKCommandBuffer.cpp @@ -472,6 +472,7 @@ void CCVKCommandBuffer::drawIndirect(Buffer *buffer, uint32_t offset, uint32_t c bufferOffset + offset, count, stride); + ++_numDrawCalls; } else { for (uint32_t i = 0U; i < count; ++i) { uint32_t currentOffset = bufferOffset + offset + i * stride; @@ -481,6 +482,7 @@ void CCVKCommandBuffer::drawIndirect(Buffer *buffer, uint32_t offset, uint32_t c 1, stride); } + _numDrawCalls += count; } } @@ -501,6 +503,7 @@ void CCVKCommandBuffer::drawIndexedIndirect(Buffer *buffer, uint32_t offset, uin bufferOffset + offset, count, stride); + ++_numDrawCalls; } else { for (uint32_t i = 0U; i < count; ++i) { uint32_t currentOffset = bufferOffset + offset + i * stride; @@ -510,6 +513,7 @@ void CCVKCommandBuffer::drawIndexedIndirect(Buffer *buffer, uint32_t offset, uin 1, stride); } + _numDrawCalls += count; } } diff --git a/native/cocos/renderer/pipeline/PipelineSceneData.cpp b/native/cocos/renderer/pipeline/PipelineSceneData.cpp index b750d63a595..90fb081330b 100644 --- a/native/cocos/renderer/pipeline/PipelineSceneData.cpp +++ b/native/cocos/renderer/pipeline/PipelineSceneData.cpp @@ -180,6 +180,7 @@ void PipelineSceneData::initGPUDrivenMaterial() { IMaterialInfo info; MacroRecord macros{ + {"CC_USE_SAMPLER_FILTER_MIN_MAX", filterMinMax}, {"CC_SUPPORT_FIRST_INSTANCE", firstInstance}, {"CC_USE_FRUSTUM_CULLING", frustumCulling}, {"CC_USE_OCCLUSION_CULLING", defines[i].useOcclusion}, diff --git a/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp b/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp index a547d88b6af..5b2cfc766ce 100644 --- a/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp +++ b/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp @@ -43,6 +43,7 @@ #include "cocos/renderer/pipeline/Define.h" #include "cocos/renderer/pipeline/InstancedBuffer.h" #include "cocos/renderer/pipeline/PipelineStateManager.h" +#include "cocos/renderer/pipeline/helper/Utils.h" #include "cocos/scene/Model.h" #include "cocos/scene/Octree.h" #include "cocos/scene/Pass.h" @@ -1499,6 +1500,9 @@ struct RenderGraphVisitor : boost::dfs_visitor<> { if (pass.showStatistics) { submitProfilerCommands(ctx, vertID, pass); +#if CC_USE_DEBUG_RENDERER + renderDebugRenderer(ctx.currentPass, ctx.cmdBuff, ctx.ppl->pipelineSceneData, nullptr); +#endif } ctx.cmdBuff->endRenderPass(); ctx.currentPass = nullptr; diff --git a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp index 616db2fac66..5a908546de9 100644 --- a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp +++ b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp @@ -1135,8 +1135,15 @@ void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, gpuCullPass->addStorageBuffer(drawInstanceBuffer, AccessType::WRITE, "CCDrawInstanceBuffer"); gpuCullPass->addStorageBuffer(visibilityBuffer, AccessType::READ_WRITE, "CCVisibilityBuffer"); if (!hzbName.empty()) { - auto *sampler = device->getSampler({gfx::Filter::POINT, gfx::Filter::POINT, gfx::Filter::NONE, - gfx::Address::CLAMP, gfx::Address::CLAMP, gfx::Address::CLAMP}); + gfx::Sampler *sampler = nullptr; + if (device->getCapabilities().supportFilterMinMax) { + sampler = device->getSampler({gfx::Filter::LINEAR, gfx::Filter::LINEAR, gfx::Filter::NONE, + gfx::Address::CLAMP, gfx::Address::CLAMP, gfx::Address::CLAMP, + 0, gfx::ComparisonFunc::ALWAYS, gfx::Reduction::MAX}); + } else { + sampler = device->getSampler({gfx::Filter::POINT, gfx::Filter::POINT, gfx::Filter::NONE, + gfx::Address::CLAMP, gfx::Address::CLAMP, gfx::Address::CLAMP}); + } gpuCullPass->addTexture(hzbName + std::to_string(cullingID), "CCDepthMap", sampler, 0); } diff --git a/native/cocos/renderer/pipeline/helper/Utils.cpp b/native/cocos/renderer/pipeline/helper/Utils.cpp index 3c224906b3a..6d5d98ec718 100644 --- a/native/cocos/renderer/pipeline/helper/Utils.cpp +++ b/native/cocos/renderer/pipeline/helper/Utils.cpp @@ -86,7 +86,7 @@ void renderProfiler(gfx::RenderPass *renderPass, gfx::CommandBuffer *cmdBuff, sc #if CC_USE_DEBUG_RENDERER void renderDebugRenderer(gfx::RenderPass *renderPass, gfx::CommandBuffer *cmdBuff, PipelineSceneData *sceneData, const scene::Camera *camera) { - if (camera != profilerCamera) { + if (camera && camera != profilerCamera) { return; } diff --git a/native/cocos/scene/Model.cpp b/native/cocos/scene/Model.cpp index b21ed880264..d2effbf8164 100644 --- a/native/cocos/scene/Model.cpp +++ b/native/cocos/scene/Model.cpp @@ -466,13 +466,17 @@ bool Model::isInGPUScene(index_t subModelIndex) const { return mesh->isInGPUScene(); } -bool Model::supportGPUScene(index_t subModelIndex) const { +bool Model::supportGPUDriven(index_t subModelIndex) const { const auto *pipeline = Root::getInstance()->getPipeline(); const auto *sceneData = pipeline->getPipelineSceneData(); if (!sceneData || !sceneData->isGPUDrivenEnabled()) { return false; } + if (!_gpuDrivenEnabled) { + return false; + } + // skip light probe object if (_useLightProbe) { return false; @@ -552,7 +556,7 @@ ccstd::vector Model::getMacroPatches(index_t subModelIndex) { } } patches.emplace_back(IMacroPatch{CC_DISABLE_DIRECTIONAL_LIGHT, !_receiveDirLight}); - patches.emplace_back(IMacroPatch{CC_USE_GPU_DRIVEN, supportGPUScene(subModelIndex) && isInGPUScene(subModelIndex)}); + patches.emplace_back(IMacroPatch{CC_USE_GPU_DRIVEN, supportGPUDriven(subModelIndex) && isInGPUScene(subModelIndex)}); return patches; } diff --git a/native/cocos/scene/Model.h b/native/cocos/scene/Model.h index f1cf7df8256..ff501da94a8 100644 --- a/native/cocos/scene/Model.h +++ b/native/cocos/scene/Model.h @@ -253,6 +253,8 @@ class Model : public RefCounted { inline void setModelBounds(geometry::AABB *bounds) { _modelBounds = bounds; } inline bool isModelImplementedInJS() const { return (_type != Type::DEFAULT && _type != Type::SKINNING && _type != Type::BAKED_SKINNING); }; inline const gfx::Texture *getLightmap() const { return _lightmap ? _lightmap->getGFXTexture() : nullptr; } + inline void setGPUDrivenEnabled(bool b) { _gpuDrivenEnabled = b; } + inline bool isGPUDrivenEnabled() const { return _gpuDrivenEnabled; } protected: static SubModel *createSubModel(); @@ -260,7 +262,7 @@ class Model : public RefCounted { void updateAttributesAndBinding(index_t subModelIndex); bool isLightProbeAvailable() const; void updateSHBuffer(); - bool supportGPUScene(index_t subModelIndex) const; + bool supportGPUDriven(index_t subModelIndex) const; bool isInGPUScene(index_t subModelIndex) const; // Please declare variables in descending order of memory size occupied by variables. @@ -275,6 +277,7 @@ class Model : public RefCounted { int32_t _reflectionProbeId{-1}; int32_t _reflectionProbeBlendId{-1}; float _reflectionProbeBlendWeight{0.F}; + bool _gpuDrivenEnabled{true}; OctreeNode *_octreeNode{nullptr}; RenderScene *_scene{nullptr}; diff --git a/native/cocos/scene/RenderScene.cpp b/native/cocos/scene/RenderScene.cpp index 1ce2a7e8a6b..0cff55af33f 100644 --- a/native/cocos/scene/RenderScene.cpp +++ b/native/cocos/scene/RenderScene.cpp @@ -392,12 +392,6 @@ void RenderScene::removeModels() { _models.clear(); } -void RenderScene::addGPUMesh(Mesh *mesh) { - if (_gpuScene) { - _gpuScene->addMesh(mesh); - } -} - void RenderScene::addGPUModel(Model *model) { model->attachToScene(this); _gpuModels.emplace_back(model); diff --git a/native/cocos/scene/RenderScene.h b/native/cocos/scene/RenderScene.h index 60bf3b2e216..7af396ce477 100644 --- a/native/cocos/scene/RenderScene.h +++ b/native/cocos/scene/RenderScene.h @@ -106,7 +106,6 @@ class RenderScene : public RefCounted { void removeModel(Model *model); void removeModels(); - void addGPUMesh(Mesh *mesh); void addGPUModel(Model *model); void removeGPUModel(Model *model); void removeGPUModels(); diff --git a/native/cocos/scene/SubModel.cpp b/native/cocos/scene/SubModel.cpp index 92be251cc8f..db0c77ca17b 100644 --- a/native/cocos/scene/SubModel.cpp +++ b/native/cocos/scene/SubModel.cpp @@ -133,9 +133,6 @@ void SubModel::initialize(RenderingSubMesh *subMesh, const SharedPassArray &pPas CC_ASSERT(!pPasses->empty()); gfx::DescriptorSetInfo dsInfo; dsInfo.layout = (*pPasses)[0]->getLocalSetLayout(); - if (!subMesh->getIaInfo().vertexBuffers.empty()) { - _inputAssembler = _device->createInputAssembler(subMesh->getIaInfo()); - } _descriptorSet = _device->createDescriptorSet(dsInfo); const auto *pipeline = Root::getInstance()->getPipeline(); @@ -147,6 +144,7 @@ void SubModel::initialize(RenderingSubMesh *subMesh, const SharedPassArray &pPas } _subMesh = subMesh; + initInputAssembler(); ccstd::vector tmp = patches; std::sort(tmp.begin(), tmp.end(), IMacroPatch::compare); _patches = tmp; @@ -193,6 +191,16 @@ void SubModel::initialize(RenderingSubMesh *subMesh, const SharedPassArray &pPas } } +void SubModel::initInputAssembler() { + if (_subMesh->getVertexBuffers().empty()) { + return; + } + + if (!_inputAssembler) { + _inputAssembler = _device->createInputAssembler(_subMesh->getIaInfo()); + } +} + void SubModel::destroy() { CC_SAFE_DESTROY_NULL(_descriptorSet); CC_SAFE_DESTROY_NULL(_inputAssembler); diff --git a/native/cocos/scene/SubModel.h b/native/cocos/scene/SubModel.h index 7ce204b5297..aa6e7564afd 100644 --- a/native/cocos/scene/SubModel.h +++ b/native/cocos/scene/SubModel.h @@ -86,6 +86,7 @@ class SubModel : public RefCounted { int32_t getInstancedAttributeIndex(const ccstd::string &name) const; void initialize(RenderingSubMesh *subMesh, const SharedPassArray &passes, const ccstd::vector &patches); + void initInputAssembler(); void destroy(); void onPipelineStateChanged(); void onMacroPatchesStateChanged(const ccstd::vector &patches); diff --git a/native/cocos/scene/gpu-scene/GPUMeshPool.cpp b/native/cocos/scene/gpu-scene/GPUMeshPool.cpp index 0e56e7c9b7b..119d92af0b1 100644 --- a/native/cocos/scene/gpu-scene/GPUMeshPool.cpp +++ b/native/cocos/scene/gpu-scene/GPUMeshPool.cpp @@ -31,6 +31,7 @@ #include "core/assets/RenderingSubMesh.h" #include "3d/assets/Mesh.h" #include "base/memory/Memory.h" +#include "base/std/container/unordered_set.h" namespace cc { namespace scene { @@ -69,11 +70,21 @@ void GPUMeshPool::destroy() { } void GPUMeshPool::build(const ccstd::vector& meshes) { - for (const auto& mesh : meshes) { + ccstd::unordered_set meshSet; + for (auto* mesh : meshes) { + meshSet.insert(mesh); + } + + for (const auto& mesh : meshSet) { addMesh(mesh); } updateBuffers(); + + // Note: must redirect mesh after updateBuffers + for (const auto& mesh : meshSet) { + redirectMesh(mesh); + } } void GPUMeshPool::addMesh(Mesh* mesh) { @@ -119,28 +130,43 @@ void GPUMeshPool::addMesh(Mesh* mesh) { vb->second.push(buffer->getData() + vertexBundle.view.offset, vertexBundle.view.length, vbCount); ib->second.push(buffer->getData() + indexView.offset, indexView.length, ibCount); - - // Destroy subMesh's private buffers - subMesh->destroy(); } mesh->setInGPUScene(true); // Release CPU side data if necessary +#if !CC_EDITOR if (!mesh->isAllowDataAccess()) { mesh->releaseData(); } +#endif _dirty = true; } +void GPUMeshPool::redirectMesh(Mesh* mesh) { + const auto& subMeshes = mesh->getRenderingSubMeshes(); + + for (const auto& subMesh : subMeshes) { + const auto meshIdx = subMesh->getMeshPoolIndex(); + const auto& meshData = _meshes[meshIdx]; + + auto* vb = getVertexBuffer(meshData.attributesHash); + auto* ib = getIndexBuffer(meshData.indexStride); + gfx::BufferList vbs = {vb}; + + // Reset subMesh's buffers to GPUMeshPool. + subMesh->resetBuffers(vbs, ib, meshData.vertexCount, meshData.firstVertex, meshData.indexCount, meshData.firstIndex, meshData.firstVertex); + } +} + void GPUMeshPool::updateBuffers() { if (!_dirty) { return; } auto* device = gfx::Device::getInstance(); - for (const auto& iter : _vbs) { + for (auto& iter : _vbs) { const auto size = iter.second.stride * iter.second.count; auto vb = _vertexBuffers.find(iter.first); if (vb == _vertexBuffers.cend()) { @@ -158,9 +184,13 @@ void GPUMeshPool::updateBuffers() { buffer->update(iter.second.buffer.data(), size); } } + + // Release CPU side buffer + iter.second.buffer.clear(); + iter.second.buffer.shrink_to_fit(); } - for (const auto& iter : _ibs) { + for (auto& iter : _ibs) { const auto size = iter.second.stride * iter.second.count; auto ib = _indexBuffers.find(iter.first); if (ib == _indexBuffers.cend()) { @@ -178,6 +208,10 @@ void GPUMeshPool::updateBuffers() { buffer->update(iter.second.buffer.data(), size); } } + + // Release CPU side buffer + iter.second.buffer.clear(); + iter.second.buffer.shrink_to_fit(); } _dirty = false; diff --git a/native/cocos/scene/gpu-scene/GPUMeshPool.h b/native/cocos/scene/gpu-scene/GPUMeshPool.h index 747d1685afe..598fd048a9a 100644 --- a/native/cocos/scene/gpu-scene/GPUMeshPool.h +++ b/native/cocos/scene/gpu-scene/GPUMeshPool.h @@ -32,11 +32,13 @@ #include "renderer/gfx-base/GFXBuffer.h" #include "scene/gpu-scene/Const.h" + namespace cc { class Mesh; namespace scene { class GPUScene; +class Model; struct SubMeshData { ccstd::hash_t attributesHash{0U}; @@ -68,13 +70,14 @@ class CC_DLL GPUMeshPool final : public RefCounted { void destroy(); void build(const ccstd::vector& meshes); - void addMesh(Mesh* mesh); - + inline const SubMeshData& getSubMeshData(uint32_t index) const { return _meshes[index]; } inline gfx::Buffer* getVertexBuffer(ccstd::hash_t key) { return _vertexBuffers[key].get(); } inline gfx::Buffer* getIndexBuffer(uint32_t key) { return _indexBuffers[key].get(); } private: + void addMesh(Mesh* mesh); + void redirectMesh(Mesh* mesh); void updateBuffers(); GPUScene* _gpuScene{nullptr}; diff --git a/native/cocos/scene/gpu-scene/GPUScene.cpp b/native/cocos/scene/gpu-scene/GPUScene.cpp index daee19afa09..3fc477e4357 100644 --- a/native/cocos/scene/gpu-scene/GPUScene.cpp +++ b/native/cocos/scene/gpu-scene/GPUScene.cpp @@ -59,11 +59,6 @@ void GPUScene::build(const ccstd::vector& meshes) { _meshPool->build(meshes); } -void GPUScene::addMesh(Mesh* mesh) { - _meshPool->addMesh(mesh); - _meshPool->updateBuffers(); -} - void GPUScene::addModel(const Model* model) { _objectPool->addModel(model); _batchPool->addModel(model); diff --git a/native/cocos/scene/gpu-scene/GPUScene.h b/native/cocos/scene/gpu-scene/GPUScene.h index 9029a0d809a..04bb55bef07 100644 --- a/native/cocos/scene/gpu-scene/GPUScene.h +++ b/native/cocos/scene/gpu-scene/GPUScene.h @@ -49,7 +49,6 @@ class CC_DLL GPUScene final : public RefCounted { void build(const ccstd::vector& meshes); - void addMesh(Mesh* mesh); void addModel(const Model* model); void removeModel(const Model* model); void removeAllModels(); diff --git a/native/tools/swig-config/scene.i b/native/tools/swig-config/scene.i index e84367d3111..6b5868393df 100644 --- a/native/tools/swig-config/scene.i +++ b/native/tools/swig-config/scene.i @@ -518,6 +518,7 @@ using namespace cc; %attribute(cc::scene::Model, int32_t, reflectionProbeId, getReflectionProbeId, setReflectionProbeId); %attribute(cc::scene::Model, int32_t, reflectionProbeBlendId, getReflectionProbeBlendId, setReflectionProbeBlendId); %attribute(cc::scene::Model, float, reflectionProbeBlendWeight, getReflectionProbeBlendWeight, setReflectionProbeBlendWeight); +%attribute(cc::scene::Model, bool, gpuDrivenEnabled, isGPUDrivenEnabled, setGPUDrivenEnabled); %attribute(cc::scene::SubModel, cc::scene::SharedPassArray &, passes, getPasses, setPasses); %attribute(cc::scene::SubModel, ccstd::vector> &, shaders, getShaders, setShaders); From c266fba36d3ab56d5292458d4119a44a391c1087 Mon Sep 17 00:00:00 2001 From: stanleyljl <798080536@qq.com> Date: Sat, 7 Oct 2023 15:08:01 +0800 Subject: [PATCH 10/16] refactor lightmap & add shadow for gpu driven --- cocos/game/director.ts | 6 +- cocos/rendering/custom/pipeline.ts | 1 + cocos/root.ts | 6 -- cocos/scene-graph/scene.jsb.ts | 3 - cocos/scene-graph/scene.ts | 3 - .../builtin/uniforms/cc-light-map.chunk | 10 +-- .../gfx-gles3/GLES3PrimaryCommandBuffer.cpp | 2 + .../renderer/gfx-metal/MTLCommandBuffer.mm | 2 + .../renderer/gfx-vulkan/VKCommandBuffer.cpp | 2 + .../pipeline/custom/NativeExecutor.cpp | 2 +- .../pipeline/custom/NativePipeline.cpp | 78 +++++++++++------ .../pipeline/custom/NativePipelineTypes.h | 2 +- .../pipeline/custom/NativeRenderQueue.cpp | 83 +++++++++--------- .../pipeline/custom/RenderInterfaceTypes.h | 13 +-- native/cocos/scene/Model.cpp | 12 ++- native/cocos/scene/Model.h | 1 - native/cocos/scene/gpu-scene/GPUBatchPool.cpp | 85 +++++++++---------- native/cocos/scene/gpu-scene/GPUBatchPool.h | 10 +-- 18 files changed, 170 insertions(+), 151 deletions(-) diff --git a/cocos/game/director.ts b/cocos/game/director.ts index 4d741a6a267..5140451d23c 100644 --- a/cocos/game/director.ts +++ b/cocos/game/director.ts @@ -387,9 +387,11 @@ export class Director extends EventTarget { onBeforeLoadScene(); } - if (scene.renderScene) { - scene.renderScene.activate(); + if (scene) { + scene.renderScene?.activate(); + scene.globals.activate(scene); } + this.emit(Director.EVENT_BEFORE_SCENE_LAUNCH, scene); // Run an Entity Scene diff --git a/cocos/rendering/custom/pipeline.ts b/cocos/rendering/custom/pipeline.ts index c19a4153f36..f360eea4d80 100644 --- a/cocos/rendering/custom/pipeline.ts +++ b/cocos/rendering/custom/pipeline.ts @@ -1407,6 +1407,7 @@ export interface Pipeline extends BasicPipeline { layoutPath?: string, hzbName?: string, light?: Light | null, + level?: number, bMainPass?: boolean): void; /** * @en Add hierarchical z buffer generation pass diff --git a/cocos/root.ts b/cocos/root.ts index eecacd79cc3..87f5fd6c51e 100644 --- a/cocos/root.ts +++ b/cocos/root.ts @@ -428,12 +428,6 @@ export class Root { //----------------------------------------------- // pipeline initialization completed //----------------------------------------------- - const scene = director.getScene(); - if (scene) { - scene.globals.activate(); - } - - this.onGlobalPipelineStateChanged(); if (!this._batcher && internal.Batcher2D) { this._batcher = new internal.Batcher2D(this); if (!this._batcher!.initialize()) { diff --git a/cocos/scene-graph/scene.jsb.ts b/cocos/scene-graph/scene.jsb.ts index 9a6936da06e..af423168394 100644 --- a/cocos/scene-graph/scene.jsb.ts +++ b/cocos/scene-graph/scene.jsb.ts @@ -130,9 +130,6 @@ sceneProto._activate = function (active: boolean) { } cclegacy.director._nodeActivator.activateNode(this, active); // The test environment does not currently support the renderer - if (!TEST || EDITOR) { - this._globals.activate(this); - } }; sceneProto._instantiate = function(): void {}; diff --git a/cocos/scene-graph/scene.ts b/cocos/scene-graph/scene.ts index 363021e9dee..ce5b1a2f261 100644 --- a/cocos/scene-graph/scene.ts +++ b/cocos/scene-graph/scene.ts @@ -191,9 +191,6 @@ export class Scene extends Node { } cclegacy.director._nodeActivator.activateNode(this, active); // The test environment does not currently support the renderer - if (!TEST) { - this._globals.activate(this); - } } } diff --git a/editor/assets/chunks/builtin/uniforms/cc-light-map.chunk b/editor/assets/chunks/builtin/uniforms/cc-light-map.chunk index ca7f64f61d6..4df72e67441 100644 --- a/editor/assets/chunks/builtin/uniforms/cc-light-map.chunk +++ b/editor/assets/chunks/builtin/uniforms/cc-light-map.chunk @@ -1,8 +1,2 @@ -#if CC_USE_GPU_DRIVEN - #pragma rate cc_lightingMap pass - #pragma glBinding(2) - uniform sampler2D cc_lightingMap; -#else - #pragma builtin(local) - layout(set = 2, binding = 11) uniform sampler2D cc_lightingMap; -#endif +#pragma builtin(local) +layout(set = 2, binding = 11) uniform sampler2D cc_lightingMap; diff --git a/native/cocos/renderer/gfx-gles3/GLES3PrimaryCommandBuffer.cpp b/native/cocos/renderer/gfx-gles3/GLES3PrimaryCommandBuffer.cpp index b0a1eb6eb9c..ff6ee21a97c 100644 --- a/native/cocos/renderer/gfx-gles3/GLES3PrimaryCommandBuffer.cpp +++ b/native/cocos/renderer/gfx-gles3/GLES3PrimaryCommandBuffer.cpp @@ -85,6 +85,7 @@ void GLES3PrimaryCommandBuffer::insertMarker(const MarkerInfo &marker) { } void GLES3PrimaryCommandBuffer::drawIndirect(Buffer *buffer, uint32_t offset, uint32_t count, uint32_t stride) { + CC_PROFILE(GLES3PrimaryCmdBufDrawIndirect); if (_isStateInvalid) { bindStates(); } @@ -100,6 +101,7 @@ void GLES3PrimaryCommandBuffer::drawIndirect(Buffer *buffer, uint32_t offset, ui } void GLES3PrimaryCommandBuffer::drawIndexedIndirect(Buffer *buffer, uint32_t offset, uint32_t count, uint32_t stride) { + CC_PROFILE(GLES3PrimaryCmdBufDrawIndirect); if (_isStateInvalid) { bindStates(); } diff --git a/native/cocos/renderer/gfx-metal/MTLCommandBuffer.mm b/native/cocos/renderer/gfx-metal/MTLCommandBuffer.mm index c73e0743760..a6174f3c66d 100644 --- a/native/cocos/renderer/gfx-metal/MTLCommandBuffer.mm +++ b/native/cocos/renderer/gfx-metal/MTLCommandBuffer.mm @@ -586,6 +586,7 @@ of this software and associated engine source code (the "Software"), a limited, } void CCMTLCommandBuffer::drawIndirect(Buffer *buffer, uint32_t offset, uint32_t count, uint32_t stride) { + CC_PROFILE(CCMTLCommandBufferDrawIndirect); prepareForDraw(); auto mtlEncoder = _renderEncoder.getMTLEncoder(); auto *mtlBuffer = static_cast(buffer); @@ -599,6 +600,7 @@ of this software and associated engine source code (the "Software"), a limited, } void CCMTLCommandBuffer::drawIndexedIndirect(Buffer *buffer, uint32_t offset, uint32_t count, uint32_t stride) { + CC_PROFILE(CCMTLCommandBufferDrawIndirect); prepareForDraw(); CCMTLInputAssembler *inputAssembler = _gpuCommandBufferObj->inputAssembler; const auto *indexBuffer = static_cast(inputAssembler->getIndexBuffer()); diff --git a/native/cocos/renderer/gfx-vulkan/VKCommandBuffer.cpp b/native/cocos/renderer/gfx-vulkan/VKCommandBuffer.cpp index 4e3ca777a98..8df7167c438 100644 --- a/native/cocos/renderer/gfx-vulkan/VKCommandBuffer.cpp +++ b/native/cocos/renderer/gfx-vulkan/VKCommandBuffer.cpp @@ -456,6 +456,7 @@ void CCVKCommandBuffer::nextSubpass() { } void CCVKCommandBuffer::drawIndirect(Buffer *buffer, uint32_t offset, uint32_t count, uint32_t stride) { + CC_PROFILE(CCVKCmdBufDrawIndirect); if (_firstDirtyDescriptorSet < _curGPUDescriptorSets.size()) { bindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS); } @@ -487,6 +488,7 @@ void CCVKCommandBuffer::drawIndirect(Buffer *buffer, uint32_t offset, uint32_t c } void CCVKCommandBuffer::drawIndexedIndirect(Buffer *buffer, uint32_t offset, uint32_t count, uint32_t stride) { + CC_PROFILE(CCVKCmdBufDrawIndirect); if (_firstDirtyDescriptorSet < _curGPUDescriptorSets.size()) { bindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS); } diff --git a/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp b/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp index 5b2cfc766ce..ecc60b35b70 100644 --- a/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp +++ b/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp @@ -1499,10 +1499,10 @@ struct RenderGraphVisitor : boost::dfs_visitor<> { } if (pass.showStatistics) { - submitProfilerCommands(ctx, vertID, pass); #if CC_USE_DEBUG_RENDERER renderDebugRenderer(ctx.currentPass, ctx.cmdBuff, ctx.ppl->pipelineSceneData, nullptr); #endif + submitProfilerCommands(ctx, vertID, pass); } ctx.cmdBuff->endRenderPass(); ctx.currentPass = nullptr; diff --git a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp index 5a908546de9..c55389851e9 100644 --- a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp +++ b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp @@ -1024,7 +1024,7 @@ bool setupGpuDrivenResources( { name = "CCVisibilityBuffer"; name.append(std::to_string(cullingID)); - const auto bufferSize = gpuScene->getInstanceCount() * static_cast(sizeof(uint32_t)); + const auto bufferSize = std::max(gpuScene->getInstanceCount(), 1U) * static_cast(sizeof(uint32_t)); auto resID = findVertex(name, resg); if (resID == ResourceGraph::null_vertex()) { ppl.addStorageBuffer(std::string(name), gfx::Format::UNKNOWN, bufferSize, ResourceResidency::MANAGED); @@ -1059,7 +1059,7 @@ bool setupGpuDrivenResources( void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &layoutPath, - const std::string &hzbName, const scene::Light *light, bool bMainPass) { + const std::string &hzbName, const scene::Light *light, uint32_t level, bool bMainPass) { auto *scene = camera->getScene(); if (!scene) { return; @@ -1079,11 +1079,6 @@ void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, const uint32_t sceneID = iter->second; bool firstPass = setupGpuDrivenResources(*this, camera, gpuScene, sceneID, cullingID, resourceGraph, hzbName); - if (light) { - // build light culling pass - return; - } - const std::string objectBuffer = "CCObjectBuffer" + std::to_string(sceneID); const std::string instanceBuffer = "CCInstanceBuffer" + std::to_string(sceneID); const std::string indirectBuffer = "CCIndirectBuffer" + std::to_string(sceneID); @@ -1155,26 +1150,57 @@ void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, std::unique_ptr gpuCullQueue(dynamic_cast(gpuCullPass->addQueue())); gpuCullQueue->addDispatch(groupCount, 1, 1, pipelineSceneData->getGPUCullingMaterial(materialIndex), 0); - ccstd::vector planes; - const auto &frustum = camera->getFrustum(); - for (auto *plane : frustum.planes) { - planes.push_back(plane->n.x); - planes.push_back(plane->n.y); - planes.push_back(plane->n.z); - planes.push_back(plane->d); + if (light) { + auto *layers = pipelineSceneData->getCSMLayers(); + layers->update(pipelineSceneData, camera); + + auto *layer = layers->getLayers()[level]; + const auto size = pipelineSceneData->getShadows()->getSize(); + const auto width = static_cast(size.x) / 2; + const auto height = static_cast(size.y) / 2; + + ccstd::vector planes; + const auto &frustum = layer->getValidFrustum(); + for (auto *plane : frustum.planes) { + planes.push_back(plane->n.x); + planes.push_back(plane->n.y); + planes.push_back(plane->n.z); + planes.push_back(plane->d); + } + ArrayBuffer planesBuffer(reinterpret_cast(&planes[0]), sizeof(float) * static_cast(planes.size())); + gpuCullPass->setMat4("cc_view", layer->getMatShadowView()); + gpuCullPass->setMat4("cc_proj", layer->getMatShadowProj()); + gpuCullPass->setArrayBuffer("cc_planes", &planesBuffer); + gpuCullPass->setFloat("cc_znear", layer->getSplitCameraNear()); + gpuCullPass->setFloat("cc_zfar", layer->getSplitCameraFar()); + gpuCullPass->setFloat("cc_depthWidth", static_cast(utils::previousPOT(width))); + gpuCullPass->setFloat("cc_depthHeight", static_cast(utils::previousPOT(height))); + gpuCullPass->setUint("cc_isPerspective", light->getType() == scene::LightType::DIRECTIONAL ? 0 : 1); + gpuCullPass->setUint("cc_orientation", 0); + gpuCullPass->setUint("cc_instanceCount", instanceCount); + gpuCullPass->setUint("cc_phaseId", phaseID); + } else { + ccstd::vector planes; + const auto &frustum = camera->getFrustum(); + for (auto *plane : frustum.planes) { + planes.push_back(plane->n.x); + planes.push_back(plane->n.y); + planes.push_back(plane->n.z); + planes.push_back(plane->d); + } + ArrayBuffer planesBuffer(reinterpret_cast(&planes[0]), sizeof(float) * static_cast(planes.size())); + gpuCullPass->setMat4("cc_view", camera->getMatView()); + gpuCullPass->setMat4("cc_proj", camera->getMatProj()); + gpuCullPass->setArrayBuffer("cc_planes", &planesBuffer); + gpuCullPass->setFloat("cc_znear", camera->getNearClip()); + gpuCullPass->setFloat("cc_zfar", camera->getFarClip()); + gpuCullPass->setFloat("cc_depthWidth", static_cast(utils::previousPOT(camera->getWidth()))); + gpuCullPass->setFloat("cc_depthHeight", static_cast(utils::previousPOT(camera->getHeight()))); + gpuCullPass->setUint("cc_isPerspective", static_cast(camera->getProjectionType())); + gpuCullPass->setUint("cc_orientation", static_cast(camera->getSurfaceTransform())); + gpuCullPass->setUint("cc_instanceCount", instanceCount); + gpuCullPass->setUint("cc_phaseId", phaseID); } - ArrayBuffer planesBuffer(reinterpret_cast(&planes[0]), sizeof(float) * static_cast(planes.size())); - gpuCullPass->setMat4("cc_view", camera->getMatView()); - gpuCullPass->setMat4("cc_proj", camera->getMatProj()); - gpuCullPass->setArrayBuffer("cc_planes", &planesBuffer); - gpuCullPass->setFloat("cc_znear", camera->getNearClip()); - gpuCullPass->setFloat("cc_zfar", camera->getFarClip()); - gpuCullPass->setFloat("cc_depthWidth", static_cast(utils::previousPOT(camera->getWidth()))); - gpuCullPass->setFloat("cc_depthHeight", static_cast(utils::previousPOT(camera->getHeight()))); - gpuCullPass->setUint("cc_isPerspective", static_cast(camera->getProjectionType())); - gpuCullPass->setUint("cc_orientation", static_cast(camera->getSurfaceTransform())); - gpuCullPass->setUint("cc_instanceCount", instanceCount); - gpuCullPass->setUint("cc_phaseId", phaseID); } } diff --git a/native/cocos/renderer/pipeline/custom/NativePipelineTypes.h b/native/cocos/renderer/pipeline/custom/NativePipelineTypes.h index a86aa1fdf0d..8fb6b511b7e 100644 --- a/native/cocos/renderer/pipeline/custom/NativePipelineTypes.h +++ b/native/cocos/renderer/pipeline/custom/NativePipelineTypes.h @@ -1442,7 +1442,7 @@ class NativePipeline final : public Pipeline { ComputePassBuilder *addComputePass(const ccstd::string &passName) override; void addUploadPass(ccstd::vector &uploadPairs) override; void addMovePass(const ccstd::vector &movePairs) override; - void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &layoutPath, const std::string &hzbName, const scene::Light *light, bool bMainPass) override; + void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &layoutPath, const std::string &hzbName, const scene::Light *light, uint32_t level, bool bMainPass) override; void addBuiltinHzbGenerationPass(const std::string &sourceDepthStencilName, const std::string &targetHzbName) override; uint32_t addCustomBuffer(const ccstd::string &name, const gfx::BufferInfo &info, const std::string &type) override; uint32_t addCustomTexture(const ccstd::string &name, const gfx::TextureInfo &info, const std::string &type) override; diff --git a/native/cocos/renderer/pipeline/custom/NativeRenderQueue.cpp b/native/cocos/renderer/pipeline/custom/NativeRenderQueue.cpp index c37761be9f6..b7520244b09 100644 --- a/native/cocos/renderer/pipeline/custom/NativeRenderQueue.cpp +++ b/native/cocos/renderer/pipeline/custom/NativeRenderQueue.cpp @@ -193,6 +193,10 @@ void GPUDrivenQueue::recordCommandBuffer( const bool bDrawBlend = any(sceneFlags & SceneFlags::TRANSPARENT_OBJECT); const bool bDrawOpaqueOrMask = any(sceneFlags & (SceneFlags::OPAQUE_OBJECT | SceneFlags::CUTOUT_OBJECT)); + const bool bDrawShadowCaster = any(sceneFlags & SceneFlags::SHADOW_CASTER); + if (!bDrawShadowCaster && !bDrawBlend && !bDrawOpaqueOrMask) { + return; // nothing to draw + } CC_EXPECTS(cullingID != 0xFFFFFFFF); ccstd::pmr::string indirectName("CCDrawIndirectBuffer", get_allocator()); @@ -211,56 +215,53 @@ void GPUDrivenQueue::recordCommandBuffer( gfx::PipelineState *lastPSO = nullptr; const auto indirectStride = scene::GPUBatchPool::getIndirectStride(); - for (const auto &lightmapIter : batchPool->getBatches()) { - const auto *lightmap = lightmapIter.first; - // Stanley TODO: bindDescriptorSet for lightmap & sampler here, lightmap can be nullptr. + for (const auto &iter : batchPool->getBatches()) { + const auto *batch = iter.second; + if (batch->empty()) { + continue; + } - for (const auto &passIter : lightmapIter.second) { - const auto *batch = passIter.second; - if (batch->empty()) { - continue; - } + const auto *drawPass = batch->getPass(); + if (phaseLayoutID != drawPass->getPhaseID()) { + continue; + } - const auto *drawPass = batch->getPass(); - if (phaseLayoutID != drawPass->getPhaseID()) { - continue; - } + const bool bBlend = drawPass->isBlend(); + const bool bOpaqueOrMask = !bBlend; + if (!bDrawBlend && bBlend) { + // skip transparent object + continue; + } + if (!bDrawOpaqueOrMask && bOpaqueOrMask) { + // skip opaque object + continue; + } - const bool bBlend = drawPass->isBlend(); - const bool bOpaqueOrMask = !bBlend; - if (!bDrawBlend && bBlend) { - // skip transparent object - continue; - } - if (!bDrawOpaqueOrMask && bOpaqueOrMask) { - // skip opaque object + cmdBuffer->bindDescriptorSet(pipeline::materialSet, drawPass->getDescriptorSet()); + + const auto &items = batch->getItems(); + for (const auto &item : items) { + if (!item.count) { continue; } - cmdBuffer->bindDescriptorSet(pipeline::materialSet, drawPass->getDescriptorSet()); - - const auto &items = batch->getItems(); - for (const auto &item : items) { - if (!item.count) { - continue; - } + auto *pso = pipeline::PipelineStateManager::getOrCreatePipelineState( + drawPass, item.shader, item.inputAssembler, renderPass); - auto *pso = pipeline::PipelineStateManager::getOrCreatePipelineState( - drawPass, item.shader, item.inputAssembler, renderPass); + if (lastPSO != pso) { + cmdBuffer->bindPipelineState(pso); + lastPSO = pso; + } - if (lastPSO != pso) { - cmdBuffer->bindPipelineState(pso); - lastPSO = pso; - } + cmdBuffer->bindInputAssembler(item.inputAssembler); + cmdBuffer->bindDescriptorSet(pipeline::localSet, item.descriptorSet); - cmdBuffer->bindInputAssembler(item.inputAssembler); - if (supportFirstInstance) { - cmdBuffer->drawIndexedIndirect(indirectBuffer, item.first * indirectStride, item.count, indirectStride); - } else { - for (auto i = 0; i < item.count; i++) { - // Stanley TODO: bindDescriptorSet for cc_drawInstances with dynamicOffsets here. - cmdBuffer->drawIndexedIndirect(indirectBuffer, (item.first + i) * indirectStride, 1, indirectStride); - } + if (supportFirstInstance) { + cmdBuffer->drawIndexedIndirect(indirectBuffer, item.first * indirectStride, item.count, indirectStride); + } else { + for (auto i = 0; i < item.count; i++) { + // Stanley TODO: bindDescriptorSet for cc_drawInstances with dynamicOffsets here. + cmdBuffer->drawIndexedIndirect(indirectBuffer, (item.first + i) * indirectStride, 1, indirectStride); } } } diff --git a/native/cocos/renderer/pipeline/custom/RenderInterfaceTypes.h b/native/cocos/renderer/pipeline/custom/RenderInterfaceTypes.h index feabf492685..c1a2af6613a 100644 --- a/native/cocos/renderer/pipeline/custom/RenderInterfaceTypes.h +++ b/native/cocos/renderer/pipeline/custom/RenderInterfaceTypes.h @@ -1457,7 +1457,7 @@ class Pipeline : public BasicPipeline { * @param hzbName @en name of hierarchical z buffer @zh 层次深度缓存的名字 * @param light @en light of the culling pass @zh 剔除通道的灯光 */ - virtual void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &layoutPath, const std::string &hzbName, const scene::Light *light, bool bMainPass) = 0; + virtual void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &layoutPath, const std::string &hzbName, const scene::Light *light, uint32_t level, bool bMainPass) = 0; /** * @en Add hierarchical z buffer generation pass * @zh 添加层次化深度缓存生成通道 @@ -1489,16 +1489,19 @@ class Pipeline : public BasicPipeline { updateStorageTexture(name, width, height, gfx::Format::UNKNOWN); } void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera) { - addBuiltinGpuCullingPass(cullingID, camera, "", "", nullptr, true); + addBuiltinGpuCullingPass(cullingID, camera, "", "", nullptr, 0, true); } void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &layoutPath) { - addBuiltinGpuCullingPass(cullingID, camera, layoutPath, "", nullptr, true); + addBuiltinGpuCullingPass(cullingID, camera, layoutPath, "", nullptr, 0, true); } void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &layoutPath, const std::string &hzbName) { - addBuiltinGpuCullingPass(cullingID, camera, layoutPath, hzbName, nullptr, true); + addBuiltinGpuCullingPass(cullingID, camera, layoutPath, hzbName, nullptr, 0, true); } void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &layoutPath, const std::string &hzbName, const scene::Light *light) { - addBuiltinGpuCullingPass(cullingID, camera, layoutPath, hzbName, light, true); + addBuiltinGpuCullingPass(cullingID, camera, layoutPath, hzbName, light, 0, true); + } + void addBuiltinGpuCullingPass(uint32_t cullingID, const scene::Camera *camera, const std::string &layoutPath, const std::string &hzbName, const scene::Light *light, uint32_t level) { + addBuiltinGpuCullingPass(cullingID, camera, layoutPath, hzbName, light, level, true); } }; diff --git a/native/cocos/scene/Model.cpp b/native/cocos/scene/Model.cpp index d2effbf8164..570325eeeff 100644 --- a/native/cocos/scene/Model.cpp +++ b/native/cocos/scene/Model.cpp @@ -601,7 +601,11 @@ void Model::updateInstancedAttributes(const ccstd::vector &attri _localDataUpdated = true; } -void Model::initLocalDescriptors(index_t /*subModelIndex*/) { +void Model::initLocalDescriptors(index_t subModelIndex) { + if (supportGPUDriven(subModelIndex)) { + return; + } + if (!_localBuffer) { _localBuffer = _device->createBuffer({gfx::BufferUsageBit::UNIFORM | gfx::BufferUsageBit::TRANSFER_DST, gfx::MemoryUsageBit::DEVICE, @@ -632,7 +636,11 @@ void Model::initLocalSHDescriptors(index_t /*subModelIndex*/) { } } -void Model::initWorldBoundDescriptors(index_t /*subModelIndex*/) { +void Model::initWorldBoundDescriptors(index_t subModelIndex) { + if (supportGPUDriven(subModelIndex)) { + return; + } + if (!_worldBoundBuffer) { _worldBoundBuffer = _device->createBuffer({gfx::BufferUsageBit::UNIFORM | gfx::BufferUsageBit::TRANSFER_DST, gfx::MemoryUsageBit::DEVICE, diff --git a/native/cocos/scene/Model.h b/native/cocos/scene/Model.h index ff501da94a8..7333d729d76 100644 --- a/native/cocos/scene/Model.h +++ b/native/cocos/scene/Model.h @@ -252,7 +252,6 @@ class Model : public RefCounted { } inline void setModelBounds(geometry::AABB *bounds) { _modelBounds = bounds; } inline bool isModelImplementedInJS() const { return (_type != Type::DEFAULT && _type != Type::SKINNING && _type != Type::BAKED_SKINNING); }; - inline const gfx::Texture *getLightmap() const { return _lightmap ? _lightmap->getGFXTexture() : nullptr; } inline void setGPUDrivenEnabled(bool b) { _gpuDrivenEnabled = b; } inline bool isGPUDrivenEnabled() const { return _gpuDrivenEnabled; } diff --git a/native/cocos/scene/gpu-scene/GPUBatchPool.cpp b/native/cocos/scene/gpu-scene/GPUBatchPool.cpp index 9299ab969e5..991f12911b1 100644 --- a/native/cocos/scene/gpu-scene/GPUBatchPool.cpp +++ b/native/cocos/scene/gpu-scene/GPUBatchPool.cpp @@ -54,11 +54,18 @@ void GPUBatch::addSubModel(const SubModel* subModel, uint32_t passIdx) { const auto meshIdx = subMesh->getMeshPoolIndex(); const auto objectIdx = subModel->getObjectPoolIndex(); auto *shader = subModel->getShader(passIdx); + auto *descriptorSet = subModel->getDescriptorSet(); + auto *lightingMap = descriptorSet->getTexture(pipeline::LIGHTMAPTEXTURE::BINDING); auto *meshPool = _gpuScene->getMeshPool(); const auto &meshData = meshPool->getSubMeshData(meshIdx); for (auto &item : _items) { + // whether to use the same lightmap + if (item.lightingMap != lightingMap) { + continue; + } + // whether to use the same shader if (item.shader != shader) { continue; @@ -91,7 +98,7 @@ void GPUBatch::addSubModel(const SubModel* subModel, uint32_t passIdx) { const gfx::InputAssemblerInfo info = {subMesh->getAttributes(), vbs, ib}; auto *inputAssembler = device->createInputAssembler(info); - BatchItem item{0, 0, shader, inputAssembler, meshData.indexStride, {}}; + BatchItem item{0, 0, shader, inputAssembler, descriptorSet, lightingMap, meshData.indexStride, {}}; item.mesh2objects.insert({meshIdx, {objectIdx}}); _items.push_back(item); @@ -165,35 +172,33 @@ void GPUBatchPool::update(uint32_t stamp) { uint32_t batchId = 0U; const auto &meshPool = _gpuScene->getMeshPool(); - for (auto &lightmapBatch : _batches) { - for (auto &passBatch : lightmapBatch.second) { - auto *pass = passBatch.first; - const auto phaseId = pass->getPhaseID(); - auto &items = passBatch.second->getItems(); + for (auto &batch : _batches) { + auto *pass = batch.second->getPass(); + const auto phaseId = pass->getPhaseID(); + auto &items = batch.second->getItems(); - for (auto &item : items) { - item.first = first; - item.count = static_cast(item.mesh2objects.size()); - first += item.count; + for (auto &item : items) { + item.first = first; + item.count = static_cast(item.mesh2objects.size()); + first += item.count; - for (const auto &iter : item.mesh2objects) { - const auto meshIdx = iter.first; - const auto &meshData = meshPool->getSubMeshData(meshIdx); + for (const auto &iter : item.mesh2objects) { + const auto meshIdx = iter.first; + const auto &meshData = meshPool->getSubMeshData(meshIdx); - const auto indexCount = meshData.indexCount; - const auto firstIndex = meshData.firstIndex; - const auto firstVertex = static_cast(meshData.firstVertex); - const auto firstInstance = static_cast(_instances.size()); - // const auto instanceCount = static_cast(iter.second.size()); + const auto indexCount = meshData.indexCount; + const auto firstIndex = meshData.firstIndex; + const auto firstVertex = static_cast(meshData.firstVertex); + const auto firstInstance = static_cast(_instances.size()); + // const auto instanceCount = static_cast(iter.second.size()); - _indirectCmds.push_back({indexCount, 0, firstIndex, firstVertex, firstInstance}); + _indirectCmds.push_back({indexCount, 0, firstIndex, firstVertex, firstInstance}); - for (const auto &objectIdx : iter.second) { - _instances.push_back({objectIdx, phaseId, batchId}); - } - - batchId++; + for (const auto &objectIdx : iter.second) { + _instances.push_back({objectIdx, phaseId, batchId}); } + + batchId++; } } } @@ -202,7 +207,6 @@ void GPUBatchPool::update(uint32_t stamp) { } void GPUBatchPool::addModel(const Model* model) { - const auto *lightmap = model->getLightmap(); const auto &subModels = model->getSubModels(); for (const auto &subModel : subModels) { @@ -210,18 +214,13 @@ void GPUBatchPool::addModel(const Model* model) { const auto passCount = passes.size(); for (auto passIdx = 0; passIdx < passCount; passIdx++) { - auto lightmapIter = _batches.find(lightmap); - if (lightmapIter == _batches.cend()) { - lightmapIter = _batches.insert({lightmap, PassBatchMap()}).first; - } - const auto &pass = passes[passIdx]; - auto passIter = lightmapIter->second.find(pass); - if (passIter == lightmapIter->second.cend()) { - passIter = lightmapIter->second.insert({pass, ccnew GPUBatch(_gpuScene, pass)}).first; + auto iter = _batches.find(pass); + if (iter == _batches.cend()) { + iter = _batches.insert({pass, ccnew GPUBatch(_gpuScene, pass)}).first; } - passIter->second->addSubModel(subModel, passIdx); + iter->second->addSubModel(subModel, passIdx); } } @@ -229,7 +228,6 @@ void GPUBatchPool::addModel(const Model* model) { } void GPUBatchPool::removeModel(const Model* model) { - const auto *lightmap = model->getLightmap(); const auto &subModels = model->getSubModels(); for (const auto &subModel : subModels) { @@ -237,18 +235,13 @@ void GPUBatchPool::removeModel(const Model* model) { const auto passCount = passes.size(); for (auto passIdx = 0; passIdx < passCount; passIdx++) { - auto lightmapIter = _batches.find(lightmap); - if (lightmapIter == _batches.cend()) { - continue; - } - const auto &pass = passes[passIdx]; - auto passIter = lightmapIter->second.find(pass); - if (passIter == lightmapIter->second.cend()) { + auto iter = _batches.find(pass); + if (iter == _batches.cend()) { continue; } - passIter->second->removeSubModel(subModel, passIdx); + iter->second->removeSubModel(subModel, passIdx); } } @@ -256,10 +249,8 @@ void GPUBatchPool::removeModel(const Model* model) { } void GPUBatchPool::removeAllModels() { - for (auto &lightMapBatch : _batches) { - for (auto &passBatch : lightMapBatch.second) { - CC_SAFE_DESTROY_AND_DELETE(passBatch.second); - } + for (auto &batch : _batches) { + CC_SAFE_DESTROY_AND_DELETE(batch.second); } _batches.clear(); diff --git a/native/cocos/scene/gpu-scene/GPUBatchPool.h b/native/cocos/scene/gpu-scene/GPUBatchPool.h index 01ec90e7ac7..d4dd1122008 100644 --- a/native/cocos/scene/gpu-scene/GPUBatchPool.h +++ b/native/cocos/scene/gpu-scene/GPUBatchPool.h @@ -40,6 +40,7 @@ namespace gfx { class Shader; class Buffer; class InputAssembler; +class DescriptorSet; class Texture; } @@ -67,6 +68,8 @@ struct CC_DLL BatchItem { gfx::Shader *shader{nullptr}; gfx::InputAssembler *inputAssembler{nullptr}; + gfx::DescriptorSet *descriptorSet{nullptr}; + gfx::Texture *lightingMap{nullptr}; uint32_t indexStride{0U}; /** @@ -99,9 +102,6 @@ class CC_DLL GPUBatch final { BatchItemList _items; }; -using PassBatchMap = ccstd::unordered_map; -using LightMapBatchMap = ccstd::unordered_map; - class CC_DLL GPUBatchPool final : public RefCounted { public: GPUBatchPool() = default; @@ -115,7 +115,7 @@ class CC_DLL GPUBatchPool final : public RefCounted { void removeModel(const Model *model); void removeAllModels(); - inline LightMapBatchMap &getBatches() { return _batches; } + inline ccstd::unordered_map &getBatches() { return _batches; } inline uint32_t getIndirectCount() const { return static_cast(_indirectCmds.size()); } inline uint32_t getInstanceCount() const { return static_cast(_instances.size()); } static uint32_t getIndirectStride(); @@ -128,7 +128,7 @@ class CC_DLL GPUBatchPool final : public RefCounted { void updateBuffers(); GPUScene *_gpuScene{nullptr}; - LightMapBatchMap _batches; + ccstd::unordered_map _batches; ccstd::vector _instances; ccstd::vector _indirectCmds; From afbf795aa06cf2b04a9bd7863aafb1d1f785603f Mon Sep 17 00:00:00 2001 From: stanleyljl <798080536@qq.com> Date: Sun, 8 Oct 2023 17:04:08 +0800 Subject: [PATCH 11/16] fix gpu driven shadow bug --- .../renderer/pipeline/custom/NativeExecutor.cpp | 13 +++++++++++-- .../renderer/pipeline/custom/NativeRenderGraph.cpp | 7 +++++++ .../renderer/pipeline/custom/NativeRenderQueue.cpp | 5 ----- .../renderer/pipeline/custom/RenderGraphTypes.cpp | 2 ++ .../renderer/pipeline/custom/RenderGraphTypes.h | 1 + 5 files changed, 21 insertions(+), 7 deletions(-) diff --git a/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp b/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp index ecc60b35b70..ddd087f4211 100644 --- a/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp +++ b/native/cocos/renderer/pipeline/custom/NativeExecutor.cpp @@ -593,6 +593,7 @@ gfx::DescriptorSet* initDescriptorSet( gfx::DescriptorSet* updatePerPassDescriptorSet( gfx::CommandBuffer* cmdBuff, + ResourceGraph& resg, const LayoutGraphData& lg, const DescriptorSetData& set, const RenderData& user, @@ -677,7 +678,15 @@ gfx::DescriptorSet* updatePerPassDescriptorSet( for (const auto& d : block.descriptors) { bool found = false; CC_EXPECTS(d.count == 1); - if (auto iter = user.buffers.find(d.descriptorID.value); + if (auto iter = user.bufferNames.find(d.descriptorID.value); + iter != user.bufferNames.end()) { + const auto resID = findVertex(iter->second, resg); + CC_ENSURES(resID != ResourceGraph::null_vertex()); + auto* buffer = resg.getBuffer(resID); + CC_ENSURES(buffer); + newSet->bindBuffer(bindID, buffer); + found = true; + } else if (auto iter = user.buffers.find(d.descriptorID.value); iter != user.buffers.end()) { newSet->bindBuffer(bindID, iter->second.get()); found = true; @@ -732,7 +741,7 @@ gfx::DescriptorSet* updateCameraUniformBufferAndDescriptorSet( auto& set = iter->second; auto& node = ctx.context.layoutGraphResources.at(passLayoutID); const auto& user = get(RenderGraph::DataTag{}, ctx.g, sceneID); // notice: sceneID - perPassSet = updatePerPassDescriptorSet(ctx.cmdBuff, ctx.lg, set, user, node); + perPassSet = updatePerPassDescriptorSet(ctx.cmdBuff, ctx.resourceGraph, ctx.lg, set, user, node); } return perPassSet; } diff --git a/native/cocos/renderer/pipeline/custom/NativeRenderGraph.cpp b/native/cocos/renderer/pipeline/custom/NativeRenderGraph.cpp index cc6e2c863f1..c05f83c0258 100644 --- a/native/cocos/renderer/pipeline/custom/NativeRenderGraph.cpp +++ b/native/cocos/renderer/pipeline/custom/NativeRenderGraph.cpp @@ -760,6 +760,7 @@ void NativeRenderQueueBuilder::addGpuDrivenResource(const scene::Camera *camera, } if (any(sceneFlags & SceneFlags::GPU_DRIVEN)) { + auto &data = get(RenderGraph::DataTag{}, *renderGraph, rgSceneID); const auto passID = renderGraph->getPassID(nodeID); const auto &sceneCulling = dynamic_cast(pipelineRuntime)->nativeContext.sceneCulling; const auto sceneID = sceneCulling.sceneIDs.at(scene); @@ -772,6 +773,12 @@ void NativeRenderQueueBuilder::addGpuDrivenResource(const scene::Camera *camera, ccstd::pmr::string drawInstanceBuffer("CCDrawInstanceBuffer"); drawInstanceBuffer.append(std::to_string(cullingID)); + const auto objectNameID = layoutGraph->attributeIndex.at("CCObjectBuffer"); + data.bufferNames[objectNameID.value] = objectBuffer; + + const auto instanceNameID = layoutGraph->attributeIndex.at("CCDrawInstanceBuffer"); + data.bufferNames[instanceNameID.value] = drawInstanceBuffer; + auto &rasterPass = get(RasterPassTag{}, passID, *renderGraph); if (rasterPass.computeViews.find(objectBuffer) == rasterPass.computeViews.end()) { auto res = rasterPass.computeViews.emplace( diff --git a/native/cocos/renderer/pipeline/custom/NativeRenderQueue.cpp b/native/cocos/renderer/pipeline/custom/NativeRenderQueue.cpp index b7520244b09..d90e4b48e0c 100644 --- a/native/cocos/renderer/pipeline/custom/NativeRenderQueue.cpp +++ b/native/cocos/renderer/pipeline/custom/NativeRenderQueue.cpp @@ -204,11 +204,6 @@ void GPUDrivenQueue::recordCommandBuffer( auto indirectResID = findVertex(indirectName, resg); const auto &indirectBuffer = get(ManagedBufferTag{}, indirectResID, resg).buffer.get(); - ccstd::pmr::string instanceName("CCDrawInstanceBuffer", get_allocator()); - instanceName.append(std::to_string(cullingID)); - auto instanceResID = findVertex(instanceName, resg); - const auto &instanceBuffer = get(ManagedBufferTag{}, instanceResID, resg).buffer.get(); - // Draw visible instances const auto supportFirstInstance = device->getCapabilities().supportFirstInstance; auto *batchPool = gpuScene->getBatchPool(); diff --git a/native/cocos/renderer/pipeline/custom/RenderGraphTypes.cpp b/native/cocos/renderer/pipeline/custom/RenderGraphTypes.cpp index 6627c262ebc..3ca5123978e 100644 --- a/native/cocos/renderer/pipeline/custom/RenderGraphTypes.cpp +++ b/native/cocos/renderer/pipeline/custom/RenderGraphTypes.cpp @@ -392,6 +392,7 @@ RenderData::RenderData(const allocator_type& alloc) noexcept buffers(alloc), textures(alloc), samplers(alloc), + bufferNames(alloc), custom(alloc) {} RenderData::RenderData(RenderData&& rhs, const allocator_type& alloc) @@ -399,6 +400,7 @@ RenderData::RenderData(RenderData&& rhs, const allocator_type& alloc) buffers(std::move(rhs.buffers), alloc), textures(std::move(rhs.textures), alloc), samplers(std::move(rhs.samplers), alloc), + bufferNames(std::move(rhs.bufferNames), alloc), custom(std::move(rhs.custom), alloc) {} RenderGraph::RenderGraph(const allocator_type& alloc) noexcept diff --git a/native/cocos/renderer/pipeline/custom/RenderGraphTypes.h b/native/cocos/renderer/pipeline/custom/RenderGraphTypes.h index 1cccc271dbf..0cbe0752bc2 100644 --- a/native/cocos/renderer/pipeline/custom/RenderGraphTypes.h +++ b/native/cocos/renderer/pipeline/custom/RenderGraphTypes.h @@ -911,6 +911,7 @@ struct RenderData { PmrUnorderedMap> buffers; PmrUnorderedMap> textures; PmrUnorderedMap samplers; + PmrUnorderedMap bufferNames; ccstd::pmr::string custom; }; From 35db44c2a7e1e11446b6c8a16a8436af7ac2edee Mon Sep 17 00:00:00 2001 From: stanleyljl <798080536@qq.com> Date: Sun, 8 Oct 2023 18:17:00 +0800 Subject: [PATCH 12/16] fix eslint warnning --- cocos/scene-graph/scene.ts | 5 ++++- native/cocos/renderer/pipeline/custom/NativeRenderGraph.cpp | 3 --- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cocos/scene-graph/scene.ts b/cocos/scene-graph/scene.ts index ce5b1a2f261..615944075d8 100644 --- a/cocos/scene-graph/scene.ts +++ b/cocos/scene-graph/scene.ts @@ -129,11 +129,13 @@ export class Scene extends Node { /** * @deprecated since v3.5.0, this is an engine private interface that will be removed in the future. */ + // eslint-disable-next-line @typescript-eslint/no-empty-function public _onHierarchyChanged (): void { } /** * @deprecated since v3.5.0, this is an engine private interface that will be removed in the future. */ + // eslint-disable-next-line @typescript-eslint/no-empty-function public _onPostActivated (active: boolean): void { } @@ -155,10 +157,11 @@ export class Scene extends Node { * @zh * 参考 [[Node.updateWorldTransform]] */ + // eslint-disable-next-line @typescript-eslint/no-empty-function public updateWorldTransform (): void {} // life-cycle call backs - + // eslint-disable-next-line @typescript-eslint/no-empty-function protected _instantiate (): void { } /** diff --git a/native/cocos/renderer/pipeline/custom/NativeRenderGraph.cpp b/native/cocos/renderer/pipeline/custom/NativeRenderGraph.cpp index c05f83c0258..a0c7f6422f3 100644 --- a/native/cocos/renderer/pipeline/custom/NativeRenderGraph.cpp +++ b/native/cocos/renderer/pipeline/custom/NativeRenderGraph.cpp @@ -773,9 +773,6 @@ void NativeRenderQueueBuilder::addGpuDrivenResource(const scene::Camera *camera, ccstd::pmr::string drawInstanceBuffer("CCDrawInstanceBuffer"); drawInstanceBuffer.append(std::to_string(cullingID)); - const auto objectNameID = layoutGraph->attributeIndex.at("CCObjectBuffer"); - data.bufferNames[objectNameID.value] = objectBuffer; - const auto instanceNameID = layoutGraph->attributeIndex.at("CCDrawInstanceBuffer"); data.bufferNames[instanceNameID.value] = drawInstanceBuffer; From 353314753619d7d8b43b66223591bb56b7b32d72 Mon Sep 17 00:00:00 2001 From: stanleyljl <798080536@qq.com> Date: Mon, 9 Oct 2023 10:56:51 +0800 Subject: [PATCH 13/16] fix clang tidy error --- native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp | 2 +- native/cocos/scene/gpu-scene/GPUBatchPool.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp b/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp index 801a22cc179..14af4621447 100644 --- a/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp +++ b/native/cocos/renderer/pipeline/custom/FrameGraphDispatcher.cpp @@ -1602,7 +1602,7 @@ constexpr gfx::AccessFlags allStageReadAccess(const ResourceDesc& desc) { if ((flags & ResourceFlags::STORAGE) != ResourceFlags::NONE) { ret = gfx::AccessFlags::COMPUTE_SHADER_READ_OTHER | gfx::AccessFlags::FRAGMENT_SHADER_READ_OTHER | gfx::AccessFlags::VERTEX_SHADER_READ_OTHER; } else if (isBuffer) { // NOLINT(misc-redundant-expression) - ret = gfx::AccessFlags::COMPUTE_SHADER_READ_UNIFORM_BUFFER | gfx::AccessFlags::COMPUTE_SHADER_READ_UNIFORM_BUFFER | gfx::AccessFlags::VERTEX_SHADER_READ_UNIFORM_BUFFER; + ret = gfx::AccessFlags::COMPUTE_SHADER_READ_UNIFORM_BUFFER | gfx::AccessFlags::FRAGMENT_SHADER_READ_UNIFORM_BUFFER | gfx::AccessFlags::VERTEX_SHADER_READ_UNIFORM_BUFFER; } else { ret = gfx::AccessFlags::COMPUTE_SHADER_READ_TEXTURE | gfx::AccessFlags::FRAGMENT_SHADER_READ_TEXTURE | gfx::AccessFlags::VERTEX_SHADER_READ_TEXTURE; } diff --git a/native/cocos/scene/gpu-scene/GPUBatchPool.cpp b/native/cocos/scene/gpu-scene/GPUBatchPool.cpp index 991f12911b1..e161f0346e9 100644 --- a/native/cocos/scene/gpu-scene/GPUBatchPool.cpp +++ b/native/cocos/scene/gpu-scene/GPUBatchPool.cpp @@ -173,7 +173,7 @@ void GPUBatchPool::update(uint32_t stamp) { const auto &meshPool = _gpuScene->getMeshPool(); for (auto &batch : _batches) { - auto *pass = batch.second->getPass(); + const auto *pass = batch.second->getPass(); const auto phaseId = pass->getPhaseID(); auto &items = batch.second->getItems(); From b1d162f9865155ac684f241940515413bb0f1f32 Mon Sep 17 00:00:00 2001 From: stanleyljl <798080536@qq.com> Date: Mon, 9 Oct 2023 11:18:50 +0800 Subject: [PATCH 14/16] fix compress texture --- cocos/asset/assets/simple-texture.ts | 4 +--- native/cocos/core/assets/SimpleTexture.cpp | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/cocos/asset/assets/simple-texture.ts b/cocos/asset/assets/simple-texture.ts index a0a49cd2725..6abb7040ab6 100644 --- a/cocos/asset/assets/simple-texture.ts +++ b/cocos/asset/assets/simple-texture.ts @@ -322,11 +322,9 @@ export class SimpleTexture extends TextureBase { if (this._width === 0 || this._height === 0) { return; } let flags = TextureFlagBit.NONE; if (this._mipFilter !== Filter.NONE && canGenerateMipmap(device, this._width, this._height)) { - if (!this.isCompressed) { - this._mipmapLevel = getMipLevel(this._width, this._height); - } if (!this.isUsingOfflineMipmaps() && !this.isCompressed) { flags = TextureFlagBit.GEN_MIPMAP; + this._mipmapLevel = getMipLevel(this._width, this._height); } } const textureCreateInfo = this._getGfxTextureCreateInfo({ diff --git a/native/cocos/core/assets/SimpleTexture.cpp b/native/cocos/core/assets/SimpleTexture.cpp index 86fb47d5440..3c80a33aeba 100644 --- a/native/cocos/core/assets/SimpleTexture.cpp +++ b/native/cocos/core/assets/SimpleTexture.cpp @@ -134,11 +134,9 @@ void SimpleTexture::createTexture(gfx::Device *device) { auto flags = gfx::TextureFlagBit::NONE; auto usage = gfx::TextureUsageBit::SAMPLED | gfx::TextureUsageBit::TRANSFER_DST; if (_mipFilter != Filter::NONE && canGenerateMipmap(_width, _height)) { - if (!isCompressed()) { - _mipmapLevel = getMipLevel(_width, _height); - } if (!isUsingOfflineMipmaps() && !isCompressed()) { flags = gfx::TextureFlagBit::GEN_MIPMAP; + _mipmapLevel = getMipLevel(_width, _height); } } From 63099e53b0815cc8ca7c9594ba0067753d5ac05c Mon Sep 17 00:00:00 2001 From: stanleyljl <798080536@qq.com> Date: Tue, 10 Oct 2023 17:06:11 +0800 Subject: [PATCH 15/16] support shadow map of spot light --- .../pipeline/custom/NativePipeline.cpp | 54 +++++++++++++------ 1 file changed, 39 insertions(+), 15 deletions(-) diff --git a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp index c55389851e9..efb666c8289 100644 --- a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp +++ b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp @@ -35,7 +35,8 @@ #include "cocos/renderer/pipeline/custom/RenderingModule.h" #include "cocos/renderer/pipeline/custom/details/GslUtils.h" #include "cocos/scene/RenderScene.h" -#include "cocos/scene/RenderWindow.h" +#include "cocos/scene/RenderWindow.h" +#include "cocos/scene/SpotLight.h" #include "cocos/scene/gpu-scene/GPUScene.h" #if CC_USE_DEBUG_RENDERER #include "profiler/DebugRenderer.h" @@ -1150,32 +1151,55 @@ void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, std::unique_ptr gpuCullQueue(dynamic_cast(gpuCullPass->addQueue())); gpuCullQueue->addDispatch(groupCount, 1, 1, pipelineSceneData->getGPUCullingMaterial(materialIndex), 0); - if (light) { - auto *layers = pipelineSceneData->getCSMLayers(); - layers->update(pipelineSceneData, camera); + if (light) { + const auto cap = device->getCapabilities(); + const auto size = pipelineSceneData->getShadows()->getSize(); + const auto width = static_cast(size.x) / 2; + const auto height = static_cast(size.y) / 2; + const geometry::Frustum *frustum = nullptr; + Mat4 viewMat, projMat; + uint32_t perspective = 1; + float nearPlane = 0.1F; + float farPlane = 1000.0F; - auto *layer = layers->getLayers()[level]; - const auto size = pipelineSceneData->getShadows()->getSize(); - const auto width = static_cast(size.x) / 2; - const auto height = static_cast(size.y) / 2; + if (light->getType() == scene::LightType::SPOT) { + const auto *spotLight = dynamic_cast(light); + frustum = &spotLight->getFrustum(); + nearPlane = 0.001F; + farPlane = spotLight->getRange(); + viewMat = spotLight->getNode()->getWorldMatrix().getInversed(); + Mat4::createPerspective(spotLight->getAngle(), 1.0F, nearPlane, farPlane, true, cap.clipSpaceMinZ, cap.clipSpaceSignY, 0, &projMat); + perspective = 1; + } else if (light->getType() == scene::LightType::DIRECTIONAL) { + auto *layers = pipelineSceneData->getCSMLayers(); + layers->update(pipelineSceneData, camera); + auto *layer = layers->getLayers()[level]; + frustum = &layer->getValidFrustum(); + nearPlane = layer->getSplitCameraNear(); + farPlane = layer->getSplitCameraFar(); + viewMat = layer->getMatShadowView(); + projMat = layer->getMatShadowProj(); + perspective = 0; + } else { + CC_EXPECTS(false); + } ccstd::vector planes; - const auto &frustum = layer->getValidFrustum(); - for (auto *plane : frustum.planes) { + for (auto *plane : frustum->planes) { planes.push_back(plane->n.x); planes.push_back(plane->n.y); planes.push_back(plane->n.z); planes.push_back(plane->d); } ArrayBuffer planesBuffer(reinterpret_cast(&planes[0]), sizeof(float) * static_cast(planes.size())); - gpuCullPass->setMat4("cc_view", layer->getMatShadowView()); - gpuCullPass->setMat4("cc_proj", layer->getMatShadowProj()); + gpuCullPass->setMat4("cc_view", viewMat); + gpuCullPass->setMat4("cc_proj", projMat); gpuCullPass->setArrayBuffer("cc_planes", &planesBuffer); - gpuCullPass->setFloat("cc_znear", layer->getSplitCameraNear()); - gpuCullPass->setFloat("cc_zfar", layer->getSplitCameraFar()); + gpuCullPass->setFloat("cc_znear", nearPlane); + gpuCullPass->setFloat("cc_zfar", farPlane); gpuCullPass->setFloat("cc_depthWidth", static_cast(utils::previousPOT(width))); gpuCullPass->setFloat("cc_depthHeight", static_cast(utils::previousPOT(height))); - gpuCullPass->setUint("cc_isPerspective", light->getType() == scene::LightType::DIRECTIONAL ? 0 : 1); + gpuCullPass->setUint("cc_isPerspective", perspective); gpuCullPass->setUint("cc_orientation", 0); gpuCullPass->setUint("cc_instanceCount", instanceCount); gpuCullPass->setUint("cc_phaseId", phaseID); From 98eb869a6d14efc4917d1b650b0a121da4bcee9e Mon Sep 17 00:00:00 2001 From: stanleyljl <798080536@qq.com> Date: Tue, 10 Oct 2023 17:44:55 +0800 Subject: [PATCH 16/16] fix clang tidy --- native/cocos/renderer/pipeline/custom/NativePipeline.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp index efb666c8289..25da497d7a9 100644 --- a/native/cocos/renderer/pipeline/custom/NativePipeline.cpp +++ b/native/cocos/renderer/pipeline/custom/NativePipeline.cpp @@ -1157,7 +1157,8 @@ void NativePipeline::addBuiltinGpuCullingPass(uint32_t cullingID, const auto width = static_cast(size.x) / 2; const auto height = static_cast(size.y) / 2; const geometry::Frustum *frustum = nullptr; - Mat4 viewMat, projMat; + Mat4 viewMat; + Mat4 projMat; uint32_t perspective = 1; float nearPlane = 0.1F; float farPlane = 1000.0F;