From a6c5f93fb4b70683cd129c856f43279e93d5b093 Mon Sep 17 00:00:00 2001 From: Jozufozu Date: Sun, 3 Nov 2024 16:32:29 -0800 Subject: [PATCH] Spherical instances in a vacuum - Write out instance bounding spheres in pass one - Read them back in pass two so we don't have to read in the entire instance twice - Cull pass 2 no longer needs to be parameterized by instance type, so less program binds are needed - Fix page indexing logic - Fix visibility sizing logic --- .../backend/compile/IndirectPrograms.java | 25 +++++++++--- .../engine/indirect/BufferBindings.java | 19 +++++----- .../engine/indirect/IndirectBuffers.java | 26 ++++++++----- .../engine/indirect/IndirectCullingGroup.java | 7 ---- .../engine/indirect/IndirectDrawManager.java | 3 ++ .../internal/indirect/buffer_bindings.glsl | 19 +++++----- .../internal/indirect/early_cull.glsl | 30 +++++++-------- .../flywheel/internal/indirect/late_cull.glsl | 38 +++++-------------- 8 files changed, 82 insertions(+), 85 deletions(-) diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java index 2e8d5d5ae..dadf7181e 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/compile/IndirectPrograms.java @@ -23,6 +23,7 @@ import dev.engine_room.flywheel.backend.util.AtomicReferenceCounted; import dev.engine_room.flywheel.lib.util.ResourceUtil; import net.minecraft.resources.ResourceLocation; +import net.minecraft.util.Unit; public class IndirectPrograms extends AtomicReferenceCounted { private static final ResourceLocation CULL_SHADER_API_IMPL = Flywheel.rl("internal/indirect/cull_api_impl.glsl"); @@ -37,6 +38,7 @@ public class IndirectPrograms extends AtomicReferenceCounted { private static final Compile> CULL = new Compile<>(); private static final Compile UTIL = new Compile<>(); + private static final Compile UNIT = new Compile<>(); private static final List EXTENSIONS = getExtensions(GlCompat.MAX_GLSL_VERSION); private static final List COMPUTE_EXTENSIONS = getComputeExtensions(GlCompat.MAX_GLSL_VERSION); @@ -46,10 +48,10 @@ public class IndirectPrograms extends AtomicReferenceCounted { private final PipelineCompiler pipeline; private final CompilationHarness> culling; - private final CompilationHarness> cullPassTwo; + private final CompilationHarness cullPassTwo; private final CompilationHarness utils; - private IndirectPrograms(PipelineCompiler pipeline, CompilationHarness> culling, CompilationHarness> cullPassTwo, CompilationHarness utils) { + private IndirectPrograms(PipelineCompiler pipeline, CompilationHarness> culling, CompilationHarness cullPassTwo, CompilationHarness utils) { this.pipeline = pipeline; this.culling = culling; this.cullPassTwo = cullPassTwo; @@ -91,7 +93,7 @@ static void reload(ShaderSources sources, List vertexComponents var pipelineCompiler = PipelineCompiler.create(sources, Pipelines.INDIRECT, vertexComponents, fragmentComponents, EXTENSIONS); var pass1Compiler = createCullingCompiler(sources, CULL_SHADER_MAIN, "early_cull"); - var pass2Compiler = createCullingCompiler(sources, PASS2_SHADER_MAIN, "late_cull"); + var pass2Compiler = createPassTwoCompiler(sources, PASS2_SHADER_MAIN, "late_cull"); var utilCompiler = createUtilCompiler(sources); IndirectPrograms newInstance = new IndirectPrograms(pipelineCompiler, pass1Compiler, pass2Compiler, utilCompiler); @@ -119,6 +121,19 @@ private static CompilationHarness> createCullingCompiler(ShaderS .harness(name, sources); } + private static CompilationHarness createPassTwoCompiler(ShaderSources sources, ResourceLocation main, String name) { + return UNIT.program() + .link(UNIT.shader(GlCompat.MAX_GLSL_VERSION, ShaderType.COMPUTE) + .nameMapper(instanceType -> name) + .requireExtensions(COMPUTE_EXTENSIONS) + .define("_FLW_SUBGROUP_SIZE", GlCompat.SUBGROUP_SIZE) + .enableExtension("GL_KHR_shader_subgroup_basic") + .enableExtension("GL_KHR_shader_subgroup_ballot") + .withResource(main)) + .postLink((key, program) -> Uniforms.setUniformBlockBindings(program)) + .harness(name, sources); + } + /** * A compiler for utility shaders, directly compiles the shader at the resource location specified by the parameter. */ @@ -163,8 +178,8 @@ public GlProgram getCullingProgram(InstanceType instanceType) { return culling.get(instanceType); } - public GlProgram getCullPassTwoProgram(InstanceType instanceType) { - return cullPassTwo.get(instanceType); + public GlProgram getCullPassTwoProgram() { + return cullPassTwo.get(Unit.INSTANCE); } public GlProgram getApplyProgram() { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java index 322cfe531..455856bc5 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/BufferBindings.java @@ -1,16 +1,17 @@ package dev.engine_room.flywheel.backend.engine.indirect; public final class BufferBindings { - public static final int LAST_FRAME_VISIBILITY = 0; - public static final int PAGE_FRAME_DESCRIPTOR = 1; - public static final int INSTANCE = 2; - public static final int DRAW_INSTANCE_INDEX = 3; - public static final int MODEL = 4; - public static final int DRAW = 5; + public static final int BOUNDING_SPHERES = 0; + public static final int LAST_FRAME_VISIBILITY = 1; + public static final int PAGE_FRAME_DESCRIPTOR = 2; + public static final int INSTANCE = 3; + public static final int DRAW_INSTANCE_INDEX = 4; + public static final int MODEL = 5; + public static final int DRAW = 6; - public static final int LIGHT_LUT = 6; - public static final int LIGHT_SECTION = 7; - public static final int MATRICES = 8; + public static final int LIGHT_LUT = 7; + public static final int LIGHT_SECTION = 8; + public static final int MATRICES = 9; private BufferBindings() { } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java index 62f1453fd..6922dda75 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectBuffers.java @@ -7,12 +7,11 @@ import org.lwjgl.system.Pointer; import dev.engine_room.flywheel.backend.gl.buffer.GlBufferType; -import dev.engine_room.flywheel.lib.math.MoreMath; import dev.engine_room.flywheel.lib.memory.MemoryBlock; public class IndirectBuffers { // Number of vbos created. - public static final int BUFFER_COUNT = 6; + public static final int BUFFER_COUNT = 7; public static final long INT_SIZE = Integer.BYTES; public static final long PTR_SIZE = Pointer.POINTER_SIZE; @@ -31,6 +30,7 @@ public class IndirectBuffers { private static final long BUFFERS_SIZE_BYTES = SIZE_OFFSET + BUFFER_COUNT * PTR_SIZE; // Offsets to the vbos + private static final long BOUNDING_SPHERES_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.BOUNDING_SPHERES * INT_SIZE; private static final long LAST_FRAME_VISIBILITY_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.LAST_FRAME_VISIBILITY * INT_SIZE; private static final long PAGE_FRAME_DESCRIPTOR_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.PAGE_FRAME_DESCRIPTOR * INT_SIZE; private static final long INSTANCE_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.INSTANCE * INT_SIZE; @@ -39,6 +39,7 @@ public class IndirectBuffers { private static final long DRAW_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.DRAW * INT_SIZE; // Offsets to the sizes + private static final long BOUNDING_SPHERES_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.BOUNDING_SPHERES * PTR_SIZE; private static final long LAST_FRAME_VISIBILITY_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.LAST_FRAME_VISIBILITY * PTR_SIZE; private static final long PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.PAGE_FRAME_DESCRIPTOR * PTR_SIZE; private static final long INSTANCE_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.INSTANCE * PTR_SIZE; @@ -65,6 +66,7 @@ public class IndirectBuffers { */ private final MemoryBlock multiBindBlock; + public final ResizableStorageArray boundingSpheres; public final ResizableStorageArray lastFrameVisibility; public final ObjectStorage objectStorage; public final ResizableStorageArray drawInstanceIndex; @@ -74,6 +76,7 @@ public class IndirectBuffers { IndirectBuffers(long instanceStride) { this.multiBindBlock = MemoryBlock.calloc(BUFFERS_SIZE_BYTES, 1); + boundingSpheres = new ResizableStorageArray(16); lastFrameVisibility = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR); objectStorage = new ObjectStorage(instanceStride); drawInstanceIndex = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR); @@ -83,12 +86,14 @@ public class IndirectBuffers { void updateCounts(int instanceCount, int modelCount, int drawCount) { drawInstanceIndex.ensureCapacity(instanceCount); - lastFrameVisibility.ensureCapacity(MoreMath.ceilingDiv(instanceCount, 32)); + lastFrameVisibility.ensureCapacity(objectStorage.capacity()); + boundingSpheres.ensureCapacity(objectStorage.capacity() * 32L); model.ensureCapacity(modelCount); draw.ensureCapacity(drawCount); final long ptr = multiBindBlock.ptr(); + MemoryUtil.memPutInt(ptr + BOUNDING_SPHERES_HANDLE_OFFSET, boundingSpheres.handle()); MemoryUtil.memPutInt(ptr + LAST_FRAME_VISIBILITY_HANDLE_OFFSET, lastFrameVisibility.handle()); MemoryUtil.memPutInt(ptr + PAGE_FRAME_DESCRIPTOR_HANDLE_OFFSET, objectStorage.frameDescriptorBuffer.handle()); MemoryUtil.memPutInt(ptr + INSTANCE_HANDLE_OFFSET, objectStorage.objectBuffer.handle()); @@ -96,7 +101,8 @@ void updateCounts(int instanceCount, int modelCount, int drawCount) { MemoryUtil.memPutInt(ptr + MODEL_HANDLE_OFFSET, model.handle()); MemoryUtil.memPutInt(ptr + DRAW_HANDLE_OFFSET, draw.handle()); - MemoryUtil.memPutAddress(ptr + LAST_FRAME_VISIBILITY_SIZE_OFFSET, INT_SIZE * MoreMath.ceilingDiv(instanceCount, 32)); + MemoryUtil.memPutAddress(ptr + BOUNDING_SPHERES_SIZE_OFFSET, 16L * objectStorage.capacity() * 32); + MemoryUtil.memPutAddress(ptr + LAST_FRAME_VISIBILITY_SIZE_OFFSET, INT_SIZE * objectStorage.capacity()); MemoryUtil.memPutAddress(ptr + PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET, objectStorage.frameDescriptorBuffer.capacity()); MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, objectStorage.objectBuffer.capacity()); MemoryUtil.memPutAddress(ptr + DRAW_INSTANCE_INDEX_SIZE_OFFSET, INT_SIZE * instanceCount); @@ -105,23 +111,23 @@ void updateCounts(int instanceCount, int modelCount, int drawCount) { } public void bindForCullPassOne() { - multiBind(0, 5); + multiBind(0, 6); } public void bindForCullPassTwo() { - multiBind(0, 5); + multiBind(0, 6); } public void bindForApply() { - multiBind(4, 2); + multiBind(5, 2); } public void bindForModelReset() { - multiBind(4, 1); + multiBind(5, 1); } public void bindForDraw() { - multiBind(2, 4); + multiBind(3, 4); GlBufferType.DRAW_INDIRECT_BUFFER.bind(draw.handle()); } @@ -129,7 +135,7 @@ public void bindForDraw() { * Bind all buffers except the draw command buffer. */ public void bindForCrumbling() { - multiBind(3, 3); + multiBind(4, 3); } private void multiBind(int base, int count) { diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java index 4500534c0..91c0bb141 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectCullingGroup.java @@ -23,7 +23,6 @@ import dev.engine_room.flywheel.backend.engine.InstancerKey; import dev.engine_room.flywheel.backend.engine.MaterialRenderState; import dev.engine_room.flywheel.backend.engine.MeshPool; -import dev.engine_room.flywheel.backend.engine.uniform.Uniforms; import dev.engine_room.flywheel.backend.gl.GlCompat; import dev.engine_room.flywheel.backend.gl.shader.GlProgram; import dev.engine_room.flywheel.lib.math.MoreMath; @@ -44,7 +43,6 @@ public class IndirectCullingGroup { private final IndirectPrograms programs; private final GlProgram earlyCull; - private final GlProgram lateCull; private boolean needsDrawBarrier; private boolean needsDrawSort; @@ -58,7 +56,6 @@ public class IndirectCullingGroup { this.programs = programs; earlyCull = programs.getCullingProgram(instanceType); - lateCull = programs.getCullPassTwoProgram(instanceType); } public void flushInstancers() { @@ -113,7 +110,6 @@ public void dispatchCull() { return; } - Uniforms.bindAll(); earlyCull.bind(); buffers.bindForCullPassOne(); @@ -125,9 +121,6 @@ public void dispatchCullPassTwo() { return; } - Uniforms.bindAll(); - lateCull.bind(); - buffers.bindForCullPassTwo(); glDispatchCompute(buffers.objectStorage.capacity(), 1, 1); } diff --git a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java index d59cbe381..fcfd35a09 100644 --- a/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java +++ b/common/src/backend/java/dev/engine_room/flywheel/backend/engine/indirect/IndirectDrawManager.java @@ -132,6 +132,9 @@ public void render(VisualType visualType) { GlTextureUnit.T0.makeActive(); GlStateManager._bindTexture(depthPyramid.pyramidTextureId); + programs.getCullPassTwoProgram() + .bind(); + for (var group1 : cullingGroups.values()) { group1.dispatchCullPassTwo(); } diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl index a035454a5..ec04b636f 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/buffer_bindings.glsl @@ -2,16 +2,17 @@ // A few of these could be combined. // Per culling group -#define _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING 0// cull1, cull2 -#define _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING 1// cull1, cull2 -#define _FLW_INSTANCE_BUFFER_BINDING 2// cull1, cull2, draw -#define _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING 3// cull1, cull2, draw -#define _FLW_MODEL_BUFFER_BINDING 4// cull1, cull2, apply -#define _FLW_DRAW_BUFFER_BINDING 5// apply, draw +#define _FLW_BOUNDING_SPHERE_BINDING 0// cull1, cull2 +#define _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING 1// cull1, cull2 +#define _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING 2// cull1, cull2 +#define _FLW_INSTANCE_BUFFER_BINDING 3// cull1, cull2, draw +#define _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING 4// cull1, cull2, draw +#define _FLW_MODEL_BUFFER_BINDING 5// cull1, cull2, apply +#define _FLW_DRAW_BUFFER_BINDING 6// apply, draw // Global to the engine -#define _FLW_LIGHT_LUT_BUFFER_BINDING 6 -#define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 7 +#define _FLW_LIGHT_LUT_BUFFER_BINDING 7 +#define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 8 -#define _FLW_MATRIX_BUFFER_BINDING 8 +#define _FLW_MATRIX_BUFFER_BINDING 9 diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl index d06e3ef51..022f20efe 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/early_cull.glsl @@ -7,15 +7,14 @@ layout(local_size_x = 32) in; +layout(std430, binding = _FLW_BOUNDING_SPHERE_BINDING) restrict writeonly buffer BoundingSphereBuffer { + vec4 _flw_boundingSpheres[]; +}; + layout(std430, binding = _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer { uint _flw_drawIndices[]; }; -// High 6 bits for the number of instances in the page. -const uint _FLW_PAGE_COUNT_OFFSET = 26u; -// Bottom 26 bits for the model index. -const uint _FLW_MODEL_INDEX_MASK = 0x3FFFFFF; - layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict readonly buffer PageFrameDescriptorBuffer { uint _flw_pageFrameDescriptors[]; }; @@ -61,39 +60,36 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) { transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius); } + _flw_boundingSpheres[instanceIndex] = vec4(center, radius); + return _flw_testSphere(center, radius); } -// TODO: There's an opportunity here to write out the transformed bounding spheres to a buffer and use them in pass 2, -// instead of pulling the entire instance again. It would save a lot of memory bandwidth and matrix multiplications in -// pass 2, but it would also be a good bit of writes in pass 1. It's worth investigating, but it would be nice to have -// nsight trace working to be more sure. void main() { - uint pageIndex = gl_WorkGroupID.x; + uint pageIndex = gl_WorkGroupID.x << 1u; if (pageIndex >= _flw_pageFrameDescriptors.length()) { return; } - uint packedModelIndexAndCount = _flw_pageFrameDescriptors[pageIndex]; + uint modelIndex = _flw_pageFrameDescriptors[pageIndex]; - uint pageInstanceCount = packedModelIndexAndCount >> _FLW_PAGE_COUNT_OFFSET; + uint pageValidity = _flw_pageFrameDescriptors[pageIndex + 1]; - if (gl_LocalInvocationID.x >= pageInstanceCount) { + if (((1u << gl_LocalInvocationID.x) & pageValidity) == 0) { return; } uint instanceIndex = gl_GlobalInvocationID.x; - uint modelIndex = packedModelIndexAndCount & _FLW_MODEL_INDEX_MASK; - if (!_flw_isVisible(instanceIndex, modelIndex)) { return; } - uint pageVisibility = _flw_visibility[pageIndex]; + uint pageVisibility = _flw_visibility[gl_WorkGroupID.x]; + bool visibleLastFrame = (_flw_visibility[gl_WorkGroupID.x] & (1u << gl_LocalInvocationID.x)) != 0u; - if ((pageVisibility & (1u << gl_LocalInvocationID.x)) != 0u) { + if (visibleLastFrame) { // This instance was visibile last frame, it should be rendered early. uint localIndex = atomicAdd(_flw_models[modelIndex].instanceCount, 1); uint targetIndex = _flw_models[modelIndex].baseInstance + localIndex; diff --git a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl index 66b0f5cc3..aaf803c66 100644 --- a/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl +++ b/common/src/backend/resources/assets/flywheel/flywheel/internal/indirect/late_cull.glsl @@ -2,7 +2,6 @@ #include "flywheel:internal/indirect/model_descriptor.glsl" #include "flywheel:internal/uniforms/uniforms.glsl" #include "flywheel:util/matrix.glsl" -#include "flywheel:internal/indirect/matrices.glsl" #include "flywheel:internal/indirect/dispatch.glsl" layout(local_size_x = 32) in; @@ -11,12 +10,9 @@ layout(std430, binding = _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING) restrict write uint _flw_drawIndices[]; }; - -// High 6 bits for the number of instances in the page. -const uint _FLW_PAGE_COUNT_OFFSET = 26u; -// Bottom 26 bits for the model index. -const uint _FLW_MODEL_INDEX_MASK = 0x3FFFFFF; - +layout(std430, binding = _FLW_BOUNDING_SPHERE_BINDING) restrict readonly buffer BoundingSphereBuffer { + vec4 _flw_boundingSpheres[]; +}; layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict readonly buffer PageFrameDescriptorBuffer { uint _flw_pageFrameDescriptors[]; @@ -30,10 +26,6 @@ layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer ModelDescriptor _flw_models[]; }; -layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict readonly buffer MatrixBuffer { - Matrices _flw_matrices[]; -}; - layout(binding = 0) uniform sampler2D _flw_depthPyramid; bool projectSphere(vec3 c, float r, float znear, float P00, float P11, out vec4 aabb) { @@ -113,21 +105,11 @@ bool _flw_hizTest(vec3 center, float radius) { return true; } -bool _flw_isVisible(uint instanceIndex, uint modelIndex) { - uint matrixIndex = _flw_models[modelIndex].matrixIndex; - BoundingSphere sphere = _flw_models[modelIndex].boundingSphere; +bool _flw_isVisible(uint instanceIndex) { + vec4 boundingSphere = _flw_boundingSpheres[instanceIndex]; - vec3 center; - float radius; - _flw_unpackBoundingSphere(sphere, center, radius); - - FlwInstance instance = _flw_unpackInstance(instanceIndex); - - flw_transformBoundingSphere(instance, center, radius); - - if (matrixIndex > 0) { - transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius); - } + vec3 center = boundingSphere.xyz; + float radius = boundingSphere.w; bool visible = _flw_testSphere(center, radius); @@ -155,8 +137,8 @@ void main() { uint instanceIndex = gl_GlobalInvocationID.x; - bool visible = _flw_isVisible(instanceIndex, modelIndex); - bool visibleLastFrame = (_flw_visibility[pageIndex] & (1u << gl_LocalInvocationID.x)) != 0u; + bool visible = _flw_isVisible(instanceIndex); + bool visibleLastFrame = (_flw_visibility[gl_WorkGroupID.x] & (1u << gl_LocalInvocationID.x)) != 0u; if (visible && !visibleLastFrame) { uint localIndex = atomicAdd(_flw_models[modelIndex].instanceCount, 1); @@ -168,6 +150,6 @@ void main() { uvec4 visibility = subgroupBallot(visible); if (subgroupElect()) { - _flw_visibility[pageIndex] = visibility.x; + _flw_visibility[gl_WorkGroupID.x] = visibility.x; } }