diff --git a/backends/vulkan/runtime/graph/ops/DispatchNode.h b/backends/vulkan/runtime/graph/ops/DispatchNode.h index 958637218e..7d04f7714e 100644 --- a/backends/vulkan/runtime/graph/ops/DispatchNode.h +++ b/backends/vulkan/runtime/graph/ops/DispatchNode.h @@ -46,12 +46,15 @@ class PushConstantDataInfo { payload_.attr = attr; } - explicit PushConstantDataInfo(const void* data, uint32_t dataLen) + explicit PushConstantDataInfo( + const void* data, + uint32_t dataLen, + uint32_t pushConstantLen = 0) : tensorUniformData(nullptr) { VK_CHECK_COND( dataLen <= 16, "Single push constant data size must be <= 16 bytes"); - payload_.dataSize = dataLen; - memcpy(payload_.data, data, payload_.dataSize); + payload_.dataSize = pushConstantLen ? pushConstantLen : dataLen; + memcpy(payload_.data, data, dataLen); } /* diff --git a/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl b/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl index be0e1bfa20..62aa2f810d 100644 --- a/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl +++ b/backends/vulkan/runtime/graph/ops/glsl/binary_op.glsl @@ -19,11 +19,6 @@ layout(std430) buffer; ${layout_declare_tensor(B, "w", "t_out", DTYPE, STORAGE)} ${layout_declare_tensor(B, "r", "t_in", DTYPE, STORAGE)} ${layout_declare_tensor(B, "r", "t_other", DTYPE, STORAGE)} -${layout_declare_ubo(B, "ivec4", "out_sizes")} -${layout_declare_ubo(B, "ivec4", "in_sizes")} -${layout_declare_ubo(B, "ivec4", "other_sizes")} -${layout_declare_ubo(B, "ivec2", "broadcast_params")} -${layout_declare_ubo(B, "float", "alpha")} #include "broadcasting_utils.h" #include "indexing_utils.h" @@ -40,6 +35,14 @@ const lowp ivec4 in_axis_map = unhash_axis_map(in_layout); ${layout_declare_spec_const(C, "int", "other_layout", "DEFAULT_LAYOUT")} const lowp ivec4 other_axis_map = unhash_axis_map(other_layout); +layout(push_constant) uniform restrict Block { + ivec4 out_sizes; + ivec4 in_sizes; + ivec4 other_sizes; + ivec2 broadcast_params; + float alpha; +}; + void main() { const ivec3 lpos = ivec3(gl_GlobalInvocationID); const ivec4 tidx = lpos_to_tidx(lpos, out_sizes, out_axis_map.w, packed_dim); diff --git a/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp b/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp index 33f73cd6da..7e88982aae 100644 --- a/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp +++ b/backends/vulkan/runtime/graph/ops/impl/BinaryOp.cpp @@ -67,7 +67,10 @@ void add_binary_op_node( alpha_val = graph.extract_scalar(alpha); } - const utils::ivec2 broadcast_params = create_broadcast_params(*t_in1, *t_in2); + const struct BinaryOpsParams { + const utils::ivec2 broadcast_params; + const float alpha_val; + } binary_ops_params{create_broadcast_params(*t_in1, *t_in2), alpha_val}; std::string kernel_name("binary_"); kernel_name.reserve(kShaderNameReserve); @@ -83,16 +86,16 @@ void add_binary_op_node( {{out, vkapi::MemoryAccessType::WRITE}, {{arg1, arg2}, vkapi::MemoryAccessType::READ}}, // Shader params buffers - {t_out->sizes_ubo(), - t_in1->sizes_ubo(), - t_in2->sizes_ubo(), - graph.create_params_buffer(broadcast_params), - graph.create_params_buffer(alpha_val)}, + {}, // Specialization Constants {t_out->hashed_layout(), t_in1->hashed_layout(), t_in2->hashed_layout()}, // Resizing Logic resize_binary_op_node, - {})); + {}, + {{graph.sizes_pc_of(out), + graph.sizes_pc_of(arg1), + graph.sizes_pc_of(arg2), + PushConstantDataInfo(&binary_ops_params, sizeof(binary_ops_params))}})); } #define DEFINE_BINARY_OP_WITH_ALPHA_FN(op_name) \ diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp index 77a0458d90..604ad26588 100644 --- a/backends/vulkan/test/vulkan_compute_api_test.cpp +++ b/backends/vulkan/test/vulkan_compute_api_test.cpp @@ -1601,9 +1601,7 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) { auto addFn = VK_GET_OP_FN("aten.add.Tensor"); addFn(graph, {a.value, b.value, kDummyValueRef, c}); - // +2: alpha UBO, broadcast UBO for arithmetic shader - // +1: t.sizes_ubo() for arithmetic shader output c - expected_vma_allocation_count += 3; + // no new allocations if binary op uses push constants EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count); IOValueRef d = graph.add_input_tensor( @@ -1624,17 +1622,16 @@ TEST(VulkanComputeGraphTest, test_simple_shared_objects_with_resize) { auto mulFn = VK_GET_OP_FN("aten.mul.Tensor"); mulFn(graph, {c, d.value, e}); - // +2: alpha UBO, broadcast UBO for arithmetic shader - // +1: t.sizes_ubo() for arithmetic shader output e - expected_vma_allocation_count += 3; + // no new allocations if binary op uses push constants EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count); IOValueRef out = {}; out.value = e; out.staging = graph.set_output_tensor(out.value); + // +1: staging buffer input tensor // +1: staging buffer for the output tensor - expected_vma_allocation_count += 1; + expected_vma_allocation_count += 2; EXPECT_EQ(get_vma_allocation_count(), expected_vma_allocation_count); graph.prepare();