From 73beb8ddabdb37f522be27aa8429f4fe7478fbc8 Mon Sep 17 00:00:00 2001 From: Markus Tavenrath Date: Wed, 21 Aug 2024 14:31:59 +0200 Subject: [PATCH 1/7] Overlap cmdbuffer creation and cmdbuffer execution in Vulkan backend by submitting smaller cmdbuffers early. --- ggml/src/ggml-vulkan.cpp | 105 +++++++++++++++++++++++---------------- 1 file changed, 61 insertions(+), 44 deletions(-) diff --git a/ggml/src/ggml-vulkan.cpp b/ggml/src/ggml-vulkan.cpp index 32fda32a879ba..3dd242df18a55 100644 --- a/ggml/src/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan.cpp @@ -785,6 +785,9 @@ static vk_submission ggml_vk_create_submission(vk_device& device, vk_queue& q, s static void ggml_vk_submit(vk_context& ctx, vk::Fence fence) { if (ctx->seqs.empty()) { + if (fence) { + ctx->q->queue.submit({}, fence); + } return; } VK_LOG_DEBUG("ggml_vk_submit(" << ctx << ", " << fence << ")"); @@ -5614,11 +5617,15 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) { } } -static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * node, int node_idx, bool last_node, bool dryrun){ +bool ggml_vk_compute_forward(ggml_backend_vk_context* ctx, ggml_tensor* tensor, int tensor_idx, bool use_fence); + +// Returns true if node has enqueued work into the queue, false otherwise +// If submit is true the current all operations queued so far are being submitted to Vulkan to overlap cmdlist creation and GPU execution. +static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * node, int node_idx, ggml_tensor *node_begin, int node_idx_begin, bool dryrun, bool submit){ ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) node->extra; if (ggml_is_empty(node) || extra == nullptr) { - return; + return false; } VK_LOG_DEBUG("ggml_vk_build_graph(" << node << ", " << ggml_op_name(node->op) << ")"); @@ -5635,7 +5642,7 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod case GGML_OP_PERMUTE: case GGML_OP_TRANSPOSE: case GGML_OP_NONE: - return; + return false; case GGML_OP_UNARY: switch (ggml_get_unary_op(node)) { case GGML_UNARY_OP_SILU: @@ -5645,7 +5652,7 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod case GGML_UNARY_OP_TANH: break; default: - return; + return false; } break; case GGML_OP_REPEAT: @@ -5680,7 +5687,7 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod default: std::cerr << "ggml_vulkan: Error: Missing op: " << ggml_op_name(node->op) << std::endl; GGML_ABORT("fatal error"); - return; + return false; } vk_context compute_ctx; @@ -5772,7 +5779,7 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod ggml_vk_unary(ctx, compute_ctx, src0, node, dryrun); break; default: - return; + return false; } break; case GGML_OP_DIAG_MASK_INF: @@ -5816,11 +5823,11 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod break; default: - return; + return false; } if (dryrun) { - return; + return false; } ctx->tensor_ctxs[node_idx] = compute_ctx; @@ -5831,14 +5838,26 @@ static void ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod last_node = true; #endif - if (last_node) { + if (submit) { ggml_vk_ctx_end(compute_ctx); compute_ctx->exit_tensor_idx = node_idx; ctx->compute_ctx.reset(); + + bool ok = ggml_vk_compute_forward(ctx, node_begin, node_idx_begin, false); + if (!ok) { + if (node->op == GGML_OP_UNARY) { + std::cerr << __func__ << ": error: op not supported UNARY " << node->name << " (" << ggml_unary_op_name(static_cast(node->op_params[0])) << ")" << std::endl; + } + else { + std::cerr << __func__ << ": error: op not supported " << node->name << " (" << ggml_op_name(node->op) << ")" << std::endl; + } + } + } + return true; } -static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * tensor, int tensor_idx){ +static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * tensor, int tensor_idx, bool use_fence = true){ ggml_tensor_extra_gpu * extra = nullptr; switch (tensor->op) { @@ -5910,9 +5929,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * vk_context subctx = ctx->tensor_ctxs[tensor_idx].lock(); -#ifdef GGML_VULKAN_PERF - std::chrono::steady_clock::time_point start; -#endif // GGML_VULKAN_PERF + VkFence fence = use_fence ? ctx->fence : VkFence{}; // Only run if ctx hasn't been submitted yet if (!subctx->seqs.empty()) { @@ -5921,20 +5938,13 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * memcpy(cpy.dst, cpy.src, cpy.n); } -#ifdef GGML_VULKAN_PERF - start = std::chrono::steady_clock::now(); -#endif // GGML_VULKAN_PERF - - ggml_vk_submit(subctx, ctx->fence); + ggml_vk_submit(subctx, fence); } - if (tensor_idx == subctx->exit_tensor_idx) { - VK_CHECK(ctx->device->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "ggml_vk_compute_forward waitForFences"); + if (tensor_idx != 0 && tensor_idx == subctx->exit_tensor_idx) { + ggml_vk_submit(subctx, fence); -#ifdef GGML_VULKAN_PERF - auto duration = std::chrono::duration_cast(std::chrono::steady_clock::now() - start); - ctx->device->perf_logger->log_timing(tensor, duration.count()); -#endif // GGML_VULKAN_PERF + VK_CHECK(ctx->device->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "ggml_vk_compute_forward waitForFences"); ctx->device->device.resetFences({ ctx->fence }); @@ -6426,7 +6436,7 @@ GGML_CALL static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backen ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; for (int i = 0; i < cgraph->n_nodes; i++) { - ggml_vk_build_graph(ctx, cgraph->nodes[i], i, 0, true); + ggml_vk_build_graph(ctx, cgraph->nodes[i], i, nullptr, 0, true, false); } ggml_vk_preallocate_buffers(ctx); ggml_pipeline_allocate_descriptor_sets(ctx->device); @@ -6441,33 +6451,40 @@ GGML_CALL static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backen // Reserve tensor context space for all nodes ctx->tensor_ctxs.resize(cgraph->n_nodes); - for (int i = 0; i < cgraph->n_nodes; i++) { - ggml_vk_build_graph(ctx, cgraph->nodes[i], i, i == last_node, false); - } + bool first_node_in_batch = true; // true if next node will be first node in a batch + int submit_node_idx = 0; // index to first node in a batch + // submit work every submit_count node to overlap CPU cmdbuffer generation with GPU execution + constexpr int submit_count = 50; for (int i = 0; i < cgraph->n_nodes; i++) { - ggml_tensor * node = cgraph->nodes[i]; - - if (ggml_vk_is_empty(node)) { - continue; + if (first_node_in_batch) { + submit_node_idx = i; } + + bool submit = ((i % submit_count) == 0) || (i == last_node); + bool enqueued = ggml_vk_build_graph(ctx, cgraph->nodes[i], i, cgraph->nodes[submit_node_idx], submit_node_idx, false, submit); - bool ok = ggml_vk_compute_forward(ctx, node, i); - if (!ok) { - if (node->op == GGML_OP_UNARY) { - std::cerr << __func__ << ": error: op not supported UNARY " << node->name << " (" << ggml_unary_op_name(static_cast(node->op_params[0])) << ")" << std::endl; - } else { - std::cerr << __func__ << ": error: op not supported " << node->name << " (" << ggml_op_name(node->op) << ")" << std::endl; - } + if (first_node_in_batch && enqueued) { + first_node_in_batch = false; } -#ifdef GGML_VULKAN_CHECK_RESULTS - else { - ggml_vk_check_results_1(node); + if (submit) { + first_node_in_batch = true; } -#endif - GGML_ASSERT(ok); } + // wait for work on the GPU to complete work + bool ok = ggml_vk_compute_forward(ctx, cgraph->nodes[cgraph->n_nodes-1], cgraph->n_nodes - 1, true); + + if (!ok) { + std::cerr << __func__ << ": error: failed to enqueue cmdbuffer" << std::endl; + } +#ifdef GGML_VULKAN_CHECK_RESULTS + else { + ggml_vk_check_results_1(node); + } +#endif + GGML_ASSERT(ok); + #ifdef GGML_VULKAN_PERF ctx->device->perf_logger->print_timings(); #endif From fb7befd045146e3f859a711cffc841e19167dc41 Mon Sep 17 00:00:00 2001 From: Markus Tavenrath Date: Wed, 21 Aug 2024 19:08:01 +0200 Subject: [PATCH 2/7] fix compile issues --- ggml/src/ggml-vulkan.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-vulkan.cpp b/ggml/src/ggml-vulkan.cpp index 3dd242df18a55..916f05c0b09eb 100644 --- a/ggml/src/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan.cpp @@ -5617,7 +5617,7 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) { } } -bool ggml_vk_compute_forward(ggml_backend_vk_context* ctx, ggml_tensor* tensor, int tensor_idx, bool use_fence); +static bool ggml_vk_compute_forward(ggml_backend_vk_context* ctx, ggml_tensor* tensor, int tensor_idx, bool use_fence); // Returns true if node has enqueued work into the queue, false otherwise // If submit is true the current all operations queued so far are being submitted to Vulkan to overlap cmdlist creation and GPU execution. @@ -5929,7 +5929,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * vk_context subctx = ctx->tensor_ctxs[tensor_idx].lock(); - VkFence fence = use_fence ? ctx->fence : VkFence{}; + vk::Fence fence = use_fence ? ctx->fence : vk::Fence{}; // Only run if ctx hasn't been submitted yet if (!subctx->seqs.empty()) { From 5e2ce2471004132245d39a0c6e1e759a482c5f94 Mon Sep 17 00:00:00 2001 From: Markus Tavenrath Date: Thu, 22 Aug 2024 15:48:24 +0200 Subject: [PATCH 3/7] Fix issues where the last submit wasn't executed or handled properly. --- ggml/src/ggml-vulkan.cpp | 49 ++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/ggml/src/ggml-vulkan.cpp b/ggml/src/ggml-vulkan.cpp index 916f05c0b09eb..ccf73cae1d37a 100644 --- a/ggml/src/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan.cpp @@ -2479,7 +2479,7 @@ static void ggml_vk_dispatch_pipeline(ggml_backend_vk_context* ctx, vk_context& const uint32_t wg2 = CEIL_DIV(elements[2], pipeline->wg_denoms[2]); VK_LOG_DEBUG("ggml_vk_dispatch_pipeline(" << pipeline->name << ", {"; for (auto& buffer : descriptor_buffer_infos) { - std::cerr << "(" << buffer << ", " << buffer.offset << ", " << buffer.size << "), "; + std::cerr << "(" << buffer.buffer << ", " << buffer.offset << ", " << buffer.range << "), "; } std::cerr << "}, (" << wg0 << "," << wg1 << "," << wg2 << "))"); GGML_ASSERT(pipeline->descriptor_set_idx < pipeline->descriptor_sets.size()); @@ -5621,7 +5621,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context* ctx, ggml_tensor* t // Returns true if node has enqueued work into the queue, false otherwise // If submit is true the current all operations queued so far are being submitted to Vulkan to overlap cmdlist creation and GPU execution. -static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * node, int node_idx, ggml_tensor *node_begin, int node_idx_begin, bool dryrun, bool submit){ +static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * node, int node_idx, ggml_tensor *node_begin, int node_idx_begin, bool dryrun, bool last_node, bool submit){ ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) node->extra; if (ggml_is_empty(node) || extra == nullptr) { @@ -5838,9 +5838,17 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod last_node = true; #endif - if (submit) { + if (submit || last_node) { ggml_vk_ctx_end(compute_ctx); - compute_ctx->exit_tensor_idx = node_idx; + + // TODO probably it'd be better to pass a exit_node flag to ggml_vk_compute_forward + if (last_node) { + compute_ctx->exit_tensor_idx = node_idx_begin; + } + else { + compute_ctx->exit_tensor_idx = -1; + } + ctx->compute_ctx.reset(); bool ok = ggml_vk_compute_forward(ctx, node_begin, node_idx_begin, false); @@ -5929,6 +5937,11 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * vk_context subctx = ctx->tensor_ctxs[tensor_idx].lock(); + // always wait for the GPU work to be done for the last submit + if (tensor_idx == subctx->exit_tensor_idx) { + use_fence = true; + } + vk::Fence fence = use_fence ? ctx->fence : vk::Fence{}; // Only run if ctx hasn't been submitted yet @@ -5941,13 +5954,13 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * ggml_vk_submit(subctx, fence); } - if (tensor_idx != 0 && tensor_idx == subctx->exit_tensor_idx) { - ggml_vk_submit(subctx, fence); - + if (use_fence) { VK_CHECK(ctx->device->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "ggml_vk_compute_forward waitForFences"); ctx->device->device.resetFences({ ctx->fence }); + } + if (tensor_idx == subctx->exit_tensor_idx) { // Do staging buffer copies for (auto& cpy : subctx->out_memcpys) { memcpy(cpy.dst, cpy.src, cpy.n); @@ -6436,7 +6449,7 @@ GGML_CALL static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backen ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; for (int i = 0; i < cgraph->n_nodes; i++) { - ggml_vk_build_graph(ctx, cgraph->nodes[i], i, nullptr, 0, true, false); + ggml_vk_build_graph(ctx, cgraph->nodes[i], i, nullptr, 0, true, false, false); } ggml_vk_preallocate_buffers(ctx); ggml_pipeline_allocate_descriptor_sets(ctx->device); @@ -6460,9 +6473,10 @@ GGML_CALL static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backen if (first_node_in_batch) { submit_node_idx = i; } - - bool submit = ((i % submit_count) == 0) || (i == last_node); - bool enqueued = ggml_vk_build_graph(ctx, cgraph->nodes[i], i, cgraph->nodes[submit_node_idx], submit_node_idx, false, submit); + + // TODO probably it's better to move the submit conter to ggml_vk_build_graph since a lot of nodes might not contain actual vulkan work + bool submit = i && ((i % submit_count) == 0); + bool enqueued = ggml_vk_build_graph(ctx, cgraph->nodes[i], i, cgraph->nodes[submit_node_idx], submit_node_idx, false, i == last_node, submit); if (first_node_in_batch && enqueued) { first_node_in_batch = false; @@ -6472,19 +6486,6 @@ GGML_CALL static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backen } } - // wait for work on the GPU to complete work - bool ok = ggml_vk_compute_forward(ctx, cgraph->nodes[cgraph->n_nodes-1], cgraph->n_nodes - 1, true); - - if (!ok) { - std::cerr << __func__ << ": error: failed to enqueue cmdbuffer" << std::endl; - } -#ifdef GGML_VULKAN_CHECK_RESULTS - else { - ggml_vk_check_results_1(node); - } -#endif - GGML_ASSERT(ok); - #ifdef GGML_VULKAN_PERF ctx->device->perf_logger->print_timings(); #endif From 2812b35ce42992aa9e6693f01a8c157dfdad4400 Mon Sep 17 00:00:00 2001 From: Markus Tavenrath Date: Fri, 23 Aug 2024 09:01:46 +0200 Subject: [PATCH 4/7] remove trailing whitespace --- ggml/src/ggml-vulkan.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-vulkan.cpp b/ggml/src/ggml-vulkan.cpp index ccf73cae1d37a..84b591cc90737 100644 --- a/ggml/src/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan.cpp @@ -5843,7 +5843,7 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod // TODO probably it'd be better to pass a exit_node flag to ggml_vk_compute_forward if (last_node) { - compute_ctx->exit_tensor_idx = node_idx_begin; + compute_ctx->exit_tensor_idx = node_idx_begin; } else { compute_ctx->exit_tensor_idx = -1; From 2b2bc1ff8b167de8047be9d8d72eebf1d4e7721d Mon Sep 17 00:00:00 2001 From: Markus Tavenrath Date: Tue, 27 Aug 2024 13:47:33 +0200 Subject: [PATCH 5/7] Repair GGML_VULKAN_CHECK_RESULTS --- ggml/src/ggml-vulkan.cpp | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/ggml/src/ggml-vulkan.cpp b/ggml/src/ggml-vulkan.cpp index 84b591cc90737..5f5fb8a6bfcab 100644 --- a/ggml/src/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan.cpp @@ -5931,10 +5931,6 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * VK_LOG_DEBUG("ggml_vk_compute_forward(" << tensor << ", name=" << tensor->name << ", op=" << ggml_op_name(tensor->op) << ", type=" << tensor->type << ", ne0=" << tensor->ne[0] << ", ne1=" << tensor->ne[1] << ", ne2=" << tensor->ne[2] << ", ne3=" << tensor->ne[3] << ", nb0=" << tensor->nb[0] << ", nb1=" << tensor->nb[1] << ", nb2=" << tensor->nb[2] << ", nb3=" << tensor->nb[3] << ", view_src=" << tensor->view_src << ", view_offs=" << tensor->view_offs << ")"); -#ifdef GGML_VULKAN_CHECK_RESULTS - ggml_vk_check_results_0(tensor); -#endif - vk_context subctx = ctx->tensor_ctxs[tensor_idx].lock(); // always wait for the GPU work to be done for the last submit @@ -5942,22 +5938,28 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * use_fence = true; } - vk::Fence fence = use_fence ? ctx->fence : vk::Fence{}; - // Only run if ctx hasn't been submitted yet if (!subctx->seqs.empty()) { +#ifdef GGML_VULKAN_CHECK_RESULTS + ggml_vk_check_results_0(tensor); + use_fence = true; +#endif + // Do staging buffer copies for (auto& cpy : subctx->in_memcpys) { memcpy(cpy.dst, cpy.src, cpy.n); } - ggml_vk_submit(subctx, fence); - } + ggml_vk_submit(subctx, use_fence ? ctx->fence : vk::Fence{}); - if (use_fence) { - VK_CHECK(ctx->device->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "ggml_vk_compute_forward waitForFences"); + if (use_fence) { + VK_CHECK(ctx->device->device.waitForFences({ ctx->fence }, true, UINT64_MAX), "ggml_vk_compute_forward waitForFences"); - ctx->device->device.resetFences({ ctx->fence }); + ctx->device->device.resetFences({ ctx->fence }); + } +#ifdef GGML_VULKAN_CHECK_RESULTS + ggml_vk_check_results_1(tensor); +#endif } if (tensor_idx == subctx->exit_tensor_idx) { From 28506e51d6cc62d0f132f0f37dc7bfaf1ba51930 Mon Sep 17 00:00:00 2001 From: Markus Tavenrath Date: Tue, 27 Aug 2024 13:56:13 +0200 Subject: [PATCH 6/7] Increase submit counter only if actual work has been submitted and increase submit count to 100. --- ggml/src/ggml-vulkan.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/ggml/src/ggml-vulkan.cpp b/ggml/src/ggml-vulkan.cpp index 5f5fb8a6bfcab..411a13c8a5eea 100644 --- a/ggml/src/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan.cpp @@ -6470,21 +6470,27 @@ GGML_CALL static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backen int submit_node_idx = 0; // index to first node in a batch // submit work every submit_count node to overlap CPU cmdbuffer generation with GPU execution - constexpr int submit_count = 50; + constexpr int submit_count = 100; + int submitted_nodes = 0; for (int i = 0; i < cgraph->n_nodes; i++) { if (first_node_in_batch) { submit_node_idx = i; } - // TODO probably it's better to move the submit conter to ggml_vk_build_graph since a lot of nodes might not contain actual vulkan work - bool submit = i && ((i % submit_count) == 0); + bool submit = (submitted_nodes >= submit_count) || (i == last_node); bool enqueued = ggml_vk_build_graph(ctx, cgraph->nodes[i], i, cgraph->nodes[submit_node_idx], submit_node_idx, false, i == last_node, submit); - if (first_node_in_batch && enqueued) { - first_node_in_batch = false; + if (enqueued) { + ++submitted_nodes; + + if (first_node_in_batch) { + first_node_in_batch = false; + } } + if (submit) { first_node_in_batch = true; + submitted_nodes = 0; } } From 9dc02233909a978109da2a474db265fe264c5613 Mon Sep 17 00:00:00 2001 From: Markus Tavenrath Date: Sun, 8 Sep 2024 11:19:34 +0200 Subject: [PATCH 7/7] Fix some nodes are not checked with GGML_VULKAN_CHECK_RESULTS enabled. --- ggml/src/ggml-vulkan.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ggml/src/ggml-vulkan.cpp b/ggml/src/ggml-vulkan.cpp index 411a13c8a5eea..e76d58a630870 100644 --- a/ggml/src/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan.cpp @@ -6478,14 +6478,18 @@ GGML_CALL static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backen } bool submit = (submitted_nodes >= submit_count) || (i == last_node); + + bool enqueued = ggml_vk_build_graph(ctx, cgraph->nodes[i], i, cgraph->nodes[submit_node_idx], submit_node_idx, false, i == last_node, submit); if (enqueued) { ++submitted_nodes; +#ifndef GGML_VULKAN_CHECK_RESULTS if (first_node_in_batch) { first_node_in_batch = false; } +#endif } if (submit) {