From 446d57d7cdf20d51b32b7fe2373bd7d2460225da Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 2 Sep 2024 22:31:23 +0200 Subject: [PATCH] add SLOT_STATE_DONE_PROMPT --- examples/server/server.cpp | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 9518829a0f714..99e5996b5a585 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -53,6 +53,7 @@ enum stop_type { enum slot_state { SLOT_STATE_IDLE, SLOT_STATE_PROCESSING_PROMPT, + SLOT_STATE_DONE_PROMPT, SLOT_STATE_GENERATING, }; @@ -2235,9 +2236,9 @@ struct server_context { {"progress", (float) slot.n_prompt_tokens_processed / slot.n_prompt_tokens}, }); - // entire prompt has been processed - start decoding new tokens + // entire prompt has been processed if (slot.n_past == slot.n_prompt_tokens) { - slot.state = SLOT_STATE_GENERATING; + slot.state = SLOT_STATE_DONE_PROMPT; GGML_ASSERT(batch.n_tokens > 0); @@ -2349,15 +2350,22 @@ struct server_context { } for (auto & slot : slots) { - if (slot.state != SLOT_STATE_GENERATING || slot.i_batch < (int) i || slot.i_batch >= (int) (i + n_tokens)) { + if (slot.i_batch < (int) i || slot.i_batch >= (int) (i + n_tokens)) { continue; // continue loop of slots } - // prompt evaluated for embedding - if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING) { - send_embedding(slot, batch_view); - slot.release(); - slot.i_batch = -1; + if (slot.state == SLOT_STATE_DONE_PROMPT) { + if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING) { + // prompt evaluated for embedding + send_embedding(slot, batch_view); + slot.release(); + slot.i_batch = -1; + continue; // continue loop of slots + } else { + // prompt evaluated for next-token prediction + slot.state = SLOT_STATE_GENERATING; + } + } else if (slot.state != SLOT_STATE_GENERATING) { continue; // continue loop of slots }