Skip to content

Commit

Permalink
add SLOT_STATE_DONE_PROMPT
Browse files Browse the repository at this point in the history
  • Loading branch information
ngxson committed Sep 2, 2024
1 parent 2c81cde commit 446d57d
Showing 1 changed file with 16 additions and 8 deletions.
24 changes: 16 additions & 8 deletions examples/server/server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ enum stop_type {
enum slot_state {
SLOT_STATE_IDLE,
SLOT_STATE_PROCESSING_PROMPT,
SLOT_STATE_DONE_PROMPT,
SLOT_STATE_GENERATING,
};

Expand Down Expand Up @@ -2235,9 +2236,9 @@ struct server_context {
{"progress", (float) slot.n_prompt_tokens_processed / slot.n_prompt_tokens},
});

// entire prompt has been processed - start decoding new tokens
// entire prompt has been processed
if (slot.n_past == slot.n_prompt_tokens) {
slot.state = SLOT_STATE_GENERATING;
slot.state = SLOT_STATE_DONE_PROMPT;

GGML_ASSERT(batch.n_tokens > 0);

Expand Down Expand Up @@ -2349,15 +2350,22 @@ struct server_context {
}

for (auto & slot : slots) {
if (slot.state != SLOT_STATE_GENERATING || slot.i_batch < (int) i || slot.i_batch >= (int) (i + n_tokens)) {
if (slot.i_batch < (int) i || slot.i_batch >= (int) (i + n_tokens)) {
continue; // continue loop of slots
}

// prompt evaluated for embedding
if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING) {
send_embedding(slot, batch_view);
slot.release();
slot.i_batch = -1;
if (slot.state == SLOT_STATE_DONE_PROMPT) {
if (slot.cmpl_type == SERVER_TASK_CMPL_TYPE_EMBEDDING) {
// prompt evaluated for embedding
send_embedding(slot, batch_view);
slot.release();
slot.i_batch = -1;
continue; // continue loop of slots
} else {
// prompt evaluated for next-token prediction
slot.state = SLOT_STATE_GENERATING;
}
} else if (slot.state != SLOT_STATE_GENERATING) {
continue; // continue loop of slots
}

Expand Down

0 comments on commit 446d57d

Please sign in to comment.