Skip to content

Commit

Permalink
llama : logits_all has priority over batch->logits
Browse files Browse the repository at this point in the history
Otherwise, the server embeddings tests failed.
This was likely an existing problem but was only detected here
because of an additional assertion.
  • Loading branch information
compilade committed Jul 17, 2024
1 parent 2e4adb4 commit 7b7db0b
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2898,7 +2898,12 @@ struct llama_sbatch {
}
}
}
if (batch->logits) {
if (logits_all) {
for (size_t i = 0; i < length; ++i) {
ubatch.output[ubatch.n_tokens + i] = 1;
out_ids.push_back(ids[seq.offset + i]);
}
} else if (batch->logits) {
if (ubatch.equal_seqs) {
for (size_t i = 0; i < length; ++i) {
size_t id = ids[seq.offset + i];
Expand All @@ -2913,11 +2918,6 @@ struct llama_sbatch {
if (ubatch.output[i] != 0) { out_ids.push_back(seq.offset + i); }
}
}
} else if (logits_all) {
for (size_t i = 0; i < length; ++i) {
ubatch.output[ubatch.n_tokens + i] = 1;
out_ids.push_back(ids[seq.offset + i]);
}
} else {
// only get last output
for (size_t i = 0; i < length; ++i) {
Expand Down Expand Up @@ -15088,7 +15088,7 @@ static int llama_decode_internal(
};

while (lctx.sbatch.n_tokens > 0) {
// For now, only use equal splits for recurrent or hybrid model architectures
// For now, only use equal splits for recurrent model architectures
llama_ubatch u_batch = kv_self.recurrent ? lctx.sbatch.split_equal(n_ubatch) : lctx.sbatch.split_simple(n_ubatch);
const uint32_t n_tokens = u_batch.n_tokens;

Expand Down

0 comments on commit 7b7db0b

Please sign in to comment.