context : simplify output counting logic during decode (#14142)

* batch : remove logits_all flag

ggml-ci

* context : simplify output counting logic during decode

ggml-ci

* cont : fix comments
This commit is contained in:
Georgi Gerganov
2025-06-12 11:50:01 +03:00
committed by GitHub
parent c3ee46fab4
commit f6e1a7aa87
3 changed files with 28 additions and 23 deletions

View File

@ -306,9 +306,10 @@ llama_batch_allocr::llama_batch_allocr(struct llama_batch in_batch, llama_pos p0
batch.seq_id = seq_id.data();
}
if (!batch.logits) {
logits.resize(batch.n_tokens);
logits[logits.size() - 1] = true;
batch.logits = logits.data();
// by default return the output only for the last token
output.resize(batch.n_tokens);
output[output.size() - 1] = true;
batch.logits = output.data();
}
}