mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 12:05:03 +00:00
context : simplify output counting logic during decode (#14142)
* batch : remove logits_all flag ggml-ci * context : simplify output counting logic during decode ggml-ci * cont : fix comments
This commit is contained in:
@ -306,9 +306,10 @@ llama_batch_allocr::llama_batch_allocr(struct llama_batch in_batch, llama_pos p0
|
||||
batch.seq_id = seq_id.data();
|
||||
}
|
||||
if (!batch.logits) {
|
||||
logits.resize(batch.n_tokens);
|
||||
logits[logits.size() - 1] = true;
|
||||
batch.logits = logits.data();
|
||||
// by default return the output only for the last token
|
||||
output.resize(batch.n_tokens);
|
||||
output[output.size() - 1] = true;
|
||||
batch.logits = output.data();
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user