context : simplify output counting logic during decode (#14142)

* batch : remove logits_all flag ggml-ci * context : simplify output counting logic during decode ggml-ci * cont : fix comments
2025-06-27 12:05:03 +00:00 · 2025-06-12 11:50:01 +03:00
parent c3ee46fab4
commit f6e1a7aa87
3 changed files with 28 additions and 23 deletions
--- a/src/llama-batch.cpp
+++ b/src/llama-batch.cpp
@ -306,9 +306,10 @@ llama_batch_allocr::llama_batch_allocr(struct llama_batch in_batch, llama_pos p0
        batch.seq_id = seq_id.data();
    }
    if (!batch.logits) {
-        logits.resize(batch.n_tokens);
-        logits[logits.size() - 1] = true;
-        batch.logits = logits.data();
+        // by default return the output only for the last token
+        output.resize(batch.n_tokens);
+        output[output.size() - 1] = true;
+        batch.logits = output.data();
    }
 }