mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-01 05:05:10 +00:00
context : remove logits_all flag
ggml-ci
This commit is contained in:
@ -116,8 +116,6 @@ llama_context::llama_context(
|
||||
__func__, n_ctx_per_seq, hparams.n_ctx_train);
|
||||
}
|
||||
|
||||
logits_all = params.logits_all;
|
||||
|
||||
if (!hparams.vocab_only) {
|
||||
// GPU backends
|
||||
for (auto * dev : model.devices) {
|
||||
@ -890,7 +888,7 @@ int llama_context::decode(llama_batch & inp_batch) {
|
||||
for (uint32_t i = 0; i < n_tokens_all; ++i) {
|
||||
n_outputs_all += batch.logits[i] != 0;
|
||||
}
|
||||
} else if (logits_all || embd_pooled) {
|
||||
} else if (embd_pooled) {
|
||||
n_outputs_all = n_tokens_all;
|
||||
} else {
|
||||
// keep last output only
|
||||
|
Reference in New Issue
Block a user