context : remove logits_all flag (#13284)

* context : remove logits_all flag ggml-ci * llama : remove logits_all flag + reorder llama_context_params ggml-ci
2025-06-30 04:45:17 +00:00 · 2025-05-08 14:26:50 +03:00
parent 70a6991edf
commit 51fb96b1ff
9 changed files with 13 additions and 37 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -116,8 +116,6 @@ llama_context::llama_context(
                __func__, n_ctx_per_seq, hparams.n_ctx_train);
    }

-    logits_all = params.logits_all;
-
    if (!hparams.vocab_only) {
        // GPU backends
        for (auto * dev : model.devices) {
@ -890,7 +888,7 @@ int llama_context::decode(llama_batch & inp_batch) {
        for (uint32_t i = 0; i < n_tokens_all; ++i) {
            n_outputs_all += batch.logits[i] != 0;
        }
-    } else if (logits_all || embd_pooled) {
+    } else if (embd_pooled) {
        n_outputs_all = n_tokens_all;
    } else {
        // keep last output only
@ -1853,13 +1851,12 @@ llama_context_params llama_context_default_params() {
        /*.cb_eval_user_data           =*/ nullptr,
        /*.type_k                      =*/ GGML_TYPE_F16,
        /*.type_v                      =*/ GGML_TYPE_F16,
-        /*.logits_all                  =*/ false,
+        /*.abort_callback              =*/ nullptr,
+        /*.abort_callback_data         =*/ nullptr,
        /*.embeddings                  =*/ false,
        /*.offload_kqv                 =*/ true,
        /*.flash_attn                  =*/ false,
        /*.no_perf                     =*/ true,
-        /*.abort_callback              =*/ nullptr,
-        /*.abort_callback_data         =*/ nullptr,
    };

    return result;