batch : auto-gen positions + verify multi-sequence input (#14177)

* batch : verify multi-sequence input batches ggml-ci * cont : auto-gen positions + verify multi-seq input ggml-ci * cont : first print debug info, then perform validation ggml-ci * cont : fix position auto-gen + add comments ggml-ci
2025-07-29 05:33:37 -04:00 · 2025-06-15 09:18:37 +03:00
parent 00ba772610
commit b9912ac570
5 changed files with 155 additions and 26 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -727,9 +727,8 @@ int llama_context::encode(const llama_batch & batch_inp) {
        return -1;
    }

-    // temporary allocate memory for the input batch if needed
    // note: during encode, we always pass the full sequence starting from pos = 0
-    if (!batch_allocr->init(batch_inp, model.vocab, batch_inp.pos ? -1 : 0)) {
+    if (!batch_allocr->init(batch_inp, model.vocab, nullptr)) {
        LLAMA_LOG_ERROR("%s: failed to initialize batch\n", __func__);
        return -1;
    }
@@ -895,8 +894,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
        return -1;
    }

-    // temporary allocate memory for the input batch if needed
-    if (!batch_allocr->init(batch_inp, model.vocab, batch_inp.pos ? -1 : memory->seq_pos_max(0) + 1)) {
+    if (!batch_allocr->init(batch_inp, model.vocab, memory.get())) {
        LLAMA_LOG_ERROR("%s: failed to initialize batch\n", __func__);
        return -1;
    }