batch : fix check for empty sequences in memory (#14364)

* batch : fix check for empty sequences in memory

ggml-ci

* cont : reuse the var

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-06-24 18:26:30 +03:00
committed by GitHub
parent c148cf1946
commit 62af464227

View File

@ -244,11 +244,13 @@ bool llama_batch_allocr::init(
continue; continue;
} }
if (memory) { const llama_pos p0 = memory ? memory->seq_pos_max(s) : -1;
if (p0 >= 0) {
bool ok = true; bool ok = true;
if (batch.token) { if (batch.token) {
if (seq_pos_min(s) != memory->seq_pos_max(s) + 1) { if (seq_pos_min(s) != p0 + 1) {
ok = false; ok = false;
} }
} else { } else {
@ -256,7 +258,7 @@ bool llama_batch_allocr::init(
// for embeddings (typically used as vision input), we allow them to have repeating positions // for embeddings (typically used as vision input), we allow them to have repeating positions
// ref: https://github.com/ggml-org/llama.cpp/issues/13694#issuecomment-2983871762 // ref: https://github.com/ggml-org/llama.cpp/issues/13694#issuecomment-2983871762
if (seq_pos_min(s) != memory->seq_pos_max(s) && seq_pos_min(s) != memory->seq_pos_max(s) + 1) { if (seq_pos_min(s) != p0 && seq_pos_min(s) != p0 + 1) {
ok = false; ok = false;
} }
} }
@ -267,7 +269,7 @@ bool llama_batch_allocr::init(
" - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n" " - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n"
" - the tokens for sequence %d in the input batch have a starting position of Y = %d\n" " - the tokens for sequence %d in the input batch have a starting position of Y = %d\n"
" it is required that the sequence positions remain consecutive: Y = X + 1\n", " it is required that the sequence positions remain consecutive: Y = X + 1\n",
__func__, s, s, memory->seq_pos_max(s), s, seq_pos_min(s)); __func__, s, s, p0, s, seq_pos_min(s));
return false; return false;
} }