memory : fix broken batch splits for recurrent cache (#14575)

Splits producing more than one ubatch per batch for recurrent models
were broken with #14512.

This fixes it by moving the completeness check after the ubatch split loop.
This commit is contained in:
compilade
2025-07-08 11:37:47 -04:00
committed by GitHub
parent b8eeb8741d
commit bb4f7a9e4e

View File

@ -377,14 +377,18 @@ llama_memory_context_ptr llama_memory_recurrent::init_batch(llama_batch_allocr &
ubatch = balloc.split_equal(n_ubatch, false);
}
if (balloc.get_n_used() < balloc.get_n_tokens()) {
// failed to find a suitable split
if (ubatch.n_tokens == 0) {
break;
}
ubatches.push_back(std::move(ubatch)); // NOLINT
}
if (balloc.get_n_used() < balloc.get_n_tokens()) {
// failed to find a suitable split
break;
}
if (!prepare(ubatches)) {
break;
}