mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-17 08:14:50 +00:00
memory : fix broken batch splits for recurrent cache (#14575)
Splits producing more than one ubatch per batch for recurrent models were broken with #14512. This fixes it by moving the completeness check after the ubatch split loop.
This commit is contained in:
@ -377,14 +377,18 @@ llama_memory_context_ptr llama_memory_recurrent::init_batch(llama_batch_allocr &
|
||||
ubatch = balloc.split_equal(n_ubatch, false);
|
||||
}
|
||||
|
||||
if (balloc.get_n_used() < balloc.get_n_tokens()) {
|
||||
// failed to find a suitable split
|
||||
if (ubatch.n_tokens == 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
ubatches.push_back(std::move(ubatch)); // NOLINT
|
||||
}
|
||||
|
||||
if (balloc.get_n_used() < balloc.get_n_tokens()) {
|
||||
// failed to find a suitable split
|
||||
break;
|
||||
}
|
||||
|
||||
if (!prepare(ubatches)) {
|
||||
break;
|
||||
}
|
||||
|
Reference in New Issue
Block a user