memory : correctly handle failure in apply() (#14438)

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-06-30 18:03:03 +03:00
committed by GitHub
parent 5dd942de59
commit 745f11fed0
6 changed files with 32 additions and 4 deletions

View File

@ -246,7 +246,7 @@ bool llama_kv_cache_unified_iswa_context::next() {
}
bool llama_kv_cache_unified_iswa_context::apply() {
assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
assert(!llama_memory_status_is_fail(status));
bool res = true;

View File

@ -1776,7 +1776,7 @@ bool llama_kv_cache_unified_context::next() {
}
bool llama_kv_cache_unified_context::apply() {
assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
assert(!llama_memory_status_is_fail(status));
// no ubatches -> this is a KV cache update
if (ubatches.empty()) {

View File

@ -218,7 +218,7 @@ bool llama_memory_hybrid_context::next() {
}
bool llama_memory_hybrid_context::apply() {
assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
assert(!llama_memory_status_is_fail(status));
bool res = true;

View File

@ -1071,7 +1071,15 @@ bool llama_memory_recurrent_context::next() {
}
bool llama_memory_recurrent_context::apply() {
assert(status == LLAMA_MEMORY_STATUS_SUCCESS);
assert(!llama_memory_status_is_fail(status));
// no ubatches -> this is an update
if (ubatches.empty()) {
// recurrent cache never performs updates
assert(status == LLAMA_MEMORY_STATUS_NO_UPDATE);
return true;
}
mem->find_slot(ubatches[i_next]);

View File

@ -40,3 +40,20 @@ llama_memory_status llama_memory_status_combine(llama_memory_status s0, llama_me
// if either status has an update, then the combined status has an update
return has_update ? LLAMA_MEMORY_STATUS_SUCCESS : LLAMA_MEMORY_STATUS_NO_UPDATE;
}
bool llama_memory_status_is_fail(llama_memory_status status) {
switch (status) {
case LLAMA_MEMORY_STATUS_SUCCESS:
case LLAMA_MEMORY_STATUS_NO_UPDATE:
{
return false;
}
case LLAMA_MEMORY_STATUS_FAILED_PREPARE:
case LLAMA_MEMORY_STATUS_FAILED_COMPUTE:
{
return true;
}
}
return false;
}

View File

@ -31,6 +31,9 @@ enum llama_memory_status {
// useful for implementing hybrid memory types (e.g. iSWA)
llama_memory_status llama_memory_status_combine(llama_memory_status s0, llama_memory_status s1);
// helper function for checking if a memory status indicates a failure
bool llama_memory_status_is_fail(llama_memory_status status);
// the interface for managing the memory context during batch processing
// this interface is implemented per memory type. see:
// - llama_kv_cache_unified_context