From 31feaee04dea7cc2ba7694d0b186906c58863eb2 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 4 Jul 2025 19:08:31 +0300 Subject: [PATCH] kv-cache : restore find_slot impl ggml-ci --- src/llama-kv-cache-unified.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp index ae544cc4b..eb6527b75 100644 --- a/src/llama-kv-cache-unified.cpp +++ b/src/llama-kv-cache-unified.cpp @@ -789,7 +789,7 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_ res.s1 = std::max(res.s1, seq_to_stream[seq_id]); res.strm[s] = seq_to_stream[seq_id]; - res.idxs[s].resize(n_tokens); + res.idxs[s].reserve(n_tokens); const auto & cells = v_cells[seq_to_stream[seq_id]]; @@ -806,7 +806,6 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_ return { }; } - uint32_t n_found = 0; uint32_t n_tested = 0; // for continuous slots, we test that all tokens in the ubatch fit, starting from the current head @@ -857,9 +856,7 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_ } if (can_use) { - res.idxs[s][n_found] = idx; - - n_found++; + res.idxs[s].push_back(idx); } else { if (cont) { break; @@ -867,12 +864,12 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_ } } - if (n_found == n_tokens) { + if (res.idxs[s].size() == n_tokens) { break; } if (cont) { - n_found = 0; + res.idxs[s].clear(); } if (n_tested >= cells.size()) { @@ -882,7 +879,7 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_ } // we didn't find a suitable slot - return empty result - if (n_found < n_tokens) { + if (res.idxs[s].size() < n_tokens) { return { }; } }