From e0e806f52ebcd0ee285c994fe8fd8b8787d2cb0a Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 4 Jun 2025 09:50:32 +0300 Subject: [PATCH] kv-cache : fix unified::seq_rm to work with seq_id < 0 (#13985) ggml-ci --- src/llama-kv-cache-unified.cpp | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp index a81715476..4007f202e 100644 --- a/src/llama-kv-cache-unified.cpp +++ b/src/llama-kv-cache-unified.cpp @@ -149,12 +149,27 @@ bool llama_kv_cache_unified::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos p1 = std::numeric_limits::max(); } - for (uint32_t i = 0; i < cells.size(); ++i) { - if (!cells.pos_in(i, p0, p1)) { - continue; - } + if (seq_id >= 0) { + for (uint32_t i = 0; i < cells.size(); ++i) { + if (!cells.pos_in(i, p0, p1)) { + continue; + } + + if (cells.seq_has(i, seq_id) && cells.seq_rm(i, seq_id)) { + if (new_head == cells.size()) { + new_head = i; + } + } + } + } else { + // match any sequence + for (uint32_t i = 0; i < cells.size(); ++i) { + if (!cells.pos_in(i, p0, p1)) { + continue; + } + + cells.rm(i); - if (cells.seq_has(i, seq_id) && cells.seq_rm(i, seq_id)) { if (new_head == cells.size()) { new_head = i; }