kv-cache : fix unified::seq_rm to work with seq_id < 0 (#13985)

ggml-ci
2025-06-29 04:35:05 +00:00 · 2025-06-04 09:50:32 +03:00
parent 7e00e60ef8
commit e0e806f52e
1 changed files with 20 additions and 5 deletions
--- a/src/llama-kv-cache-unified.cpp
+++ b/src/llama-kv-cache-unified.cpp
@ -149,6 +149,7 @@ bool llama_kv_cache_unified::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos
        p1 = std::numeric_limits<llama_pos>::max();
    }

+    if (seq_id >= 0) {
        for (uint32_t i = 0; i < cells.size(); ++i) {
            if (!cells.pos_in(i, p0, p1)) {
                continue;
@ -160,6 +161,20 @@ bool llama_kv_cache_unified::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos
                }
            }
        }
+    } else {
+        // match any sequence
+        for (uint32_t i = 0; i < cells.size(); ++i) {
+            if (!cells.pos_in(i, p0, p1)) {
+                continue;
+            }
+
+            cells.rm(i);
+
+            if (new_head == cells.size()) {
+                new_head = i;
+            }
+        }
+    }

    // If we freed up a slot, set head to it so searching can start there.
    if (new_head != cells.size() && new_head < head) {