kv-cache : fix shift

ggml-ci
2025-07-28 21:23:55 -04:00 · 2025-06-09 10:53:26 +03:00
parent 91a8ee6a6f
commit eee8d481d9
2 changed files with 4 additions and 5 deletions
--- a/src/llama-kv-cache-unified.cpp
+++ b/src/llama-kv-cache-unified.cpp
@@ -944,11 +944,9 @@ llm_graph_result_ptr llama_kv_cache_unified::build_graph_shift(
    const auto & n_embd_head_k = hparams.n_embd_head_k;
  //const auto & n_embd_head_v = hparams.n_embd_head_v;

-    //GGML_ASSERT(kv_self->size == n_ctx);
-
    auto inp = std::make_unique<llm_graph_input_k_shift>(this);

-    inp->k_shift = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, cparams.n_ctx);
+    inp->k_shift = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, cells.size());
    ggml_set_input(inp->k_shift);

    for (const auto & layer : layers) {
--- a/src/llama-kv-cells.h
+++ b/src/llama-kv-cells.h
@@ -317,8 +317,6 @@ public:
        pos[i]   += d;
        shift[i] += d;

-        seq_pos_add(i);
-
        has_shift = true;

        if (pos[i] < 0) {
@@ -326,12 +324,15 @@ public:

            seq[i].reset();
            pos[i] = -1;
+            shift[i] = 0;

            used.erase(i);

            return true;
        }

+        seq_pos_add(i);
+
        return false;
    }