graph : simplify attn input build for unified KV cache (#12381)

ggml-ci
2025-06-28 20:25:20 +00:00 · 2025-03-14 10:47:44 +02:00
parent 081bee8c64
commit c522ce4143
3 changed files with 53 additions and 58 deletions
--- a/src/llama-graph.h
+++ b/src/llama-graph.h
@ -509,9 +509,7 @@ struct llm_graph_context {
                  float   kq_scale,
                    int   il) const;

-    llm_graph_input_attn_kv_unified * build_attn_inp_kv_unified(
-            bool causal,
-            bool swa) const;
+    llm_graph_input_attn_kv_unified * build_attn_inp_kv_unified() const;

    ggml_tensor * build_attn(
            llm_graph_input_attn_kv_unified * inp,