graph : simplify attn input build for unified KV cache (#12381)

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-03-14 10:47:44 +02:00
committed by GitHub
parent 081bee8c64
commit c522ce4143
3 changed files with 53 additions and 58 deletions

View File

@ -509,9 +509,7 @@ struct llm_graph_context {
float kq_scale,
int il) const;
llm_graph_input_attn_kv_unified * build_attn_inp_kv_unified(
bool causal,
bool swa) const;
llm_graph_input_attn_kv_unified * build_attn_inp_kv_unified() const;
ggml_tensor * build_attn(
llm_graph_input_attn_kv_unified * inp,