graph : normalize Q, K, V shapes + sync cross attention (#12449)

* graph : normalize Q, K, V shapes and add comments ggml-ci * context : synchronize before getting cross attention data * model : fix command-r attention norm check
2025-06-29 12:35:16 +00:00 · 2025-03-18 21:35:19 +02:00
parent bb115d2bf7
commit 75422e8bc4
4 changed files with 433 additions and 277 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -1143,6 +1143,8 @@ int llama_context::encode(llama_batch & inp_batch) {
    if (model.arch == LLM_ARCH_T5 && t_embd) {
        //cross.t_embd = t_embd;

+        synchronize();
+
        cross.n_embd = t_embd->ne[0];
        cross.n_enc  = t_embd->ne[1];
        cross.v_embd.resize(cross.n_embd*cross.n_enc);