mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-28 20:25:20 +00:00
graph : normalize Q, K, V shapes + sync cross attention (#12449)
* graph : normalize Q, K, V shapes and add comments ggml-ci * context : synchronize before getting cross attention data * model : fix command-r attention norm check
This commit is contained in:
@ -1378,7 +1378,7 @@ ggml_tensor * llm_graph_context::build_attn(
|
||||
// note: storing RoPE-ed version of K in the KV cache
|
||||
ggml_build_forward_expand(gf, ggml_cpy(ctx0, k_cur, k_cache_view));
|
||||
|
||||
assert(v_cur->ne[0] == n_embd_v_gqa && v_cur->ne[1] == n_tokens);
|
||||
v_cur = ggml_reshape_2d(ctx0, v_cur, n_embd_v_gqa, n_tokens);
|
||||
|
||||
ggml_tensor * v_cache_view = nullptr;
|
||||
|
||||
|
Reference in New Issue
Block a user