CUDA: fix q_nope_absorbed prec for DS 2 Lite f16 (#13137)

This commit is contained in:
Johannes Gäßler
2025-04-28 09:29:26 +02:00
committed by GitHub
parent 85f36e5e71
commit 69699be48a
3 changed files with 5 additions and 4 deletions

View File

@ -10149,6 +10149,7 @@ struct llm_build_deepseek2 : public llm_graph_context {
// {n_embd_head_qk_nope, kv_lora_rank, n_head} x {n_embd_head_qk_nope, n_tokens, n_head}
ggml_tensor * q_nope_absorbed = ggml_mul_mat(ctx0, model.layers[il].wk_b, q_nope);
ggml_mul_mat_set_prec(q_nope_absorbed, GGML_PREC_F32);
cb(q_nope_absorbed, "q_nope_absorbed", il);
// {kv_lora_rank, n_head, n_tokens}