Merge branch 'master' into gg/llama-kv-cache

ggml-ci
2025-07-16 15:47:35 +00:00 · 2025-02-27 15:10:18 +02:00
parent 952feedfca b95c8af37c
commit 82675a0180
100 changed files with 4248 additions and 1355 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -479,6 +479,7 @@ extern "C" {
    LLAMA_API int32_t llama_model_n_embd     (const struct llama_model * model);
    LLAMA_API int32_t llama_model_n_layer    (const struct llama_model * model);
    LLAMA_API int32_t llama_model_n_head     (const struct llama_model * model);
+    LLAMA_API int32_t llama_model_n_head_kv  (const struct llama_model * model);

    // Get the model's RoPE frequency scaling factor
    LLAMA_API float llama_model_rope_freq_scale_train(const struct llama_model * model);