From 4762ad7316dcdec20016ab5985fb46a27902204d Mon Sep 17 00:00:00 2001 From: Gabriel Larson <55459720+gabriellarson@users.noreply.github.com> Date: Sun, 27 Jul 2025 03:18:37 -0500 Subject: [PATCH] model : make rope_yarn_log_mul optional for deepseek2 (#14896) * make rope_yarn_log_mul optional for deepseek2 * default rope_yarn_log_mul = 0.0f --- src/llama-hparams.h | 2 +- src/llama-model.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llama-hparams.h b/src/llama-hparams.h index c422cd7be..ec7fd6a42 100644 --- a/src/llama-hparams.h +++ b/src/llama-hparams.h @@ -98,7 +98,7 @@ struct llama_hparams { float rope_freq_scale_train; float rope_freq_scale_train_swa; uint32_t n_ctx_orig_yarn; - float rope_yarn_log_mul; + float rope_yarn_log_mul = 0.0f; std::array rope_sections; diff --git a/src/llama-model.cpp b/src/llama-model.cpp index f16789c2a..71f89e190 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -1369,7 +1369,7 @@ void llama_model::load_hparams(llama_model_loader & ml) { // that have no expert_gating_func model parameter set hparams.expert_gating_func = LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX; } - ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul); + ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false); switch (hparams.n_layer) { case 27: type = LLM_TYPE_16B; break;