From 965ad1c08a5683b9c33f65b827ffcb32740b48d6 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Wed, 19 Feb 2025 08:20:10 +0200
Subject: [PATCH] speculative : update default params

---
 common/common.h      | 4 ++--
 common/speculative.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/common/common.h b/common/common.h
index 10bcc10d5..efe8e7f79 100644
--- a/common/common.h
+++ b/common/common.h
@@ -178,10 +178,10 @@ struct common_params_speculative {
 
     int32_t n_ctx        =     0; // draft context size
     int32_t n_max        =    16; // maximum number of tokens to draft during speculative decoding
-    int32_t n_min        =     5; // minimum number of draft tokens to use for speculative decoding
+    int32_t n_min        =     0; // minimum number of draft tokens to use for speculative decoding
     int32_t n_gpu_layers =    -1; // number of layers to store in VRAM for the draft model (-1 - use default)
     float   p_split      =  0.1f; // speculative decoding split probability
-    float   p_min        =  0.9f; // minimum speculative decoding probability (greedy)
+    float   p_min        = 0.75f; // minimum speculative decoding probability (greedy)
 
     struct cpu_params cpuparams;
     struct cpu_params cpuparams_batch;
diff --git a/common/speculative.h b/common/speculative.h
index 2baf99fc7..2b51a70ca 100644
--- a/common/speculative.h
+++ b/common/speculative.h
@@ -9,7 +9,7 @@ struct common_speculative_params {
     int n_draft = 16;  // max drafted tokens
     int n_reuse = 256;
 
-    float p_min = 0.9f; // min probability required to accept a token in the draft
+    float p_min = 0.75f; // min probability required to accept a token in the draft
 };
 
 struct common_speculative * common_speculative_init(struct llama_context * ctx_dft);