From 965ad1c08a5683b9c33f65b827ffcb32740b48d6 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 19 Feb 2025 08:20:10 +0200 Subject: [PATCH] speculative : update default params --- common/common.h | 4 ++-- common/speculative.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/common/common.h b/common/common.h index 10bcc10d5..efe8e7f79 100644 --- a/common/common.h +++ b/common/common.h @@ -178,10 +178,10 @@ struct common_params_speculative { int32_t n_ctx = 0; // draft context size int32_t n_max = 16; // maximum number of tokens to draft during speculative decoding - int32_t n_min = 5; // minimum number of draft tokens to use for speculative decoding + int32_t n_min = 0; // minimum number of draft tokens to use for speculative decoding int32_t n_gpu_layers = -1; // number of layers to store in VRAM for the draft model (-1 - use default) float p_split = 0.1f; // speculative decoding split probability - float p_min = 0.9f; // minimum speculative decoding probability (greedy) + float p_min = 0.75f; // minimum speculative decoding probability (greedy) struct cpu_params cpuparams; struct cpu_params cpuparams_batch; diff --git a/common/speculative.h b/common/speculative.h index 2baf99fc7..2b51a70ca 100644 --- a/common/speculative.h +++ b/common/speculative.h @@ -9,7 +9,7 @@ struct common_speculative_params { int n_draft = 16; // max drafted tokens int n_reuse = 256; - float p_min = 0.9f; // min probability required to accept a token in the draft + float p_min = 0.75f; // min probability required to accept a token in the draft }; struct common_speculative * common_speculative_init(struct llama_context * ctx_dft);