speculative : change default p_accept to 0.5 + CLI args (#3919)

ggml-ci
2025-08-13 03:47:46 -04:00 · 2023-11-03 09:41:17 +02:00
parent 05816027d6
commit 8f961abdc4
3 changed files with 25 additions and 5 deletions
--- a/examples/speculative/speculative.cpp
+++ b/examples/speculative/speculative.cpp
@@ -37,9 +37,11 @@ int main(int argc, char ** argv) {
    // max number of parallel drafting sequences (i.e. tree branches)
    const int n_seq_dft = params.n_parallel;

-    // TODO: make this configurable
-    const float p_accept = 0.80f;
-    const float p_split  = 0.10f;
+    // probability threshold for accepting a token from the draft model
+    const float p_accept = params.p_accept;
+
+    // probability threshold for splitting a draft branch (only for n_seq_dft > 1)
+    const float p_split  = params.p_split;

 #ifndef LOG_DISABLE_LOGS
    log_set_target(log_filename_generator("speculative", "log"));