sampling : avoid expensive softmax during greedy sampling (#9605)

* sampling : avoid expensive softmax during greedy sampling ggml-ci * speculative : fix default RNG seed + set sparams.n_probs * Update tests/test-sampling.cpp Co-authored-by: slaren <slarengh@gmail.com> * sampling : add clarifying comment [no ci] --------- Co-authored-by: slaren <slarengh@gmail.com>
2025-08-13 03:47:46 -04:00 · 2024-09-24 09:03:17 +03:00
parent c087b6f11d
commit b0f27361f3
5 changed files with 59 additions and 6 deletions
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -3,13 +3,14 @@
 #include "llama-vocab.h"
 #include "llama-grammar.h"

-#include <cassert>
 #include <algorithm>
-#include <cstring>
-#include <ctime>
+#include <cassert>
 #include <cfloat>
 #include <chrono>
 #include <cmath>
+#include <cstdlib>
+#include <cstring>
+#include <ctime>
 #include <numeric>
 #include <random>
 #include <unordered_map>