server: fix incorrectly reported token probabilities (#7125)

* server: normalize token probabilities * fix temperature == 0.0f
2025-08-20 06:36:48 -04:00 · 2024-05-07 23:07:58 +02:00
parent b6aa670203
commit af0a5b6163
4 changed files with 31 additions and 11 deletions
--- a/common/sampling.h
+++ b/common/sampling.h
@@ -81,6 +81,7 @@ struct llama_sampling_context {
    // TODO: replace with ring-buffer
    std::vector<llama_token>      prev;
    std::vector<llama_token_data> cur;
+    size_t n_considered;

    std::mt19937 rng;
 };