Server: fix seed for multiple slots (#6835)

* Server: add tests for consistent results * sampling: separate rng per sampling context
2025-07-01 21:15:06 +00:00 · 2024-04-24 11:08:36 +02:00
parent c0d1b3e03e
commit 28103f4832
11 changed files with 145 additions and 30 deletions
--- a/examples/lookup/lookup-stats.cpp
+++ b/examples/lookup/lookup-stats.cpp
@ -30,7 +30,6 @@ int main(int argc, char ** argv){

    // load the model
    std::tie(model, ctx) = llama_init_from_gpt_params(params);
-    llama_set_rng_seed(ctx, params.seed);
    GGML_ASSERT(llama_n_vocab(model) < (1 << 16));

    // tokenize the prompt