diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 3b5e03528..1b1cf439b 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -233,6 +233,7 @@ struct server_task { slot_params defaults; defaults.sampling = params_base.sampling; defaults.speculative = params_base.speculative; + defaults.n_keep = params_base.n_keep; // enabling this will output extra debug information in the HTTP responses from the server params.verbose = params_base.verbosity > 9; @@ -2060,6 +2061,7 @@ struct server_context { SLT_INF(slot, "new slot n_ctx_slot = %d\n", slot.n_ctx); slot.params.sampling = params_base.sampling; + slot.params.n_keep = params_base.n_keep; slot.callback_on_release = [this](int) { queue_tasks.pop_deferred_task();