mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-26 19:55:04 +00:00
server : pass default --keep argument (#14120)
This commit is contained in:
@ -233,6 +233,7 @@ struct server_task {
|
||||
slot_params defaults;
|
||||
defaults.sampling = params_base.sampling;
|
||||
defaults.speculative = params_base.speculative;
|
||||
defaults.n_keep = params_base.n_keep;
|
||||
|
||||
// enabling this will output extra debug information in the HTTP responses from the server
|
||||
params.verbose = params_base.verbosity > 9;
|
||||
@ -2060,6 +2061,7 @@ struct server_context {
|
||||
SLT_INF(slot, "new slot n_ctx_slot = %d\n", slot.n_ctx);
|
||||
|
||||
slot.params.sampling = params_base.sampling;
|
||||
slot.params.n_keep = params_base.n_keep;
|
||||
|
||||
slot.callback_on_release = [this](int) {
|
||||
queue_tasks.pop_deferred_task();
|
||||
|
Reference in New Issue
Block a user