server : fix draft context not being released (#11354)

2025-06-26 19:55:04 +00:00 · 2025-01-22 17:44:40 +01:00
parent c64d2becb1
commit 12c2bdf2de
1 changed files with 3 additions and 0 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -1772,6 +1772,9 @@ struct server_context {
            // force F16 KV cache for the draft model for extra performance
            cparams_dft.type_k = GGML_TYPE_F16;
            cparams_dft.type_v = GGML_TYPE_F16;
+
+            // the context is not needed - we will create one for each slot
+            llama_init_dft.context.reset();
        }

        chat_templates = common_chat_templates_from_model(model, params_base.chat_template);