server : don't overfill the batch during infill (#10018)

ggml-ci
2025-08-13 03:47:46 -04:00 · 2024-10-28 08:49:32 +02:00
parent 8841ce3f43
commit 8125e6cbfc
2 changed files with 5 additions and 2 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1880,6 +1880,7 @@ struct server_context {
                    if (slot.state == SLOT_STATE_STARTED) {
                        slot.t_start_process_prompt = ggml_time_us();
                        slot.t_start_generation = 0;
+
                        slot.n_past = 0;
                        slot.n_prompt_tokens = prompt_tokens.size();
                        slot.state = SLOT_STATE_PROCESSING_PROMPT;