From 5fbfe384d4659f81c47a477eb8ee97692c7ffef9 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 21 May 2025 19:46:56 +0300 Subject: [PATCH] server : improve error reporting (#13680) --- tools/server/server.cpp | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 087665e41..7424da523 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -3366,14 +3366,29 @@ struct server_context { metrics.on_decoded(slots); if (ret != 0) { - if (n_batch == 1 || ret < 0) { - // if you get here, it means the KV cache is full - try increasing it via the context size - SRV_ERR("failed to decode the batch: KV cache is full - try increasing it via the context size, i = %d, n_batch = %d, ret = %d\n", i, n_batch, ret); - for (auto & slot : slots) { - slot.release(); - send_error(slot, "Input prompt is too big compared to KV size. Please try increasing KV size."); + { + std::string err; + + if (n_batch == 1 && ret == 1) { + err = "Context size has been exceeded."; + } + + if (ret == -1) { + err = "Invalid input batch."; + } + + if (ret < -1) { + err = "Compute error."; + } + + if (!err.empty()) { + SRV_ERR("%s, i = %d, n_batch = %d, ret = %d\n", err.c_str(), i, n_batch, ret); + for (auto & slot : slots) { + slot.release(); + send_error(slot, err); + } + break; } - break; // break loop of n_batch } // retry with half the batch size to try to find a free slot in the KV cache