mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 12:05:03 +00:00
server : fix EOS token detection with disabled cache (#5938)
This commit is contained in:
@ -1123,7 +1123,7 @@ struct server_context {
|
||||
});
|
||||
}
|
||||
|
||||
if (!slot.cache_tokens.empty() && result.tok == llama_token_eos(model)) {
|
||||
if (result.tok == llama_token_eos(model)) {
|
||||
slot.stopped_eos = true;
|
||||
slot.has_next_token = false;
|
||||
|
||||
|
Reference in New Issue
Block a user