mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-29 12:35:16 +00:00
server : fix token duplication when streaming with stop strings (#10997)
This commit is contained in:
@ -1856,6 +1856,8 @@ struct server_context {
|
|||||||
result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
|
result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
|
||||||
slot.n_sent_text += result.text_to_send.size();
|
slot.n_sent_text += result.text_to_send.size();
|
||||||
// add the token to slot queue and cache
|
// add the token to slot queue and cache
|
||||||
|
} else {
|
||||||
|
result.text_to_send = "";
|
||||||
}
|
}
|
||||||
|
|
||||||
slot.add_token(result);
|
slot.add_token(result);
|
||||||
|
Reference in New Issue
Block a user