mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-26 19:55:04 +00:00
server: fix regression on streamed non-chat completion w/ stops (#13785)
* more forgiving message diffs: partial stop words aren't erased, full stops are * Add (slow) server test for completion + stream + stop
This commit is contained in:
@ -31,6 +31,11 @@ static std::string string_diff(const std::string & last, const std::string & cur
|
||||
return current;
|
||||
}
|
||||
if (!string_starts_with(current, last)) {
|
||||
if (string_starts_with(last, current)) {
|
||||
// This happens if the last generation ended on a partial stop word (not erased),
|
||||
// and the current ended on a stop word (erased).
|
||||
return "";
|
||||
}
|
||||
throw std::runtime_error("Invalid diff: '" + last + "' not found at start of '" + current + "'");
|
||||
}
|
||||
return current.substr(last.size());
|
||||
|
Reference in New Issue
Block a user