mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 03:55:20 +00:00
server: fix regression on streamed non-chat completion w/ stops (#13785)
* more forgiving message diffs: partial stop words aren't erased, full stops are * Add (slow) server test for completion + stream + stop
This commit is contained in:
@ -31,6 +31,11 @@ static std::string string_diff(const std::string & last, const std::string & cur
|
|||||||
return current;
|
return current;
|
||||||
}
|
}
|
||||||
if (!string_starts_with(current, last)) {
|
if (!string_starts_with(current, last)) {
|
||||||
|
if (string_starts_with(last, current)) {
|
||||||
|
// This happens if the last generation ended on a partial stop word (not erased),
|
||||||
|
// and the current ended on a stop word (erased).
|
||||||
|
return "";
|
||||||
|
}
|
||||||
throw std::runtime_error("Invalid diff: '" + last + "' not found at start of '" + current + "'");
|
throw std::runtime_error("Invalid diff: '" + last + "' not found at start of '" + current + "'");
|
||||||
}
|
}
|
||||||
return current.substr(last.size());
|
return current.substr(last.size());
|
||||||
|
@ -121,6 +121,30 @@ def test_completion_stream_with_openai_library():
|
|||||||
assert match_regex("(going|bed)+", output_text)
|
assert match_regex("(going|bed)+", output_text)
|
||||||
|
|
||||||
|
|
||||||
|
# Test case from https://github.com/ggml-org/llama.cpp/issues/13780
|
||||||
|
@pytest.mark.slow
|
||||||
|
def test_completion_stream_with_openai_library_stops():
|
||||||
|
global server
|
||||||
|
server.model_hf_repo = "bartowski/Phi-3.5-mini-instruct-GGUF:Q4_K_M"
|
||||||
|
server.model_hf_file = None
|
||||||
|
server.start()
|
||||||
|
client = OpenAI(api_key="dummy", base_url=f"http://{server.server_host}:{server.server_port}/v1")
|
||||||
|
res = client.completions.create(
|
||||||
|
model="davinci-002",
|
||||||
|
prompt="System: You are helpfull assistant.\nAssistant:\nHey! How could I help?\nUser:\nTell me a joke.\nAssistant:\n",
|
||||||
|
stop=["User:\n", "Assistant:\n"],
|
||||||
|
max_tokens=200,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
output_text = ''
|
||||||
|
for data in res:
|
||||||
|
choice = data.choices[0]
|
||||||
|
if choice.finish_reason is None:
|
||||||
|
assert choice.text is not None
|
||||||
|
output_text += choice.text
|
||||||
|
assert match_regex("Sure, here's one for[\\s\\S]*", output_text), f'Unexpected output: {output_text}'
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("n_slots", [1, 2])
|
@pytest.mark.parametrize("n_slots", [1, 2])
|
||||||
def test_consistent_result_same_seed(n_slots: int):
|
def test_consistent_result_same_seed(n_slots: int):
|
||||||
global server
|
global server
|
||||||
|
Reference in New Issue
Block a user