mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-26 11:45:21 +00:00
server: fix streaming crashes (#13786)
* add preludes to content on partial regex match * allow all parsers to parse non-tool-call content. * tweak order of <|python_tag|> vs <function= parsing for functionary v3.1 format. still not ideal but hopefully less prone to crash
This commit is contained in:
@ -401,9 +401,12 @@ static common_chat_msg simple_assist_msg(const std::string & content, const std:
|
||||
}
|
||||
return msg;
|
||||
}
|
||||
const common_chat_msg message_assist = simple_assist_msg("Hello, world!\nWhat's up?");
|
||||
const common_chat_msg message_assist_empty = simple_assist_msg("");
|
||||
const common_chat_msg message_assist_thoughts_unparsed_deepseek = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?");
|
||||
const common_chat_msg message_assist = simple_assist_msg("Hello, world!\nWhat's up?");
|
||||
const common_chat_msg message_assist_empty = simple_assist_msg("");
|
||||
const common_chat_msg message_assist_thoughts_unparsed_deepseek = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?");
|
||||
const common_chat_msg message_assist_thoughts_unparsed_md = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```");
|
||||
const common_chat_msg message_assist_thoughts_unparsed_md_partial = simple_assist_msg("<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}");
|
||||
|
||||
const common_chat_msg message_assist_thoughts_unparsed_r7b = simple_assist_msg("<|START_THINKING|>I'm\nthinking<|END_THINKING|>Hello, world!\nWhat's up?");
|
||||
const common_chat_msg message_assist_thoughts = simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking");
|
||||
const common_chat_msg message_assist_thoughts_unopened_unparsed = simple_assist_msg("I'm\nthinking</think>Hello, world!\nWhat's up?");
|
||||
@ -591,8 +594,6 @@ static void test_template_output_parsers() {
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
}));
|
||||
assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
|
||||
common_chat_parse(
|
||||
@ -619,8 +620,6 @@ static void test_template_output_parsers() {
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
}));
|
||||
assert_msg_equals(message_assist_thoughts_call_idx,
|
||||
common_chat_parse(
|
||||
@ -632,8 +631,6 @@ static void test_template_output_parsers() {
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
}));
|
||||
assert_msg_equals(message_assist_thoughts_no_content,
|
||||
common_chat_parse(
|
||||
@ -644,8 +641,6 @@ static void test_template_output_parsers() {
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_COMMAND_R7B,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
}));
|
||||
|
||||
test_templates(tmpls.get(), end_tokens, message_assist_call_idx, tools,
|
||||
@ -675,6 +670,18 @@ static void test_template_output_parsers() {
|
||||
|
||||
// Generic tool calls doesn't generate / parse content-only messages symmetrically.
|
||||
|
||||
assert_equals(
|
||||
simple_assist_msg("{ \"tool_call\" : { \"name\" : \"t"),
|
||||
common_chat_parse(
|
||||
"{ \"tool_call\" : { \"name\" : \"t",
|
||||
/* is_partial= */ true,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GENERIC,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ true,
|
||||
/* .parse_tool_calls = */ false,
|
||||
}));
|
||||
assert_equals(
|
||||
message_assist_empty,
|
||||
common_chat_parse(
|
||||
@ -776,11 +783,9 @@ static void test_template_output_parsers() {
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
}));
|
||||
assert_msg_equals(
|
||||
simple_assist_msg(""),
|
||||
simple_assist_msg("Let's call something\n"),
|
||||
common_chat_parse(
|
||||
"Let's call something\n"
|
||||
"<tool_call>{\"name",
|
||||
@ -788,8 +793,6 @@ static void test_template_output_parsers() {
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
}));
|
||||
assert_msg_equals(message_assist_call_thoughts,
|
||||
common_chat_parse(
|
||||
@ -979,7 +982,34 @@ static void test_template_output_parsers() {
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
}));
|
||||
assert_msg_equals(message_assist_thoughts,
|
||||
common_chat_parse(
|
||||
"<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
|
||||
/* is_partial= */ true,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
}));
|
||||
assert_msg_equals(message_assist_thoughts_unparsed_md,
|
||||
common_chat_parse(
|
||||
"<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```",
|
||||
/* is_partial= */ false,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ true,
|
||||
/* .thinking_forced_open = */ false,
|
||||
/* .parse_tool_calls = */ false,
|
||||
}));
|
||||
assert_msg_equals(message_assist_thoughts_unparsed_md_partial,
|
||||
common_chat_parse(
|
||||
"<think>I'm\nthinking</think>Hello, world!\nWhat's up?\n```json\n{}```",
|
||||
/* is_partial= */ true,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ true,
|
||||
/* .thinking_forced_open = */ false,
|
||||
}));
|
||||
assert_msg_equals(message_assist_thoughts_unopened_unparsed,
|
||||
@ -989,8 +1019,6 @@ static void test_template_output_parsers() {
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
}));
|
||||
assert_msg_equals(message_assist_thoughts,
|
||||
common_chat_parse(
|
||||
@ -1073,6 +1101,13 @@ static void test_template_output_parsers() {
|
||||
{COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1}));
|
||||
}
|
||||
|
||||
assert_equals(
|
||||
message_assist_call,
|
||||
common_chat_parse(
|
||||
"<function=special_function>{\"arg1\": 1}<",
|
||||
/* is_partial= */ true,
|
||||
{COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1}));
|
||||
|
||||
test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||
test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
|
||||
"<function=special_function>{\"arg1\": 1}</function>");
|
||||
@ -1187,8 +1222,6 @@ static void test_template_output_parsers() {
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
}));
|
||||
assert_msg_equals(message_assist_thoughts_unopened_unparsed,
|
||||
common_chat_parse(
|
||||
@ -1197,8 +1230,6 @@ static void test_template_output_parsers() {
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
}));
|
||||
assert_msg_equals(message_assist_thoughts,
|
||||
common_chat_parse(
|
||||
@ -1252,8 +1283,6 @@ static void test_template_output_parsers() {
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
}));
|
||||
assert_msg_equals(message_assist_thoughts,
|
||||
common_chat_parse(
|
||||
@ -1295,8 +1324,6 @@ static void test_template_output_parsers() {
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_R1,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
}));
|
||||
test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
|
||||
"<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>special_function\n"
|
||||
|
Reference in New Issue
Block a user