mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-28 20:25:20 +00:00
server
: extract <think> tags from qwq outputs (#12297)
* extract <think> tags from qwq outputs * const for all static regexes in chat.cpp
This commit is contained in:
@ -445,6 +445,7 @@ std::string common_chat_format_name(common_chat_format format) {
|
|||||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
|
||||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
|
||||||
case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
|
case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
|
||||||
|
case COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING: return "Hermes 2 Pro (extract reasoning)";
|
||||||
case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
|
case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
|
||||||
case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING: return "Command R7B (extract reasoning)";
|
case COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING: return "Command R7B (extract reasoning)";
|
||||||
default:
|
default:
|
||||||
@ -878,9 +879,9 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_
|
|||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
static common_chat_msg common_chat_parse_command_r7b(const std::string & input, bool extract_reasoning) {
|
static common_chat_msg common_chat_parse_command_r7b(const std::string & input, bool extract_reasoning) {
|
||||||
static std::regex thought_regex("(<\\|START_THINKING\\|>([\\s\\S]*?)<\\|END_THINKING\\|>)([\\s\\S]*)");
|
static const std::regex thought_regex("(<\\|START_THINKING\\|>([\\s\\S]*?)<\\|END_THINKING\\|>)([\\s\\S]*)");
|
||||||
static std::regex action_regex("<\\|START_ACTION\\|>([\\s\\S]*?)<\\|END_ACTION\\|>");
|
static const std::regex action_regex("<\\|START_ACTION\\|>([\\s\\S]*?)<\\|END_ACTION\\|>");
|
||||||
static std::regex response_regex("(?:<\\|START_RESPONSE\\|>)?([\\s\\S]*?)<\\|END_RESPONSE\\|>");
|
static const std::regex response_regex("(?:<\\|START_RESPONSE\\|>)?([\\s\\S]*?)<\\|END_RESPONSE\\|>");
|
||||||
|
|
||||||
std::smatch match;
|
std::smatch match;
|
||||||
|
|
||||||
@ -1012,10 +1013,10 @@ static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const com
|
|||||||
}
|
}
|
||||||
static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bool with_builtin_tools = false) {
|
static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bool with_builtin_tools = false) {
|
||||||
// TODO: tighten & simplify the parser, don't accept leading text context.
|
// TODO: tighten & simplify the parser, don't accept leading text context.
|
||||||
static std::regex function_regex(
|
static const std::regex function_regex(
|
||||||
"\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
|
"\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
|
||||||
static std::regex close_regex("\\}\\s*");
|
static const std::regex close_regex("\\}\\s*");
|
||||||
static std::regex builtin_call_regex("<\\|python_tag\\|>\\s*([^.(]+)\\s*\\.\\s*call\\s*\\(\\s*([\\w]+)\\s*=\\s*([\\s\\S]*?)\\)");
|
static const std::regex builtin_call_regex("<\\|python_tag\\|>\\s*([^.(]+)\\s*\\.\\s*call\\s*\\(\\s*([\\w]+)\\s*=\\s*([\\s\\S]*?)\\)");
|
||||||
|
|
||||||
if (with_builtin_tools) {
|
if (with_builtin_tools) {
|
||||||
std::smatch match;
|
std::smatch match;
|
||||||
@ -1105,34 +1106,42 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
|
|||||||
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING : COMMON_CHAT_FORMAT_DEEPSEEK_R1;
|
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_DEEPSEEK_R1_EXTRACT_REASONING : COMMON_CHAT_FORMAT_DEEPSEEK_R1;
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
static common_chat_msg handle_think_tag_prelude(const std::string & input, bool extract_reasoning, const std::function<common_chat_msg(const std::string &)> & rest_parser) {
|
||||||
|
std::smatch match;
|
||||||
|
static const std::regex reasoning_content_regex("((?:<think>)?([\\s\\S\\r\\n]*?)</think>)?([\\s\\S\\r\\n]*)");
|
||||||
|
if (std::regex_match(input, match, reasoning_content_regex)) {
|
||||||
|
auto rest = match[3].str();
|
||||||
|
auto msg = rest_parser(rest);
|
||||||
|
auto reasoning_content = string_strip(match[2].str());
|
||||||
|
if (extract_reasoning) {
|
||||||
|
msg.reasoning_content = reasoning_content;
|
||||||
|
} else if (!reasoning_content.empty()) {
|
||||||
|
std::ostringstream content;
|
||||||
|
content << "<think>" << reasoning_content << "</think>" << msg.content;
|
||||||
|
msg.content = content.str();
|
||||||
|
}
|
||||||
|
return msg;
|
||||||
|
}
|
||||||
|
return rest_parser(input);
|
||||||
|
}
|
||||||
static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input, bool extract_reasoning) {
|
static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input, bool extract_reasoning) {
|
||||||
static std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n");
|
return handle_think_tag_prelude(input, extract_reasoning, [](const std::string & input) {
|
||||||
static std::regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
|
static const std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n");
|
||||||
static std::regex reasoning_content_regex("((?:<think>)?([\\s\\S\\r\\n]*?)</think>)?([\\s\\S\\r\\n]*)");
|
static const std::regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
|
||||||
static std::regex tool_calls_regex("[\\s\\r\\n]*(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>)([\\s\\S\\r\\n]*?)<|tool▁calls▁end|>");
|
static const std::regex tool_calls_regex("[\\s\\r\\n]*(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>)([\\s\\S\\r\\n]*?)<|tool▁calls▁end|>");
|
||||||
|
|
||||||
common_chat_msg msg;
|
common_chat_msg msg;
|
||||||
msg.role = "assistant";
|
msg.role = "assistant";
|
||||||
std::smatch match;
|
std::smatch match;
|
||||||
if (std::regex_match(input, match, reasoning_content_regex)) {
|
if (std::regex_search(input, match, tool_calls_regex)) {
|
||||||
std::string rest;
|
|
||||||
if (extract_reasoning) {
|
|
||||||
msg.reasoning_content = string_strip(match[2].str());
|
|
||||||
} else {
|
|
||||||
msg.content = match[1].str();
|
|
||||||
}
|
|
||||||
rest = match[3].str();
|
|
||||||
|
|
||||||
if (std::regex_search(rest, match, tool_calls_regex)) {
|
|
||||||
auto tool_calls = match[1].str();
|
auto tool_calls = match[1].str();
|
||||||
auto msg2 = parse_json_tool_calls(tool_calls, std::nullopt, function_regex, close_regex);
|
auto msg2 = parse_json_tool_calls(tool_calls, std::nullopt, function_regex, close_regex);
|
||||||
msg.tool_calls = std::move(msg2.tool_calls);
|
msg.tool_calls = std::move(msg2.tool_calls);
|
||||||
} else {
|
|
||||||
msg.content += std::string(rest.begin() + rest.find_first_not_of(" \r\n"), rest.end());
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
msg.content = input;
|
msg.content = input;
|
||||||
}
|
}
|
||||||
return msg;
|
return msg;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||||
@ -1237,8 +1246,8 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_
|
|||||||
}
|
}
|
||||||
|
|
||||||
static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & input) {
|
static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & input) {
|
||||||
static std::regex function_regex(R"((?:>>>)?(?:assistant<|end_header_id|>\n)?(\w+)\n)");
|
static const std::regex function_regex(R"((?:>>>)?(?:assistant<|end_header_id|>\n)?(\w+)\n)");
|
||||||
static std::regex close_regex(R"($|(?=>>>))");
|
static const std::regex close_regex(R"($|(?=>>>))");
|
||||||
|
|
||||||
std::string content;
|
std::string content;
|
||||||
auto it = input.begin();
|
auto it = input.begin();
|
||||||
@ -1327,7 +1336,7 @@ static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(con
|
|||||||
}
|
}
|
||||||
static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::string & input) {
|
static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::string & input) {
|
||||||
// This version of Functionary still supports the llama 3.1 tool call format for the python tool.
|
// This version of Functionary still supports the llama 3.1 tool call format for the python tool.
|
||||||
static std::regex python_tag_regex(R"(<\|python_tag\|>([\s\S\n]*)$)");
|
static const std::regex python_tag_regex(R"(<\|python_tag\|>([\s\S\n]*)$)");
|
||||||
std::smatch match;
|
std::smatch match;
|
||||||
if (std::regex_search(input, match, python_tag_regex)) {
|
if (std::regex_search(input, match, python_tag_regex)) {
|
||||||
auto code = match[1].str();
|
auto code = match[1].str();
|
||||||
@ -1341,8 +1350,8 @@ static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::s
|
|||||||
});
|
});
|
||||||
return msg;
|
return msg;
|
||||||
}
|
}
|
||||||
static std::regex function_regex(R"(<function=(\w+)>)");
|
static const std::regex function_regex(R"(<function=(\w+)>)");
|
||||||
static std::regex close_regex(R"(</function>)");
|
static const std::regex close_regex(R"(</function>)");
|
||||||
// TODO: tighten & simplify.
|
// TODO: tighten & simplify.
|
||||||
return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
|
return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
|
||||||
}
|
}
|
||||||
@ -1409,6 +1418,8 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
|
|||||||
"(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?\\s*\\{\\s*\"", //name\"\\s*:\\s*\"" + escaped_name + "\"",
|
"(?:```(?:json|xml)?\n\\s*)?(?:<function_call>|<tools>|<xml><json>|<response>)?\\s*\\{\\s*\"", //name\"\\s*:\\s*\"" + escaped_name + "\"",
|
||||||
});
|
});
|
||||||
data.preserved_tokens = {
|
data.preserved_tokens = {
|
||||||
|
"<think>",
|
||||||
|
"</think>",
|
||||||
"<tool_call>",
|
"<tool_call>",
|
||||||
"</tool_call>",
|
"</tool_call>",
|
||||||
"<function",
|
"<function",
|
||||||
@ -1429,11 +1440,12 @@ static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat
|
|||||||
});
|
});
|
||||||
|
|
||||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
||||||
data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
|
data.format = inputs.extract_reasoning ? COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING : COMMON_CHAT_FORMAT_HERMES_2_PRO;
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
static common_chat_msg common_chat_parse_hermes_2_pro(const std::string& input) {
|
static common_chat_msg common_chat_parse_hermes_2_pro(const std::string& input, bool extract_reasoning) {
|
||||||
const static std::regex open_regex(
|
return handle_think_tag_prelude(input, extract_reasoning, [](const std::string & input) {
|
||||||
|
static const std::regex open_regex(
|
||||||
"(?:"
|
"(?:"
|
||||||
"(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
|
"(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
|
||||||
"(<tool_call>" // match 2 (open_tag)
|
"(<tool_call>" // match 2 (open_tag)
|
||||||
@ -1454,7 +1466,6 @@ static common_chat_msg common_chat_parse_hermes_2_pro(const std::string& input)
|
|||||||
);
|
);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
common_chat_msg msg;
|
common_chat_msg msg;
|
||||||
msg.role = "assistant";
|
msg.role = "assistant";
|
||||||
|
|
||||||
@ -1545,6 +1556,7 @@ static common_chat_msg common_chat_parse_hermes_2_pro(const std::string& input)
|
|||||||
msg.content = input;
|
msg.content = input;
|
||||||
return msg;
|
return msg;
|
||||||
}
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||||
@ -1609,6 +1621,11 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|||||||
return common_chat_params_init_command_r7b(tmpl, params);
|
return common_chat_params_init_command_r7b(tmpl, params);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
|
||||||
|
if (src.find("<tool_call>") != std::string::npos && params.json_schema.is_null()) {
|
||||||
|
return common_chat_params_init_hermes_2_pro(tmpl, params);
|
||||||
|
}
|
||||||
|
|
||||||
// Use generic handler when mixing tools + JSON schema.
|
// Use generic handler when mixing tools + JSON schema.
|
||||||
// TODO: support that mix in handlers below.
|
// TODO: support that mix in handlers below.
|
||||||
if ((params.tools.is_array() && params.json_schema.is_object())) {
|
if ((params.tools.is_array() && params.json_schema.is_object())) {
|
||||||
@ -1630,11 +1647,6 @@ static common_chat_params common_chat_templates_apply_jinja(
|
|||||||
return common_chat_params_init_without_tools(tmpl, params);
|
return common_chat_params_init_without_tools(tmpl, params);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Hermes 2/3 Pro, Qwen 2.5 Instruct (w/ tools)
|
|
||||||
if (src.find("<tool_call>") != std::string::npos) {
|
|
||||||
return common_chat_params_init_hermes_2_pro(tmpl, params);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Functionary v3.1 (w/ tools)
|
// Functionary v3.1 (w/ tools)
|
||||||
if (src.find("<|start_header_id|>") != std::string::npos
|
if (src.find("<|start_header_id|>") != std::string::npos
|
||||||
&& src.find("<function=") != std::string::npos) {
|
&& src.find("<function=") != std::string::npos) {
|
||||||
@ -1752,7 +1764,9 @@ common_chat_msg common_chat_parse(const std::string & input, common_chat_format
|
|||||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
|
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
|
||||||
return common_chat_parse_functionary_v3_1_llama_3_1(input);
|
return common_chat_parse_functionary_v3_1_llama_3_1(input);
|
||||||
case COMMON_CHAT_FORMAT_HERMES_2_PRO:
|
case COMMON_CHAT_FORMAT_HERMES_2_PRO:
|
||||||
return common_chat_parse_hermes_2_pro(input);
|
return common_chat_parse_hermes_2_pro(input, /* extract_reasoning= */ false);
|
||||||
|
case COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING:
|
||||||
|
return common_chat_parse_hermes_2_pro(input, /* extract_reasoning= */ true);
|
||||||
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
|
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
|
||||||
return common_chat_parse_firefunction_v2(input);
|
return common_chat_parse_firefunction_v2(input);
|
||||||
case COMMON_CHAT_FORMAT_COMMAND_R7B:
|
case COMMON_CHAT_FORMAT_COMMAND_R7B:
|
||||||
|
@ -53,6 +53,7 @@ enum common_chat_format {
|
|||||||
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
|
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
|
||||||
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
||||||
COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
||||||
|
COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING,
|
||||||
COMMON_CHAT_FORMAT_COMMAND_R7B,
|
COMMON_CHAT_FORMAT_COMMAND_R7B,
|
||||||
COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING,
|
COMMON_CHAT_FORMAT_COMMAND_R7B_EXTRACT_REASONING,
|
||||||
|
|
||||||
|
@ -766,6 +766,19 @@ static void test_template_output_parsers() {
|
|||||||
"{\n \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
|
"{\n \"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}",
|
||||||
COMMON_CHAT_FORMAT_HERMES_2_PRO));
|
COMMON_CHAT_FORMAT_HERMES_2_PRO));
|
||||||
|
|
||||||
|
assert_msg_equals(message_assist_thoughts_unparsed_think,
|
||||||
|
common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
|
||||||
|
COMMON_CHAT_FORMAT_HERMES_2_PRO));
|
||||||
|
assert_msg_equals(message_assist_thoughts_unparsed_think,
|
||||||
|
common_chat_parse("I'm thinking</think>Hello, world!\nWhat's up?",
|
||||||
|
COMMON_CHAT_FORMAT_HERMES_2_PRO));
|
||||||
|
assert_msg_equals(message_assist_thoughts,
|
||||||
|
common_chat_parse("<think>I'm thinking</think>Hello, world!\nWhat's up?",
|
||||||
|
COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING));
|
||||||
|
assert_msg_equals(message_assist_thoughts,
|
||||||
|
common_chat_parse("I'm thinking</think>Hello, world!\nWhat's up?",
|
||||||
|
COMMON_CHAT_FORMAT_HERMES_2_PRO_EXTRACT_REASONING));
|
||||||
|
|
||||||
test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
test_templates(tmpls.get(), end_tokens, message_assist, tools, "Hello, world!\nWhat's up?", /* expect_grammar_triggered= */ false);
|
||||||
test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
|
test_templates(tmpls.get(), end_tokens, message_assist_call, tools,
|
||||||
"<tool_call>\n"
|
"<tool_call>\n"
|
||||||
|
Reference in New Issue
Block a user