mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-30 11:59:59 -04:00
gpt-oss: implement harmony parsing (#15181)
* model : add harmony parser for gpt-oss * gpt-oss : fix grammar trigger from causing empty stack * gpt-oss: tweak the grammar trigger again * gpt-oss : add support for recipient in role header * gpt-oss : fix ungrouped tool calls in grammar * gpt-oss : loosen function name matching during parse * gpt-oss : clean up workarounds * gpt-oss : add template tests * gpt-oss : simulate thinking and tool call tags * gpt-oss : undo think tags when reasoning_format is none * gpt-oss : set special tokens back to user defined * gpt-oss : update openai-gpt-oss template * server : filter out harmony thought messages * gpt-oss : simplify parsing
This commit is contained in:
@@ -1439,6 +1439,188 @@ static void test_template_output_parsers() {
|
||||
/* expect_grammar_triggered= */ false
|
||||
);
|
||||
}
|
||||
{
|
||||
auto tmpls = read_templates("models/templates/openai-gpt-oss-120b.jinja");
|
||||
std::vector<std::string> end_tokens{ "<|return|>", "<|call|>" };
|
||||
|
||||
assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
|
||||
assert_equals(COMMON_CHAT_FORMAT_GPT_OSS, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
|
||||
|
||||
assert_msg_equals(simple_assist_msg("", "I'm\nthink"),
|
||||
common_chat_parse(
|
||||
"<|channel|>analysis<|message|>I'm\nthink",
|
||||
/* is_partial= */ true,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
}));
|
||||
assert_msg_equals(simple_assist_msg("", "I'm\nthinking"),
|
||||
common_chat_parse(
|
||||
"<|channel|>analysis<|message|>I'm\nthinking<|end|>",
|
||||
/* is_partial= */ true,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
}));
|
||||
assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
|
||||
common_chat_parse(
|
||||
"<|channel|>analysis<|message|>I'm\nthinking<|end|>"
|
||||
"<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
|
||||
/* is_partial= */ false,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
}));
|
||||
assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"),
|
||||
common_chat_parse(
|
||||
"<|channel|>analysis<|message|>I'm\nthinking<|end|>"
|
||||
"<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1",
|
||||
/* is_partial= */ true,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
}));
|
||||
assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1"),
|
||||
common_chat_parse(
|
||||
"<|channel|>analysis<|message|>I'm\nthinking<|end|>"
|
||||
"<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1",
|
||||
/* is_partial= */ true,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
}));
|
||||
assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
|
||||
common_chat_parse(
|
||||
"<|channel|>analysis<|message|>I'm\nthinking<|end|>"
|
||||
"<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
|
||||
/* is_partial= */ false,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
}));
|
||||
assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
|
||||
common_chat_parse(
|
||||
"<|channel|>analysis<|message|>I'm\nthinking<|end|>"
|
||||
"<|start|>assistant<|channel|>analysis to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
|
||||
/* is_partial= */ false,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
}));
|
||||
assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
|
||||
common_chat_parse(
|
||||
"<|channel|>analysis<|message|>I'm\nthinking<|end|>"
|
||||
"<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?",
|
||||
/* is_partial= */ true,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
}));
|
||||
assert_msg_equals(simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
|
||||
common_chat_parse(
|
||||
"<|channel|>analysis<|message|>I'm\nthinking<|end|>"
|
||||
"<|start|>assistant<|channel|>commentary<|message|>Hello, world!\nWhat's up?<|end|>"
|
||||
"<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
|
||||
/* is_partial= */ true,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
}));
|
||||
|
||||
// Test parse_tool_calls == false
|
||||
assert_msg_equals(
|
||||
simple_assist_msg("Hello, world!\nWhat's up?", "I'm\nthinking"),
|
||||
common_chat_parse(
|
||||
"<|channel|>analysis<|message|>I'm\nthinking<|end|>"
|
||||
"<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
|
||||
/* is_partial= */ true,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
/* .parse_tool_calls = */ false,
|
||||
}));
|
||||
assert_msg_equals(
|
||||
simple_assist_msg("", "I'm\nthinking"),
|
||||
common_chat_parse(
|
||||
"<|channel|>analysis<|message|>I'm\nthinking<|end|>"
|
||||
"<|start|>assistant<|channel|>commentary to=functions.special_function<|message|>{\"arg1",
|
||||
/* is_partial= */ true,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
/* .parse_tool_calls = */ false,
|
||||
}));
|
||||
assert_msg_equals(
|
||||
simple_assist_msg("", "I'm\nthinking"),
|
||||
common_chat_parse(
|
||||
"<|channel|>analysis<|message|>I'm\nthinking<|end|>"
|
||||
"<|start|>assistant<|channel|>commentary to=functions.special_function <|constrain|>json<|message|>{\"arg1\": 1}",
|
||||
/* is_partial= */ false,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
/* .parse_tool_calls = */ false,
|
||||
}));
|
||||
|
||||
// Test reasoning formats
|
||||
assert_msg_equals(
|
||||
simple_assist_msg(
|
||||
"<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"),
|
||||
common_chat_parse(
|
||||
"<|channel|>analysis<|message|>I'm\nthinking<|end|>"
|
||||
"<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
|
||||
/* is_partial= */ false,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
|
||||
}));
|
||||
|
||||
assert_msg_equals(
|
||||
simple_assist_msg(
|
||||
"<|channel|>analysis<|message|>I'm\nthinking<|end|>Hello, world!\nWhat's up?"),
|
||||
common_chat_parse(
|
||||
"<|channel|>analysis<|message|>I'm\nthinking<|end|>"
|
||||
"<|start|>assistant<|channel|>final<|message|>Hello, world!\nWhat's up?",
|
||||
/* is_partial= */ false,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
/* .reasoning_in_content = */ true,
|
||||
}));
|
||||
|
||||
// Test tool calling in role header
|
||||
assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"),
|
||||
common_chat_parse(
|
||||
" to=functions.special_function<|channel|>commentary <|constrain|>json<|message|>{\"arg1\": 1}",
|
||||
/* is_partial= */ false,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
}));
|
||||
assert_msg_equals(simple_assist_msg("", "", "special_function", "{\"arg1\": 1}"),
|
||||
common_chat_parse(
|
||||
" to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}",
|
||||
/* is_partial= */ false,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
}));
|
||||
assert_msg_equals(simple_assist_msg("", "I'm\nthinking", "special_function", "{\"arg1\": 1}"),
|
||||
common_chat_parse(
|
||||
"<|channel|>analysis<|message|>I'm\nthinking<|end|>"
|
||||
"<|start|>assistant to=functions.special_function<|channel|>analysis <|constrain|>json<|message|>{\"arg1\": 1}",
|
||||
/* is_partial= */ false,
|
||||
{
|
||||
/* .format = */ COMMON_CHAT_FORMAT_GPT_OSS,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_AUTO,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
static void test_msg_diffs_compute() {
|
||||
|
Reference in New Issue
Block a user