mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-26 11:45:21 +00:00
server
: add --reasoning-budget 0
to disable thinking (incl. qwen3 w/ enable_thinking:false) (#13771)
--------- Co-authored-by: ochafik <ochafik@google.com> Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com>
This commit is contained in:
@ -737,14 +737,14 @@ static void test_template_output_parsers() {
|
||||
auto tmpls = read_templates("models/templates/Qwen-QwQ-32B.jinja");
|
||||
std::vector<std::string> end_tokens{ "<|im_end|>" };
|
||||
|
||||
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
|
||||
assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
|
||||
assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
|
||||
}
|
||||
{
|
||||
auto tmpls = read_templates("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja");
|
||||
std::vector<std::string> end_tokens{ "<|im_end|>" };
|
||||
|
||||
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
|
||||
assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
|
||||
assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
|
||||
assert_equals(
|
||||
COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
||||
|
Reference in New Issue
Block a user