server: add --reasoning-budget 0 to disable thinking (incl. qwen3 w/ enable_thinking:false) (#13771)

---------

Co-authored-by: ochafik <ochafik@google.com>
Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com>
This commit is contained in:
Olivier Chafik
2025-05-26 00:30:51 +01:00
committed by GitHub
parent 2f099b510f
commit e121edc432
12 changed files with 277 additions and 107 deletions

View File

@ -737,14 +737,14 @@ static void test_template_output_parsers() {
auto tmpls = read_templates("models/templates/Qwen-QwQ-32B.jinja");
std::vector<std::string> end_tokens{ "<|im_end|>" };
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
}
{
auto tmpls = read_templates("models/templates/NousResearch-Hermes-2-Pro-Llama-3-8B-tool_use.jinja");
std::vector<std::string> end_tokens{ "<|im_end|>" };
assert_equals(COMMON_CHAT_FORMAT_CONTENT_ONLY, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_no_tools).format);
assert_equals(COMMON_CHAT_FORMAT_HERMES_2_PRO, common_chat_templates_apply(tmpls.get(), inputs_tools).format);
assert_equals(
COMMON_CHAT_FORMAT_HERMES_2_PRO,