diff --git a/common/arg.cpp b/common/arg.cpp index 0f01bb314..3d18aaa17 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2949,11 +2949,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex "- deepseek: puts thoughts in `message.reasoning_content` (except in streaming mode, which behaves as `none`)\n" "(default: auto)", [](common_params & params, const std::string & value) { - /**/ if (value == "deepseek") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; } - else if (value == "deepseek-legacy") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY; } - else if (value == "none") { params.reasoning_format = COMMON_REASONING_FORMAT_NONE; } - else if (value == "auto") { params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; } - else { throw std::invalid_argument("invalid value"); } + params.reasoning_format = common_reasoning_format_from_name(value); } ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_THINK")); add_opt(common_arg( diff --git a/common/chat.cpp b/common/chat.cpp index 316bd2417..92fbbbe11 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -625,6 +625,19 @@ const char * common_reasoning_format_name(common_reasoning_format format) { } } +common_reasoning_format common_reasoning_format_from_name(const std::string & format) { + if (format == "none") { + return COMMON_REASONING_FORMAT_NONE; + } else if (format == "auto") { + return COMMON_REASONING_FORMAT_AUTO; + } else if (format == "deepseek") { + return COMMON_REASONING_FORMAT_DEEPSEEK; + } else if (format == "deepseek-legacy") { + return COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY; + } + throw std::runtime_error("Unknown reasoning format: " + format); +} + static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) { std::string arguments; if (builder.is_partial()) { diff --git a/common/chat.h b/common/chat.h index eb628d8bc..c4d6b2e85 100644 --- a/common/chat.h +++ b/common/chat.h @@ -191,6 +191,7 @@ std::string common_chat_format_example( const char* common_chat_format_name(common_chat_format format); const char* common_reasoning_format_name(common_reasoning_format format); +common_reasoning_format common_reasoning_format_from_name(const std::string & format); common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax); common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice); diff --git a/tools/server/README.md b/tools/server/README.md index 87cef7573..af9264ddd 100644 --- a/tools/server/README.md +++ b/tools/server/README.md @@ -1132,6 +1132,12 @@ The `response_format` parameter supports both plain JSON output (e.g. `{"type": `chat_template_kwargs`: Allows sending additional parameters to the json templating system. For example: `{"enable_thinking": false}` +`reasoning_format`: The reasoning format to be parsed. If set to `none`, it will output the raw generated text. + +`thinking_forced_open`: Force a reasoning model to always output the reasoning. Only works on certain models. + +`parse_tool_calls`: Whether to parse the generated tool call. + *Examples:* You can use either Python `openai` library with appropriate checkpoints: diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index 4e25eef17..a472714ef 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/server.cpp b/tools/server/server.cpp index a255d481a..45f50e937 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -383,8 +383,12 @@ struct server_task { } else { params.oaicompat_chat_syntax.format = defaults.oaicompat_chat_syntax.format; } - params.oaicompat_chat_syntax.reasoning_format = params_base.reasoning_format; - params.oaicompat_chat_syntax.reasoning_in_content = params.stream && (params_base.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY); + common_reasoning_format reasoning_format = params_base.reasoning_format; + if (data.contains("reasoning_format")) { + reasoning_format = common_reasoning_format_from_name(data.at("reasoning_format").get()); + } + params.oaicompat_chat_syntax.reasoning_format = reasoning_format; + params.oaicompat_chat_syntax.reasoning_in_content = params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY); params.oaicompat_chat_syntax.thinking_forced_open = json_value(data, "thinking_forced_open", false); params.oaicompat_chat_syntax.parse_tool_calls = json_value(data, "parse_tool_calls", false); } diff --git a/tools/server/webui/src/utils/app.context.tsx b/tools/server/webui/src/utils/app.context.tsx index 96cffd95a..0b95e88f8 100644 --- a/tools/server/webui/src/utils/app.context.tsx +++ b/tools/server/webui/src/utils/app.context.tsx @@ -209,6 +209,7 @@ export const AppContextProvider = ({ messages, stream: true, cache_prompt: true, + reasoning_format: 'none', samplers: config.samplers, temperature: config.temperature, dynatemp_range: config.dynatemp_range,