server: add --reasoning-budget 0 to disable thinking (incl. qwen3 w/ enable_thinking:false) (#13771)

---------

Co-authored-by: ochafik <ochafik@google.com>
Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com>
This commit is contained in:
Olivier Chafik
2025-05-26 00:30:51 +01:00
committed by GitHub
parent 2f099b510f
commit e121edc432
12 changed files with 277 additions and 107 deletions

View File

@ -84,7 +84,8 @@ class ServerProcess:
draft_max: int | None = None
no_webui: bool | None = None
jinja: bool | None = None
reasoning_format: Literal['deepseek', 'none'] | None = None
reasoning_format: Literal['deepseek', 'none', 'nothink'] | None = None
reasoning_budget: int | None = None
chat_template: str | None = None
chat_template_file: str | None = None
server_path: str | None = None
@ -191,6 +192,8 @@ class ServerProcess:
server_args.append("--jinja")
if self.reasoning_format is not None:
server_args.extend(("--reasoning-format", self.reasoning_format))
if self.reasoning_budget is not None:
server_args.extend(("--reasoning-budget", self.reasoning_budget))
if self.chat_template:
server_args.extend(["--chat-template", self.chat_template])
if self.chat_template_file: