mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 03:55:20 +00:00
server : added --no-prefill-assistant flag (#13608)
* added no-prefill-assistant flag * reworded documentation comment * updated server README.md
This commit is contained in:
@ -4348,6 +4348,7 @@ int main(int argc, char ** argv) {
|
||||
json data = oaicompat_completion_params_parse(
|
||||
body,
|
||||
params.use_jinja,
|
||||
params.prefill_assistant,
|
||||
params.reasoning_format,
|
||||
ctx_server.chat_templates.get(),
|
||||
ctx_server.mctx,
|
||||
@ -4369,6 +4370,7 @@ int main(int argc, char ** argv) {
|
||||
json data = oaicompat_completion_params_parse(
|
||||
body,
|
||||
params.use_jinja,
|
||||
params.prefill_assistant,
|
||||
params.reasoning_format,
|
||||
ctx_server.chat_templates.get(),
|
||||
ctx_server.mctx,
|
||||
|
Reference in New Issue
Block a user