mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 03:55:20 +00:00
server : added --no-prefill-assistant flag (#13608)
* added no-prefill-assistant flag * reworded documentation comment * updated server README.md
This commit is contained in:
@ -583,6 +583,7 @@ static json oaicompat_completion_params_parse(const json & body) {
|
||||
static json oaicompat_completion_params_parse(
|
||||
const json & body, /* openai api json semantics */
|
||||
bool use_jinja,
|
||||
bool prefill_assistant,
|
||||
common_reasoning_format reasoning_format,
|
||||
const struct common_chat_templates * tmpls,
|
||||
bool allow_non_text,
|
||||
@ -732,7 +733,7 @@ static json oaicompat_completion_params_parse(
|
||||
|
||||
// if the assistant message appears at the end of list, we do not add end-of-turn token
|
||||
// for ex. this can be useful to modify the reasoning process in reasoning models
|
||||
bool prefill_assistant_message = !inputs.messages.empty() && inputs.messages.back().role == "assistant";
|
||||
bool prefill_assistant_message = !inputs.messages.empty() && inputs.messages.back().role == "assistant" && prefill_assistant;
|
||||
common_chat_msg last_message;
|
||||
if (prefill_assistant_message) {
|
||||
last_message = inputs.messages.back();
|
||||
|
Reference in New Issue
Block a user