mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-28 12:25:03 +00:00
server : fix first message identification (#13634)
* server : fix first message identification When using the OpenAI SDK (https://github.com/openai/openai-node/blob/master/src/lib/ChatCompletionStream.ts#L623-L626) we noticed that the expected assistant role is missing in the first streaming message. Fix this by correctly checking for the first message. Co-authored-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com> Signed-off-by: Dorin Geman <dorin.geman@docker.com> * server : Fix checks for first role message for stream=True Co-authored-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com> Signed-off-by: Dorin Geman <dorin.geman@docker.com> --------- Signed-off-by: Dorin Geman <dorin.geman@docker.com> Co-authored-by: Piotr Stankiewicz <piotr.stankiewicz@docker.com>
This commit is contained in:
committed by
GitHub
parent
797f2ac062
commit
42158ae2e8
@ -951,7 +951,7 @@ struct server_task_result_cmpl_partial : server_task_result {
|
||||
}
|
||||
|
||||
json to_json_oaicompat_chat() {
|
||||
bool first = n_decoded == 0;
|
||||
bool first = n_decoded == 1;
|
||||
std::time_t t = std::time(0);
|
||||
json choices;
|
||||
|
||||
@ -962,15 +962,18 @@ struct server_task_result_cmpl_partial : server_task_result {
|
||||
{"delta", json{{"role", "assistant"}}}}});
|
||||
} else {
|
||||
// We have to send this as two updates to conform to openai behavior
|
||||
// initial_ret is the role message for stream=True
|
||||
json initial_ret = json{{"choices", json::array({json{
|
||||
{"finish_reason", nullptr},
|
||||
{"index", 0},
|
||||
{"delta", json{
|
||||
{"role", "assistant"}
|
||||
{"role", "assistant"},
|
||||
{"content", ""}
|
||||
}}}})},
|
||||
{"created", t},
|
||||
{"id", oaicompat_cmpl_id},
|
||||
{"model", oaicompat_model},
|
||||
{"system_fingerprint", build_info},
|
||||
{"object", "chat.completion.chunk"}};
|
||||
|
||||
json second_ret = json{
|
||||
@ -982,8 +985,19 @@ struct server_task_result_cmpl_partial : server_task_result {
|
||||
{"created", t},
|
||||
{"id", oaicompat_cmpl_id},
|
||||
{"model", oaicompat_model},
|
||||
{"system_fingerprint", build_info},
|
||||
{"object", "chat.completion.chunk"}};
|
||||
|
||||
if (prob_output.probs.size() > 0) {
|
||||
second_ret["choices"][0]["logprobs"] = json{
|
||||
{"content", completion_token_output::probs_vector_to_json({prob_output}, post_sampling_probs)},
|
||||
};
|
||||
}
|
||||
|
||||
if (timings.prompt_n >= 0) {
|
||||
second_ret.push_back({"timings", timings.to_json()});
|
||||
}
|
||||
|
||||
return std::vector<json>({initial_ret, second_ret});
|
||||
}
|
||||
} else {
|
||||
|
Reference in New Issue
Block a user