server : send token probs for "stream == false" (#4714)

This commit is contained in:
Georgi Gerganov
2024-01-04 19:56:33 +02:00
committed by GitHub
parent a91928014f
commit 012cf349ae

View File

@ -1325,7 +1325,7 @@ struct llama_server_context
{
probs = std::vector<completion_token_output>(
slot.generated_token_probs.begin(),
slot.generated_token_probs.begin() + slot.sent_token_probs_index);
slot.generated_token_probs.end());
}
res.result_json["completion_probabilities"] = probs_vector_to_json(ctx, probs);
}