mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-28 03:55:06 -04:00
Adding logprobs to /v1/completions (#11344)
Signed-off-by: Jiri Podivin <jpodivin@redhat.com>
This commit is contained in:
@ -267,6 +267,11 @@ struct server_task {
|
|||||||
params.speculative.n_min = std::max(params.speculative.n_min, 2);
|
params.speculative.n_min = std::max(params.speculative.n_min, 2);
|
||||||
params.speculative.n_max = std::max(params.speculative.n_max, 0);
|
params.speculative.n_max = std::max(params.speculative.n_max, 0);
|
||||||
|
|
||||||
|
// Use OpenAI API logprobs only if n_probs wasn't provided
|
||||||
|
if (data.contains("logprobs") && params.sampling.n_probs == defaults.sampling.n_probs){
|
||||||
|
params.sampling.n_probs = json_value(data, "logprobs", defaults.sampling.n_probs);
|
||||||
|
}
|
||||||
|
|
||||||
if (data.contains("lora")) {
|
if (data.contains("lora")) {
|
||||||
if (data.at("lora").is_array()) {
|
if (data.at("lora").is_array()) {
|
||||||
params.lora = parse_lora_request(params_base.lora_adapters, data.at("lora"));
|
params.lora = parse_lora_request(params_base.lora_adapters, data.at("lora"));
|
||||||
|
Reference in New Issue
Block a user