mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 03:55:20 +00:00
server : passthrough the /models endpoint during loading (#13535)
* server : passthrough the /models endpoint during loading * server : update readme + return json for "meta" field
This commit is contained in:
@ -1040,7 +1040,7 @@ To know the `id` of the adapter, use GET `/lora-adapters`
|
|||||||
|
|
||||||
Returns information about the loaded model. See [OpenAI Models API documentation](https://platform.openai.com/docs/api-reference/models).
|
Returns information about the loaded model. See [OpenAI Models API documentation](https://platform.openai.com/docs/api-reference/models).
|
||||||
|
|
||||||
The returned list always has one single element.
|
The returned list always has one single element. The `meta` field can be `null` (for example, while the model is still loading).
|
||||||
|
|
||||||
By default, model `id` field is the path to model file, specified via `-m`. You can set a custom value for model `id` field via `--alias` argument. For example, `--alias gpt-4o-mini`.
|
By default, model `id` field is the path to model file, specified via `-m`. You can set a custom value for model `id` field via `--alias` argument. For example, `--alias gpt-4o-mini`.
|
||||||
|
|
||||||
|
@ -3707,6 +3707,9 @@ int main(int argc, char ** argv) {
|
|||||||
if (req.path == "/" || tmp.back() == "html") {
|
if (req.path == "/" || tmp.back() == "html") {
|
||||||
res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
|
res.set_content(reinterpret_cast<const char*>(loading_html), loading_html_len, "text/html; charset=utf-8");
|
||||||
res.status = 503;
|
res.status = 503;
|
||||||
|
} else if (req.path == "/models" || req.path == "/v1/models") {
|
||||||
|
// allow the models endpoint to be accessed during loading
|
||||||
|
return true;
|
||||||
} else {
|
} else {
|
||||||
res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
|
res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
|
||||||
}
|
}
|
||||||
@ -4365,7 +4368,13 @@ int main(int argc, char ** argv) {
|
|||||||
res_ok(res, {{ "prompt", std::move(data.at("prompt")) }});
|
res_ok(res, {{ "prompt", std::move(data.at("prompt")) }});
|
||||||
};
|
};
|
||||||
|
|
||||||
const auto handle_models = [¶ms, &ctx_server, &res_ok](const httplib::Request &, httplib::Response & res) {
|
const auto handle_models = [¶ms, &ctx_server, &state, &res_ok](const httplib::Request &, httplib::Response & res) {
|
||||||
|
server_state current_state = state.load();
|
||||||
|
json model_meta = nullptr;
|
||||||
|
if (current_state == SERVER_STATE_READY) {
|
||||||
|
model_meta = ctx_server.model_meta();
|
||||||
|
}
|
||||||
|
|
||||||
json models = {
|
json models = {
|
||||||
{"object", "list"},
|
{"object", "list"},
|
||||||
{"data", {
|
{"data", {
|
||||||
@ -4374,7 +4383,7 @@ int main(int argc, char ** argv) {
|
|||||||
{"object", "model"},
|
{"object", "model"},
|
||||||
{"created", std::time(0)},
|
{"created", std::time(0)},
|
||||||
{"owned_by", "llamacpp"},
|
{"owned_by", "llamacpp"},
|
||||||
{"meta", ctx_server.model_meta()}
|
{"meta", model_meta},
|
||||||
},
|
},
|
||||||
}}
|
}}
|
||||||
};
|
};
|
||||||
|
Reference in New Issue
Block a user