llama : rework embeddings logic (#14208)

* llama : rework embeddings logic

ggml-ci

* cont : fix rerank

ggml-ci

* cont : engrish [no ci]

* cont : fix rerank

ggml-ci

* server : support both embeddings and completions with single model

ggml-ci

* cont : avoid embeddings_org

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-06-16 14:14:00 +03:00
committed by GitHub
parent 3ba0d843c6
commit d3e64b9f49
16 changed files with 159 additions and 114 deletions

View File

@ -897,34 +897,6 @@ struct common_init_result common_init_from_params(common_params & params) {
const llama_vocab * vocab = llama_model_get_vocab(model);
if (params.reranking) {
bool ok = true;
if (llama_vocab_bos(vocab) == LLAMA_TOKEN_NULL) {
LOG_WRN("%s: warning: vocab does not have a BOS token, reranking will not work\n", __func__);
ok = false;
}
bool has_eos = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
bool has_sep = llama_vocab_sep(vocab) != LLAMA_TOKEN_NULL;
if (!has_eos && !has_sep) {
LOG_WRN("%s: warning: vocab does not have an EOS token or SEP token, reranking will not work\n", __func__);
ok = false;
} else if (!has_eos) {
LOG_WRN("%s: warning: vocab does not have an EOS token, using SEP token as fallback\n", __func__);
} else if (!has_sep) {
LOG_WRN("%s: warning: vocab does not have a SEP token, reranking will not work\n", __func__);
ok = false;
}
if (!ok) {
llama_model_free(model);
return iparams;
}
}
auto cparams = common_context_params_to_llama(params);
llama_context * lctx = llama_init_from_model(model, cparams);
@ -966,6 +938,35 @@ struct common_init_result common_init_from_params(common_params & params) {
}
}
if (llama_pooling_type(lctx) == LLAMA_POOLING_TYPE_RANK) {
bool ok = true;
if (llama_vocab_bos(vocab) == LLAMA_TOKEN_NULL) {
LOG_WRN("%s: warning: vocab does not have a BOS token, reranking will not work\n", __func__);
ok = false;
}
bool has_eos = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
bool has_sep = llama_vocab_sep(vocab) != LLAMA_TOKEN_NULL;
if (!has_eos && !has_sep) {
LOG_WRN("%s: warning: vocab does not have an EOS token or SEP token, reranking will not work\n", __func__);
ok = false;
} else if (!has_eos) {
LOG_WRN("%s: warning: vocab does not have an EOS token, using SEP token as fallback\n", __func__);
} else if (!has_sep) {
LOG_WRN("%s: warning: vocab does not have a SEP token, reranking will not work\n", __func__);
ok = false;
}
if (!ok) {
llama_free(lctx);
llama_model_free(model);
return iparams;
}
}
// load and optionally apply lora adapters
for (auto & la : params.lora_adapters) {
llama_adapter_lora_ptr lora;
@ -1143,11 +1144,6 @@ struct llama_context_params common_context_params_to_llama(const common_params &
cparams.op_offload = !params.no_op_offload;
cparams.swa_full = params.swa_full;
if (params.reranking) {
cparams.embeddings = true;
cparams.pooling_type = LLAMA_POOLING_TYPE_RANK;
}
cparams.type_k = params.cache_type_k;
cparams.type_v = params.cache_type_v;