llama-model : support Qwen2 embedding models and pooling_mode_lasttoken (#13245)

This commit is contained in:
Jared Van Bortel
2025-05-02 11:42:30 -04:00
committed by GitHub
parent 7d2123484e
commit 2f567611c0
3 changed files with 45 additions and 28 deletions

View File

@ -773,6 +773,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
// fall through
case LLM_ARCH_QWEN2:
{
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type, false);
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
switch (hparams.n_layer) {
case 24: type = hparams.n_embd == 1024 ? LLM_TYPE_0_5B : LLM_TYPE_1B; break;