From 0c1df14b5f8d992805cb22d0b77b44092a18aeab Mon Sep 17 00:00:00 2001 From: Douglas Hanley Date: Sat, 12 Jul 2025 06:21:02 -0400 Subject: [PATCH] server : fix pooled embedding output (#14645) --- tools/server/server.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 57b917f2f..d4dffb39c 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -2581,12 +2581,14 @@ struct server_context { continue; } - const float * embd = llama_get_embeddings_seq(ctx, batch.seq_id[i][0]); - if (embd == NULL) { + const float * embd = nullptr; + if (llama_pooling_type(slot.ctx) == LLAMA_POOLING_TYPE_NONE) { embd = llama_get_embeddings_ith(ctx, i); + } else { + embd = llama_get_embeddings_seq(ctx, batch.seq_id[i][0]); } - if (embd == NULL) { + if (embd == nullptr) { SLT_ERR(slot, "failed to get embeddings, token = %d, seq_id = %d\n", batch.token[i], batch.seq_id[i][0]); res->embedding.push_back(std::vector(n_embd, 0.0f)); @@ -2594,12 +2596,12 @@ struct server_context { } // normalize only when there is pooling - // TODO: configurable if (llama_pooling_type(slot.ctx) != LLAMA_POOLING_TYPE_NONE) { common_embd_normalize(embd, embd_res.data(), n_embd, 2); res->embedding.push_back(embd_res); + break; } else { - res->embedding.push_back({ embd, embd + n_embd }); + res->embedding.emplace_back(embd, embd + n_embd); } }