server : avoid common_batch

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-03-20 16:52:24 +02:00
parent 76fd7d6f5b
commit 8a23b4a54a
3 changed files with 91 additions and 137 deletions

View File

@ -88,13 +88,19 @@ def test_embedding_pooling_none():
res = server.make_request("POST", "/embeddings", data={
"input": "hello hello hello",
})
assert res.status_code == 200
assert 'embedding' in res.body[0]
assert len(res.body[0]['embedding']) == 5 # 3 text tokens + 2 special
# make sure embedding vector is not normalized
for x in res.body[0]['embedding']:
assert abs(sum([x ** 2 for x in x]) - 1) > EPSILON
# /embeddings does not support pooling type 'none'
assert res.status_code == 400
assert "error" in res.body
# TODO: re-enable when we figure out how to support pooling type 'none'
#assert res.status_code == 200
#assert 'embedding' in res.body[0]
#assert len(res.body[0]['embedding']) == 5 # 3 text tokens + 2 special
## make sure embedding vector is not normalized
#for x in res.body[0]['embedding']:
# assert abs(sum([x ** 2 for x in x]) - 1) > EPSILON
def test_embedding_pooling_none_oai():