llama-model : support Qwen2 embedding models and pooling_mode_lasttoken (#13245)

This commit is contained in:
Jared Van Bortel
2025-05-02 11:42:30 -04:00
committed by GitHub
parent 7d2123484e
commit 2f567611c0
3 changed files with 45 additions and 28 deletions

View File

@ -2033,6 +2033,8 @@ class PoolingType(IntEnum):
NONE = 0
MEAN = 1
CLS = 2
LAST = 3
RANK = 4
class GGMLQuantizationType(IntEnum):