llama-model : support Qwen2 embedding models and pooling_mode_lasttoken (#13245)

2025-06-26 19:55:04 +00:00 · 2025-05-02 11:42:30 -04:00
parent 7d2123484e
commit 2f567611c0
3 changed files with 45 additions and 28 deletions
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@ -2033,6 +2033,8 @@ class PoolingType(IntEnum):
    NONE = 0
    MEAN = 1
    CLS  = 2
+    LAST = 3
+    RANK = 4


 class GGMLQuantizationType(IntEnum):