Use correct type of pooling for embedding models (#5500)

Use correct type of pooling for embedding models
2025-08-19 22:36:13 -04:00 · 2024-02-15 11:21:49 -06:00
parent c06e45d729
commit 4524290e87
5 changed files with 94 additions and 31 deletions
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -40,7 +40,7 @@ class Keys:
        TENSOR_DATA_LAYOUT    = "{arch}.tensor_data_layout"
        EXPERT_COUNT          = "{arch}.expert_count"
        EXPERT_USED_COUNT     = "{arch}.expert_used_count"
-        POOLING_LAYER         = "{arch}.pooling_layer"
+        POOLING_TYPE          = "{arch}.pooling_type"

    class Attention:
        HEAD_COUNT        = "{arch}.attention.head_count"
@@ -561,6 +561,12 @@ class RopeScalingType(Enum):
    YARN   = 'yarn'


+class PoolingType(IntEnum):
+    NONE = 0
+    MEAN = 1
+    CLS  = 2
+
+
 class GGMLQuantizationType(IntEnum):
    F32  = 0
    F16  = 1