Use correct type of pooling for embedding models (#5500)

Use correct type of pooling for embedding models
2025-06-26 19:55:04 +00:00 · 2024-02-15 11:21:49 -06:00
parent c06e45d729
commit 4524290e87
5 changed files with 94 additions and 31 deletions
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@ -19,6 +19,7 @@ from .constants import (
    GGUFValueType,
    Keys,
    RopeScalingType,
+    PoolingType,
    TokenType,
 )

@ -360,8 +361,8 @@ class GGUFWriter:
    def add_causal_attention(self, value: bool) -> None:
        self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)

-    def add_pooling_layer(self, value: bool) -> None:
-        self.add_bool(Keys.LLM.POOLING_LAYER.format(arch=self.arch), value)
+    def add_pooling_type(self, value: PoolingType) -> None:
+        self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value)

    def add_rope_dimension_count(self, count: int) -> None:
        self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)