mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-26 19:55:04 +00:00
Use correct type of pooling for embedding models (#5500)
Use correct type of pooling for embedding models
This commit is contained in:
@ -19,6 +19,7 @@ from .constants import (
|
||||
GGUFValueType,
|
||||
Keys,
|
||||
RopeScalingType,
|
||||
PoolingType,
|
||||
TokenType,
|
||||
)
|
||||
|
||||
@ -360,8 +361,8 @@ class GGUFWriter:
|
||||
def add_causal_attention(self, value: bool) -> None:
|
||||
self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
|
||||
|
||||
def add_pooling_layer(self, value: bool) -> None:
|
||||
self.add_bool(Keys.LLM.POOLING_LAYER.format(arch=self.arch), value)
|
||||
def add_pooling_type(self, value: PoolingType) -> None:
|
||||
self.add_uint32(Keys.LLM.POOLING_TYPE.format(arch=self.arch), value)
|
||||
|
||||
def add_rope_dimension_count(self, count: int) -> None:
|
||||
self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
|
||||
|
Reference in New Issue
Block a user