mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-19 22:36:13 -04:00
Use correct type of pooling for embedding models (#5500)
Use correct type of pooling for embedding models
This commit is contained in:
@@ -40,7 +40,7 @@ class Keys:
|
||||
TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
|
||||
EXPERT_COUNT = "{arch}.expert_count"
|
||||
EXPERT_USED_COUNT = "{arch}.expert_used_count"
|
||||
POOLING_LAYER = "{arch}.pooling_layer"
|
||||
POOLING_TYPE = "{arch}.pooling_type"
|
||||
|
||||
class Attention:
|
||||
HEAD_COUNT = "{arch}.attention.head_count"
|
||||
@@ -561,6 +561,12 @@ class RopeScalingType(Enum):
|
||||
YARN = 'yarn'
|
||||
|
||||
|
||||
class PoolingType(IntEnum):
|
||||
NONE = 0
|
||||
MEAN = 1
|
||||
CLS = 2
|
||||
|
||||
|
||||
class GGMLQuantizationType(IntEnum):
|
||||
F32 = 0
|
||||
F16 = 1
|
||||
|
Reference in New Issue
Block a user