mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-17 21:51:27 -04:00
llama : initial Mamba-2 support
This commit is contained in:
@@ -396,7 +396,7 @@ class TensorNameMap:
|
||||
"encoder.layers.{bid}.norm2", # nomic-bert
|
||||
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
|
||||
"encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
|
||||
"encoder.layer.{bid}.layer_norm_2" # jina-v2-code
|
||||
"encoder.layer.{bid}.layer_norm_2", # jina-v2-code
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_IN: (
|
||||
@@ -429,6 +429,10 @@ class TensorNameMap:
|
||||
"backbone.layers.{bid}.mixer.D",
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_NORM: (
|
||||
"backbone.layers.{bid}.mixer.norm", # mamba2
|
||||
),
|
||||
|
||||
MODEL_TENSOR.SSM_OUT: (
|
||||
"model.layers.{bid}.out_proj",
|
||||
"backbone.layers.{bid}.mixer.out_proj",
|
||||
|
Reference in New Issue
Block a user