This commit is contained in:
younesbelkada
2025-07-04 14:48:22 +04:00
2 changed files with 13 additions and 2 deletions

View File

@ -686,6 +686,9 @@ class TextModel(ModelBase):
if chkhsh == "9d032fcbd5501f4a38150912590928bfb36091efb5df11b8e2124b0390e3fb1e":
# ref: https://huggingface.co/tiiuae/Falcon3-7B-Base
res = "falcon3"
if chkhsh == "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86":
# ref: https://huggingface.co/collections/tiiuae/falcon-h1-6819f2795bc406da60fab8df
res = "falcon-H1"
if chkhsh == "8e62295832751ca1e8f92f2226f403dea30dc5165e448b5bfa05af5340c64ec7":
# ref: https://huggingface.co/BAAI/bge-large-zh-v1.5
res = "bert-bge-large"
@ -4905,8 +4908,11 @@ class Mamba2Model(TextModel):
# Fail early for models which don't have a block expansion factor of 2
# TODO: does this really matter?
assert d_inner == 2 * d_model
assert d_inner % head_dim == 0
# skip the assertion for FalconH1 Model
architectures = self.hparams.get("architectures")
if architectures is None or architectures[0] != "FalconH1ForCausalLM":
assert d_inner == 2 * d_model
assert d_inner % head_dim == 0
self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default
self.gguf_writer.add_embedding_length(d_model)
@ -4945,6 +4951,10 @@ class Mamba2Model(TextModel):
d_model = self.find_hparam(["hidden_size", "d_model", "dim"])
d_inner = self.find_hparam(["intermediate_size", "d_inner"], optional=True) or 2 * d_model
n_group = self.hparams.get("n_groups", 1)
architectures = self.hparams.get("architectures")
if architectures is not None and architectures[0] == "FalconH1ForCausalLM":
# FalconH1F has a different d_inner
d_inner = self.hparams.get("mamba_d_ssm")
data_torch = data_torch.reshape((n_group, d_inner // n_group))
if name.endswith(".A_log"):

View File

@ -589,6 +589,7 @@ class TensorNameMap:
MODEL_TENSOR.SSM_OUT: (
"model.layers.{bid}.out_proj",
"backbone.layers.{bid}.mixer.out_proj",
"model.layers.{bid}.mamba.out_proj", # falcon-h1
),
MODEL_TENSOR.TIME_MIX_W0: (