mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-16 15:47:35 +00:00
model : add support for Falcon-H1 family (#14534)
* v1 * push more fixes * another fix * fix * more fixes * minor fix * more cleaning on python code * python fixes * changed precision for multipliers float 32->64 * fixes * another fix * fix * pre-norm -> norm * fix * Revert "fix" This reverts commit243e4d1a50
. * fix * small fix ffn_norm * try * mix instead of max * fix vocab size * conflict solve * fixed multipliers * falcon-h1 specefic vocab resolved * read arch from gguf.MODEL_ARCH * mamba_d_ssm added to d_inner find_hparam * remove unused functions from gguf_writer.py * override modify_tensors instead of get_tensors * fix conversion and d_inner * added some cb functions for debugging puposes * inp_out_ids moved outside of layers loop * mup_vec create as float64 * fix rope_theta * injected mup * clean ups * rm extra space * rm unused MAMBA_CHUNK_SIZE * rm unused key * add bos False * changed ROPE_TYPE * cleaning debugging stuff * cleaning debug quant * fix comment * some cleanups * some cleanups * Update src/llama-model-loader.cpp * more cleanups * moe cleanuips * d_ssm -> d_inner; * cleaning unused hparams * cleanup * more cleanups * more cleanups on python conversion; * minor cleanups * Apply suggestions from code review Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * remove todo * added falcon-h1 * tensor not required * clean * remove unneeded attributes * more cleanups and fixed conversion * remove final_norm * flake8 fixes * Update src/llama-model.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * flake8 fixes * Update src/llama-hparams.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Update src/llama-model.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Update src/llama-model.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Update src/llama-arch.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Update convert_hf_to_gguf.py Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * added hashes * Update src/llama-arch.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update src/llama-vocab.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * update the update file * Revert "update the update file" This reverts commit082ab4ad2a
. * fix: address suggestions * fix: update convert_hf_to_gguf.py * Update gguf-py/gguf/constants.py Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Update src/llama-model-loader.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * d_inner fixed * Update src/llama-model.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * reshaping ssm_norm for 34B * removing generate_mup * remove duplicates metadata keys * rm comment * final comment * fix unused args * fix constants * fix bad merge * Update src/llama-model.cpp Co-authored-by: compilade <git@compilade.net> * falcon-h1: remove unused ssm_in_b and bad merge * Update src/llama-model.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * falcon-h1: fix last comment * Update convert_hf_to_gguf.py Co-authored-by: compilade <git@compilade.net> * falcon-h1: revert add_add_bos(False) * falcon-h1: fix tied weights * falcon-h1: remove whitespace * falcon-h1: fix wrong size param * falcon-h1: fix whitespace issues --------- Co-authored-by: younesbelkada <younes.belkada@tii.ae> Co-authored-by: Younes B <49240599+younesbelkada@users.noreply.github.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> Co-authored-by: compilade <git@compilade.net>
This commit is contained in:
committed by
GitHub
parent
20b7bf8a32
commit
04655063c4
@ -288,6 +288,7 @@ class MODEL_ARCH(IntEnum):
|
||||
LLAMA4 = auto()
|
||||
DECI = auto()
|
||||
FALCON = auto()
|
||||
FALCON_H1 = auto()
|
||||
BAICHUAN = auto()
|
||||
GROK = auto()
|
||||
GPT2 = auto()
|
||||
@ -662,6 +663,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||
MODEL_ARCH.DOTS1: "dots1",
|
||||
MODEL_ARCH.ARCEE: "arcee",
|
||||
MODEL_ARCH.ERNIE4_5: "ernie4_5",
|
||||
MODEL_ARCH.FALCON_H1: "falcon-h1",
|
||||
MODEL_ARCH.HUNYUAN_MOE: "hunyuan-moe",
|
||||
MODEL_ARCH.SMOLLM3: "smollm3",
|
||||
}
|
||||
@ -2215,6 +2217,40 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||
MODEL_TENSOR.FFN_DOWN,
|
||||
MODEL_TENSOR.FFN_UP,
|
||||
],
|
||||
MODEL_ARCH.FALCON_H1: [
|
||||
# Token embedding
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
|
||||
# Input layernorm
|
||||
MODEL_TENSOR.ATTN_NORM,
|
||||
|
||||
# Attention components
|
||||
MODEL_TENSOR.ATTN_Q, # Query projection
|
||||
MODEL_TENSOR.ATTN_K, # Key projection
|
||||
MODEL_TENSOR.ATTN_V, # Value projection
|
||||
MODEL_TENSOR.ATTN_OUT, # Output projection
|
||||
|
||||
# SSM components (Mamba2 specific)
|
||||
MODEL_TENSOR.SSM_IN, # Input projection for SSM
|
||||
MODEL_TENSOR.SSM_CONV1D, # Convolution layer
|
||||
MODEL_TENSOR.SSM_DT, # Delta time projection
|
||||
MODEL_TENSOR.SSM_A, # A parameter (log form)
|
||||
MODEL_TENSOR.SSM_D, # D parameter
|
||||
MODEL_TENSOR.SSM_NORM, # Normalization in SSM
|
||||
MODEL_TENSOR.SSM_OUT, # Output projection
|
||||
|
||||
# Pre-feedforward layernorm
|
||||
MODEL_TENSOR.FFN_PRE_NORM,
|
||||
|
||||
# Feed-forward network components
|
||||
MODEL_TENSOR.FFN_GATE, # Gate projection (SwiGLU)
|
||||
MODEL_TENSOR.FFN_DOWN, # Down projection
|
||||
MODEL_TENSOR.FFN_UP, # Up projection
|
||||
|
||||
# Post-feedforward layernorm
|
||||
MODEL_TENSOR.OUTPUT_NORM, # Final layer norm
|
||||
MODEL_TENSOR.OUTPUT, # Output projection (lm_head)
|
||||
],
|
||||
MODEL_ARCH.HUNYUAN_MOE: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
MODEL_TENSOR.OUTPUT_NORM,
|
||||
|
Reference in New Issue
Block a user