model : gemma3n text-only (#14400)

* gemma3n

* add llm_graph_input_one
This commit is contained in:
Xuan-Son Nguyen
2025-06-26 19:34:02 +02:00
committed by GitHub
parent a01047b041
commit 8846aace49
13 changed files with 960 additions and 15 deletions

View File

@@ -480,6 +480,70 @@ class TensorNameMap:
"encoder.layer.{bid}.layer_norm_2" # jina-v2-code
),
MODEL_TENSOR.PER_LAYER_TOKEN_EMBD: (
"model.embed_tokens_per_layer", # gemma3n
),
MODEL_TENSOR.PER_LAYER_MODEL_PROJ: (
"model.per_layer_model_projection", # gemma3n
),
MODEL_TENSOR.PER_LAYER_PROJ_NORM: (
"model.per_layer_projection_norm", # gemma3n
),
MODEL_TENSOR.ALTUP_PROJ: (
"model.altup_projections", # gemma3n
),
MODEL_TENSOR.ALTUP_UNEMBD_PROJ: (
"model.altup_unembed_projections", # gemma3n
),
MODEL_TENSOR.PER_LAYER_INP_GATE: (
"model.layers.{bid}.per_layer_input_gate", # gemma3n
),
MODEL_TENSOR.PER_LAYER_PROJ: (
"model.layers.{bid}.per_layer_projection", # gemma3n
),
MODEL_TENSOR.PER_LAYER_POST_NORM: (
"model.layers.{bid}.post_per_layer_input_norm", # gemma3n
),
MODEL_TENSOR.ALTUP_CORRECT_COEF: (
"model.layers.{bid}.altup.correction_coefs", # gemma3n
),
MODEL_TENSOR.ALTUP_CORRECT_SCALE: (
"model.layers.{bid}.altup.correct_output_scale", # gemma3n
),
MODEL_TENSOR.ALTUP_PREDICT_COEF: (
"model.layers.{bid}.altup.prediction_coefs", # gemma3n
),
MODEL_TENSOR.ALTUP_ROUTER: (
"model.layers.{bid}.altup.modality_router", # gemma3n
),
MODEL_TENSOR.ALTUP_ROUTER_NORM: (
"model.layers.{bid}.altup.router_norm", # gemma3n
),
MODEL_TENSOR.LAUREL_L: (
"model.layers.{bid}.laurel.linear_left", # gemma3n
),
MODEL_TENSOR.LAUREL_R: (
"model.layers.{bid}.laurel.linear_right", # gemma3n
),
MODEL_TENSOR.LAUREL_POST_NORM: (
"model.layers.{bid}.laurel.post_laurel_norm", # gemma3n
),
MODEL_TENSOR.SSM_IN: (
"model.layers.{bid}.in_proj",
"backbone.layers.{bid}.mixer.in_proj",