model : gemma3n text-only (#14400)

* gemma3n * add llm_graph_input_one
2025-08-04 16:23:49 -04:00 · 2025-06-26 19:34:02 +02:00
parent a01047b041
commit 8846aace49
13 changed files with 960 additions and 15 deletions
--- a/src/llama-hparams.h
+++ b/src/llama-hparams.h
@@ -143,6 +143,12 @@ struct llama_hparams {
    uint32_t n_attn_temp_floor_scale = 8192;
    float    f_attn_temp_scale       = 0.1;

+    // gemma3n altup
+    uint32_t n_altup      = 4; // altup_num_inputs
+    uint32_t i_altup_act  = 0; // altup_active_idx
+    uint32_t laurel_rank  = 64;
+    uint32_t n_embd_altup = 256;
+
    // needed by encoder-decoder models (e.g. T5, FLAN-T5)
    // ref: https://github.com/ggerganov/llama.cpp/pull/8141
    llama_token dec_start_token_id = LLAMA_TOKEN_NULL;