model : gemma3n text-only (#14400)

* gemma3n * add llm_graph_input_one
2025-06-29 04:35:05 +00:00 · 2025-06-26 19:34:02 +02:00
parent a01047b041
commit 8846aace49
13 changed files with 960 additions and 15 deletions
--- a/src/llama-graph.cpp
+++ b/src/llama-graph.cpp
@ -350,6 +350,12 @@ void llm_graph_input_mem_hybrid::set_input(const llama_ubatch * ubatch) {
    }
 }

+void llm_graph_input_one::set_input(const llama_ubatch *) {
+    GGML_ASSERT(one && ggml_nelements(one) == 1);
+    float f_one = 1.0f;
+    ggml_backend_tensor_set(one, &f_one, 0, sizeof(float));
+}
+
 //
 // llm_graph_context
 //
@ -1267,8 +1273,14 @@ ggml_tensor * llm_graph_context::build_attn(
    // these nodes are added to the graph together so that they are not reordered
    // by doing so, the number of splits in the graph is reduced
    ggml_build_forward_expand(gf, q_cur);
-    ggml_build_forward_expand(gf, k_cur);
-    ggml_build_forward_expand(gf, v_cur);
+
+    if (k_cur) {
+        ggml_build_forward_expand(gf, k_cur);
+    }
+
+    if (v_cur) {
+        ggml_build_forward_expand(gf, v_cur);
+    }

    const auto * mctx_iswa = static_cast<const llama_kv_cache_unified_iswa_context *>(mctx);

@ -1276,9 +1288,12 @@ ggml_tensor * llm_graph_context::build_attn(

    const auto * mctx_cur = is_swa ? mctx_iswa->get_swa() : mctx_iswa->get_base();

-    // store to KV cache
-    {
+    // optionally store to KV cache
+    if (k_cur) {
        ggml_build_forward_expand(gf, mctx_cur->cpy_k(ctx0, k_cur, il));
+    }
+
+    if (v_cur) {
        ggml_build_forward_expand(gf, mctx_cur->cpy_v(ctx0, v_cur, il));
    }