model : gemma3n text-only (#14400)

* gemma3n

* add llm_graph_input_one
This commit is contained in:
Xuan-Son Nguyen
2025-06-26 19:34:02 +02:00
committed by GitHub
parent a01047b041
commit 8846aace49
13 changed files with 960 additions and 15 deletions

View File

@ -350,6 +350,12 @@ void llm_graph_input_mem_hybrid::set_input(const llama_ubatch * ubatch) {
}
}
void llm_graph_input_one::set_input(const llama_ubatch *) {
GGML_ASSERT(one && ggml_nelements(one) == 1);
float f_one = 1.0f;
ggml_backend_tensor_set(one, &f_one, 0, sizeof(float));
}
//
// llm_graph_context
//
@ -1267,8 +1273,14 @@ ggml_tensor * llm_graph_context::build_attn(
// these nodes are added to the graph together so that they are not reordered
// by doing so, the number of splits in the graph is reduced
ggml_build_forward_expand(gf, q_cur);
ggml_build_forward_expand(gf, k_cur);
ggml_build_forward_expand(gf, v_cur);
if (k_cur) {
ggml_build_forward_expand(gf, k_cur);
}
if (v_cur) {
ggml_build_forward_expand(gf, v_cur);
}
const auto * mctx_iswa = static_cast<const llama_kv_cache_unified_iswa_context *>(mctx);
@ -1276,9 +1288,12 @@ ggml_tensor * llm_graph_context::build_attn(
const auto * mctx_cur = is_swa ? mctx_iswa->get_swa() : mctx_iswa->get_base();
// store to KV cache
{
// optionally store to KV cache
if (k_cur) {
ggml_build_forward_expand(gf, mctx_cur->cpy_k(ctx0, k_cur, il));
}
if (v_cur) {
ggml_build_forward_expand(gf, mctx_cur->cpy_v(ctx0, v_cur, il));
}