mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-29 13:43:38 -04:00
llama_encode : only force non-causal attention for enc-dec models
This commit is contained in:
@@ -732,10 +732,12 @@ int llama_context::encode(llama_batch & inp_batch) {
|
|||||||
|
|
||||||
const auto causal_attn_org = cparams.causal_attn;
|
const auto causal_attn_org = cparams.causal_attn;
|
||||||
|
|
||||||
// always use non-causal attention for encoder graphs
|
if (model.arch == LLM_ARCH_T5) {
|
||||||
// TODO: this is a tmp solution until we have a proper way to support enc-dec models
|
// always use non-causal attention for encoder graphs
|
||||||
// ref: https://github.com/ggml-org/llama.cpp/pull/12181#issuecomment-2730451223
|
// TODO: this is a tmp solution until we have a proper way to support enc-dec models
|
||||||
cparams.causal_attn = false;
|
// ref: https://github.com/ggml-org/llama.cpp/pull/12181#issuecomment-2730451223
|
||||||
|
cparams.causal_attn = false;
|
||||||
|
}
|
||||||
|
|
||||||
auto * gf = graph_init();
|
auto * gf = graph_init();
|
||||||
auto res = graph_build(ctx_compute.get(), gf, ubatch, LLM_GRAPH_TYPE_ENCODER);
|
auto res = graph_build(ctx_compute.get(), gf, ubatch, LLM_GRAPH_TYPE_ENCODER);
|
||||||
|
Reference in New Issue
Block a user