mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-28 12:25:03 +00:00
llama : Support llama 4 text-only (#12791)
* llama4 conversion * initial support, no chat template * clean up a bit * fix tokenizer conversion * correct hparams * try this * fix shexp * ffn_inp_normed * chat template * clean up model conversion * add_bos * add scale_before_ffn * fix order * weight_before_ffn * llm_graph_input_attn_temp * add chunk attn mask * build_inp_attn_scale() * add comment about ggml_repeat * clarify comments * fix build
This commit is contained in:
@ -10,6 +10,7 @@
|
||||
|
||||
enum llm_arch {
|
||||
LLM_ARCH_LLAMA,
|
||||
LLM_ARCH_LLAMA4,
|
||||
LLM_ARCH_DECI,
|
||||
LLM_ARCH_FALCON,
|
||||
LLM_ARCH_BAICHUAN,
|
||||
@ -118,6 +119,7 @@ enum llm_kv {
|
||||
LLM_KV_RESIDUAL_SCALE,
|
||||
LLM_KV_EMBEDDING_SCALE,
|
||||
LLM_KV_TOKEN_SHIFT_COUNT,
|
||||
LLM_KV_INTERLEAVE_MOE_LAYER_STEP,
|
||||
|
||||
LLM_KV_ATTENTION_HEAD_COUNT,
|
||||
LLM_KV_ATTENTION_HEAD_COUNT_KV,
|
||||
|
Reference in New Issue
Block a user