llama: Add support for RWKV v7 architecture (#12412)

* ggml: Add op l2_norm

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* ggml: Add op rwkv_wkv7

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* llama: Add support for RWKV7 and ARWKV7 models

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* llama: fix inference with RWKV6Qwen2

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* llama: add more (a)rwkv7 variants in size

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* Apply code-format changes

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* fix MUSA build

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

* llama: fix shape error with rwkv using llama-parallel

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>

---------

Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
This commit is contained in:
Molly Sophia
2025-03-18 07:27:50 +08:00
committed by GitHub
parent 60c902926c
commit 7dfad387e3
35 changed files with 2948 additions and 438 deletions

View File

@ -29,6 +29,7 @@ enum llm_type {
LLM_TYPE_109M,
LLM_TYPE_137M,
LLM_TYPE_160M,
LLM_TYPE_190M,
LLM_TYPE_220M,
LLM_TYPE_250M,
LLM_TYPE_270M,
@ -45,6 +46,7 @@ enum llm_type {
LLM_TYPE_1_6B,
LLM_TYPE_2B,
LLM_TYPE_2_8B,
LLM_TYPE_2_9B,
LLM_TYPE_3B,
LLM_TYPE_4B,
LLM_TYPE_6B,
@ -260,6 +262,20 @@ struct llama_layer {
struct ggml_tensor * time_mix_receptance_b = nullptr;
struct ggml_tensor * time_mix_gate = nullptr;
// rwkv7
struct ggml_tensor * time_mix_w0 = nullptr;
struct ggml_tensor * time_mix_a0 = nullptr;
struct ggml_tensor * time_mix_a1 = nullptr;
struct ggml_tensor * time_mix_a2 = nullptr;
struct ggml_tensor * time_mix_v0 = nullptr;
struct ggml_tensor * time_mix_v1 = nullptr;
struct ggml_tensor * time_mix_v2 = nullptr;
struct ggml_tensor * time_mix_g1 = nullptr;
struct ggml_tensor * time_mix_g2 = nullptr;
struct ggml_tensor * time_mix_k_k = nullptr;
struct ggml_tensor * time_mix_k_a = nullptr;
struct ggml_tensor * time_mix_r_k = nullptr;
struct ggml_tensor * time_mix_ln = nullptr;
struct ggml_tensor * time_mix_ln_b = nullptr;
struct ggml_tensor * time_mix_output = nullptr;