diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index feef03d1c..930c1bdd0 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -849,6 +849,9 @@ class TextModel(ModelBase): if chkhsh == "2085e1638f6c377a0aa4ead21b27bb4cb941bf800df86ed391011769c1758dfb": # ref: https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B res = "exaone4" + if chkhsh == "d4540891389ea895b53b399da6ac824becc30f2fba0e9ddbb98f92e55ca0e97c": + # ref: https://huggingface.co/Qwen/Qwen3-Embedding-8B + res = "qwen2" if res is None: logger.warning("\n") diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index df490fc80..e6efc93fa 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -33,6 +33,7 @@ class TensorNameMap: "language_model.model.embed_tokens", # llama4 "encoder", # neobert "model.transformer.wte", # llada + "embed_tokens", # qwen3-embedding ), # Token type embeddings @@ -143,6 +144,7 @@ class TensorNameMap: "transformer_encoder.{bid}.attention_norm", # neobert "model.layers.{bid}.operator_norm", # lfm2 "model.transformer.blocks.{bid}.attn_norm", # llada + "layers.{bid}.input_layernorm", # qwen3-embedding ), # Attention norm 2 @@ -188,6 +190,7 @@ class TensorNameMap: "transformer.h.{bid}.attn.attention.q_proj", # exaone "model.layers.{bid}.self_attn.q_proj", # llama4 "model.transformer.blocks.{bid}.q_proj", # llada + "layers.{bid}.self_attn.q_proj", # qwen3-embedding ), # Attention key @@ -205,6 +208,7 @@ class TensorNameMap: "transformer.h.{bid}.attn.attention.k_proj", # exaone "model.layers.{bid}.self_attn.k_proj", # llama4 "model.transformer.blocks.{bid}.k_proj", # llada + "layers.{bid}.self_attn.k_proj", # qwen3-embedding ), # Attention value @@ -221,6 +225,7 @@ class TensorNameMap: "transformer.h.{bid}.attn.attention.v_proj", # exaone "model.layers.{bid}.self_attn.v_proj", # llama4 "model.transformer.blocks.{bid}.v_proj", # llada + "layers.{bid}.self_attn.v_proj", # qwen3-embedding ), # Attention output @@ -254,6 +259,7 @@ class TensorNameMap: "model.layers.{bid}.self_attn.o_proj", # llama4 "transformer_encoder.{bid}.wo", # neobert "model.transformer.blocks.{bid}.attn_out", # llada + "layers.{bid}.self_attn.o_proj", # qwen3-embedding ), # Attention output norm @@ -300,6 +306,7 @@ class TensorNameMap: "transformer_encoder.{bid}.ffn_norm", # neobert "model.layers.layers.{bid}.pre_mlp_norm", # plamo2 "model.transformer.blocks.{bid}.ff_norm", # llada + "layers.{bid}.post_attention_layernorm", # qwen3-embedding ), # Post feed-forward norm @@ -373,7 +380,8 @@ class TensorNameMap: "model.layers.{bid}.feed_forward.up_proj", # llama4 jamba granite-hybrid "transformer_encoder.{bid}.ffn.w12", # neobert "model.layers.{bid}.block_sparse_moe.up", # smallthinker - "model.transformer.blocks.{bid}.up_proj", # llada + "model.transformer.blocks.{bid}.up_proj", # llada + "layers.{bid}.mlp.up_proj", # qwen3-embedding ), MODEL_TENSOR.FFN_UP_EXP: ( @@ -416,6 +424,7 @@ class TensorNameMap: "model.layers.{bid}.feed_forward.gate_proj", # llama4 jamba granite-hybrid "model.layers.{bid}.block_sparse_moe.gate", # smallthinker "model.transformer.blocks.{bid}.ff_proj", # llada + "layers.{bid}.mlp.gate_proj", # qwen3-embedding ), MODEL_TENSOR.FFN_GATE_EXP: ( @@ -465,7 +474,8 @@ class TensorNameMap: "model.layers.{bid}.feed_forward.down_proj", # llama4 jamba granite-hybrid "transformer_encoder.{bid}.ffn.w3", # neobert "model.layers.{bid}.block_sparse_moe.down", # smallthinker - "model.transformer.blocks.{bid}.ff_out", # llada + "model.transformer.blocks.{bid}.ff_out", # llada + "layers.{bid}.mlp.down_proj", # qwen3-embedding ), MODEL_TENSOR.FFN_DOWN_EXP: ( @@ -497,6 +507,7 @@ class TensorNameMap: "encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2 "transformer.layers.{bid}.attn.q_norm", # openelm "model.layers.layers.{bid}.mixer.q", # plamo2 + "layers.{bid}.self_attn.q_norm", # qwen3-embedding ), MODEL_TENSOR.ATTN_K_NORM: ( @@ -508,6 +519,7 @@ class TensorNameMap: "encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2 "transformer.layers.{bid}.attn.k_norm", # openelm "model.layers.layers.{bid}.mixer.k", # plamo2 + "layers.{bid}.self_attn.k_norm", # qwen3-embedding ), MODEL_TENSOR.ROPE_FREQS: ( diff --git a/src/llama-model.cpp b/src/llama-model.cpp index e3f12edd9..6b58fb8a0 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -899,6 +899,7 @@ void llama_model::load_hparams(llama_model_loader & ml) { } break; case LLM_ARCH_QWEN3: { + ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type, false); ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); switch (hparams.n_layer) { case 28: type = hparams.n_embd == 1024 ? LLM_TYPE_0_6B : LLM_TYPE_1_7B; break;