mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 12:05:03 +00:00
convert : qwen2/3moe : set yarn metadata if present (#13331)
* set yarn metadata if present * add comment about enabling YaRN Co-authored-by: Xuan-Son Nguyen <son@huggingface.co> --------- Co-authored-by: Xuan-Son Nguyen <son@huggingface.co>
This commit is contained in:
@ -2761,6 +2761,13 @@ class Qwen2MoeModel(TextModel):
|
|||||||
if (shared_expert_intermediate_size := self.hparams.get('shared_expert_intermediate_size')) is not None:
|
if (shared_expert_intermediate_size := self.hparams.get('shared_expert_intermediate_size')) is not None:
|
||||||
self.gguf_writer.add_expert_shared_feed_forward_length(shared_expert_intermediate_size)
|
self.gguf_writer.add_expert_shared_feed_forward_length(shared_expert_intermediate_size)
|
||||||
logger.info(f"gguf: expert shared feed forward length = {shared_expert_intermediate_size}")
|
logger.info(f"gguf: expert shared feed forward length = {shared_expert_intermediate_size}")
|
||||||
|
# YaRN is not enabled by default
|
||||||
|
# To enable it, please refer to this guide: https://huggingface.co/Qwen/Qwen3-30B-A3B#processing-long-texts
|
||||||
|
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
|
||||||
|
if self.hparams["rope_scaling"].get("type") == "yarn":
|
||||||
|
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
|
||||||
|
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
|
||||||
|
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
|
||||||
|
|
||||||
_experts: list[dict[str, Tensor]] | None = None
|
_experts: list[dict[str, Tensor]] | None = None
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user