llama-model : add Glm4Model implementation for GLM-4-0414 (#12867)

* GLM-4-0414

* use original one

* Using with tensor map

* fix bug

* change order

* change order

* format with flask8
This commit is contained in:
Yuxuan Zhang
2025-04-11 18:10:10 +08:00
committed by GitHub
parent 0c50923944
commit 06bb53ad9b
9 changed files with 273 additions and 4 deletions

View File

@ -735,6 +735,9 @@ class Model:
if chkhsh == "d353350c764d8c3b39c763113960e4fb4919bea5fbf208a0e3b22e8469dc7406":
# ref: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
res = "llama4"
if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2":
# ref: https://huggingface.co/THUDM/glm-4-9b-hf
res = "glm4"
if res is None:
logger.warning("\n")
@ -4897,6 +4900,22 @@ class JaisModel(Model):
self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias)
@Model.register("Glm4ForCausalLM")
class Glm4Model(Model):
model_arch = gguf.MODEL_ARCH.GLM4
def set_vocab(self):
self._set_vocab_gpt2()
def set_gguf_parameters(self):
super().set_gguf_parameters()
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
if self.hparams["rope_scaling"].get("type") == "yarn":
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
@Model.register("GlmForCausalLM", "ChatGLMModel", "ChatGLMForConditionalGeneration")
class ChatGLMModel(Model):
model_arch = gguf.MODEL_ARCH.CHATGLM
@ -5588,7 +5607,6 @@ def main() -> None:
with torch.inference_mode():
output_type = ftype_map[args.outtype]
model_architecture = hparams["architectures"][0]
try:
model_class = Model.from_model_architecture(model_architecture)
except NotImplementedError: