diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 37a9f36dc..ed069e4f8 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1103,7 +1103,7 @@ class VisionModel(ModelBase): # preprocessor config self.gguf_writer.add_vision_image_mean(self.preprocessor_config["image_mean"]) - self.gguf_writer.add_vision_image_std(self.preprocessor_config["image_mean"]) + self.gguf_writer.add_vision_image_std(self.preprocessor_config["image_std"]) def write_vocab(self): raise ValueError("VisionModel does not support vocab writing") @@ -2563,8 +2563,9 @@ class Qwen2VLVisionModel(VisionModel): # rename config.json values self.hparams["num_attention_heads"] = self.hparams.get("num_heads") self.hparams["num_hidden_layers"] = self.hparams.get("depth") - self.hparams["intermediate_size"] = self.hparams.get("hidden_size") - self.hparams["hidden_size"] = self.hparams.get("embed_dim") + if "embed_dim" in self.hparams: # qwen2vl + self.hparams["intermediate_size"] = self.hparams.get("hidden_size") + self.hparams["hidden_size"] = self.hparams.get("embed_dim") def set_gguf_parameters(self): super().set_gguf_parameters() diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 0100d0f33..b113e3f11 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -990,7 +990,7 @@ class TensorNameMap: "model.vision_model.encoder.layers.{bid}.mlp.fc2", # SmolVLM, gemma3 (note: name is swapped) "vision_tower.transformer.layers.{bid}.feed_forward.up_proj", # pixtral "visual.blocks.{bid}.mlp.fc2", # qwen2vl - "visual.blocks.{bid}.mlp.down_proj", # qwen2.5vl + "visual.blocks.{bid}.mlp.up_proj", # qwen2.5vl ), MODEL_TENSOR.V_ENC_FFN_GATE: ( @@ -1004,7 +1004,7 @@ class TensorNameMap: "model.vision_model.encoder.layers.{bid}.mlp.fc1", # SmolVLM, gemma3 (note: name is swapped) "vision_tower.transformer.layers.{bid}.feed_forward.down_proj", # pixtral "visual.blocks.{bid}.mlp.fc1", # qwen2vl - "visual.blocks.{bid}.mlp.up_proj", # qwen2.5vl + "visual.blocks.{bid}.mlp.down_proj", # qwen2.5vl ), MODEL_TENSOR.V_PRE_NORM: (