From 07c2e2f76cce9a61c110b6995fbb90ccea2c3aaa Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Wed, 30 Apr 2025 13:06:15 +0200 Subject: [PATCH 1/6] convert : correct typo image_mean --> image_std (#13208) --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index b9cea7e46..d607af695 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -1103,7 +1103,7 @@ class VisionModel(ModelBase): # preprocessor config self.gguf_writer.add_vision_image_mean(self.preprocessor_config["image_mean"]) - self.gguf_writer.add_vision_image_std(self.preprocessor_config["image_mean"]) + self.gguf_writer.add_vision_image_std(self.preprocessor_config["image_std"]) def write_vocab(self): raise ValueError("VisionModel does not support vocab writing") From 416313773b53585fddcafbcb914cbbfbaeb94b1f Mon Sep 17 00:00:00 2001 From: shalinib-ibm Date: Wed, 30 Apr 2025 16:47:08 +0530 Subject: [PATCH 2/6] ggml : fix ppc64le build (#13176) Build fails with compilation error on power pc. This patch fixes the same. Tested with unit tests run via --build && cd && make test Signed-off-by: Shalini Salomi Bodapati --- ggml/src/ggml-cpu/simd-mappings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-cpu/simd-mappings.h b/ggml/src/ggml-cpu/simd-mappings.h index 04d10cec2..45c31cf1f 100644 --- a/ggml/src/ggml-cpu/simd-mappings.h +++ b/ggml/src/ggml-cpu/simd-mappings.h @@ -341,7 +341,7 @@ static inline void __avx_f32cx8_store(ggml_fp16_t *x, __m256 y) { #define GGML_F32_EPR 4 #define GGML_F32x4 vector float -#define GGML_F32x4_ZERO 0.0f +#define GGML_F32x4_ZERO {0.0f} #define GGML_F32x4_SET1 vec_splats #define GGML_F32x4_LOAD(p) vec_xl(0, p) #define GGML_F32x4_STORE(p, r) vec_xst(r, 0, p) From e5007a5edf2692ef7151a81a61ce2716b83374e5 Mon Sep 17 00:00:00 2001 From: Jeff Bolz Date: Wed, 30 Apr 2025 07:38:37 -0500 Subject: [PATCH 3/6] vulkan: use uint array index to avoid glslang bug (#13193) --- ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp index 962d2353f..9cb7da2da 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_funcs_cm2.comp @@ -482,7 +482,7 @@ float16_t dequantFuncIQ2_XXS(const in decodeBufIQ2_XXS bl, const in uint blockCo const uint ib8 = (idx & 0x18) >> 3; // 0..3 const uint iqs = 8 * ib32 + ib8; - const uint8_t qs = bl.block.qs[iqs]; + const uint qs = bl.block.qs[iqs]; const uint signscale = pack32(u16vec2(bl16.block.qs[4*ib32+2], bl16.block.qs[4*ib32+3])); const float dscale = float(bl.block.d) * 0.25 * (0.5 + float(signscale >> 28)); From 3b127c738535d95e06abd0d43da147bc13516ad0 Mon Sep 17 00:00:00 2001 From: Olivier Chafik Date: Wed, 30 Apr 2025 13:52:35 +0100 Subject: [PATCH 4/6] common : add -jf / --json-schema-file flag (#12011) --- common/arg.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/common/arg.cpp b/common/arg.cpp index 6633c3a1d..b8c72ee64 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1948,6 +1948,23 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.sampling.grammar = json_schema_to_grammar(json::parse(value)); } ).set_sparam()); + add_opt(common_arg( + {"-jf", "--json-schema-file"}, "FILE", + "File containing a JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object\nFor schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead", + [](common_params & params, const std::string & value) { + std::ifstream file(value); + if (!file) { + throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); + } + std::string schema; + std::copy( + std::istreambuf_iterator(file), + std::istreambuf_iterator(), + std::back_inserter(schema) + ); + params.sampling.grammar = json_schema_to_grammar(json::parse(schema)); + } + ).set_sparam()); add_opt(common_arg( {"--pooling"}, "{none,mean,cls,last,rank}", "pooling type for embeddings, use model default if unspecified", From ceda28ef8e310a8dee60bf275077a3eedae8e36c Mon Sep 17 00:00:00 2001 From: Tatsuya Tanaka Date: Wed, 30 Apr 2025 22:25:20 +0900 Subject: [PATCH 5/6] llava : remove duplicate include (#13207) --- examples/llava/clip-impl.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/llava/clip-impl.h b/examples/llava/clip-impl.h index 04bfcbb5e..66cb21ef1 100644 --- a/examples/llava/clip-impl.h +++ b/examples/llava/clip-impl.h @@ -2,8 +2,6 @@ #include "gguf.h" #include "clip.h" -#include "clip.h" - #include #include #include From 3e168bede4d27b35656ab8026015b87659ecbec2 Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Wed, 30 Apr 2025 16:56:24 +0200 Subject: [PATCH 6/6] convert : improve model arch handling (#13122) * convert : improve model arch handling * use AutoConfig * rm trust_remote_code * Update convert_hf_to_gguf.py * fix self.block_count for vision * fix NomicBertModel --- convert_hf_to_gguf.py | 98 +++++++++++++++++++++++++------------------ 1 file changed, 57 insertions(+), 41 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index d607af695..123df801b 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -16,6 +16,7 @@ from pathlib import Path from hashlib import sha256 from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Literal, Sequence, TypeVar, cast from itertools import chain +from transformers import AutoConfig import math import numpy as np @@ -66,8 +67,6 @@ class ModelBase: part_names: list[str] is_safetensors: bool hparams: dict[str, Any] - block_count: int - tensor_map: gguf.TensorNameMap tensor_names: set[str] | None gguf_writer: gguf.GGUFWriter model_name: str | None @@ -78,6 +77,10 @@ class ModelBase: # subclasses should define this! model_arch: gguf.MODEL_ARCH + # subclasses should initialize this! + block_count: int + tensor_map: gguf.TensorNameMap + def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, *, is_big_endian: bool = False, use_temp_file: bool = False, eager: bool = False, metadata_override: Path | None = None, model_name: str | None = None, @@ -113,8 +116,6 @@ class ModelBase: if not self.is_safetensors: self.part_names = ModelBase.get_model_part_names(self.dir_model, "pytorch_model", ".bin") self.hparams = ModelBase.load_hparams(self.dir_model) if hparams is None else hparams - self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"]) - self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) self.tensor_names = None self.metadata_override = metadata_override self.model_name = model_name @@ -417,15 +418,13 @@ class ModelBase: @staticmethod def load_hparams(dir_model: Path): - with open(dir_model / "config.json", "r", encoding="utf-8") as f: - hparams = json.load(f) - architectures = hparams.get("architectures") - if "text_config" in hparams: - hparams = {**hparams, **hparams["text_config"]} - if architectures is not None: - # preserve "architectures" from root level config - hparams["architectures"] = architectures - return hparams + try: + return AutoConfig.from_pretrained(dir_model).to_dict() + except Exception as e: + logger.warning(f"Failed to load model config from {dir_model}: {e}") + logger.warning("Trying to load config.json instead") + with open(dir_model / "config.json", "r", encoding="utf-8") as f: + return json.load(f) @classmethod def register(cls, *names: str) -> Callable[[AnyModel], AnyModel]: @@ -454,6 +453,23 @@ class ModelBase: class TextModel(ModelBase): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + if "text_config" in self.hparams: + # move the text_config to the root level + self.hparams = {**self.hparams, **self.hparams["text_config"]} + + self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"]) + self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count) + + @classmethod + def __init_subclass__(cls): + # can't use an abstract property, because overriding it without type errors + # would require using decorated functions instead of simply defining the property + if "model_arch" not in cls.__dict__: + raise TypeError(f"Missing property 'model_arch' for {cls.__name__!r}") + def set_vocab(self): self._set_vocab_gpt2() @@ -1070,9 +1086,9 @@ class VisionModel(ModelBase): if self.model_arch != gguf.MODEL_ARCH.CLIP_VISION: raise TypeError("VisionModel must be subclassed with model_arch = gguf.MODEL_ARCH.CLIP_VISION") - # small hack to correct the number of layers - self.tensor_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.CLIP_VISION, 128) - self.n_embd_text = self.find_hparam(["hidden_size", "n_embd"]) + # get n_embd of the text model + text_config = {**self.hparams, **self.hparams["text_config"]} + self.n_embd_text = text_config.get("hidden_size", text_config.get("n_embd", 0)) assert self.n_embd_text > 0, "n_embd not found in hparams" if "vision_config" not in self.hparams: @@ -1081,6 +1097,9 @@ class VisionModel(ModelBase): self.global_config = self.hparams self.hparams = self.hparams["vision_config"] + self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers", "depth"]) + self.tensor_map = gguf.get_tensor_name_map(gguf.MODEL_ARCH.CLIP_VISION, self.block_count) + # load preprocessor config with open(self.dir_model / "preprocessor_config.json", "r", encoding="utf-8") as f: self.preprocessor_config = json.load(f) @@ -1098,7 +1117,7 @@ class VisionModel(ModelBase): self.gguf_writer.add_vision_patch_size(self.find_hparam(["patch_size"])) self.gguf_writer.add_vision_embedding_length(self.find_hparam(["hidden_size"])) self.gguf_writer.add_vision_feed_forward_length(self.find_hparam(["intermediate_size"])) - self.gguf_writer.add_vision_block_count(self.find_hparam(["num_hidden_layers"])) + self.gguf_writer.add_vision_block_count(self.block_count) self.gguf_writer.add_vision_head_count(self.find_hparam(["num_attention_heads"])) # preprocessor config @@ -1719,23 +1738,12 @@ class StableLMModel(TextModel): "LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM", - "Idefics3ForConditionalGeneration", - "SmolVLMForConditionalGeneration", + "VLlama3ForCausalLM", "LlavaForConditionalGeneration") class LlamaModel(TextModel): model_arch = gguf.MODEL_ARCH.LLAMA undo_permute = True - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - # fix for SmolVLM2, missing `num_attention_heads` in config.json - if self.hparams["architectures"][0] == "SmolVLMForConditionalGeneration": - self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 32) - # fix for Pixtral, missing `num_attention_heads` in config.json - if self.hparams["architectures"][0] == "LlavaForConditionalGeneration" \ - and self.hparams.get("model_type") == "mistral": - self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 32) - def set_vocab(self): try: self._set_vocab_sentencepiece() @@ -1898,11 +1906,7 @@ class LlavaVisionModel(VisionModel): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) if self.hparams["model_type"] == "pixtral": - # fix missing config.json values - self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 16) - self.hparams["num_hidden_layers"] = self.hparams.get("num_hidden_layers", 24) - self.hparams["intermediate_size"] = self.hparams.get("intermediate_size", 4096) - self.hparams["hidden_size"] = self.hparams.get("hidden_size", 1024) + # layer_norm_eps is not in config.json, it is hard-coded in modeling_pixtral.py self.hparams["layer_norm_eps"] = self.hparams.get("layer_norm_eps", 1e-5) self.img_break_tok_id = 12 # see tokenizer_config.json else: @@ -1913,7 +1917,6 @@ class LlavaVisionModel(VisionModel): hparams = self.hparams if hparams["model_type"] == "pixtral": self.gguf_writer.add_vision_projector_type(gguf.VisionProjectorType.PIXTRAL) - # default values below are taken from HF tranformers code self.gguf_writer.add_vision_attention_layernorm_eps(hparams["layer_norm_eps"]) self.gguf_writer.add_vision_use_silu(True) @@ -1944,13 +1947,12 @@ class LlavaVisionModel(VisionModel): class SmolVLMModel(VisionModel): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - # fix for SmolVLM2, missing some keys in config.json - # default values are taken from transformers code if self.hparams["model_type"] == "smolvlm_vision": + # fix for SmolVLM2, missing some keys in config.json + # default values are taken from transformers code self.hparams["hidden_size"] = self.hparams.get("hidden_size", 1152) self.hparams["num_attention_heads"] = self.hparams.get("num_attention_heads", 16) self.hparams["intermediate_size"] = self.hparams.get("intermediate_size", 3072) - self.hparams["num_hidden_layers"] = self.hparams.get("num_hidden_layers", 12) def set_gguf_parameters(self): super().set_gguf_parameters() @@ -3505,6 +3507,8 @@ class RobertaModel(BertModel): @ModelBase.register("NomicBertModel") class NomicBertModel(BertModel): + model_arch = gguf.MODEL_ARCH.BERT + def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, **kwargs: Any): hparams = kwargs.pop("hparams", None) if hparams is None: @@ -5849,6 +5853,19 @@ def split_str_to_n_bytes(split_str: str) -> int: return n +def get_model_architecture(dir_model: Path, model_type: ModelType, hparams: Any = None) -> str: + hparams = ModelBase.load_hparams(dir_model) if hparams is None else hparams + text_config = hparams.get("text_config", {}) + vision_config = hparams.get("vision_config", {}) + arch = hparams["architectures"][0] + # if "architectures" is found in the sub-config, use that instead + if model_type == ModelType.TEXT and text_config.get("architectures") is not None: + arch = text_config["architectures"][0] + elif model_type == ModelType.VISION and vision_config.get("architectures") is not None: + arch = vision_config["architectures"][0] + return arch + + def main() -> None: args = parse_args() @@ -5901,16 +5918,15 @@ def main() -> None: logger.info(f"Loading model: {dir_model.name}") - hparams = ModelBase.load_hparams(dir_model) - if args.mmproj: if "mmproj" not in fname_out.name: fname_out = ModelBase.add_prefix_to_filename(fname_out, "mmproj-") with torch.inference_mode(): output_type = ftype_map[args.outtype] - model_architecture = hparams["architectures"][0] model_type = ModelType.VISION if args.mmproj else ModelType.TEXT + model_architecture = get_model_architecture(dir_model, model_type) + logger.info(f"Model architecture: {model_architecture}") try: model_class = ModelBase.from_model_architecture(model_architecture, model_type=model_type) except NotImplementedError: