diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 227ae7bc2..797773193 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -3695,6 +3695,10 @@ class BertModel(TextModel): self.gguf_writer.add_causal_attention(False) self._try_set_pooling_type() + if cls_out_labels := self.hparams.get("id2label"): + key_name = gguf.Keys.Classifier.OUTPUT_LABELS.format(arch = gguf.MODEL_ARCH_NAMES[self.model_arch]) + self.gguf_writer.add_array(key_name, [v for k, v in sorted(cls_out_labels.items())]) + def set_vocab(self): tokens, toktypes, tokpre = self.get_vocab_base() self.vocab_size = len(tokens) @@ -3745,12 +3749,13 @@ class BertModel(TextModel): if name.startswith("cls.seq_relationship"): return [] - # For BertForSequenceClassification (direct projection layer) - if name == "classifier.weight": - name = "classifier.out_proj.weight" + if self.hparams.get("id2label"): + # For BertForSequenceClassification (direct projection layer) + if name == "classifier.weight": + name = "classifier.out_proj.weight" - if name == "classifier.bias": - name = "classifier.out_proj.bias" + if name == "classifier.bias": + name = "classifier.out_proj.bias" return [(self.map_tensor_name(name), data_torch)] @@ -3846,7 +3851,7 @@ class BertModel(TextModel): self.gguf_writer.add_add_eos_token(True) -@ModelBase.register("RobertaModel") +@ModelBase.register("RobertaModel", "RobertaForSequenceClassification") class RobertaModel(BertModel): model_arch = gguf.MODEL_ARCH.BERT diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 31163effa..635b61f22 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -177,6 +177,9 @@ class Keys: EMBEDDING_LENGTH = "{arch}.convnext.embedding_length" BLOCK_COUNT = "{arch}.convnext.block_count" + class Classifier: + OUTPUT_LABELS = "{arch}.classifier.output_labels" + class Tokenizer: MODEL = "tokenizer.ggml.model" PRE = "tokenizer.ggml.pre" diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp index abf436ada..2bb18c85f 100644 --- a/src/llama-arch.cpp +++ b/src/llama-arch.cpp @@ -174,6 +174,8 @@ static const std::map LLM_KV_NAMES = { { LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" }, { LLM_KV_CONVNEXT_BLOCK_COUNT, "%s.convnext.block_count" }, + { LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" }, + { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" }, { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" }, { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" }, diff --git a/src/llama-arch.h b/src/llama-arch.h index 41a023da3..930cb4eca 100644 --- a/src/llama-arch.h +++ b/src/llama-arch.h @@ -213,6 +213,8 @@ enum llm_kv { LLM_KV_CONVNEXT_EMBEDDING_LENGTH, LLM_KV_CONVNEXT_BLOCK_COUNT, + LLM_KV_CLASSIFIER_OUTPUT_LABELS, + // deprecated: LLM_KV_TOKENIZER_PREFIX_ID, LLM_KV_TOKENIZER_SUFFIX_ID, diff --git a/src/llama-hparams.h b/src/llama-hparams.h index 2d72eab18..b2bcb8b01 100644 --- a/src/llama-hparams.h +++ b/src/llama-hparams.h @@ -131,6 +131,9 @@ struct llama_hparams { bool attn_soft_cap = false; bool use_kq_norm = true; + // for Classifiers + uint32_t n_cls_out = 1; + // llama4 uint32_t n_moe_layer_step = 0; uint32_t n_no_rope_layer_step = 4; diff --git a/src/llama-model.cpp b/src/llama-model.cpp index e99f5309f..4a4618a2b 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -683,6 +683,7 @@ void llama_model::load_hparams(llama_model_loader & ml) { ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps); ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn); ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type, false); + ml.get_arr_n(LLM_KV_CLASSIFIER_OUTPUT_LABELS, hparams.n_cls_out, false); switch (hparams.n_layer) { case 3: @@ -2121,8 +2122,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) { cls = create_tensor(tn(LLM_TENSOR_CLS, "weight"), {n_embd, n_embd}, TENSOR_NOT_REQUIRED); cls_b = create_tensor(tn(LLM_TENSOR_CLS, "bias"), {n_embd}, TENSOR_NOT_REQUIRED); - cls_out = create_tensor(tn(LLM_TENSOR_CLS_OUT, "weight"), {n_embd, 1}, TENSOR_NOT_REQUIRED); - cls_out_b = create_tensor(tn(LLM_TENSOR_CLS_OUT, "bias"), {1}, TENSOR_NOT_REQUIRED); + cls_out = create_tensor(tn(LLM_TENSOR_CLS_OUT, "weight"), {n_embd, hparams.n_cls_out}, TENSOR_NOT_REQUIRED); + cls_out_b = create_tensor(tn(LLM_TENSOR_CLS_OUT, "bias"), {hparams.n_cls_out}, TENSOR_NOT_REQUIRED); } tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);