mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-28 20:25:20 +00:00
llama : add RobertaForSequenceClassification reranker support (#13875)
This commit is contained in:
@ -3695,6 +3695,10 @@ class BertModel(TextModel):
|
|||||||
self.gguf_writer.add_causal_attention(False)
|
self.gguf_writer.add_causal_attention(False)
|
||||||
self._try_set_pooling_type()
|
self._try_set_pooling_type()
|
||||||
|
|
||||||
|
if cls_out_labels := self.hparams.get("id2label"):
|
||||||
|
key_name = gguf.Keys.Classifier.OUTPUT_LABELS.format(arch = gguf.MODEL_ARCH_NAMES[self.model_arch])
|
||||||
|
self.gguf_writer.add_array(key_name, [v for k, v in sorted(cls_out_labels.items())])
|
||||||
|
|
||||||
def set_vocab(self):
|
def set_vocab(self):
|
||||||
tokens, toktypes, tokpre = self.get_vocab_base()
|
tokens, toktypes, tokpre = self.get_vocab_base()
|
||||||
self.vocab_size = len(tokens)
|
self.vocab_size = len(tokens)
|
||||||
@ -3745,6 +3749,7 @@ class BertModel(TextModel):
|
|||||||
if name.startswith("cls.seq_relationship"):
|
if name.startswith("cls.seq_relationship"):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
if self.hparams.get("id2label"):
|
||||||
# For BertForSequenceClassification (direct projection layer)
|
# For BertForSequenceClassification (direct projection layer)
|
||||||
if name == "classifier.weight":
|
if name == "classifier.weight":
|
||||||
name = "classifier.out_proj.weight"
|
name = "classifier.out_proj.weight"
|
||||||
@ -3846,7 +3851,7 @@ class BertModel(TextModel):
|
|||||||
self.gguf_writer.add_add_eos_token(True)
|
self.gguf_writer.add_add_eos_token(True)
|
||||||
|
|
||||||
|
|
||||||
@ModelBase.register("RobertaModel")
|
@ModelBase.register("RobertaModel", "RobertaForSequenceClassification")
|
||||||
class RobertaModel(BertModel):
|
class RobertaModel(BertModel):
|
||||||
model_arch = gguf.MODEL_ARCH.BERT
|
model_arch = gguf.MODEL_ARCH.BERT
|
||||||
|
|
||||||
|
@ -177,6 +177,9 @@ class Keys:
|
|||||||
EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
|
EMBEDDING_LENGTH = "{arch}.convnext.embedding_length"
|
||||||
BLOCK_COUNT = "{arch}.convnext.block_count"
|
BLOCK_COUNT = "{arch}.convnext.block_count"
|
||||||
|
|
||||||
|
class Classifier:
|
||||||
|
OUTPUT_LABELS = "{arch}.classifier.output_labels"
|
||||||
|
|
||||||
class Tokenizer:
|
class Tokenizer:
|
||||||
MODEL = "tokenizer.ggml.model"
|
MODEL = "tokenizer.ggml.model"
|
||||||
PRE = "tokenizer.ggml.pre"
|
PRE = "tokenizer.ggml.pre"
|
||||||
|
@ -174,6 +174,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
|||||||
{ LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
|
{ LLM_KV_CONVNEXT_EMBEDDING_LENGTH, "%s.convnext.embedding_length" },
|
||||||
{ LLM_KV_CONVNEXT_BLOCK_COUNT, "%s.convnext.block_count" },
|
{ LLM_KV_CONVNEXT_BLOCK_COUNT, "%s.convnext.block_count" },
|
||||||
|
|
||||||
|
{ LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" },
|
||||||
|
|
||||||
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
|
{ LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" },
|
||||||
{ LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
|
{ LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" },
|
||||||
{ LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
|
{ LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" },
|
||||||
|
@ -213,6 +213,8 @@ enum llm_kv {
|
|||||||
LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
|
LLM_KV_CONVNEXT_EMBEDDING_LENGTH,
|
||||||
LLM_KV_CONVNEXT_BLOCK_COUNT,
|
LLM_KV_CONVNEXT_BLOCK_COUNT,
|
||||||
|
|
||||||
|
LLM_KV_CLASSIFIER_OUTPUT_LABELS,
|
||||||
|
|
||||||
// deprecated:
|
// deprecated:
|
||||||
LLM_KV_TOKENIZER_PREFIX_ID,
|
LLM_KV_TOKENIZER_PREFIX_ID,
|
||||||
LLM_KV_TOKENIZER_SUFFIX_ID,
|
LLM_KV_TOKENIZER_SUFFIX_ID,
|
||||||
|
@ -131,6 +131,9 @@ struct llama_hparams {
|
|||||||
bool attn_soft_cap = false;
|
bool attn_soft_cap = false;
|
||||||
bool use_kq_norm = true;
|
bool use_kq_norm = true;
|
||||||
|
|
||||||
|
// for Classifiers
|
||||||
|
uint32_t n_cls_out = 1;
|
||||||
|
|
||||||
// llama4
|
// llama4
|
||||||
uint32_t n_moe_layer_step = 0;
|
uint32_t n_moe_layer_step = 0;
|
||||||
uint32_t n_no_rope_layer_step = 4;
|
uint32_t n_no_rope_layer_step = 4;
|
||||||
|
@ -683,6 +683,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||||
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
|
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
|
||||||
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type, false);
|
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type, false);
|
||||||
|
ml.get_arr_n(LLM_KV_CLASSIFIER_OUTPUT_LABELS, hparams.n_cls_out, false);
|
||||||
|
|
||||||
switch (hparams.n_layer) {
|
switch (hparams.n_layer) {
|
||||||
case 3:
|
case 3:
|
||||||
@ -2121,8 +2122,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
|||||||
cls = create_tensor(tn(LLM_TENSOR_CLS, "weight"), {n_embd, n_embd}, TENSOR_NOT_REQUIRED);
|
cls = create_tensor(tn(LLM_TENSOR_CLS, "weight"), {n_embd, n_embd}, TENSOR_NOT_REQUIRED);
|
||||||
cls_b = create_tensor(tn(LLM_TENSOR_CLS, "bias"), {n_embd}, TENSOR_NOT_REQUIRED);
|
cls_b = create_tensor(tn(LLM_TENSOR_CLS, "bias"), {n_embd}, TENSOR_NOT_REQUIRED);
|
||||||
|
|
||||||
cls_out = create_tensor(tn(LLM_TENSOR_CLS_OUT, "weight"), {n_embd, 1}, TENSOR_NOT_REQUIRED);
|
cls_out = create_tensor(tn(LLM_TENSOR_CLS_OUT, "weight"), {n_embd, hparams.n_cls_out}, TENSOR_NOT_REQUIRED);
|
||||||
cls_out_b = create_tensor(tn(LLM_TENSOR_CLS_OUT, "bias"), {1}, TENSOR_NOT_REQUIRED);
|
cls_out_b = create_tensor(tn(LLM_TENSOR_CLS_OUT, "bias"), {hparams.n_cls_out}, TENSOR_NOT_REQUIRED);
|
||||||
}
|
}
|
||||||
|
|
||||||
tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);
|
tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);
|
||||||
|
Reference in New Issue
Block a user