mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 20:05:20 +00:00
llama : add support for DistilBert (#13907)
* add distilbert * small fixes * add note for LLM_ARCH_DISTIL_BERT * Use MODEL_ARCH.BERT for DistilBert --------- Co-authored-by: dinhhuy <huy.dinh@brains-tech.co.jp>
This commit is contained in:
@ -2114,7 +2114,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||
case LLM_ARCH_NOMIC_BERT_MOE:
|
||||
{
|
||||
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
|
||||
type_embd = create_tensor(tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_token_types}, 0);
|
||||
type_embd = create_tensor(tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_token_types}, TENSOR_NOT_REQUIRED);
|
||||
|
||||
if (arch == LLM_ARCH_BERT) {
|
||||
pos_embd = create_tensor(tn(LLM_TENSOR_POS_EMBD, "weight"), {n_embd, n_ctx_train}, 0);
|
||||
@ -5885,8 +5885,10 @@ struct llm_build_bert : public llm_graph_context {
|
||||
inpL = build_inp_embd(model.tok_embd);
|
||||
|
||||
// token types are hardcoded to zero ("Sentence A")
|
||||
ggml_tensor * type_row0 = ggml_view_1d(ctx0, model.type_embd, n_embd, 0);
|
||||
inpL = ggml_add(ctx0, inpL, type_row0);
|
||||
if (model.type_embd) {
|
||||
ggml_tensor * type_row0 = ggml_view_1d(ctx0, model.type_embd, n_embd, 0);
|
||||
inpL = ggml_add(ctx0, inpL, type_row0);
|
||||
}
|
||||
if (model.arch == LLM_ARCH_BERT) {
|
||||
inpL = ggml_add(ctx0, ggml_get_rows(ctx0, model.pos_embd, inp_pos), inpL);
|
||||
}
|
||||
|
Reference in New Issue
Block a user