llama : add support for GPT2, Bloom and CodeShell tied word embeddings (#12456)

* Add support for GPT2, Bloom and CodeShell tied word embeddings

* Deduplicate tied word embeddings weights

* Workaround for incorrect weight map

It appears transformer.wte.weight is in the weight map even though the weights are not there, remove it if output weights are encountered first.

* check++

* fatfingers--
This commit is contained in:
Sigbjørn Skjæret
2025-03-19 09:08:49 +01:00
committed by GitHub
parent a686171ea7
commit 108e53c2f1
2 changed files with 34 additions and 24 deletions

View File

@ -2020,7 +2020,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
// output
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
output_norm_b = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd}, 0);
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, 0);
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
// if output is NULL, init from the input tok embed
if (output == NULL) {
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
}
for (int i = 0; i < n_layer; ++i) {
auto & layer = layers[i];
@ -2381,7 +2386,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
// output
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
output_norm_b = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd}, 0);
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, 0);
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
// if output is NULL, init from the input tok embed
if (output == NULL) {
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
}
for (int i = 0; i < n_layer; ++i) {
auto & layer = layers[i];
@ -2407,7 +2417,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
} break;
case LLM_ARCH_CODESHELL:
{
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
// if tok embd is NULL, init from output
if (tok_embd == NULL) {
tok_embd = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
}
// output
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);