llama : add PLM GGUF Conversion & Inference Support (#12457)

* add edgellm model arch[conversation feature doesn't work]

* remove output.weight layer for edgellm arch

* [Model] update the name of the model

* update the name of model arch in convert gguf

* [Model] Refarctor the model arch into llama-model

* [Bug] Fix the bug in create attn kv

* [Code] Fix editorconfig erros

* [Code] Remove Trailing whitespace

* [Code] Remove Trailing whitespace

* [Code] Change the order of model arch in list

* [Code] Fix flake8 Lint errors

* Remove trailing white space

* [Code] Remove  call in model arch
This commit is contained in:
Si1w
2025-03-27 10:49:15 +00:00
committed by GitHub
parent 953c2a62cf
commit f125b8dccf
6 changed files with 274 additions and 0 deletions

View File

@ -286,6 +286,7 @@ class MODEL_ARCH(IntEnum):
GRANITE_MOE = auto()
CHAMELEON = auto()
WAVTOKENIZER_DEC = auto()
PLM = auto()
class MODEL_TENSOR(IntEnum):
@ -488,6 +489,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.GRANITE_MOE: "granitemoe",
MODEL_ARCH.CHAMELEON: "chameleon",
MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec",
MODEL_ARCH.PLM: "plm",
}
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
@ -1464,6 +1466,20 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.FFN_UP_SHEXP,
MODEL_TENSOR.FFN_EXP_PROBS_B,
],
MODEL_ARCH.PLM: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_Q,
MODEL_TENSOR.ATTN_KV_A_MQA,
MODEL_TENSOR.ATTN_KV_A_NORM,
MODEL_TENSOR.ATTN_KV_B,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.FFN_DOWN,
],
MODEL_ARCH.CHATGLM : [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.ROPE_FREQS,