mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-18 05:56:00 -04:00
MPT : support GQA for replit-code-v1.5 (#3627)
This commit is contained in:
@@ -98,6 +98,8 @@ gguf_writer.add_embedding_length(hparams["d_model"])
|
||||
gguf_writer.add_block_count(block_count)
|
||||
gguf_writer.add_feed_forward_length(4 * hparams["d_model"])
|
||||
gguf_writer.add_head_count(hparams["n_heads"])
|
||||
if kv_n_heads := hparams["attn_config"].get("kv_n_heads"):
|
||||
gguf_writer.add_head_count_kv(kv_n_heads)
|
||||
gguf_writer.add_layer_norm_eps(1e-05)
|
||||
if hparams["attn_config"]["clip_qkv"] is not None:
|
||||
gguf_writer.add_clamp_kqv(hparams["attn_config"]["clip_qkv"])
|
||||
|
Reference in New Issue
Block a user