mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-17 05:25:09 -04:00
model : add support for SmallThinker series (#14898)
* support smallthinker * support 20b softmax, 4b no sliding window * new build_moe_ffn_from_probs, and can run 4b * fix 4b rope bug * fix python type check * remove is_moe judge * remove set_dense_start_swa_pattern function and modify set_swa_pattern function * trim trailing whitespace * remove get_vocab_base of SmallThinkerModel in convert_hf_to_gguf.py Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * better whitespace Apply suggestions from code review Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * use GGML_ASSERT for expert count validation Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Improve null pointer check for probs Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * use template parameter for SWA attention logic * better whitespace Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * move the creation of inp_out_ids before the layer loop * remove redundant judge for probs --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
@@ -140,7 +140,7 @@ struct llama_hparams {
|
||||
// for Classifiers
|
||||
uint32_t n_cls_out = 1;
|
||||
|
||||
// llama4
|
||||
// llama4 smallthinker
|
||||
uint32_t n_moe_layer_step = 0;
|
||||
uint32_t n_no_rope_layer_step = 4;
|
||||
uint32_t n_attn_temp_floor_scale = 8192;
|
||||
@@ -161,9 +161,10 @@ struct llama_hparams {
|
||||
enum llama_rope_scaling_type rope_scaling_type_train = LLAMA_ROPE_SCALING_TYPE_NONE;
|
||||
|
||||
// this value n_pattern means that every nth layer is dense (i.e. non-SWA)
|
||||
// dense_first means whether the pattern is start with a dense layer
|
||||
// note that if n_pattern == 0, all layers are SWA
|
||||
// if n_pattern == 1, all layers are dense
|
||||
// example: n_pattern = 3
|
||||
// example 1: n_pattern = 3, dense_first = false
|
||||
// il == 0: swa
|
||||
// il == 1: swa
|
||||
// il == 2: dense
|
||||
@@ -172,7 +173,13 @@ struct llama_hparams {
|
||||
// il == 5: dense
|
||||
// il == 6: swa
|
||||
// etc ...
|
||||
void set_swa_pattern(uint32_t n_pattern);
|
||||
// example 2: n_pattern = 2, dense_first = true
|
||||
// il == 0: dense
|
||||
// il == 1: swa
|
||||
// il == 2: dense
|
||||
// il == 3: swa
|
||||
// etc ...
|
||||
void set_swa_pattern(uint32_t n_pattern, bool dense_first = false);
|
||||
|
||||
// return true if one of the layers is SWA
|
||||
bool is_swa_any() const;
|
||||
|
Reference in New Issue
Block a user