mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-28 12:25:03 +00:00
convert : update for baichuan (#2081)
1. guess n_layers; 2. relax warnings on context size; 3. add a note that its derivations are also supported. Co-authored-by: Judd <foldl@boxvest.com>
This commit is contained in:
@ -154,9 +154,15 @@ class Params:
|
||||
# try transformer naming first
|
||||
if "model.layers.0.self_attn.q_proj.weight" in model:
|
||||
n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.q_proj.weight" not in model)
|
||||
elif "model.layers.0.self_attn.W_pack.weight" in model: # next: try baichuan naming
|
||||
n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.W_pack.weight" not in model)
|
||||
else:
|
||||
n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model)
|
||||
|
||||
if n_layer < 1:
|
||||
raise Exception("failed to guess 'n_layer'. This model is unknown or unsupported.\n"
|
||||
"Suggestion: provide 'config.json' of the model in the same directory containing model files.")
|
||||
|
||||
n_head=n_embd // 128 # guessed
|
||||
|
||||
return Params(
|
||||
|
Reference in New Issue
Block a user