llama : fix embd when offloading non-repeating layers (#1891)

This commit is contained in:
Johannes Gäßler
2023-06-16 20:25:51 +02:00
committed by GitHub
parent 5b9ccaf104
commit ac3b886953

View File

@ -1658,7 +1658,7 @@ static bool llama_eval_internal(
// cur = cur*norm(broadcasted)
cur = ggml_mul(ctx0, cur, model.norm);
offload_func_nr(cur);
// offload_func_nr(cur); // TODO CPU + GPU mirrored backend
ggml_set_name(cur, "result_norm");
embeddings = cur;