metal : relax conditions on fast matrix multiplication kernel (#3168)

* metal : relax conditions on fast matrix multiplication kernel * metal : revert the concurrnecy change because it was wrong * llama : remove experimental stuff
2025-07-05 19:03:34 +00:00 · 2023-09-15 11:09:24 +03:00
parent 76164fe2e6
commit a51b687657
4 changed files with 100 additions and 51 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -3429,10 +3429,6 @@ static bool llama_eval_internal(
    if (lctx.ctx_metal) {
        ggml_metal_set_n_cb     (lctx.ctx_metal, n_threads);
        ggml_metal_graph_compute(lctx.ctx_metal, gf);
-        ggml_metal_get_tensor   (lctx.ctx_metal, res);
-        if (!lctx.embedding.empty()) {
-            ggml_metal_get_tensor(lctx.ctx_metal, embeddings);
-        }
    } else {
        ggml_graph_compute_helper(lctx.work_buffer, gf, n_threads);
    }