refact : fix convert script + zero out KV cache to avoid nans (#3523)

* refact : fix convert script + zero out KV cache to avoid nans * ggml : silu(-inf) should never happen * metal : assert various kernel requirements
2025-07-05 13:53:31 +00:00 · 2023-10-09 14:32:17 +03:00
parent dcc09d2596
commit fcca0a7004
6 changed files with 51 additions and 91 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -1325,7 +1325,11 @@ static bool llama_kv_cache_init(
    cache.cells.clear();
    cache.cells.resize(n_ctx);

+    // TODO: this should be:
+    //       cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*ggml_tensor_overhead());
+    //       change it and test that it works
    cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB);
+    memset(cache.buf.data, 0, cache.buf.size);

    struct ggml_init_params params;
    params.mem_size   = cache.buf.size;