refact : fix convert script + zero out KV cache to avoid nans (#3523)

* refact : fix convert script + zero out KV cache to avoid nans * ggml : silu(-inf) should never happen * metal : assert various kernel requirements
2025-08-13 03:47:46 -04:00 · 2023-10-09 14:32:17 +03:00
parent dcc09d2596
commit fcca0a7004
6 changed files with 51 additions and 91 deletions
--- a/examples/parallel/parallel.cpp
+++ b/examples/parallel/parallel.cpp
@@ -167,7 +167,7 @@ int main(int argc, char ** argv) {

    // the max batch size is as large as the context to handle cases where we get very long input prompt from multiple
    // users. regardless of the size, the main loop will chunk the batch into a maximum of params.n_batch tokens at a time
-    llama_batch batch = llama_batch_init(params.n_ctx, 0);
+    llama_batch batch = llama_batch_init(n_ctx, 0);

    int32_t n_total_prompt = 0;
    int32_t n_total_gen    = 0;