llama : Add ability to cancel model load

Updated llama_progress_callback so that if it returns false, the model loading is aborted.
2025-07-13 14:29:17 +00:00 · 2023-12-14 04:03:25 -05:00
parent 55e87c3749
commit 9abe2e44d1
2 changed files with 36 additions and 15 deletions
--- a/llama.h
+++ b/llama.h
@ -126,7 +126,7 @@ extern "C" {
        bool sorted;
    } llama_token_data_array;

-    typedef void (*llama_progress_callback)(float progress, void *ctx);
+    typedef bool (*llama_progress_callback)(float progress, void *ctx);

    // Input data for llama_decode
    // A llama_batch object can contain input about one or many sequences
@ -179,7 +179,9 @@ extern "C" {
        int32_t main_gpu;     // the GPU that is used for scratch and small tensors
        const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)

-        // called with a progress value between 0 and 1, pass NULL to disable
+        // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
+        // If the provided progress_callback returns true, model loading continues.
+        // If it returns false, model loading is immediately aborted.
        llama_progress_callback progress_callback;

        // context pointer passed to the progress callback