mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-13 14:29:17 +00:00
llama : Add ability to cancel model load
Updated llama_progress_callback so that if it returns false, the model loading is aborted.
This commit is contained in:
6
llama.h
6
llama.h
@ -126,7 +126,7 @@ extern "C" {
|
||||
bool sorted;
|
||||
} llama_token_data_array;
|
||||
|
||||
typedef void (*llama_progress_callback)(float progress, void *ctx);
|
||||
typedef bool (*llama_progress_callback)(float progress, void *ctx);
|
||||
|
||||
// Input data for llama_decode
|
||||
// A llama_batch object can contain input about one or many sequences
|
||||
@ -179,7 +179,9 @@ extern "C" {
|
||||
int32_t main_gpu; // the GPU that is used for scratch and small tensors
|
||||
const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES)
|
||||
|
||||
// called with a progress value between 0 and 1, pass NULL to disable
|
||||
// Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
|
||||
// If the provided progress_callback returns true, model loading continues.
|
||||
// If it returns false, model loading is immediately aborted.
|
||||
llama_progress_callback progress_callback;
|
||||
|
||||
// context pointer passed to the progress callback
|
||||
|
Reference in New Issue
Block a user