Merge branch 'master' into xsn/private_batch_api

2025-08-28 19:18:57 -04:00 · 2025-03-18 15:45:22 +01:00
parent eab5606d7b 8551c44d84
commit dc4bb64290
76 changed files with 3990 additions and 902 deletions
--- a/include/llama.h
+++ b/include/llama.h
@@ -1045,6 +1045,10 @@ extern "C" {
    // If set to true, the model will only attend to the past tokens
    LLAMA_API void llama_set_causal_attn(struct llama_context * ctx, bool causal_attn);

+    // Set whether the model is in warmup mode or not
+    // If true, all model tensors are activated during llama_decode() to load and cache their weights.
+    LLAMA_API void llama_set_warmup(struct llama_context * ctx, bool warmup);
+
    // Set abort callback
    LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data);