llama : allow other bufts when overriding to CPU, add --no-repack option (#14990)

This commit is contained in:
Diego Devesa
2025-07-31 09:11:34 -07:00
committed by GitHub
parent e08a98826b
commit d6818d06a6
5 changed files with 39 additions and 21 deletions

View File

@@ -284,10 +284,11 @@ extern "C" {
const struct llama_model_kv_override * kv_overrides;
// Keep the booleans together to avoid misalignment during copy-by-value.
bool vocab_only; // only load the vocabulary, no weights
bool use_mmap; // use mmap if possible
bool use_mlock; // force system to keep model in RAM
bool check_tensors; // validate model tensor data
bool vocab_only; // only load the vocabulary, no weights
bool use_mmap; // use mmap if possible
bool use_mlock; // force system to keep model in RAM
bool check_tensors; // validate model tensor data
bool use_extra_bufts; // use extra buffer types (used for weight repacking)
};
// NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations