mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-14 20:29:41 -04:00
llama : allow other bufts when overriding to CPU, add --no-repack option (#14990)
This commit is contained in:
@@ -284,10 +284,11 @@ extern "C" {
|
||||
const struct llama_model_kv_override * kv_overrides;
|
||||
|
||||
// Keep the booleans together to avoid misalignment during copy-by-value.
|
||||
bool vocab_only; // only load the vocabulary, no weights
|
||||
bool use_mmap; // use mmap if possible
|
||||
bool use_mlock; // force system to keep model in RAM
|
||||
bool check_tensors; // validate model tensor data
|
||||
bool vocab_only; // only load the vocabulary, no weights
|
||||
bool use_mmap; // use mmap if possible
|
||||
bool use_mlock; // force system to keep model in RAM
|
||||
bool check_tensors; // validate model tensor data
|
||||
bool use_extra_bufts; // use extra buffer types (used for weight repacking)
|
||||
};
|
||||
|
||||
// NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
|
||||
|
Reference in New Issue
Block a user