mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-14 20:29:41 -04:00
llama : allow other bufts when overriding to CPU, add --no-repack option (#14990)
This commit is contained in:
@@ -359,6 +359,7 @@ struct common_params {
|
||||
bool warmup = true; // warmup run
|
||||
bool check_tensors = false; // validate tensor data
|
||||
bool no_op_offload = false; // globally disable offload host tensor operations to device
|
||||
bool no_extra_bufts = false; // disable extra buffer types (used for weight repacking)
|
||||
|
||||
bool single_turn = false; // single turn chat conversation
|
||||
|
||||
|
Reference in New Issue
Block a user