llama : allow other bufts when overriding to CPU, add --no-repack option (#14990)

This commit is contained in:
Diego Devesa
2025-07-31 09:11:34 -07:00
committed by GitHub
parent e08a98826b
commit d6818d06a6
5 changed files with 39 additions and 21 deletions

View File

@@ -359,6 +359,7 @@ struct common_params {
bool warmup = true; // warmup run
bool check_tensors = false; // validate tensor data
bool no_op_offload = false; // globally disable offload host tensor operations to device
bool no_extra_bufts = false; // disable extra buffer types (used for weight repacking)
bool single_turn = false; // single turn chat conversation