llama : allow other bufts when overriding to CPU, add --no-repack option (#14990)

2025-08-14 20:29:41 -04:00 · 2025-07-31 09:11:34 -07:00
parent e08a98826b
commit d6818d06a6
5 changed files with 39 additions and 21 deletions
--- a/common/common.h
+++ b/common/common.h
@@ -359,6 +359,7 @@ struct common_params {
    bool warmup            = true;  // warmup run
    bool check_tensors     = false; // validate tensor data
    bool no_op_offload     = false; // globally disable offload host tensor operations to device
+    bool no_extra_bufts    = false; // disable extra buffer types (used for weight repacking)

    bool single_turn       = false; // single turn chat conversation