mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-06 09:10:11 -04:00
Add --no-op-offload
to improve -ot
pp perf in MoE models like llama4 400B (#13386)
This commit is contained in:
@@ -30,6 +30,7 @@ struct llama_cparams {
|
||||
bool flash_attn;
|
||||
bool no_perf;
|
||||
bool warmup;
|
||||
bool op_offload;
|
||||
|
||||
enum llama_pooling_type pooling_type;
|
||||
|
||||
|
Reference in New Issue
Block a user