quantize : handle user-defined pruning of whole layers (blocks) (#13037)

This commit is contained in:
Ed Addario
2025-06-22 22:16:26 +01:00
committed by GitHub
parent 238005c2dc
commit fa4a9f2a1c
3 changed files with 119 additions and 9 deletions

View File

@ -390,6 +390,7 @@ extern "C" {
void * imatrix; // pointer to importance matrix data
void * kv_overrides; // pointer to vector containing overrides
void * tensor_types; // pointer to vector containing tensor types
void * prune_layers; // pointer to vector containing layer indices to prune
} llama_model_quantize_params;
typedef struct llama_logit_bias {