mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-26 11:45:21 +00:00
llama : add option to override model tensor buffers (#11397)
* llama : add option to override tensor buffers * ggml : fix possible underflow in ggml_nbytes
This commit is contained in:
@ -280,10 +280,18 @@ extern "C" {
|
||||
};
|
||||
};
|
||||
|
||||
struct llama_model_tensor_buft_override {
|
||||
const char * pattern;
|
||||
ggml_backend_buffer_type_t buft;
|
||||
};
|
||||
|
||||
struct llama_model_params {
|
||||
// NULL-terminated list of devices to use for offloading (if NULL, all available devices are used)
|
||||
ggml_backend_dev_t * devices;
|
||||
|
||||
// NULL-terminated list of buffer types to use for tensors that match a pattern
|
||||
const struct llama_model_tensor_buft_override * tensor_buft_overrides;
|
||||
|
||||
int32_t n_gpu_layers; // number of layers to store in VRAM
|
||||
enum llama_split_mode split_mode; // how to split the model across multiple GPUs
|
||||
|
||||
|
Reference in New Issue
Block a user