mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-07-01 05:05:10 +00:00
Better CUDA synchronization logic (#2057)
This commit is contained in:
@ -8,10 +8,6 @@ extern "C" {
|
||||
|
||||
#define GGML_CUDA_MAX_DEVICES 16
|
||||
|
||||
struct ggml_tensor_extra_gpu {
|
||||
void * data_device[GGML_CUDA_MAX_DEVICES]; // 1 pointer for each device for split tensors
|
||||
};
|
||||
|
||||
void ggml_init_cublas(void);
|
||||
void ggml_cuda_set_tensor_split(const float * tensor_split);
|
||||
|
||||
|
Reference in New Issue
Block a user