mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-27 02:28:19 -04:00
Documented CUDA reproducibility, added warning (#1346)
This commit is contained in:
@@ -348,7 +348,7 @@ static void ggml_cuda_pool_free(void * ptr, size_t size) {
|
||||
CUDA_CHECK(cudaFree(ptr));
|
||||
}
|
||||
|
||||
#define GGML_CUDA_MAX_STREAMS 8
|
||||
#define GGML_CUDA_MAX_STREAMS 8 // Set this to 1 for reproducible matrix multiplication.
|
||||
#define GGML_CUDA_MAX_EVENTS 64
|
||||
static cublasHandle_t g_cublasH = nullptr;
|
||||
static cudaStream_t g_cudaStreams[GGML_CUDA_MAX_STREAMS] = { nullptr };
|
||||
|
Reference in New Issue
Block a user