mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 12:05:03 +00:00
Only one CUDA stream per device for async compute (#1898)
This commit is contained in:
@ -106,9 +106,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||
}
|
||||
|
||||
if (arg == "-s" || arg == "--seed") {
|
||||
#if defined(GGML_USE_CUBLAS)
|
||||
fprintf(stderr, "WARNING: when using cuBLAS generation results are NOT guaranteed to be reproducible.\n");
|
||||
#endif
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
|
Reference in New Issue
Block a user