mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-20 06:36:48 -04:00
ggml-cuda : use graph allocator (#2684)
use a different function for no_alloc to avoid breaking backwards compat, fixes lora remove 512 n_batch limit fixed 2048 batch size cleanup Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
This commit is contained in:
@@ -289,7 +289,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||
break;
|
||||
}
|
||||
params.n_batch = std::stoi(argv[i]);
|
||||
params.n_batch = std::min(512, params.n_batch);
|
||||
} else if (arg == "--keep") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
|
Reference in New Issue
Block a user