mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-27 20:05:20 +00:00
server : add --numa support (#2524)
This commit is contained in:
@ -666,6 +666,7 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms,
|
||||
{
|
||||
fprintf(stdout, " --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n");
|
||||
}
|
||||
fprintf(stdout, " --numa attempt optimizations that help on some NUMA systems\n");
|
||||
#ifdef LLAMA_SUPPORTS_GPU_OFFLOAD
|
||||
fprintf(stdout, " -ngl N, --n-gpu-layers N\n");
|
||||
fprintf(stdout, " number of layers to store in VRAM\n");
|
||||
@ -940,6 +941,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
||||
{
|
||||
params.use_mmap = false;
|
||||
}
|
||||
else if (arg == "--numa")
|
||||
{
|
||||
params.numa = true;
|
||||
}
|
||||
else if (arg == "--embedding")
|
||||
{
|
||||
params.embedding = true;
|
||||
|
Reference in New Issue
Block a user