mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-28 04:15:21 +00:00
rpc : add command line option for number of threads for the CPU backend (#13060)
closes #13051
This commit is contained in:
committed by
GitHub
parent
658987cfc9
commit
2cca6c01e4
@ -22,6 +22,7 @@
|
|||||||
|
|
||||||
#include "ggml-rpc.h"
|
#include "ggml-rpc.h"
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
# define NOMINMAX
|
||||||
# define DIRECTORY_SEPARATOR '\\'
|
# define DIRECTORY_SEPARATOR '\\'
|
||||||
# include <locale>
|
# include <locale>
|
||||||
# include <windows.h>
|
# include <windows.h>
|
||||||
@ -37,6 +38,8 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <filesystem>
|
#include <filesystem>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
namespace fs = std::filesystem;
|
namespace fs = std::filesystem;
|
||||||
|
|
||||||
@ -150,12 +153,14 @@ struct rpc_server_params {
|
|||||||
int port = 50052;
|
int port = 50052;
|
||||||
size_t backend_mem = 0;
|
size_t backend_mem = 0;
|
||||||
bool use_cache = false;
|
bool use_cache = false;
|
||||||
|
int n_threads = std::max(1U, std::thread::hardware_concurrency()/2);
|
||||||
};
|
};
|
||||||
|
|
||||||
static void print_usage(int /*argc*/, char ** argv, rpc_server_params params) {
|
static void print_usage(int /*argc*/, char ** argv, rpc_server_params params) {
|
||||||
fprintf(stderr, "Usage: %s [options]\n\n", argv[0]);
|
fprintf(stderr, "Usage: %s [options]\n\n", argv[0]);
|
||||||
fprintf(stderr, "options:\n");
|
fprintf(stderr, "options:\n");
|
||||||
fprintf(stderr, " -h, --help show this help message and exit\n");
|
fprintf(stderr, " -h, --help show this help message and exit\n");
|
||||||
|
fprintf(stderr, " -t, --threads number of threads for the CPU backend (default: %d)\n", params.n_threads);
|
||||||
fprintf(stderr, " -H HOST, --host HOST host to bind to (default: %s)\n", params.host.c_str());
|
fprintf(stderr, " -H HOST, --host HOST host to bind to (default: %s)\n", params.host.c_str());
|
||||||
fprintf(stderr, " -p PORT, --port PORT port to bind to (default: %d)\n", params.port);
|
fprintf(stderr, " -p PORT, --port PORT port to bind to (default: %d)\n", params.port);
|
||||||
fprintf(stderr, " -m MEM, --mem MEM backend memory size (in MB)\n");
|
fprintf(stderr, " -m MEM, --mem MEM backend memory size (in MB)\n");
|
||||||
@ -172,6 +177,15 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
params.host = argv[i];
|
params.host = argv[i];
|
||||||
|
} else if (arg == "-t" || arg == "--threads") {
|
||||||
|
if (++i >= argc) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
params.n_threads = std::stoi(argv[i]);
|
||||||
|
if (params.n_threads <= 0) {
|
||||||
|
fprintf(stderr, "error: invalid number of threads: %d\n", params.n_threads);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
} else if (arg == "-p" || arg == "--port") {
|
} else if (arg == "-p" || arg == "--port") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
return false;
|
return false;
|
||||||
@ -199,7 +213,7 @@ static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params &
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static ggml_backend_t create_backend() {
|
static ggml_backend_t create_backend(const rpc_server_params & params) {
|
||||||
ggml_backend_t backend = NULL;
|
ggml_backend_t backend = NULL;
|
||||||
#ifdef GGML_USE_CUDA
|
#ifdef GGML_USE_CUDA
|
||||||
fprintf(stderr, "%s: using CUDA backend\n", __func__);
|
fprintf(stderr, "%s: using CUDA backend\n", __func__);
|
||||||
@ -231,6 +245,7 @@ static ggml_backend_t create_backend() {
|
|||||||
if (!backend) {
|
if (!backend) {
|
||||||
fprintf(stderr, "%s: using CPU backend\n", __func__);
|
fprintf(stderr, "%s: using CPU backend\n", __func__);
|
||||||
backend = ggml_backend_cpu_init();
|
backend = ggml_backend_cpu_init();
|
||||||
|
ggml_backend_cpu_set_n_threads(backend, params.n_threads);
|
||||||
}
|
}
|
||||||
return backend;
|
return backend;
|
||||||
}
|
}
|
||||||
@ -275,7 +290,7 @@ int main(int argc, char * argv[]) {
|
|||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_backend_t backend = create_backend();
|
ggml_backend_t backend = create_backend(params);
|
||||||
if (!backend) {
|
if (!backend) {
|
||||||
fprintf(stderr, "Failed to create backend\n");
|
fprintf(stderr, "Failed to create backend\n");
|
||||||
return 1;
|
return 1;
|
||||||
|
Reference in New Issue
Block a user