mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-06-28 04:15:21 +00:00
Co-authored-by: eugenio.segala <esegala@deloitte.co.uk>
This commit is contained in:
@ -72,6 +72,7 @@ class ServerProcess:
|
||||
disable_ctx_shift: int | None = False
|
||||
draft_min: int | None = None
|
||||
draft_max: int | None = None
|
||||
no_webui: bool | None = None
|
||||
|
||||
# session variables
|
||||
process: subprocess.Popen | None = None
|
||||
@ -158,6 +159,8 @@ class ServerProcess:
|
||||
server_args.extend(["--draft-max", self.draft_max])
|
||||
if self.draft_min:
|
||||
server_args.extend(["--draft-min", self.draft_min])
|
||||
if self.no_webui:
|
||||
server_args.append("--no-webui")
|
||||
|
||||
args = [str(arg) for arg in [server_path, *server_args]]
|
||||
print(f"bench: starting server with: {' '.join(args)}")
|
||||
|
Reference in New Issue
Block a user