mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-08-17 21:51:27 -04:00
server : better security control for public deployments (#9776)
* server : more explicit endpoint access settings * protect /props endpoint * fix tests * update server docs * fix typo * fix tests
This commit is contained in:
@@ -1299,7 +1299,8 @@ async def wait_for_slots_status(context,
|
||||
|
||||
async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
|
||||
while True:
|
||||
async with await session.get(f'{base_url}/slots', params=params) as slots_response:
|
||||
headers = {'Authorization': f'Bearer {context.server_api_key}'}
|
||||
async with await session.get(f'{base_url}/slots', params=params, headers=headers) as slots_response:
|
||||
status_code = slots_response.status
|
||||
slots = await slots_response.json()
|
||||
if context.debug:
|
||||
@@ -1387,6 +1388,7 @@ def start_server_background(context):
|
||||
context.server_path = os.environ['LLAMA_SERVER_BIN_PATH']
|
||||
server_listen_addr = context.server_fqdn
|
||||
server_args = [
|
||||
'--slots', # requires to get slot status via /slots endpoint
|
||||
'--host', server_listen_addr,
|
||||
'--port', context.server_port,
|
||||
]
|
||||
|
Reference in New Issue
Block a user