From e73d881c501ec4cd0ea7f628348377e121bb3de6 Mon Sep 17 00:00:00 2001 From: gejifeng Date: Mon, 9 Feb 2026 10:20:45 +0000 Subject: [PATCH] Onboarding: add vLLM provider support --- docs/concepts/model-providers.md | 26 +++++ docs/providers/index.md | 1 + docs/providers/vllm.md | 92 +++++++++++++++ src/agents/model-auth.ts | 1 + src/agents/models-config.providers.ts | 97 ++++++++++++++++ .../models-config.providers.vllm.test.ts | 33 ++++++ src/cli/program/register.onboard.ts | 2 +- src/commands/auth-choice-options.e2e.test.ts | 10 ++ src/commands/auth-choice-options.ts | 12 ++ src/commands/auth-choice.apply.ts | 2 + src/commands/auth-choice.apply.vllm.ts | 107 ++++++++++++++++++ .../auth-choice.preferred-provider.ts | 1 + src/commands/configure.gateway-auth.ts | 3 + src/commands/model-picker.e2e.test.ts | 49 ++++++++ src/commands/model-picker.ts | 86 +++++++++++++- ...-non-interactive.provider-auth.e2e.test.ts | 18 +++ .../local/auth-choice.ts | 11 ++ src/commands/onboard-types.ts | 1 + src/wizard/onboarding.ts | 6 + 19 files changed, 555 insertions(+), 3 deletions(-) create mode 100644 docs/providers/vllm.md create mode 100644 src/agents/models-config.providers.vllm.test.ts create mode 100644 src/commands/auth-choice.apply.vllm.ts diff --git a/docs/concepts/model-providers.md b/docs/concepts/model-providers.md index fba56a34a1..0cc1d9e292 100644 --- a/docs/concepts/model-providers.md +++ b/docs/concepts/model-providers.md @@ -259,6 +259,32 @@ ollama pull llama3.3 Ollama is automatically detected when running locally at `http://127.0.0.1:11434/v1`. See [/providers/ollama](/providers/ollama) for model recommendations and custom configuration. +### vLLM + +vLLM is a local (or self-hosted) OpenAI-compatible server: + +- Provider: `vllm` +- Auth: Optional (depends on your server) +- Default base URL: `http://127.0.0.1:8000/v1` + +To opt in to auto-discovery locally (any value works if your server doesn’t enforce auth): + +```bash +export VLLM_API_KEY="vllm-local" +``` + +Then set a model (replace with one of the IDs returned by `/v1/models`): + +```json5 +{ + agents: { + defaults: { model: { primary: "vllm/your-model-id" } }, + }, +} +``` + +See [/providers/vllm](/providers/vllm) for details. + ### Local proxies (LM Studio, vLLM, LiteLLM, etc.) Example (OpenAI‑compatible): diff --git a/docs/providers/index.md b/docs/providers/index.md index 4b77aca6aa..28fe868272 100644 --- a/docs/providers/index.md +++ b/docs/providers/index.md @@ -52,6 +52,7 @@ See [Venice AI](/providers/venice). - [MiniMax](/providers/minimax) - [Venice (Venice AI, privacy-focused)](/providers/venice) - [Ollama (local models)](/providers/ollama) +- [vLLM (local models)](/providers/vllm) - [Qianfan](/providers/qianfan) ## Transcription providers diff --git a/docs/providers/vllm.md b/docs/providers/vllm.md new file mode 100644 index 0000000000..5e0c95d313 --- /dev/null +++ b/docs/providers/vllm.md @@ -0,0 +1,92 @@ +--- +summary: "Run OpenClaw with vLLM (OpenAI-compatible local server)" +read_when: + - You want to run OpenClaw against a local vLLM server + - You want OpenAI-compatible /v1 endpoints with your own models +title: "vLLM" +--- + +# vLLM + +vLLM can serve open-source (and some custom) models via an **OpenAI-compatible** HTTP API. OpenClaw can connect to vLLM using the `openai-completions` API. + +OpenClaw can also **auto-discover** available models from vLLM when you opt in with `VLLM_API_KEY` (any value works if your server doesn’t enforce auth) and you do not define an explicit `models.providers.vllm` entry. + +## Quick start + +1. Start vLLM with an OpenAI-compatible server. + +Your base URL should expose `/v1` endpoints (e.g. `/v1/models`, `/v1/chat/completions`). vLLM commonly runs on: + +- `http://127.0.0.1:8000/v1` + +2. Opt in (any value works if no auth is configured): + +```bash +export VLLM_API_KEY="vllm-local" +``` + +3. Select a model (replace with one of your vLLM model IDs): + +```json5 +{ + agents: { + defaults: { + model: { primary: "vllm/your-model-id" }, + }, + }, +} +``` + +## Model discovery (implicit provider) + +When `VLLM_API_KEY` is set (or an auth profile exists) and you **do not** define `models.providers.vllm`, OpenClaw will query: + +- `GET http://127.0.0.1:8000/v1/models` + +…and convert the returned IDs into model entries. + +If you set `models.providers.vllm` explicitly, auto-discovery is skipped and you must define models manually. + +## Explicit configuration (manual models) + +Use explicit config when: + +- vLLM runs on a different host/port. +- You want to pin `contextWindow`/`maxTokens` values. +- Your server requires a real API key (or you want to control headers). + +```json5 +{ + models: { + providers: { + vllm: { + baseUrl: "http://127.0.0.1:8000/v1", + apiKey: "${VLLM_API_KEY}", + api: "openai-completions", + models: [ + { + id: "your-model-id", + name: "Local vLLM Model", + reasoning: false, + input: ["text"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128000, + maxTokens: 8192, + }, + ], + }, + }, + }, +} +``` + +## Troubleshooting + +- Check the server is reachable: + +```bash +curl http://127.0.0.1:8000/v1/models +``` + +- If requests fail with auth errors, set a real `VLLM_API_KEY` that matches your server configuration, or configure the provider explicitly under `models.providers.vllm`. diff --git a/src/agents/model-auth.ts b/src/agents/model-auth.ts index 3ad13f7708..4d414a7156 100644 --- a/src/agents/model-auth.ts +++ b/src/agents/model-auth.ts @@ -309,6 +309,7 @@ export function resolveEnvApiKey(provider: string): EnvApiKeyResult | null { together: "TOGETHER_API_KEY", qianfan: "QIANFAN_API_KEY", ollama: "OLLAMA_API_KEY", + vllm: "VLLM_API_KEY", }; const envVar = envMap[normalized]; if (!envVar) { diff --git a/src/agents/models-config.providers.ts b/src/agents/models-config.providers.ts index aa4c3a086d..98677ef9c3 100644 --- a/src/agents/models-config.providers.ts +++ b/src/agents/models-config.providers.ts @@ -85,6 +85,16 @@ const OLLAMA_DEFAULT_COST = { cacheWrite: 0, }; +const VLLM_BASE_URL = "http://127.0.0.1:8000/v1"; +const VLLM_DEFAULT_CONTEXT_WINDOW = 128000; +const VLLM_DEFAULT_MAX_TOKENS = 8192; +const VLLM_DEFAULT_COST = { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, +}; + export const QIANFAN_BASE_URL = "https://qianfan.baidubce.com/v2"; export const QIANFAN_DEFAULT_MODEL_ID = "deepseek-v3.2"; const QIANFAN_DEFAULT_CONTEXT_WINDOW = 98304; @@ -129,6 +139,11 @@ export function resolveOllamaApiBase(configuredBaseUrl?: string): string { } async function discoverOllamaModels(baseUrl?: string): Promise { +type VllmModelsResponse = { + data?: Array<{ + id?: string; + }>; +}; // Skip Ollama discovery in test environments if (process.env.VITEST || process.env.NODE_ENV === "test") { return []; @@ -172,6 +187,59 @@ async function discoverOllamaModels(baseUrl?: string): Promise { + // Skip vLLM discovery in test environments + if (process.env.VITEST || process.env.NODE_ENV === "test") { + return []; + } + + const trimmedBaseUrl = baseUrl.trim().replace(/\/+$/, ""); + const url = `${trimmedBaseUrl}/models`; + + try { + const trimmedApiKey = apiKey?.trim(); + const response = await fetch(url, { + headers: trimmedApiKey ? { Authorization: `Bearer ${trimmedApiKey}` } : undefined, + signal: AbortSignal.timeout(5000), + }); + if (!response.ok) { + console.warn(`Failed to discover vLLM models: ${response.status}`); + return []; + } + const data = (await response.json()) as VllmModelsResponse; + const models = data.data ?? []; + if (models.length === 0) { + console.warn("No vLLM models found on local instance"); + return []; + } + + return models + .map((m) => ({ id: typeof m.id === "string" ? m.id.trim() : "" })) + .filter((m) => Boolean(m.id)) + .map((m) => { + const modelId = m.id; + const lower = modelId.toLowerCase(); + const isReasoning = + lower.includes("r1") || lower.includes("reasoning") || lower.includes("think"); + return { + id: modelId, + name: modelId, + reasoning: isReasoning, + input: ["text"], + cost: VLLM_DEFAULT_COST, + contextWindow: VLLM_DEFAULT_CONTEXT_WINDOW, + maxTokens: VLLM_DEFAULT_MAX_TOKENS, + } satisfies ModelDefinitionConfig; + }); + } catch (error) { + console.warn(`Failed to discover vLLM models: ${String(error)}`); + return []; + } +} + function normalizeApiKeyConfig(value: string): string { const trimmed = value.trim(); const match = /^\$\{([A-Z0-9_]+)\}$/.exec(trimmed); @@ -481,6 +549,18 @@ function buildTogetherProvider(): ProviderConfig { }; } +async function buildVllmProvider(params?: { + baseUrl?: string; + apiKey?: string; +}): Promise { + const baseUrl = (params?.baseUrl?.trim() || VLLM_BASE_URL).replace(/\/+$/, ""); + const models = await discoverVllmModels(baseUrl, params?.apiKey); + return { + baseUrl, + api: "openai-completions", + models, + }; +} export function buildQianfanProvider(): ProviderConfig { return { baseUrl: QIANFAN_BASE_URL, @@ -607,6 +687,23 @@ export async function resolveImplicitProviders(params: { providers.ollama = { ...(await buildOllamaProvider(ollamaBaseUrl)), apiKey: ollamaKey }; } + // vLLM provider - OpenAI-compatible local server (opt-in via env/profile). + // If explicitly configured, keep user-defined models/settings as-is. + if (!params.explicitProviders?.vllm) { + const vllmEnvVar = resolveEnvApiKeyVarName("vllm"); + const vllmProfileKey = resolveApiKeyFromProfiles({ provider: "vllm", store: authStore }); + const vllmKey = vllmEnvVar ?? vllmProfileKey; + if (vllmKey) { + const discoveryApiKey = vllmEnvVar + ? (process.env[vllmEnvVar]?.trim() ?? "") + : (vllmProfileKey ?? ""); + providers.vllm = { + ...(await buildVllmProvider({ apiKey: discoveryApiKey || undefined })), + apiKey: vllmKey, + }; + } + } + const togetherKey = resolveEnvApiKeyVarName("together") ?? resolveApiKeyFromProfiles({ provider: "together", store: authStore }); diff --git a/src/agents/models-config.providers.vllm.test.ts b/src/agents/models-config.providers.vllm.test.ts new file mode 100644 index 0000000000..441b4155ec --- /dev/null +++ b/src/agents/models-config.providers.vllm.test.ts @@ -0,0 +1,33 @@ +import { mkdtempSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; +import { describe, expect, it } from "vitest"; +import { resolveImplicitProviders } from "./models-config.providers.js"; + +describe("vLLM provider", () => { + it("should not include vllm when no API key is configured", async () => { + const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-")); + const providers = await resolveImplicitProviders({ agentDir }); + + expect(providers?.vllm).toBeUndefined(); + }); + + it("should include vllm when VLLM_API_KEY is set", async () => { + const agentDir = mkdtempSync(join(tmpdir(), "openclaw-test-")); + process.env.VLLM_API_KEY = "test-key"; + + try { + const providers = await resolveImplicitProviders({ agentDir }); + + expect(providers?.vllm).toBeDefined(); + expect(providers?.vllm?.apiKey).toBe("VLLM_API_KEY"); + expect(providers?.vllm?.baseUrl).toBe("http://127.0.0.1:8000/v1"); + expect(providers?.vllm?.api).toBe("openai-completions"); + + // Note: discovery is disabled in test environments (VITEST check) + expect(providers?.vllm?.models).toEqual([]); + } finally { + delete process.env.VLLM_API_KEY; + } + }); +}); diff --git a/src/cli/program/register.onboard.ts b/src/cli/program/register.onboard.ts index 5fd5e5bdcf..a9e9c2aecb 100644 --- a/src/cli/program/register.onboard.ts +++ b/src/cli/program/register.onboard.ts @@ -58,7 +58,7 @@ export function registerOnboardCommand(program: Command) { .option("--mode ", "Wizard mode: local|remote") .option( "--auth-choice ", - "Auth: setup-token|token|chutes|openai-codex|openai-api-key|xai-api-key|qianfan-api-key|openrouter-api-key|litellm-api-key|ai-gateway-api-key|cloudflare-ai-gateway-api-key|moonshot-api-key|moonshot-api-key-cn|kimi-code-api-key|synthetic-api-key|venice-api-key|gemini-api-key|zai-api-key|zai-coding-global|zai-coding-cn|zai-global|zai-cn|xiaomi-api-key|apiKey|minimax-api|minimax-api-lightning|opencode-zen|custom-api-key|skip|together-api-key", + "Auth: setup-token|token|chutes|vllm|openai-codex|openai-api-key|xai-api-key|qianfan-api-key|openrouter-api-key|litellm-api-key|ai-gateway-api-key|cloudflare-ai-gateway-api-key|moonshot-api-key|moonshot-api-key-cn|kimi-code-api-key|synthetic-api-key|venice-api-key|gemini-api-key|zai-api-key|zai-coding-global|zai-coding-cn|zai-global|zai-cn|xiaomi-api-key|apiKey|minimax-api|minimax-api-lightning|opencode-zen|custom-api-key|skip|together-api-key", ) .option( "--token-provider ", diff --git a/src/commands/auth-choice-options.e2e.test.ts b/src/commands/auth-choice-options.e2e.test.ts index 2e593a0973..b639853e6b 100644 --- a/src/commands/auth-choice-options.e2e.test.ts +++ b/src/commands/auth-choice-options.e2e.test.ts @@ -134,4 +134,14 @@ describe("buildAuthChoiceOptions", () => { expect(options.some((opt) => opt.value === "xai-api-key")).toBe(true); }); + + it("includes vLLM auth choice", () => { + const store: AuthProfileStore = { version: 1, profiles: {} }; + const options = buildAuthChoiceOptions({ + store, + includeSkip: false, + }); + + expect(options.some((opt) => opt.value === "vllm")).toBe(true); + }); }); diff --git a/src/commands/auth-choice-options.ts b/src/commands/auth-choice-options.ts index 73cd6359e5..88d5eb9363 100644 --- a/src/commands/auth-choice-options.ts +++ b/src/commands/auth-choice-options.ts @@ -10,6 +10,7 @@ export type AuthChoiceOption = { export type AuthChoiceGroupId = | "openai" | "anthropic" + | "vllm" | "google" | "copilot" | "openrouter" @@ -54,6 +55,12 @@ const AUTH_CHOICE_GROUP_DEFS: { hint: "setup-token + API key", choices: ["token", "apiKey"], }, + { + value: "vllm", + label: "vLLM", + hint: "Local/self-hosted OpenAI-compatible", + choices: ["vllm"], + }, { value: "minimax", label: "MiniMax", @@ -182,6 +189,11 @@ export function buildAuthChoiceOptions(params: { label: "OpenAI Codex (ChatGPT OAuth)", }); options.push({ value: "chutes", label: "Chutes (OAuth)" }); + options.push({ + value: "vllm", + label: "vLLM (custom URL + model)", + hint: "Local/self-hosted OpenAI-compatible server", + }); options.push({ value: "openai-api-key", label: "OpenAI API key" }); options.push({ value: "xai-api-key", label: "xAI (Grok) API key" }); options.push({ diff --git a/src/commands/auth-choice.apply.ts b/src/commands/auth-choice.apply.ts index 103e606090..73091aefd1 100644 --- a/src/commands/auth-choice.apply.ts +++ b/src/commands/auth-choice.apply.ts @@ -12,6 +12,7 @@ import { applyAuthChoiceMiniMax } from "./auth-choice.apply.minimax.js"; import { applyAuthChoiceOAuth } from "./auth-choice.apply.oauth.js"; import { applyAuthChoiceOpenAI } from "./auth-choice.apply.openai.js"; import { applyAuthChoiceQwenPortal } from "./auth-choice.apply.qwen-portal.js"; +import { applyAuthChoiceVllm } from "./auth-choice.apply.vllm.js"; import { applyAuthChoiceXAI } from "./auth-choice.apply.xai.js"; export type ApplyAuthChoiceParams = { @@ -42,6 +43,7 @@ export async function applyAuthChoice( ): Promise { const handlers: Array<(p: ApplyAuthChoiceParams) => Promise> = [ applyAuthChoiceAnthropic, + applyAuthChoiceVllm, applyAuthChoiceOpenAI, applyAuthChoiceOAuth, applyAuthChoiceApiProviders, diff --git a/src/commands/auth-choice.apply.vllm.ts b/src/commands/auth-choice.apply.vllm.ts new file mode 100644 index 0000000000..dc69f720af --- /dev/null +++ b/src/commands/auth-choice.apply.vllm.ts @@ -0,0 +1,107 @@ +import type { OpenClawConfig } from "../config/config.js"; +import type { ApplyAuthChoiceParams, ApplyAuthChoiceResult } from "./auth-choice.apply.js"; +import { upsertAuthProfile } from "../agents/auth-profiles.js"; + +const VLLM_DEFAULT_BASE_URL = "http://127.0.0.1:8000/v1"; +const VLLM_DEFAULT_CONTEXT_WINDOW = 128000; +const VLLM_DEFAULT_MAX_TOKENS = 8192; +const VLLM_DEFAULT_COST = { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, +}; + +function applyVllmDefaultModel(cfg: OpenClawConfig, modelRef: string): OpenClawConfig { + const existingModel = cfg.agents?.defaults?.model; + const fallbacks = + existingModel && typeof existingModel === "object" && "fallbacks" in existingModel + ? (existingModel as { fallbacks?: string[] }).fallbacks + : undefined; + + return { + ...cfg, + agents: { + ...cfg.agents, + defaults: { + ...cfg.agents?.defaults, + model: { + ...(fallbacks ? { fallbacks } : undefined), + primary: modelRef, + }, + }, + }, + }; +} + +export async function applyAuthChoiceVllm( + params: ApplyAuthChoiceParams, +): Promise { + if (params.authChoice !== "vllm") { + return null; + } + + const baseUrlRaw = await params.prompter.text({ + message: "vLLM base URL", + initialValue: VLLM_DEFAULT_BASE_URL, + placeholder: VLLM_DEFAULT_BASE_URL, + validate: (value) => (value?.trim() ? undefined : "Required"), + }); + const apiKeyRaw = await params.prompter.text({ + message: "vLLM API key", + placeholder: "sk-... (or any non-empty string)", + validate: (value) => (value?.trim() ? undefined : "Required"), + }); + const modelIdRaw = await params.prompter.text({ + message: "vLLM model", + placeholder: "meta-llama/Meta-Llama-3-8B-Instruct", + validate: (value) => (value?.trim() ? undefined : "Required"), + }); + + const baseUrl = String(baseUrlRaw ?? "") + .trim() + .replace(/\/+$/, ""); + const apiKey = String(apiKeyRaw ?? "").trim(); + const modelId = String(modelIdRaw ?? "").trim(); + const modelRef = `vllm/${modelId}`; + + upsertAuthProfile({ + profileId: "vllm:default", + credential: { type: "api_key", provider: "vllm", key: apiKey }, + agentDir: params.agentDir, + }); + + const nextConfig: OpenClawConfig = { + ...params.config, + models: { + ...params.config.models, + mode: params.config.models?.mode ?? "merge", + providers: { + ...params.config.models?.providers, + vllm: { + baseUrl, + api: "openai-completions", + apiKey: "VLLM_API_KEY", + models: [ + { + id: modelId, + name: modelId, + reasoning: false, + input: ["text"], + cost: VLLM_DEFAULT_COST, + contextWindow: VLLM_DEFAULT_CONTEXT_WINDOW, + maxTokens: VLLM_DEFAULT_MAX_TOKENS, + }, + ], + }, + }, + }, + }; + + if (!params.setDefaultModel) { + return { config: nextConfig, agentModelOverride: modelRef }; + } + + await params.prompter.note(`Default model set to ${modelRef}`, "Model configured"); + return { config: applyVllmDefaultModel(nextConfig, modelRef) }; +} diff --git a/src/commands/auth-choice.preferred-provider.ts b/src/commands/auth-choice.preferred-provider.ts index 8cd4480253..95921b02c1 100644 --- a/src/commands/auth-choice.preferred-provider.ts +++ b/src/commands/auth-choice.preferred-provider.ts @@ -6,6 +6,7 @@ const PREFERRED_PROVIDER_BY_AUTH_CHOICE: Partial> = { "claude-cli": "anthropic", token: "anthropic", apiKey: "anthropic", + vllm: "vllm", "openai-codex": "openai-codex", "codex-cli": "openai-codex", chutes: "chutes", diff --git a/src/commands/configure.gateway-auth.ts b/src/commands/configure.gateway-auth.ts index 396e092574..7a5d6f098f 100644 --- a/src/commands/configure.gateway-auth.ts +++ b/src/commands/configure.gateway-auth.ts @@ -77,6 +77,9 @@ export async function promptAuthConfig( ignoreAllowlist: true, preferredProvider: resolvePreferredProviderForAuthChoice(authChoice), }); + if (modelSelection.config) { + next = modelSelection.config; + } if (modelSelection.model) { next = applyPrimaryModel(next, modelSelection.model); } diff --git a/src/commands/model-picker.e2e.test.ts b/src/commands/model-picker.e2e.test.ts index 692aa445a6..b23cfa0674 100644 --- a/src/commands/model-picker.e2e.test.ts +++ b/src/commands/model-picker.e2e.test.ts @@ -20,9 +20,11 @@ const ensureAuthProfileStore = vi.hoisted(() => })), ); const listProfilesForProvider = vi.hoisted(() => vi.fn(() => [])); +const upsertAuthProfile = vi.hoisted(() => vi.fn()); vi.mock("../agents/auth-profiles.js", () => ({ ensureAuthProfileStore, listProfilesForProvider, + upsertAuthProfile, })); const resolveEnvApiKey = vi.hoisted(() => vi.fn(() => undefined)); @@ -68,6 +70,53 @@ describe("promptDefaultModel", () => { true, ); }); + + it("supports configuring vLLM during onboarding", async () => { + loadModelCatalog.mockResolvedValue([ + { + provider: "anthropic", + id: "claude-sonnet-4-5", + name: "Claude Sonnet 4.5", + }, + ]); + + const select = vi.fn(async (params) => { + const vllm = params.options.find((opt: { value: string }) => opt.value === "__vllm__"); + return (vllm?.value ?? "") as never; + }); + const text = vi + .fn() + .mockResolvedValueOnce("http://127.0.0.1:8000/v1") + .mockResolvedValueOnce("sk-vllm-test") + .mockResolvedValueOnce("meta-llama/Meta-Llama-3-8B-Instruct"); + const prompter = makePrompter({ select, text: text as never }); + const config = { agents: { defaults: {} } } as OpenClawConfig; + + const result = await promptDefaultModel({ + config, + prompter, + allowKeep: false, + includeManual: false, + includeVllm: true, + ignoreAllowlist: true, + }); + + expect(upsertAuthProfile).toHaveBeenCalledWith( + expect.objectContaining({ + profileId: "vllm:default", + credential: expect.objectContaining({ provider: "vllm" }), + }), + ); + expect(result.model).toBe("vllm/meta-llama/Meta-Llama-3-8B-Instruct"); + expect(result.config?.models?.providers?.vllm).toMatchObject({ + baseUrl: "http://127.0.0.1:8000/v1", + api: "openai-completions", + apiKey: "VLLM_API_KEY", + models: [ + { id: "meta-llama/Meta-Llama-3-8B-Instruct", name: "meta-llama/Meta-Llama-3-8B-Instruct" }, + ], + }); + }); }); describe("promptModelAllowlist", () => { diff --git a/src/commands/model-picker.ts b/src/commands/model-picker.ts index b0719fdd43..ae1d844d7c 100644 --- a/src/commands/model-picker.ts +++ b/src/commands/model-picker.ts @@ -1,6 +1,10 @@ import type { OpenClawConfig } from "../config/config.js"; import type { WizardPrompter, WizardSelectOption } from "../wizard/prompts.js"; -import { ensureAuthProfileStore, listProfilesForProvider } from "../agents/auth-profiles.js"; +import { + ensureAuthProfileStore, + listProfilesForProvider, + upsertAuthProfile, +} from "../agents/auth-profiles.js"; import { DEFAULT_MODEL, DEFAULT_PROVIDER } from "../agents/defaults.js"; import { getCustomProviderApiKey, resolveEnvApiKey } from "../agents/model-auth.js"; import { loadModelCatalog } from "../agents/model-catalog.js"; @@ -16,7 +20,17 @@ import { OPENAI_CODEX_DEFAULT_MODEL } from "./openai-codex-model-default.js"; const KEEP_VALUE = "__keep__"; const MANUAL_VALUE = "__manual__"; +const VLLM_VALUE = "__vllm__"; const PROVIDER_FILTER_THRESHOLD = 30; +const VLLM_DEFAULT_BASE_URL = "http://127.0.0.1:8000/v1"; +const VLLM_DEFAULT_CONTEXT_WINDOW = 128000; +const VLLM_DEFAULT_MAX_TOKENS = 8192; +const VLLM_DEFAULT_COST = { + input: 0, + output: 0, + cacheRead: 0, + cacheWrite: 0, +}; // Models that are internal routing features and should not be shown in selection lists. // These may be valid as defaults (e.g., set automatically during auth flow) but are not @@ -28,13 +42,14 @@ type PromptDefaultModelParams = { prompter: WizardPrompter; allowKeep?: boolean; includeManual?: boolean; + includeVllm?: boolean; ignoreAllowlist?: boolean; preferredProvider?: string; agentDir?: string; message?: string; }; -type PromptDefaultModelResult = { model?: string }; +type PromptDefaultModelResult = { model?: string; config?: OpenClawConfig }; type PromptModelAllowlistResult = { models?: string[] }; function hasAuthForProvider( @@ -107,6 +122,7 @@ export async function promptDefaultModel( const cfg = params.config; const allowKeep = params.allowKeep ?? true; const includeManual = params.includeManual ?? true; + const includeVllm = params.includeVllm ?? false; const ignoreAllowlist = params.ignoreAllowlist ?? false; const preferredProviderRaw = params.preferredProvider?.trim(); const preferredProvider = preferredProviderRaw @@ -212,6 +228,13 @@ export async function promptDefaultModel( if (includeManual) { options.push({ value: MANUAL_VALUE, label: "Enter model manually" }); } + if (includeVllm) { + options.push({ + value: VLLM_VALUE, + label: "vLLM (custom)", + hint: "Enter vLLM URL + API key + model", + }); + } const seen = new Set(); const addModelOption = (entry: { @@ -295,6 +318,65 @@ export async function promptDefaultModel( initialValue: configuredRaw || resolvedKey || undefined, }); } + if (selection === VLLM_VALUE) { + const baseUrlRaw = await params.prompter.text({ + message: "vLLM base URL", + initialValue: VLLM_DEFAULT_BASE_URL, + placeholder: VLLM_DEFAULT_BASE_URL, + validate: (value) => (value?.trim() ? undefined : "Required"), + }); + const apiKeyRaw = await params.prompter.text({ + message: "vLLM API key", + placeholder: "sk-... (or any non-empty string)", + validate: (value) => (value?.trim() ? undefined : "Required"), + }); + const modelIdRaw = await params.prompter.text({ + message: "vLLM model", + placeholder: "meta-llama/Meta-Llama-3-8B-Instruct", + validate: (value) => (value?.trim() ? undefined : "Required"), + }); + + const baseUrl = String(baseUrlRaw ?? "") + .trim() + .replace(/\/+$/, ""); + const apiKey = String(apiKeyRaw ?? "").trim(); + const modelId = String(modelIdRaw ?? "").trim(); + + upsertAuthProfile({ + profileId: "vllm:default", + credential: { type: "api_key", provider: "vllm", key: apiKey }, + agentDir: params.agentDir, + }); + + const nextConfig: OpenClawConfig = { + ...cfg, + models: { + ...cfg.models, + mode: cfg.models?.mode ?? "merge", + providers: { + ...cfg.models?.providers, + vllm: { + baseUrl, + api: "openai-completions", + apiKey: "VLLM_API_KEY", + models: [ + { + id: modelId, + name: modelId, + reasoning: false, + input: ["text"], + cost: VLLM_DEFAULT_COST, + contextWindow: VLLM_DEFAULT_CONTEXT_WINDOW, + maxTokens: VLLM_DEFAULT_MAX_TOKENS, + }, + ], + }, + }, + }, + }; + + return { model: `vllm/${modelId}`, config: nextConfig }; + } return { model: String(selection) }; } diff --git a/src/commands/onboard-non-interactive.provider-auth.e2e.test.ts b/src/commands/onboard-non-interactive.provider-auth.e2e.test.ts index 896020838f..90b8e2de5b 100644 --- a/src/commands/onboard-non-interactive.provider-auth.e2e.test.ts +++ b/src/commands/onboard-non-interactive.provider-auth.e2e.test.ts @@ -330,6 +330,24 @@ describe("onboard (non-interactive): provider auth", () => { }); }, 60_000); + it("rejects vLLM auth choice in non-interactive mode", async () => { + await withOnboardEnv("openclaw-onboard-vllm-non-interactive-", async ({ runtime }) => { + await expect( + runNonInteractive( + { + nonInteractive: true, + authChoice: "vllm", + skipHealth: true, + skipChannels: true, + skipSkills: true, + json: true, + }, + runtime, + ), + ).rejects.toThrow('Auth choice "vllm" requires interactive mode.'); + }); + }, 60_000); + it("stores LiteLLM API key and sets default model", async () => { await withOnboardEnv("openclaw-onboard-litellm-", async ({ configPath, runtime }) => { await runNonInteractive( diff --git a/src/commands/onboard-non-interactive/local/auth-choice.ts b/src/commands/onboard-non-interactive/local/auth-choice.ts index b29f44edfc..471acc96c7 100644 --- a/src/commands/onboard-non-interactive/local/auth-choice.ts +++ b/src/commands/onboard-non-interactive/local/auth-choice.ts @@ -88,6 +88,17 @@ export async function applyNonInteractiveAuthChoice(params: { return null; } + if (authChoice === "vllm") { + runtime.error( + [ + 'Auth choice "vllm" requires interactive mode.', + "Use interactive onboard/configure to enter base URL, API key, and model ID.", + ].join("\n"), + ); + runtime.exit(1); + return null; + } + if (authChoice === "apiKey") { const resolved = await resolveNonInteractiveApiKey({ provider: "anthropic", diff --git a/src/commands/onboard-types.ts b/src/commands/onboard-types.ts index 84cf9e8247..361d8345c7 100644 --- a/src/commands/onboard-types.ts +++ b/src/commands/onboard-types.ts @@ -9,6 +9,7 @@ export type AuthChoice = | "claude-cli" | "token" | "chutes" + | "vllm" | "openai-codex" | "openai-api-key" | "openrouter-api-key" diff --git a/src/wizard/onboarding.ts b/src/wizard/onboarding.ts index 91f1e967fe..8c4a62920c 100644 --- a/src/wizard/onboarding.ts +++ b/src/wizard/onboarding.ts @@ -411,7 +411,13 @@ export async function runOnboardingWizard( ignoreAllowlist: true, preferredProvider: customPreferredProvider ?? resolvePreferredProviderForAuthChoice(authChoice), + includeVllm: true, + preferredProvider: + customPreferredProvider ?? resolvePreferredProviderForAuthChoice(authChoice), }); + if (modelSelection.config) { + nextConfig = modelSelection.config; + } if (modelSelection.model) { nextConfig = applyPrimaryModel(nextConfig, modelSelection.model); }