diff --git a/README.md b/README.md index ae82e64a..5a30e059 100644 --- a/README.md +++ b/README.md @@ -174,6 +174,9 @@ deepseek --provider fireworks --model deepseek-v4-pro # Self-hosted SGLang SGLANG_BASE_URL="http://localhost:30000/v1" deepseek --provider sglang --model deepseek-v4-flash + +# Self-hosted vLLM +VLLM_BASE_URL="http://localhost:8000/v1" deepseek --provider vllm --model deepseek-v4-flash ``` --- @@ -257,11 +260,12 @@ Key environment variables: | `DEEPSEEK_API_KEY` | API key | | `DEEPSEEK_BASE_URL` | API base URL | | `DEEPSEEK_MODEL` | Default model | -| `DEEPSEEK_PROVIDER` | `deepseek` (default), `nvidia-nim`, `fireworks`, `sglang` | +| `DEEPSEEK_PROVIDER` | `deepseek` (default), `nvidia-nim`, `fireworks`, `sglang`, `vllm` | | `DEEPSEEK_PROFILE` | Config profile name | | `DEEPSEEK_MEMORY` | Set to `on` to enable user memory | -| `NVIDIA_API_KEY` / `FIREWORKS_API_KEY` / `SGLANG_API_KEY` | Provider auth | +| `NVIDIA_API_KEY` / `FIREWORKS_API_KEY` / `SGLANG_API_KEY` / `VLLM_API_KEY` | Provider auth | | `SGLANG_BASE_URL` | Self-hosted SGLang endpoint | +| `VLLM_BASE_URL` | Self-hosted vLLM endpoint | | `NO_ANIMATIONS=1` | Force accessibility mode at startup | | `SSL_CERT_FILE` | Custom CA bundle for corporate proxies | diff --git a/README.zh-CN.md b/README.zh-CN.md index b041be8b..1fcb548a 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -169,6 +169,9 @@ deepseek --provider fireworks --model deepseek-v4-pro # 自托管 SGLang SGLANG_BASE_URL="http://localhost:30000/v1" deepseek --provider sglang --model deepseek-v4-flash + +# 自托管 vLLM +VLLM_BASE_URL="http://localhost:8000/v1" deepseek --provider vllm --model deepseek-v4-flash ``` --- @@ -252,11 +255,12 @@ deepseek mcp-server # 启动 dispatcher MCP stdio 服 | `DEEPSEEK_API_KEY` | DeepSeek API key | | `DEEPSEEK_BASE_URL` | API base URL | | `DEEPSEEK_MODEL` | 默认模型 | -| `DEEPSEEK_PROVIDER` | `deepseek`(默认)、`nvidia-nim`、`fireworks`、`sglang` | +| `DEEPSEEK_PROVIDER` | `deepseek`(默认)、`nvidia-nim`、`fireworks`、`sglang`、`vllm` | | `DEEPSEEK_PROFILE` | 配置 profile 名称 | | `DEEPSEEK_MEMORY` | 设为 `on` 启用用户记忆 | -| `NVIDIA_API_KEY` / `FIREWORKS_API_KEY` / `SGLANG_API_KEY` | 提供商认证 | +| `NVIDIA_API_KEY` / `FIREWORKS_API_KEY` / `SGLANG_API_KEY` / `VLLM_API_KEY` | 提供商认证 | | `SGLANG_BASE_URL` | 自托管 SGLang 端点 | +| `VLLM_BASE_URL` | 自托管 vLLM 端点 | | `NO_ANIMATIONS=1` | 启动时强制无障碍模式 | | `SSL_CERT_FILE` | 企业代理的自定义 CA 包 | diff --git a/config.example.toml b/config.example.toml index 2bee9a0b..632bc7a7 100644 --- a/config.example.toml +++ b/config.example.toml @@ -12,10 +12,10 @@ # Choose which provider to use by default. Per-provider credentials live in the # `[providers.*]` sections near the bottom of # this file — keeping both stored at once means `/provider deepseek` and -# `/provider nvidia-nim` (or `--provider fireworks`, `/provider sglang`) toggle without having to +# `/provider nvidia-nim` (or `--provider fireworks`, `/provider sglang`, `/provider vllm`) toggle without having to # re-enter keys. Top-level `api_key` / `base_url` are still read as DeepSeek # defaults when `[providers.deepseek]` is absent (backward compatibility). -provider = "deepseek" # deepseek | nvidia-nim | openrouter | novita | fireworks | sglang +provider = "deepseek" # deepseek | nvidia-nim | openrouter | novita | fireworks | sglang | vllm api_key = "YOUR_DEEPSEEK_API_KEY" # must be non-empty base_url = "https://api.deepseek.com" # base_url = "https://api.deepseeki.com" # China users @@ -148,6 +148,7 @@ max_subagents = 10 # optional (1-20) # (or NVIDIA_NIM_BASE_URL / NVIDIA_BASE_URL), NVIDIA_NIM_MODEL # Fireworks: FIREWORKS_API_KEY, FIREWORKS_BASE_URL # SGLang: SGLANG_BASE_URL, SGLANG_MODEL, optional SGLANG_API_KEY +# vLLM: VLLM_BASE_URL, VLLM_MODEL, optional VLLM_API_KEY # DeepSeek Platform (https://platform.deepseek.com) [providers.deepseek] @@ -173,6 +174,12 @@ max_subagents = 10 # optional (1-20) # base_url = "http://localhost:30000/v1" # model = "deepseek-ai/DeepSeek-V4-Pro" # or deepseek-ai/DeepSeek-V4-Flash +# Self-hosted vLLM OpenAI-compatible server +[providers.vllm] +# api_key = "OPTIONAL_VLLM_TOKEN" +# base_url = "http://localhost:8000/v1" +# model = "deepseek-ai/DeepSeek-V4-Pro" # or deepseek-ai/DeepSeek-V4-Flash + # ───────────────────────────────────────────────────────────────────────────────── # Network Policy (#135) # ───────────────────────────────────────────────────────────────────────────────── diff --git a/crates/agent/src/lib.rs b/crates/agent/src/lib.rs index eb1e0622..3d2644d1 100644 --- a/crates/agent/src/lib.rs +++ b/crates/agent/src/lib.rs @@ -163,6 +163,28 @@ impl Default for ModelRegistry { supports_tools: true, supports_reasoning: true, }, + ModelInfo { + id: "deepseek-ai/DeepSeek-V4-Pro".to_string(), + provider: ProviderKind::Vllm, + aliases: vec![ + "deepseek-v4-pro".to_string(), + "vllm-deepseek-v4-pro".to_string(), + ], + supports_tools: true, + supports_reasoning: true, + }, + ModelInfo { + id: "deepseek-ai/DeepSeek-V4-Flash".to_string(), + provider: ProviderKind::Vllm, + aliases: vec![ + "deepseek-v4-flash".to_string(), + "deepseek-chat".to_string(), + "deepseek-reasoner".to_string(), + "vllm-deepseek-v4-flash".to_string(), + ], + supports_tools: true, + supports_reasoning: true, + }, ]; Self::new(models) } @@ -366,4 +388,22 @@ mod tests { assert_eq!(resolved.resolved.provider, ProviderKind::Sglang); assert_eq!(resolved.resolved.id, "deepseek-ai/DeepSeek-V4-Flash"); } + + #[test] + fn vllm_default_uses_canonical_model_id() { + let registry = ModelRegistry::default(); + let resolved = registry.resolve(None, Some(ProviderKind::Vllm)); + + assert_eq!(resolved.resolved.provider, ProviderKind::Vllm); + assert_eq!(resolved.resolved.id, "deepseek-ai/DeepSeek-V4-Pro"); + } + + #[test] + fn deepseek_v4_flash_alias_resolves_to_vllm_when_provider_hinted() { + let registry = ModelRegistry::default(); + let resolved = registry.resolve(Some("deepseek-v4-flash"), Some(ProviderKind::Vllm)); + + assert_eq!(resolved.resolved.provider, ProviderKind::Vllm); + assert_eq!(resolved.resolved.id, "deepseek-ai/DeepSeek-V4-Flash"); + } } diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index 87ee2d7c..cb2a5cf5 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -28,6 +28,7 @@ enum ProviderArg { Novita, Fireworks, Sglang, + Vllm, } impl From for ProviderKind { @@ -40,6 +41,7 @@ impl From for ProviderKind { ProviderArg::Novita => ProviderKind::Novita, ProviderArg::Fireworks => ProviderKind::Fireworks, ProviderArg::Sglang => ProviderKind::Sglang, + ProviderArg::Vllm => ProviderKind::Vllm, } } } @@ -561,17 +563,19 @@ fn provider_slot(provider: ProviderKind) -> &'static str { ProviderKind::Novita => "novita", ProviderKind::Fireworks => "fireworks", ProviderKind::Sglang => "sglang", + ProviderKind::Vllm => "vllm", } } /// Provider order used by the `auth list` and `auth status` outputs. -const PROVIDER_LIST: [ProviderKind; 7] = [ +const PROVIDER_LIST: [ProviderKind; 8] = [ ProviderKind::Deepseek, ProviderKind::NvidiaNim, ProviderKind::Openrouter, ProviderKind::Novita, ProviderKind::Fireworks, ProviderKind::Sglang, + ProviderKind::Vllm, ProviderKind::Openai, ]; @@ -1045,9 +1049,10 @@ fn delegate_to_tui( | ProviderKind::Novita | ProviderKind::Fireworks | ProviderKind::Sglang + | ProviderKind::Vllm ) { bail!( - "The interactive TUI supports DeepSeek, NVIDIA NIM, OpenRouter, Novita, Fireworks, and SGLang providers. Remove --provider {} or use `deepseek model ...` for provider registry inspection.", + "The interactive TUI supports DeepSeek, NVIDIA NIM, OpenRouter, Novita, Fireworks, SGLang, and vLLM providers. Remove --provider {} or use `deepseek model ...` for provider registry inspection.", resolved_runtime.provider.as_str() ); } @@ -1562,6 +1567,16 @@ mod tests { })) )); + let cli = parse_ok(&["deepseek", "auth", "get", "--provider", "vllm"]); + assert!(matches!( + cli.command, + Some(Commands::Auth(AuthArgs { + command: AuthCommand::Get { + provider: ProviderArg::Vllm + } + })) + )); + let cli = parse_ok(&["deepseek", "auth", "list"]); assert!(matches!( cli.command, diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index dc491669..9d7cbfb8 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -26,6 +26,9 @@ const DEFAULT_OPENROUTER_BASE_URL: &str = "https://openrouter.ai/api/v1"; const DEFAULT_NOVITA_BASE_URL: &str = "https://api.novita.ai/v1"; const DEFAULT_FIREWORKS_BASE_URL: &str = "https://api.fireworks.ai/inference/v1"; const DEFAULT_SGLANG_BASE_URL: &str = "http://localhost:30000/v1"; +const DEFAULT_VLLM_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro"; +const DEFAULT_VLLM_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash"; +const DEFAULT_VLLM_BASE_URL: &str = "http://localhost:8000/v1"; #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] #[serde(rename_all = "kebab-case")] @@ -38,6 +41,7 @@ pub enum ProviderKind { Novita, Fireworks, Sglang, + Vllm, } impl ProviderKind { @@ -51,6 +55,7 @@ impl ProviderKind { Self::Novita => "novita", Self::Fireworks => "fireworks", Self::Sglang => "sglang", + Self::Vllm => "vllm", } } @@ -64,6 +69,7 @@ impl ProviderKind { "novita" => Some(Self::Novita), "fireworks" | "fireworks-ai" => Some(Self::Fireworks), "sglang" | "sg-lang" => Some(Self::Sglang), + "vllm" | "v-llm" => Some(Self::Vllm), _ => None, } } @@ -92,6 +98,8 @@ pub struct ProvidersToml { pub fireworks: ProviderConfigToml, #[serde(default)] pub sglang: ProviderConfigToml, + #[serde(default)] + pub vllm: ProviderConfigToml, } impl ProvidersToml { @@ -105,6 +113,7 @@ impl ProvidersToml { ProviderKind::Novita => &self.novita, ProviderKind::Fireworks => &self.fireworks, ProviderKind::Sglang => &self.sglang, + ProviderKind::Vllm => &self.vllm, } } @@ -117,6 +126,7 @@ impl ProvidersToml { ProviderKind::Novita => &mut self.novita, ProviderKind::Fireworks => &mut self.fireworks, ProviderKind::Sglang => &mut self.sglang, + ProviderKind::Vllm => &mut self.vllm, } } } @@ -318,6 +328,7 @@ impl ConfigToml { merge_provider_config(&mut self.providers.novita, &project.providers.novita); merge_provider_config(&mut self.providers.fireworks, &project.providers.fireworks); merge_provider_config(&mut self.providers.sglang, &project.providers.sglang); + merge_provider_config(&mut self.providers.vllm, &project.providers.vllm); if project.network.is_some() { self.network = project.network; @@ -373,6 +384,9 @@ impl ConfigToml { "providers.sglang.api_key" => self.providers.sglang.api_key.clone(), "providers.sglang.base_url" => self.providers.sglang.base_url.clone(), "providers.sglang.model" => self.providers.sglang.model.clone(), + "providers.vllm.api_key" => self.providers.vllm.api_key.clone(), + "providers.vllm.base_url" => self.providers.vllm.base_url.clone(), + "providers.vllm.model" => self.providers.vllm.model.clone(), _ => self.extras.get(key).map(toml::Value::to_string), } } @@ -460,6 +474,15 @@ impl ConfigToml { "providers.sglang.model" => { self.providers.sglang.model = Some(value.to_string()); } + "providers.vllm.api_key" => { + self.providers.vllm.api_key = Some(value.to_string()); + } + "providers.vllm.base_url" => { + self.providers.vllm.base_url = Some(value.to_string()); + } + "providers.vllm.model" => { + self.providers.vllm.model = Some(value.to_string()); + } _ => { self.extras .insert(key.to_string(), toml::Value::String(value.to_string())); @@ -513,6 +536,9 @@ impl ConfigToml { "providers.sglang.api_key" => self.providers.sglang.api_key = None, "providers.sglang.base_url" => self.providers.sglang.base_url = None, "providers.sglang.model" => self.providers.sglang.model = None, + "providers.vllm.api_key" => self.providers.vllm.api_key = None, + "providers.vllm.base_url" => self.providers.vllm.base_url = None, + "providers.vllm.model" => self.providers.vllm.model = None, _ => { self.extras.remove(key); } @@ -624,6 +650,15 @@ impl ConfigToml { if let Some(v) = self.providers.sglang.model.as_ref() { out.insert("providers.sglang.model".to_string(), v.clone()); } + if let Some(v) = self.providers.vllm.api_key.as_ref() { + out.insert("providers.vllm.api_key".to_string(), redact_secret(v)); + } + if let Some(v) = self.providers.vllm.base_url.as_ref() { + out.insert("providers.vllm.base_url".to_string(), v.clone()); + } + if let Some(v) = self.providers.vllm.model.as_ref() { + out.insert("providers.vllm.model".to_string(), v.clone()); + } for (k, v) in &self.extras { out.insert(k.clone(), v.to_string()); @@ -695,6 +730,7 @@ impl ConfigToml { ProviderKind::Novita => DEFAULT_NOVITA_BASE_URL.to_string(), ProviderKind::Fireworks => DEFAULT_FIREWORKS_BASE_URL.to_string(), ProviderKind::Sglang => DEFAULT_SGLANG_BASE_URL.to_string(), + ProviderKind::Vllm => DEFAULT_VLLM_BASE_URL.to_string(), }); let model = cli @@ -712,6 +748,7 @@ impl ConfigToml { ProviderKind::Novita => DEFAULT_NOVITA_MODEL.to_string(), ProviderKind::Fireworks => DEFAULT_FIREWORKS_MODEL.to_string(), ProviderKind::Sglang => DEFAULT_SGLANG_MODEL.to_string(), + ProviderKind::Vllm => DEFAULT_VLLM_MODEL.to_string(), }); let model = normalize_model_for_provider(provider, &model); @@ -822,6 +859,14 @@ fn normalize_model_for_provider(provider: ProviderKind, model: &str) -> String { "deepseek-v4-flash" | "deepseek-v4flash" | "deepseek-chat" | "deepseek-reasoner" | "deepseek-r1" | "deepseek-v3" | "deepseek-v3.2", ) => DEFAULT_SGLANG_FLASH_MODEL.to_string(), + (ProviderKind::Vllm, "deepseek-v4-pro" | "deepseek-v4pro") => { + DEFAULT_VLLM_MODEL.to_string() + } + ( + ProviderKind::Vllm, + "deepseek-v4-flash" | "deepseek-v4flash" | "deepseek-chat" | "deepseek-reasoner" + | "deepseek-r1" | "deepseek-v3" | "deepseek-v3.2", + ) => DEFAULT_VLLM_FLASH_MODEL.to_string(), _ => model.to_string(), } } @@ -973,6 +1018,7 @@ struct EnvRuntimeOverrides { novita_base_url: Option, fireworks_base_url: Option, sglang_base_url: Option, + vllm_base_url: Option, } impl EnvRuntimeOverrides { @@ -1013,6 +1059,9 @@ impl EnvRuntimeOverrides { sglang_base_url: std::env::var("SGLANG_BASE_URL") .ok() .filter(|v| !v.trim().is_empty()), + vllm_base_url: std::env::var("VLLM_BASE_URL") + .ok() + .filter(|v| !v.trim().is_empty()), } } @@ -1027,6 +1076,7 @@ impl EnvRuntimeOverrides { ProviderKind::Novita => self.novita_base_url.clone(), ProviderKind::Fireworks => self.fireworks_base_url.clone(), ProviderKind::Sglang => self.sglang_base_url.clone(), + ProviderKind::Vllm => self.vllm_base_url.clone(), } } } @@ -1061,6 +1111,8 @@ mod tests { fireworks_base_url: Option, sglang_api_key: Option, sglang_base_url: Option, + vllm_api_key: Option, + vllm_base_url: Option, } impl EnvGuard { @@ -1083,6 +1135,8 @@ mod tests { fireworks_base_url: env::var_os("FIREWORKS_BASE_URL"), sglang_api_key: env::var_os("SGLANG_API_KEY"), sglang_base_url: env::var_os("SGLANG_BASE_URL"), + vllm_api_key: env::var_os("VLLM_API_KEY"), + vllm_base_url: env::var_os("VLLM_BASE_URL"), }; // Safety: test-only environment mutation guarded by a module mutex. unsafe { @@ -1103,6 +1157,8 @@ mod tests { env::remove_var("FIREWORKS_BASE_URL"); env::remove_var("SGLANG_API_KEY"); env::remove_var("SGLANG_BASE_URL"); + env::remove_var("VLLM_API_KEY"); + env::remove_var("VLLM_BASE_URL"); } guard } @@ -1137,6 +1193,8 @@ mod tests { Self::restore_var("FIREWORKS_BASE_URL", self.fireworks_base_url.take()); Self::restore_var("SGLANG_API_KEY", self.sglang_api_key.take()); Self::restore_var("SGLANG_BASE_URL", self.sglang_base_url.take()); + Self::restore_var("VLLM_API_KEY", self.vllm_api_key.take()); + Self::restore_var("VLLM_BASE_URL", self.vllm_base_url.take()); } } } @@ -1320,6 +1378,8 @@ mod tests { Some(ProviderKind::Fireworks) ); assert_eq!(ProviderKind::parse("sg-lang"), Some(ProviderKind::Sglang)); + assert_eq!(ProviderKind::parse("v-llm"), Some(ProviderKind::Vllm)); + assert_eq!(ProviderKind::parse("vllm"), Some(ProviderKind::Vllm)); } #[test] @@ -1386,6 +1446,22 @@ mod tests { assert_eq!(resolved.model, DEFAULT_SGLANG_MODEL); } + #[test] + fn vllm_provider_defaults_to_local_endpoint_and_model() { + let _lock = env_lock(); + let _env = EnvGuard::without_deepseek_runtime_overrides(); + let config = ConfigToml { + provider: ProviderKind::Vllm, + ..ConfigToml::default() + }; + + let resolved = config.resolve_runtime_options(&CliRuntimeOverrides::default()); + + assert_eq!(resolved.provider, ProviderKind::Vllm); + assert_eq!(resolved.base_url, DEFAULT_VLLM_BASE_URL); + assert_eq!(resolved.model, DEFAULT_VLLM_MODEL); + } + #[test] fn openrouter_env_api_key_falls_back_when_config_missing() { let _lock = env_lock(); @@ -1488,6 +1564,22 @@ mod tests { assert_eq!(resolved.model, DEFAULT_SGLANG_FLASH_MODEL); } + #[test] + fn vllm_provider_normalizes_flash_aliases() { + let _lock = env_lock(); + let _env = EnvGuard::without_deepseek_runtime_overrides(); + let cli = CliRuntimeOverrides { + provider: Some(ProviderKind::Vllm), + model: Some("deepseek-v4-flash".to_string()), + ..CliRuntimeOverrides::default() + }; + + let resolved = ConfigToml::default().resolve_runtime_options(&cli); + + assert_eq!(resolved.provider, ProviderKind::Vllm); + assert_eq!(resolved.model, DEFAULT_VLLM_FLASH_MODEL); + } + #[test] fn openrouter_provider_specific_config_overrides_env() { let _lock = env_lock(); diff --git a/crates/secrets/src/lib.rs b/crates/secrets/src/lib.rs index e0c54241..ed4cb32d 100644 --- a/crates/secrets/src/lib.rs +++ b/crates/secrets/src/lib.rs @@ -411,6 +411,7 @@ pub fn env_for(name: &str) -> Option { } "fireworks" | "fireworks-ai" => &["FIREWORKS_API_KEY"], "sglang" | "sg-lang" => &["SGLANG_API_KEY"], + "vllm" | "v-llm" => &["VLLM_API_KEY"], "openai" => &["OPENAI_API_KEY"], _ => return None, }; @@ -447,6 +448,7 @@ mod tests { "NVIDIA_NIM_API_KEY", "FIREWORKS_API_KEY", "SGLANG_API_KEY", + "VLLM_API_KEY", "OPENAI_API_KEY", ] { // Safety: tests serialise on env_lock(); the broader @@ -563,6 +565,19 @@ mod tests { unsafe { std::env::remove_var("SGLANG_API_KEY") }; } + #[test] + fn vllm_env_aliases_resolve() { + let _lock = env_lock(); + clear_known_envs(); + // Safety: env mutation guarded by env_lock(). + unsafe { std::env::set_var("VLLM_API_KEY", "vllm-key") }; + + assert_eq!(env_for("vllm").as_deref(), Some("vllm-key")); + assert_eq!(env_for("v-llm").as_deref(), Some("vllm-key")); + // Safety: env mutation guarded by env_lock(). + unsafe { std::env::remove_var("VLLM_API_KEY") }; + } + #[cfg(unix)] #[test] fn file_store_round_trips_with_secure_perms() { diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs index e0421063..832c3ae0 100644 --- a/crates/tui/src/client.rs +++ b/crates/tui/src/client.rs @@ -754,7 +754,8 @@ pub(super) fn apply_reasoning_effort( | ApiProvider::Openrouter | ApiProvider::Novita | ApiProvider::Fireworks - | ApiProvider::Sglang => { + | ApiProvider::Sglang + | ApiProvider::Vllm => { body["thinking"] = json!({ "type": "disabled" }); } ApiProvider::NvidiaNim => { @@ -769,7 +770,8 @@ pub(super) fn apply_reasoning_effort( | ApiProvider::Openrouter | ApiProvider::Novita | ApiProvider::Fireworks - | ApiProvider::Sglang => { + | ApiProvider::Sglang + | ApiProvider::Vllm => { body["reasoning_effort"] = json!("high"); body["thinking"] = json!({ "type": "enabled" }); } @@ -786,7 +788,8 @@ pub(super) fn apply_reasoning_effort( | ApiProvider::Openrouter | ApiProvider::Novita | ApiProvider::Fireworks - | ApiProvider::Sglang => { + | ApiProvider::Sglang + | ApiProvider::Vllm => { body["reasoning_effort"] = json!("max"); body["thinking"] = json!({ "type": "enabled" }); } diff --git a/crates/tui/src/commands/provider.rs b/crates/tui/src/commands/provider.rs index ac279035..1ec5a025 100644 --- a/crates/tui/src/commands/provider.rs +++ b/crates/tui/src/commands/provider.rs @@ -27,7 +27,7 @@ pub fn provider(app: &mut App, args: Option<&str>) -> CommandResult { let Some(target) = ApiProvider::parse(name) else { return CommandResult::error(format!( - "Unknown provider '{name}'. Expected: deepseek, nvidia-nim, openrouter, novita, fireworks, or sglang." + "Unknown provider '{name}'. Expected: deepseek, nvidia-nim, openrouter, novita, fireworks, sglang, or vllm." )); }; @@ -164,6 +164,19 @@ mod tests { } } + #[test] + fn switch_to_vllm_flash_emits_action() { + let mut app = create_test_app(); + let result = provider(&mut app, Some("vllm flash")); + match result.action { + Some(AppAction::SwitchProvider { provider, model }) => { + assert_eq!(provider, ApiProvider::Vllm); + assert_eq!(model.as_deref(), Some("deepseek-v4-flash")); + } + other => panic!("expected SwitchProvider, got {other:?}"), + } + } + #[test] fn switching_to_active_provider_without_model_is_a_noop() { let mut app = create_test_app(); diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 83db0131..30f72a70 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -30,6 +30,9 @@ pub const DEFAULT_FIREWORKS_BASE_URL: &str = "https://api.fireworks.ai/inference pub const DEFAULT_SGLANG_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro"; pub const DEFAULT_SGLANG_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash"; pub const DEFAULT_SGLANG_BASE_URL: &str = "http://localhost:30000/v1"; +pub const DEFAULT_VLLM_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro"; +pub const DEFAULT_VLLM_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash"; +pub const DEFAULT_VLLM_BASE_URL: &str = "http://localhost:8000/v1"; pub const DEFAULT_DEEPSEEKCN_BASE_URL: &str = "https://api.deepseeki.com"; const API_KEYRING_SENTINEL: &str = "__KEYRING__"; pub const COMMON_DEEPSEEK_MODELS: &[&str] = &[ @@ -51,6 +54,7 @@ pub enum ApiProvider { Novita, Fireworks, Sglang, + Vllm, } impl ApiProvider { @@ -66,6 +70,7 @@ impl ApiProvider { "novita" => Some(Self::Novita), "fireworks" | "fireworks-ai" => Some(Self::Fireworks), "sglang" | "sg-lang" => Some(Self::Sglang), + "vllm" | "v-llm" => Some(Self::Vllm), _ => None, } } @@ -80,6 +85,7 @@ impl ApiProvider { Self::Novita => "novita", Self::Fireworks => "fireworks", Self::Sglang => "sglang", + Self::Vllm => "vllm", } } @@ -94,6 +100,7 @@ impl ApiProvider { Self::Novita => "Novita AI", Self::Fireworks => "Fireworks AI", Self::Sglang => "SGLang", + Self::Vllm => "vLLM", } } @@ -108,6 +115,7 @@ impl ApiProvider { Self::Novita, Self::Fireworks, Self::Sglang, + Self::Vllm, ] } } @@ -902,6 +910,8 @@ pub struct ProvidersConfig { pub fireworks: ProviderConfig, #[serde(default)] pub sglang: ProviderConfig, + #[serde(default)] + pub vllm: ProviderConfig, } #[derive(Debug, Clone, Deserialize, Default)] @@ -961,7 +971,7 @@ impl Config { && ApiProvider::parse(provider).is_none() { anyhow::bail!( - "Invalid provider '{provider}': expected deepseek, deepseek-cn, nvidia-nim, openrouter, novita, fireworks, or sglang." + "Invalid provider '{provider}': expected deepseek, deepseek-cn, nvidia-nim, openrouter, novita, fireworks, sglang, or vllm." ); } if let Some(ref key) = self.api_key @@ -1079,6 +1089,7 @@ impl Config { ApiProvider::Novita => &providers.novita, ApiProvider::Fireworks => &providers.fireworks, ApiProvider::Sglang => &providers.sglang, + ApiProvider::Vllm => &providers.vllm, }) } @@ -1114,6 +1125,7 @@ impl Config { ApiProvider::Novita => DEFAULT_NOVITA_MODEL, ApiProvider::Fireworks => DEFAULT_FIREWORKS_MODEL, ApiProvider::Sglang => DEFAULT_SGLANG_MODEL, + ApiProvider::Vllm => DEFAULT_VLLM_MODEL, } .to_string() } @@ -1139,7 +1151,8 @@ impl Config { ApiProvider::Openrouter | ApiProvider::Novita | ApiProvider::Fireworks - | ApiProvider::Sglang => None, + | ApiProvider::Sglang + | ApiProvider::Vllm => None, }; let base = provider_base.or(root_base).unwrap_or_else(|| { match provider { @@ -1150,6 +1163,7 @@ impl Config { ApiProvider::Novita => DEFAULT_NOVITA_BASE_URL, ApiProvider::Fireworks => DEFAULT_FIREWORKS_BASE_URL, ApiProvider::Sglang => DEFAULT_SGLANG_BASE_URL, + ApiProvider::Vllm => DEFAULT_VLLM_BASE_URL, } .to_string() }); @@ -1173,6 +1187,7 @@ impl Config { ApiProvider::Novita => "novita", ApiProvider::Fireworks => "fireworks", ApiProvider::Sglang => "sglang", + ApiProvider::Vllm => "vllm", }; // 0. Explicit in-memory override (set by onboarding / provider @@ -1236,7 +1251,7 @@ impl Config { // Self-hosted SGLang deployments commonly run without auth on // localhost. Return an empty key and let the client omit the // Authorization header. - ApiProvider::Sglang => Ok(String::new()), + ApiProvider::Sglang | ApiProvider::Vllm => Ok(String::new()), } } @@ -1678,11 +1693,26 @@ fn apply_env_overrides(config: &mut Config) { .sglang .base_url = Some(value); } + if matches!(config.api_provider(), ApiProvider::Vllm) + && let Ok(value) = std::env::var("VLLM_BASE_URL") + && !value.trim().is_empty() + { + config + .providers + .get_or_insert_with(ProvidersConfig::default) + .vllm + .base_url = Some(value); + } if matches!(config.api_provider(), ApiProvider::Sglang) && let Ok(value) = std::env::var("SGLANG_MODEL") { config.default_text_model = Some(value); } + if matches!(config.api_provider(), ApiProvider::Vllm) + && let Ok(value) = std::env::var("VLLM_MODEL") + { + config.default_text_model = Some(value); + } if let Ok(value) = std::env::var("DEEPSEEK_MODEL").or_else(|_| std::env::var("DEEPSEEK_DEFAULT_TEXT_MODEL")) { @@ -1902,6 +1932,11 @@ fn normalize_model_config(config: &mut Config) { { providers.sglang.model = Some(normalized); } + if let Some(model) = providers.vllm.model.as_deref() + && let Some(normalized) = normalize_model_for_provider(ApiProvider::Vllm, model) + { + providers.vllm.model = Some(normalized); + } } } @@ -1927,6 +1962,8 @@ fn model_for_provider(provider: ApiProvider, normalized: String) -> String { } (ApiProvider::Sglang, "deepseek-v4-pro") => DEFAULT_SGLANG_MODEL.to_string(), (ApiProvider::Sglang, "deepseek-v4-flash") => DEFAULT_SGLANG_FLASH_MODEL.to_string(), + (ApiProvider::Vllm, "deepseek-v4-pro") => DEFAULT_VLLM_MODEL.to_string(), + (ApiProvider::Vllm, "deepseek-v4-flash") => DEFAULT_VLLM_FLASH_MODEL.to_string(), _ => normalized, } } @@ -2067,6 +2104,7 @@ fn merge_providers( novita: merge_provider_config(base.novita, override_cfg.novita), fireworks: merge_provider_config(base.fireworks, override_cfg.fireworks), sglang: merge_provider_config(base.sglang, override_cfg.sglang), + vllm: merge_provider_config(base.vllm, override_cfg.vllm), }), } } @@ -2436,6 +2474,7 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool { ApiProvider::Novita => "NOVITA_API_KEY", ApiProvider::Fireworks => "FIREWORKS_API_KEY", ApiProvider::Sglang => "SGLANG_API_KEY", + ApiProvider::Vllm => "VLLM_API_KEY", }; if std::env::var(env_var).is_ok_and(|k| !k.trim().is_empty()) { return true; @@ -2447,7 +2486,7 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool { } // SGLang is self-hosted and typically runs without authentication. - if matches!(provider, ApiProvider::Sglang) { + if matches!(provider, ApiProvider::Sglang | ApiProvider::Vllm) { return true; } @@ -2494,6 +2533,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result ApiProvider::Novita => "providers.novita", ApiProvider::Fireworks => "providers.fireworks", ApiProvider::Sglang => "providers.sglang", + ApiProvider::Vllm => "providers.vllm", }; // Parse existing TOML (or start fresh) so we can edit the right table @@ -2521,6 +2561,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result ApiProvider::Novita => "novita", ApiProvider::Fireworks => "fireworks", ApiProvider::Sglang => "sglang", + ApiProvider::Vllm => "vllm", }; let entry = providers .entry(key_inside.to_string()) @@ -2634,6 +2675,9 @@ mod tests { sglang_api_key: Option, sglang_base_url: Option, sglang_model: Option, + vllm_api_key: Option, + vllm_base_url: Option, + vllm_model: Option, } impl EnvGuard { @@ -2664,6 +2708,9 @@ mod tests { let sglang_api_key_prev = env::var_os("SGLANG_API_KEY"); let sglang_base_url_prev = env::var_os("SGLANG_BASE_URL"); let sglang_model_prev = env::var_os("SGLANG_MODEL"); + let vllm_api_key_prev = env::var_os("VLLM_API_KEY"); + let vllm_base_url_prev = env::var_os("VLLM_BASE_URL"); + let vllm_model_prev = env::var_os("VLLM_MODEL"); // Safety: test-only environment mutation guarded by a global mutex. unsafe { env::set_var("HOME", &home_str); @@ -2689,6 +2736,9 @@ mod tests { env::remove_var("SGLANG_API_KEY"); env::remove_var("SGLANG_BASE_URL"); env::remove_var("SGLANG_MODEL"); + env::remove_var("VLLM_API_KEY"); + env::remove_var("VLLM_BASE_URL"); + env::remove_var("VLLM_MODEL"); } Self { home: home_prev, @@ -2714,6 +2764,9 @@ mod tests { sglang_api_key: sglang_api_key_prev, sglang_base_url: sglang_base_url_prev, sglang_model: sglang_model_prev, + vllm_api_key: vllm_api_key_prev, + vllm_base_url: vllm_base_url_prev, + vllm_model: vllm_model_prev, } } } @@ -2748,6 +2801,9 @@ mod tests { Self::restore_var("SGLANG_API_KEY", self.sglang_api_key.take()); Self::restore_var("SGLANG_BASE_URL", self.sglang_base_url.take()); Self::restore_var("SGLANG_MODEL", self.sglang_model.take()); + Self::restore_var("VLLM_API_KEY", self.vllm_api_key.take()); + Self::restore_var("VLLM_BASE_URL", self.vllm_base_url.take()); + Self::restore_var("VLLM_MODEL", self.vllm_model.take()); } } } diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index c5978bf8..33270850 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -1301,6 +1301,10 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> { "SGLANG_API_KEY", "deepseek auth set --provider sglang --api-key \"...\"", ), + crate::config::ApiProvider::Vllm => ( + "VLLM_API_KEY", + "deepseek auth set --provider vllm --api-key \"...\"", + ), crate::config::ApiProvider::Deepseek | crate::config::ApiProvider::DeepseekCN => { ("DEEPSEEK_API_KEY", "deepseek auth set --provider deepseek") } @@ -1314,6 +1318,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> { crate::config::ApiProvider::Novita => "novita", crate::config::ApiProvider::Fireworks => "fireworks", crate::config::ApiProvider::Sglang => "sglang", + crate::config::ApiProvider::Vllm => "vllm", crate::config::ApiProvider::Deepseek | crate::config::ApiProvider::DeepseekCN => "deepseek", } @@ -1544,6 +1549,11 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt "sglang", &["SGLANG_API_KEY"][..], ), + ( + crate::config::ApiProvider::Vllm, + "vllm", + &["VLLM_API_KEY"][..], + ), ] { let in_env = env_names.iter().any(|n| { std::env::var(n) diff --git a/crates/tui/src/tui/provider_picker.rs b/crates/tui/src/tui/provider_picker.rs index ac13a7b3..9eead1b6 100644 --- a/crates/tui/src/tui/provider_picker.rs +++ b/crates/tui/src/tui/provider_picker.rs @@ -92,6 +92,7 @@ impl ProviderPickerView { ApiProvider::Novita => "NOVITA_API_KEY", ApiProvider::Fireworks => "FIREWORKS_API_KEY", ApiProvider::Sglang => "SGLANG_API_KEY", + ApiProvider::Vllm => "VLLM_API_KEY", } } @@ -374,7 +375,8 @@ mod tests { "OpenRouter", "Novita AI", "Fireworks AI", - "SGLang" + "SGLang", + "vLLM" ] ); } diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index d7cfacec..14f4d8e5 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -4507,6 +4507,7 @@ async fn execute_command_input( providers.novita.api_key = None; providers.fireworks.api_key = None; providers.sglang.api_key = None; + providers.vllm.api_key = None; } app.api_key_env_only = crate::config::active_provider_uses_env_only_api_key(config); } @@ -4884,6 +4885,7 @@ fn render(f: &mut Frame, app: &mut App) { crate::config::ApiProvider::Novita => Some("Novita"), crate::config::ApiProvider::Fireworks => Some("Fireworks"), crate::config::ApiProvider::Sglang => Some("SGLang"), + crate::config::ApiProvider::Vllm => Some("vLLM"), }; let header_data = HeaderData::new( app.mode, @@ -5510,6 +5512,7 @@ async fn apply_provider_picker_api_key( ApiProvider::Novita => &mut providers.novita, ApiProvider::Fireworks => &mut providers.fireworks, ApiProvider::Sglang => &mut providers.sglang, + ApiProvider::Vllm => &mut providers.vllm, }; entry.api_key = Some(api_key); } diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 9768e5a5..b30e9ff1 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -56,12 +56,12 @@ the legacy `deepseek login --api-key ...` alias) saves the key to to the TUI as `DEEPSEEK_MODEL`. For hosted or self-hosted DeepSeek V4 providers, set `provider = "nvidia-nim"`, -`"fireworks"`, or `"sglang"` or pass `deepseek --provider `. The facade +`"fireworks"`, `"sglang"`, or `"vllm"` or pass `deepseek --provider `. The facade saves provider credentials to the shared user config and forwards the resolved key, base URL, provider, and model to the TUI process. Use `deepseek auth set --provider nvidia-nim --api-key "YOUR_NVIDIA_API_KEY"` or `deepseek auth set --provider fireworks --api-key "YOUR_FIREWORKS_API_KEY"` to -save hosted-provider keys through the facade. SGLang is self-hosted and can run +save hosted-provider keys through the facade. SGLang and vLLM are self-hosted and can run without an API key by default. To bootstrap MCP and skills directories at their resolved paths, run `deepseek-tui setup`. @@ -99,6 +99,11 @@ default_text_model = "accounts/fireworks/models/deepseek-v4-pro" provider = "sglang" base_url = "http://localhost:30000/v1" default_text_model = "deepseek-ai/DeepSeek-V4-Pro" + +[profiles.vllm] +provider = "vllm" +base_url = "http://localhost:8000/v1" +default_text_model = "deepseek-ai/DeepSeek-V4-Pro" ``` Select a profile with: @@ -114,7 +119,7 @@ These override config values: - `DEEPSEEK_API_KEY` - `DEEPSEEK_BASE_URL` -- `DEEPSEEK_PROVIDER` (`deepseek|nvidia-nim|openrouter|novita|fireworks|sglang`) +- `DEEPSEEK_PROVIDER` (`deepseek|nvidia-nim|openrouter|novita|fireworks|sglang|vllm`) - `DEEPSEEK_MODEL` or `DEEPSEEK_DEFAULT_TEXT_MODEL` - `NVIDIA_API_KEY` or `NVIDIA_NIM_API_KEY` (preferred when provider is `nvidia-nim`; falls back to `DEEPSEEK_API_KEY`) - `NVIDIA_NIM_BASE_URL`, `NIM_BASE_URL`, or `NVIDIA_BASE_URL` @@ -124,6 +129,9 @@ These override config values: - `SGLANG_BASE_URL` - `SGLANG_MODEL` - `SGLANG_API_KEY` (optional; many localhost SGLang servers do not require auth) +- `VLLM_BASE_URL` +- `VLLM_MODEL` +- `VLLM_API_KEY` (optional; many localhost vLLM servers do not require auth) - `DEEPSEEK_LOG_LEVEL` or `RUST_LOG` (`info`/`debug`/`trace` enables lightweight verbose logs) - `DEEPSEEK_SKILLS_DIR` - `DEEPSEEK_MCP_CONFIG` @@ -294,7 +302,7 @@ If you are upgrading from older releases: ### Core keys (used by the TUI/engine) -- `provider` (string, optional): `deepseek` (default), `nvidia-nim`, `openrouter`, `novita`, `fireworks`, or `sglang`. `nvidia-nim` targets NVIDIA's NIM-hosted DeepSeek endpoints through `https://integrate.api.nvidia.com/v1`; `fireworks` targets `https://api.fireworks.ai/inference/v1`; `sglang` targets a self-hosted OpenAI-compatible endpoint, defaulting to `http://localhost:30000/v1`. +- `provider` (string, optional): `deepseek` (default), `nvidia-nim`, `openrouter`, `novita`, `fireworks`, `sglang`, or `vllm`. `nvidia-nim` targets NVIDIA's NIM-hosted DeepSeek endpoints through `https://integrate.api.nvidia.com/v1`; `fireworks` targets `https://api.fireworks.ai/inference/v1`; `sglang` targets a self-hosted OpenAI-compatible endpoint, defaulting to `http://localhost:30000/v1`; `vllm` targets a self-hosted vLLM OpenAI-compatible endpoint, defaulting to `http://localhost:8000/v1`. - `api_key` (string, required): must be non-empty (or set `DEEPSEEK_API_KEY`). - `base_url` (string, optional): defaults to `https://api.deepseek.com` for DeepSeek's OpenAI-compatible Chat Completions API, or `https://integrate.api.nvidia.com/v1` for `provider = "nvidia-nim"`. `https://api.deepseek.com/v1` is also accepted for SDK compatibility; use `https://api.deepseek.com/beta` only for DeepSeek beta features such as strict tool mode, chat prefix completion, and FIM completion. - `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, and `deepseek-ai/DeepSeek-V4-Pro` for SGLang. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash`. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. Use `/models` or `deepseek models` to discover live IDs from your configured endpoint. `DEEPSEEK_MODEL` overrides this for a single process.