feat(provider): add vLLM provider support (#737)

Add vLLM as a first-class OpenAI-compatible self-hosted provider with VLLM_BASE_URL, VLLM_API_KEY, and VLLM_MODEL wiring.
This commit is contained in:
Agent007
2026-05-06 10:22:24 +08:00
committed by GitHub
parent 50780a5289
commit a335ff5e4c
14 changed files with 293 additions and 21 deletions
+6 -2
View File
@@ -174,6 +174,9 @@ deepseek --provider fireworks --model deepseek-v4-pro
# Self-hosted SGLang
SGLANG_BASE_URL="http://localhost:30000/v1" deepseek --provider sglang --model deepseek-v4-flash
# Self-hosted vLLM
VLLM_BASE_URL="http://localhost:8000/v1" deepseek --provider vllm --model deepseek-v4-flash
```
---
@@ -257,11 +260,12 @@ Key environment variables:
| `DEEPSEEK_API_KEY` | API key |
| `DEEPSEEK_BASE_URL` | API base URL |
| `DEEPSEEK_MODEL` | Default model |
| `DEEPSEEK_PROVIDER` | `deepseek` (default), `nvidia-nim`, `fireworks`, `sglang` |
| `DEEPSEEK_PROVIDER` | `deepseek` (default), `nvidia-nim`, `fireworks`, `sglang`, `vllm` |
| `DEEPSEEK_PROFILE` | Config profile name |
| `DEEPSEEK_MEMORY` | Set to `on` to enable user memory |
| `NVIDIA_API_KEY` / `FIREWORKS_API_KEY` / `SGLANG_API_KEY` | Provider auth |
| `NVIDIA_API_KEY` / `FIREWORKS_API_KEY` / `SGLANG_API_KEY` / `VLLM_API_KEY` | Provider auth |
| `SGLANG_BASE_URL` | Self-hosted SGLang endpoint |
| `VLLM_BASE_URL` | Self-hosted vLLM endpoint |
| `NO_ANIMATIONS=1` | Force accessibility mode at startup |
| `SSL_CERT_FILE` | Custom CA bundle for corporate proxies |
+6 -2
View File
@@ -169,6 +169,9 @@ deepseek --provider fireworks --model deepseek-v4-pro
# 自托管 SGLang
SGLANG_BASE_URL="http://localhost:30000/v1" deepseek --provider sglang --model deepseek-v4-flash
# 自托管 vLLM
VLLM_BASE_URL="http://localhost:8000/v1" deepseek --provider vllm --model deepseek-v4-flash
```
---
@@ -252,11 +255,12 @@ deepseek mcp-server # 启动 dispatcher MCP stdio 服
| `DEEPSEEK_API_KEY` | DeepSeek API key |
| `DEEPSEEK_BASE_URL` | API base URL |
| `DEEPSEEK_MODEL` | 默认模型 |
| `DEEPSEEK_PROVIDER` | `deepseek`(默认)、`nvidia-nim``fireworks``sglang` |
| `DEEPSEEK_PROVIDER` | `deepseek`(默认)、`nvidia-nim``fireworks``sglang``vllm` |
| `DEEPSEEK_PROFILE` | 配置 profile 名称 |
| `DEEPSEEK_MEMORY` | 设为 `on` 启用用户记忆 |
| `NVIDIA_API_KEY` / `FIREWORKS_API_KEY` / `SGLANG_API_KEY` | 提供商认证 |
| `NVIDIA_API_KEY` / `FIREWORKS_API_KEY` / `SGLANG_API_KEY` / `VLLM_API_KEY` | 提供商认证 |
| `SGLANG_BASE_URL` | 自托管 SGLang 端点 |
| `VLLM_BASE_URL` | 自托管 vLLM 端点 |
| `NO_ANIMATIONS=1` | 启动时强制无障碍模式 |
| `SSL_CERT_FILE` | 企业代理的自定义 CA 包 |
+9 -2
View File
@@ -12,10 +12,10 @@
# Choose which provider to use by default. Per-provider credentials live in the
# `[providers.*]` sections near the bottom of
# this file — keeping both stored at once means `/provider deepseek` and
# `/provider nvidia-nim` (or `--provider fireworks`, `/provider sglang`) toggle without having to
# `/provider nvidia-nim` (or `--provider fireworks`, `/provider sglang`, `/provider vllm`) toggle without having to
# re-enter keys. Top-level `api_key` / `base_url` are still read as DeepSeek
# defaults when `[providers.deepseek]` is absent (backward compatibility).
provider = "deepseek" # deepseek | nvidia-nim | openrouter | novita | fireworks | sglang
provider = "deepseek" # deepseek | nvidia-nim | openrouter | novita | fireworks | sglang | vllm
api_key = "YOUR_DEEPSEEK_API_KEY" # must be non-empty
base_url = "https://api.deepseek.com"
# base_url = "https://api.deepseeki.com" # China users
@@ -148,6 +148,7 @@ max_subagents = 10 # optional (1-20)
# (or NVIDIA_NIM_BASE_URL / NVIDIA_BASE_URL), NVIDIA_NIM_MODEL
# Fireworks: FIREWORKS_API_KEY, FIREWORKS_BASE_URL
# SGLang: SGLANG_BASE_URL, SGLANG_MODEL, optional SGLANG_API_KEY
# vLLM: VLLM_BASE_URL, VLLM_MODEL, optional VLLM_API_KEY
# DeepSeek Platform (https://platform.deepseek.com)
[providers.deepseek]
@@ -173,6 +174,12 @@ max_subagents = 10 # optional (1-20)
# base_url = "http://localhost:30000/v1"
# model = "deepseek-ai/DeepSeek-V4-Pro" # or deepseek-ai/DeepSeek-V4-Flash
# Self-hosted vLLM OpenAI-compatible server
[providers.vllm]
# api_key = "OPTIONAL_VLLM_TOKEN"
# base_url = "http://localhost:8000/v1"
# model = "deepseek-ai/DeepSeek-V4-Pro" # or deepseek-ai/DeepSeek-V4-Flash
# ─────────────────────────────────────────────────────────────────────────────────
# Network Policy (#135)
# ─────────────────────────────────────────────────────────────────────────────────
+40
View File
@@ -163,6 +163,28 @@ impl Default for ModelRegistry {
supports_tools: true,
supports_reasoning: true,
},
ModelInfo {
id: "deepseek-ai/DeepSeek-V4-Pro".to_string(),
provider: ProviderKind::Vllm,
aliases: vec![
"deepseek-v4-pro".to_string(),
"vllm-deepseek-v4-pro".to_string(),
],
supports_tools: true,
supports_reasoning: true,
},
ModelInfo {
id: "deepseek-ai/DeepSeek-V4-Flash".to_string(),
provider: ProviderKind::Vllm,
aliases: vec![
"deepseek-v4-flash".to_string(),
"deepseek-chat".to_string(),
"deepseek-reasoner".to_string(),
"vllm-deepseek-v4-flash".to_string(),
],
supports_tools: true,
supports_reasoning: true,
},
];
Self::new(models)
}
@@ -366,4 +388,22 @@ mod tests {
assert_eq!(resolved.resolved.provider, ProviderKind::Sglang);
assert_eq!(resolved.resolved.id, "deepseek-ai/DeepSeek-V4-Flash");
}
#[test]
fn vllm_default_uses_canonical_model_id() {
let registry = ModelRegistry::default();
let resolved = registry.resolve(None, Some(ProviderKind::Vllm));
assert_eq!(resolved.resolved.provider, ProviderKind::Vllm);
assert_eq!(resolved.resolved.id, "deepseek-ai/DeepSeek-V4-Pro");
}
#[test]
fn deepseek_v4_flash_alias_resolves_to_vllm_when_provider_hinted() {
let registry = ModelRegistry::default();
let resolved = registry.resolve(Some("deepseek-v4-flash"), Some(ProviderKind::Vllm));
assert_eq!(resolved.resolved.provider, ProviderKind::Vllm);
assert_eq!(resolved.resolved.id, "deepseek-ai/DeepSeek-V4-Flash");
}
}
+17 -2
View File
@@ -28,6 +28,7 @@ enum ProviderArg {
Novita,
Fireworks,
Sglang,
Vllm,
}
impl From<ProviderArg> for ProviderKind {
@@ -40,6 +41,7 @@ impl From<ProviderArg> for ProviderKind {
ProviderArg::Novita => ProviderKind::Novita,
ProviderArg::Fireworks => ProviderKind::Fireworks,
ProviderArg::Sglang => ProviderKind::Sglang,
ProviderArg::Vllm => ProviderKind::Vllm,
}
}
}
@@ -561,17 +563,19 @@ fn provider_slot(provider: ProviderKind) -> &'static str {
ProviderKind::Novita => "novita",
ProviderKind::Fireworks => "fireworks",
ProviderKind::Sglang => "sglang",
ProviderKind::Vllm => "vllm",
}
}
/// Provider order used by the `auth list` and `auth status` outputs.
const PROVIDER_LIST: [ProviderKind; 7] = [
const PROVIDER_LIST: [ProviderKind; 8] = [
ProviderKind::Deepseek,
ProviderKind::NvidiaNim,
ProviderKind::Openrouter,
ProviderKind::Novita,
ProviderKind::Fireworks,
ProviderKind::Sglang,
ProviderKind::Vllm,
ProviderKind::Openai,
];
@@ -1045,9 +1049,10 @@ fn delegate_to_tui(
| ProviderKind::Novita
| ProviderKind::Fireworks
| ProviderKind::Sglang
| ProviderKind::Vllm
) {
bail!(
"The interactive TUI supports DeepSeek, NVIDIA NIM, OpenRouter, Novita, Fireworks, and SGLang providers. Remove --provider {} or use `deepseek model ...` for provider registry inspection.",
"The interactive TUI supports DeepSeek, NVIDIA NIM, OpenRouter, Novita, Fireworks, SGLang, and vLLM providers. Remove --provider {} or use `deepseek model ...` for provider registry inspection.",
resolved_runtime.provider.as_str()
);
}
@@ -1562,6 +1567,16 @@ mod tests {
}))
));
let cli = parse_ok(&["deepseek", "auth", "get", "--provider", "vllm"]);
assert!(matches!(
cli.command,
Some(Commands::Auth(AuthArgs {
command: AuthCommand::Get {
provider: ProviderArg::Vllm
}
}))
));
let cli = parse_ok(&["deepseek", "auth", "list"]);
assert!(matches!(
cli.command,
+92
View File
@@ -26,6 +26,9 @@ const DEFAULT_OPENROUTER_BASE_URL: &str = "https://openrouter.ai/api/v1";
const DEFAULT_NOVITA_BASE_URL: &str = "https://api.novita.ai/v1";
const DEFAULT_FIREWORKS_BASE_URL: &str = "https://api.fireworks.ai/inference/v1";
const DEFAULT_SGLANG_BASE_URL: &str = "http://localhost:30000/v1";
const DEFAULT_VLLM_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro";
const DEFAULT_VLLM_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash";
const DEFAULT_VLLM_BASE_URL: &str = "http://localhost:8000/v1";
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "kebab-case")]
@@ -38,6 +41,7 @@ pub enum ProviderKind {
Novita,
Fireworks,
Sglang,
Vllm,
}
impl ProviderKind {
@@ -51,6 +55,7 @@ impl ProviderKind {
Self::Novita => "novita",
Self::Fireworks => "fireworks",
Self::Sglang => "sglang",
Self::Vllm => "vllm",
}
}
@@ -64,6 +69,7 @@ impl ProviderKind {
"novita" => Some(Self::Novita),
"fireworks" | "fireworks-ai" => Some(Self::Fireworks),
"sglang" | "sg-lang" => Some(Self::Sglang),
"vllm" | "v-llm" => Some(Self::Vllm),
_ => None,
}
}
@@ -92,6 +98,8 @@ pub struct ProvidersToml {
pub fireworks: ProviderConfigToml,
#[serde(default)]
pub sglang: ProviderConfigToml,
#[serde(default)]
pub vllm: ProviderConfigToml,
}
impl ProvidersToml {
@@ -105,6 +113,7 @@ impl ProvidersToml {
ProviderKind::Novita => &self.novita,
ProviderKind::Fireworks => &self.fireworks,
ProviderKind::Sglang => &self.sglang,
ProviderKind::Vllm => &self.vllm,
}
}
@@ -117,6 +126,7 @@ impl ProvidersToml {
ProviderKind::Novita => &mut self.novita,
ProviderKind::Fireworks => &mut self.fireworks,
ProviderKind::Sglang => &mut self.sglang,
ProviderKind::Vllm => &mut self.vllm,
}
}
}
@@ -318,6 +328,7 @@ impl ConfigToml {
merge_provider_config(&mut self.providers.novita, &project.providers.novita);
merge_provider_config(&mut self.providers.fireworks, &project.providers.fireworks);
merge_provider_config(&mut self.providers.sglang, &project.providers.sglang);
merge_provider_config(&mut self.providers.vllm, &project.providers.vllm);
if project.network.is_some() {
self.network = project.network;
@@ -373,6 +384,9 @@ impl ConfigToml {
"providers.sglang.api_key" => self.providers.sglang.api_key.clone(),
"providers.sglang.base_url" => self.providers.sglang.base_url.clone(),
"providers.sglang.model" => self.providers.sglang.model.clone(),
"providers.vllm.api_key" => self.providers.vllm.api_key.clone(),
"providers.vllm.base_url" => self.providers.vllm.base_url.clone(),
"providers.vllm.model" => self.providers.vllm.model.clone(),
_ => self.extras.get(key).map(toml::Value::to_string),
}
}
@@ -460,6 +474,15 @@ impl ConfigToml {
"providers.sglang.model" => {
self.providers.sglang.model = Some(value.to_string());
}
"providers.vllm.api_key" => {
self.providers.vllm.api_key = Some(value.to_string());
}
"providers.vllm.base_url" => {
self.providers.vllm.base_url = Some(value.to_string());
}
"providers.vllm.model" => {
self.providers.vllm.model = Some(value.to_string());
}
_ => {
self.extras
.insert(key.to_string(), toml::Value::String(value.to_string()));
@@ -513,6 +536,9 @@ impl ConfigToml {
"providers.sglang.api_key" => self.providers.sglang.api_key = None,
"providers.sglang.base_url" => self.providers.sglang.base_url = None,
"providers.sglang.model" => self.providers.sglang.model = None,
"providers.vllm.api_key" => self.providers.vllm.api_key = None,
"providers.vllm.base_url" => self.providers.vllm.base_url = None,
"providers.vllm.model" => self.providers.vllm.model = None,
_ => {
self.extras.remove(key);
}
@@ -624,6 +650,15 @@ impl ConfigToml {
if let Some(v) = self.providers.sglang.model.as_ref() {
out.insert("providers.sglang.model".to_string(), v.clone());
}
if let Some(v) = self.providers.vllm.api_key.as_ref() {
out.insert("providers.vllm.api_key".to_string(), redact_secret(v));
}
if let Some(v) = self.providers.vllm.base_url.as_ref() {
out.insert("providers.vllm.base_url".to_string(), v.clone());
}
if let Some(v) = self.providers.vllm.model.as_ref() {
out.insert("providers.vllm.model".to_string(), v.clone());
}
for (k, v) in &self.extras {
out.insert(k.clone(), v.to_string());
@@ -695,6 +730,7 @@ impl ConfigToml {
ProviderKind::Novita => DEFAULT_NOVITA_BASE_URL.to_string(),
ProviderKind::Fireworks => DEFAULT_FIREWORKS_BASE_URL.to_string(),
ProviderKind::Sglang => DEFAULT_SGLANG_BASE_URL.to_string(),
ProviderKind::Vllm => DEFAULT_VLLM_BASE_URL.to_string(),
});
let model = cli
@@ -712,6 +748,7 @@ impl ConfigToml {
ProviderKind::Novita => DEFAULT_NOVITA_MODEL.to_string(),
ProviderKind::Fireworks => DEFAULT_FIREWORKS_MODEL.to_string(),
ProviderKind::Sglang => DEFAULT_SGLANG_MODEL.to_string(),
ProviderKind::Vllm => DEFAULT_VLLM_MODEL.to_string(),
});
let model = normalize_model_for_provider(provider, &model);
@@ -822,6 +859,14 @@ fn normalize_model_for_provider(provider: ProviderKind, model: &str) -> String {
"deepseek-v4-flash" | "deepseek-v4flash" | "deepseek-chat" | "deepseek-reasoner"
| "deepseek-r1" | "deepseek-v3" | "deepseek-v3.2",
) => DEFAULT_SGLANG_FLASH_MODEL.to_string(),
(ProviderKind::Vllm, "deepseek-v4-pro" | "deepseek-v4pro") => {
DEFAULT_VLLM_MODEL.to_string()
}
(
ProviderKind::Vllm,
"deepseek-v4-flash" | "deepseek-v4flash" | "deepseek-chat" | "deepseek-reasoner"
| "deepseek-r1" | "deepseek-v3" | "deepseek-v3.2",
) => DEFAULT_VLLM_FLASH_MODEL.to_string(),
_ => model.to_string(),
}
}
@@ -973,6 +1018,7 @@ struct EnvRuntimeOverrides {
novita_base_url: Option<String>,
fireworks_base_url: Option<String>,
sglang_base_url: Option<String>,
vllm_base_url: Option<String>,
}
impl EnvRuntimeOverrides {
@@ -1013,6 +1059,9 @@ impl EnvRuntimeOverrides {
sglang_base_url: std::env::var("SGLANG_BASE_URL")
.ok()
.filter(|v| !v.trim().is_empty()),
vllm_base_url: std::env::var("VLLM_BASE_URL")
.ok()
.filter(|v| !v.trim().is_empty()),
}
}
@@ -1027,6 +1076,7 @@ impl EnvRuntimeOverrides {
ProviderKind::Novita => self.novita_base_url.clone(),
ProviderKind::Fireworks => self.fireworks_base_url.clone(),
ProviderKind::Sglang => self.sglang_base_url.clone(),
ProviderKind::Vllm => self.vllm_base_url.clone(),
}
}
}
@@ -1061,6 +1111,8 @@ mod tests {
fireworks_base_url: Option<OsString>,
sglang_api_key: Option<OsString>,
sglang_base_url: Option<OsString>,
vllm_api_key: Option<OsString>,
vllm_base_url: Option<OsString>,
}
impl EnvGuard {
@@ -1083,6 +1135,8 @@ mod tests {
fireworks_base_url: env::var_os("FIREWORKS_BASE_URL"),
sglang_api_key: env::var_os("SGLANG_API_KEY"),
sglang_base_url: env::var_os("SGLANG_BASE_URL"),
vllm_api_key: env::var_os("VLLM_API_KEY"),
vllm_base_url: env::var_os("VLLM_BASE_URL"),
};
// Safety: test-only environment mutation guarded by a module mutex.
unsafe {
@@ -1103,6 +1157,8 @@ mod tests {
env::remove_var("FIREWORKS_BASE_URL");
env::remove_var("SGLANG_API_KEY");
env::remove_var("SGLANG_BASE_URL");
env::remove_var("VLLM_API_KEY");
env::remove_var("VLLM_BASE_URL");
}
guard
}
@@ -1137,6 +1193,8 @@ mod tests {
Self::restore_var("FIREWORKS_BASE_URL", self.fireworks_base_url.take());
Self::restore_var("SGLANG_API_KEY", self.sglang_api_key.take());
Self::restore_var("SGLANG_BASE_URL", self.sglang_base_url.take());
Self::restore_var("VLLM_API_KEY", self.vllm_api_key.take());
Self::restore_var("VLLM_BASE_URL", self.vllm_base_url.take());
}
}
}
@@ -1320,6 +1378,8 @@ mod tests {
Some(ProviderKind::Fireworks)
);
assert_eq!(ProviderKind::parse("sg-lang"), Some(ProviderKind::Sglang));
assert_eq!(ProviderKind::parse("v-llm"), Some(ProviderKind::Vllm));
assert_eq!(ProviderKind::parse("vllm"), Some(ProviderKind::Vllm));
}
#[test]
@@ -1386,6 +1446,22 @@ mod tests {
assert_eq!(resolved.model, DEFAULT_SGLANG_MODEL);
}
#[test]
fn vllm_provider_defaults_to_local_endpoint_and_model() {
let _lock = env_lock();
let _env = EnvGuard::without_deepseek_runtime_overrides();
let config = ConfigToml {
provider: ProviderKind::Vllm,
..ConfigToml::default()
};
let resolved = config.resolve_runtime_options(&CliRuntimeOverrides::default());
assert_eq!(resolved.provider, ProviderKind::Vllm);
assert_eq!(resolved.base_url, DEFAULT_VLLM_BASE_URL);
assert_eq!(resolved.model, DEFAULT_VLLM_MODEL);
}
#[test]
fn openrouter_env_api_key_falls_back_when_config_missing() {
let _lock = env_lock();
@@ -1488,6 +1564,22 @@ mod tests {
assert_eq!(resolved.model, DEFAULT_SGLANG_FLASH_MODEL);
}
#[test]
fn vllm_provider_normalizes_flash_aliases() {
let _lock = env_lock();
let _env = EnvGuard::without_deepseek_runtime_overrides();
let cli = CliRuntimeOverrides {
provider: Some(ProviderKind::Vllm),
model: Some("deepseek-v4-flash".to_string()),
..CliRuntimeOverrides::default()
};
let resolved = ConfigToml::default().resolve_runtime_options(&cli);
assert_eq!(resolved.provider, ProviderKind::Vllm);
assert_eq!(resolved.model, DEFAULT_VLLM_FLASH_MODEL);
}
#[test]
fn openrouter_provider_specific_config_overrides_env() {
let _lock = env_lock();
+15
View File
@@ -411,6 +411,7 @@ pub fn env_for(name: &str) -> Option<String> {
}
"fireworks" | "fireworks-ai" => &["FIREWORKS_API_KEY"],
"sglang" | "sg-lang" => &["SGLANG_API_KEY"],
"vllm" | "v-llm" => &["VLLM_API_KEY"],
"openai" => &["OPENAI_API_KEY"],
_ => return None,
};
@@ -447,6 +448,7 @@ mod tests {
"NVIDIA_NIM_API_KEY",
"FIREWORKS_API_KEY",
"SGLANG_API_KEY",
"VLLM_API_KEY",
"OPENAI_API_KEY",
] {
// Safety: tests serialise on env_lock(); the broader
@@ -563,6 +565,19 @@ mod tests {
unsafe { std::env::remove_var("SGLANG_API_KEY") };
}
#[test]
fn vllm_env_aliases_resolve() {
let _lock = env_lock();
clear_known_envs();
// Safety: env mutation guarded by env_lock().
unsafe { std::env::set_var("VLLM_API_KEY", "vllm-key") };
assert_eq!(env_for("vllm").as_deref(), Some("vllm-key"));
assert_eq!(env_for("v-llm").as_deref(), Some("vllm-key"));
// Safety: env mutation guarded by env_lock().
unsafe { std::env::remove_var("VLLM_API_KEY") };
}
#[cfg(unix)]
#[test]
fn file_store_round_trips_with_secure_perms() {
+6 -3
View File
@@ -754,7 +754,8 @@ pub(super) fn apply_reasoning_effort(
| ApiProvider::Openrouter
| ApiProvider::Novita
| ApiProvider::Fireworks
| ApiProvider::Sglang => {
| ApiProvider::Sglang
| ApiProvider::Vllm => {
body["thinking"] = json!({ "type": "disabled" });
}
ApiProvider::NvidiaNim => {
@@ -769,7 +770,8 @@ pub(super) fn apply_reasoning_effort(
| ApiProvider::Openrouter
| ApiProvider::Novita
| ApiProvider::Fireworks
| ApiProvider::Sglang => {
| ApiProvider::Sglang
| ApiProvider::Vllm => {
body["reasoning_effort"] = json!("high");
body["thinking"] = json!({ "type": "enabled" });
}
@@ -786,7 +788,8 @@ pub(super) fn apply_reasoning_effort(
| ApiProvider::Openrouter
| ApiProvider::Novita
| ApiProvider::Fireworks
| ApiProvider::Sglang => {
| ApiProvider::Sglang
| ApiProvider::Vllm => {
body["reasoning_effort"] = json!("max");
body["thinking"] = json!({ "type": "enabled" });
}
+14 -1
View File
@@ -27,7 +27,7 @@ pub fn provider(app: &mut App, args: Option<&str>) -> CommandResult {
let Some(target) = ApiProvider::parse(name) else {
return CommandResult::error(format!(
"Unknown provider '{name}'. Expected: deepseek, nvidia-nim, openrouter, novita, fireworks, or sglang."
"Unknown provider '{name}'. Expected: deepseek, nvidia-nim, openrouter, novita, fireworks, sglang, or vllm."
));
};
@@ -164,6 +164,19 @@ mod tests {
}
}
#[test]
fn switch_to_vllm_flash_emits_action() {
let mut app = create_test_app();
let result = provider(&mut app, Some("vllm flash"));
match result.action {
Some(AppAction::SwitchProvider { provider, model }) => {
assert_eq!(provider, ApiProvider::Vllm);
assert_eq!(model.as_deref(), Some("deepseek-v4-flash"));
}
other => panic!("expected SwitchProvider, got {other:?}"),
}
}
#[test]
fn switching_to_active_provider_without_model_is_a_noop() {
let mut app = create_test_app();
+60 -4
View File
@@ -30,6 +30,9 @@ pub const DEFAULT_FIREWORKS_BASE_URL: &str = "https://api.fireworks.ai/inference
pub const DEFAULT_SGLANG_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro";
pub const DEFAULT_SGLANG_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash";
pub const DEFAULT_SGLANG_BASE_URL: &str = "http://localhost:30000/v1";
pub const DEFAULT_VLLM_MODEL: &str = "deepseek-ai/DeepSeek-V4-Pro";
pub const DEFAULT_VLLM_FLASH_MODEL: &str = "deepseek-ai/DeepSeek-V4-Flash";
pub const DEFAULT_VLLM_BASE_URL: &str = "http://localhost:8000/v1";
pub const DEFAULT_DEEPSEEKCN_BASE_URL: &str = "https://api.deepseeki.com";
const API_KEYRING_SENTINEL: &str = "__KEYRING__";
pub const COMMON_DEEPSEEK_MODELS: &[&str] = &[
@@ -51,6 +54,7 @@ pub enum ApiProvider {
Novita,
Fireworks,
Sglang,
Vllm,
}
impl ApiProvider {
@@ -66,6 +70,7 @@ impl ApiProvider {
"novita" => Some(Self::Novita),
"fireworks" | "fireworks-ai" => Some(Self::Fireworks),
"sglang" | "sg-lang" => Some(Self::Sglang),
"vllm" | "v-llm" => Some(Self::Vllm),
_ => None,
}
}
@@ -80,6 +85,7 @@ impl ApiProvider {
Self::Novita => "novita",
Self::Fireworks => "fireworks",
Self::Sglang => "sglang",
Self::Vllm => "vllm",
}
}
@@ -94,6 +100,7 @@ impl ApiProvider {
Self::Novita => "Novita AI",
Self::Fireworks => "Fireworks AI",
Self::Sglang => "SGLang",
Self::Vllm => "vLLM",
}
}
@@ -108,6 +115,7 @@ impl ApiProvider {
Self::Novita,
Self::Fireworks,
Self::Sglang,
Self::Vllm,
]
}
}
@@ -902,6 +910,8 @@ pub struct ProvidersConfig {
pub fireworks: ProviderConfig,
#[serde(default)]
pub sglang: ProviderConfig,
#[serde(default)]
pub vllm: ProviderConfig,
}
#[derive(Debug, Clone, Deserialize, Default)]
@@ -961,7 +971,7 @@ impl Config {
&& ApiProvider::parse(provider).is_none()
{
anyhow::bail!(
"Invalid provider '{provider}': expected deepseek, deepseek-cn, nvidia-nim, openrouter, novita, fireworks, or sglang."
"Invalid provider '{provider}': expected deepseek, deepseek-cn, nvidia-nim, openrouter, novita, fireworks, sglang, or vllm."
);
}
if let Some(ref key) = self.api_key
@@ -1079,6 +1089,7 @@ impl Config {
ApiProvider::Novita => &providers.novita,
ApiProvider::Fireworks => &providers.fireworks,
ApiProvider::Sglang => &providers.sglang,
ApiProvider::Vllm => &providers.vllm,
})
}
@@ -1114,6 +1125,7 @@ impl Config {
ApiProvider::Novita => DEFAULT_NOVITA_MODEL,
ApiProvider::Fireworks => DEFAULT_FIREWORKS_MODEL,
ApiProvider::Sglang => DEFAULT_SGLANG_MODEL,
ApiProvider::Vllm => DEFAULT_VLLM_MODEL,
}
.to_string()
}
@@ -1139,7 +1151,8 @@ impl Config {
ApiProvider::Openrouter
| ApiProvider::Novita
| ApiProvider::Fireworks
| ApiProvider::Sglang => None,
| ApiProvider::Sglang
| ApiProvider::Vllm => None,
};
let base = provider_base.or(root_base).unwrap_or_else(|| {
match provider {
@@ -1150,6 +1163,7 @@ impl Config {
ApiProvider::Novita => DEFAULT_NOVITA_BASE_URL,
ApiProvider::Fireworks => DEFAULT_FIREWORKS_BASE_URL,
ApiProvider::Sglang => DEFAULT_SGLANG_BASE_URL,
ApiProvider::Vllm => DEFAULT_VLLM_BASE_URL,
}
.to_string()
});
@@ -1173,6 +1187,7 @@ impl Config {
ApiProvider::Novita => "novita",
ApiProvider::Fireworks => "fireworks",
ApiProvider::Sglang => "sglang",
ApiProvider::Vllm => "vllm",
};
// 0. Explicit in-memory override (set by onboarding / provider
@@ -1236,7 +1251,7 @@ impl Config {
// Self-hosted SGLang deployments commonly run without auth on
// localhost. Return an empty key and let the client omit the
// Authorization header.
ApiProvider::Sglang => Ok(String::new()),
ApiProvider::Sglang | ApiProvider::Vllm => Ok(String::new()),
}
}
@@ -1678,11 +1693,26 @@ fn apply_env_overrides(config: &mut Config) {
.sglang
.base_url = Some(value);
}
if matches!(config.api_provider(), ApiProvider::Vllm)
&& let Ok(value) = std::env::var("VLLM_BASE_URL")
&& !value.trim().is_empty()
{
config
.providers
.get_or_insert_with(ProvidersConfig::default)
.vllm
.base_url = Some(value);
}
if matches!(config.api_provider(), ApiProvider::Sglang)
&& let Ok(value) = std::env::var("SGLANG_MODEL")
{
config.default_text_model = Some(value);
}
if matches!(config.api_provider(), ApiProvider::Vllm)
&& let Ok(value) = std::env::var("VLLM_MODEL")
{
config.default_text_model = Some(value);
}
if let Ok(value) =
std::env::var("DEEPSEEK_MODEL").or_else(|_| std::env::var("DEEPSEEK_DEFAULT_TEXT_MODEL"))
{
@@ -1902,6 +1932,11 @@ fn normalize_model_config(config: &mut Config) {
{
providers.sglang.model = Some(normalized);
}
if let Some(model) = providers.vllm.model.as_deref()
&& let Some(normalized) = normalize_model_for_provider(ApiProvider::Vllm, model)
{
providers.vllm.model = Some(normalized);
}
}
}
@@ -1927,6 +1962,8 @@ fn model_for_provider(provider: ApiProvider, normalized: String) -> String {
}
(ApiProvider::Sglang, "deepseek-v4-pro") => DEFAULT_SGLANG_MODEL.to_string(),
(ApiProvider::Sglang, "deepseek-v4-flash") => DEFAULT_SGLANG_FLASH_MODEL.to_string(),
(ApiProvider::Vllm, "deepseek-v4-pro") => DEFAULT_VLLM_MODEL.to_string(),
(ApiProvider::Vllm, "deepseek-v4-flash") => DEFAULT_VLLM_FLASH_MODEL.to_string(),
_ => normalized,
}
}
@@ -2067,6 +2104,7 @@ fn merge_providers(
novita: merge_provider_config(base.novita, override_cfg.novita),
fireworks: merge_provider_config(base.fireworks, override_cfg.fireworks),
sglang: merge_provider_config(base.sglang, override_cfg.sglang),
vllm: merge_provider_config(base.vllm, override_cfg.vllm),
}),
}
}
@@ -2436,6 +2474,7 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool {
ApiProvider::Novita => "NOVITA_API_KEY",
ApiProvider::Fireworks => "FIREWORKS_API_KEY",
ApiProvider::Sglang => "SGLANG_API_KEY",
ApiProvider::Vllm => "VLLM_API_KEY",
};
if std::env::var(env_var).is_ok_and(|k| !k.trim().is_empty()) {
return true;
@@ -2447,7 +2486,7 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool {
}
// SGLang is self-hosted and typically runs without authentication.
if matches!(provider, ApiProvider::Sglang) {
if matches!(provider, ApiProvider::Sglang | ApiProvider::Vllm) {
return true;
}
@@ -2494,6 +2533,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result<PathBuf>
ApiProvider::Novita => "providers.novita",
ApiProvider::Fireworks => "providers.fireworks",
ApiProvider::Sglang => "providers.sglang",
ApiProvider::Vllm => "providers.vllm",
};
// Parse existing TOML (or start fresh) so we can edit the right table
@@ -2521,6 +2561,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result<PathBuf>
ApiProvider::Novita => "novita",
ApiProvider::Fireworks => "fireworks",
ApiProvider::Sglang => "sglang",
ApiProvider::Vllm => "vllm",
};
let entry = providers
.entry(key_inside.to_string())
@@ -2634,6 +2675,9 @@ mod tests {
sglang_api_key: Option<OsString>,
sglang_base_url: Option<OsString>,
sglang_model: Option<OsString>,
vllm_api_key: Option<OsString>,
vllm_base_url: Option<OsString>,
vllm_model: Option<OsString>,
}
impl EnvGuard {
@@ -2664,6 +2708,9 @@ mod tests {
let sglang_api_key_prev = env::var_os("SGLANG_API_KEY");
let sglang_base_url_prev = env::var_os("SGLANG_BASE_URL");
let sglang_model_prev = env::var_os("SGLANG_MODEL");
let vllm_api_key_prev = env::var_os("VLLM_API_KEY");
let vllm_base_url_prev = env::var_os("VLLM_BASE_URL");
let vllm_model_prev = env::var_os("VLLM_MODEL");
// Safety: test-only environment mutation guarded by a global mutex.
unsafe {
env::set_var("HOME", &home_str);
@@ -2689,6 +2736,9 @@ mod tests {
env::remove_var("SGLANG_API_KEY");
env::remove_var("SGLANG_BASE_URL");
env::remove_var("SGLANG_MODEL");
env::remove_var("VLLM_API_KEY");
env::remove_var("VLLM_BASE_URL");
env::remove_var("VLLM_MODEL");
}
Self {
home: home_prev,
@@ -2714,6 +2764,9 @@ mod tests {
sglang_api_key: sglang_api_key_prev,
sglang_base_url: sglang_base_url_prev,
sglang_model: sglang_model_prev,
vllm_api_key: vllm_api_key_prev,
vllm_base_url: vllm_base_url_prev,
vllm_model: vllm_model_prev,
}
}
}
@@ -2748,6 +2801,9 @@ mod tests {
Self::restore_var("SGLANG_API_KEY", self.sglang_api_key.take());
Self::restore_var("SGLANG_BASE_URL", self.sglang_base_url.take());
Self::restore_var("SGLANG_MODEL", self.sglang_model.take());
Self::restore_var("VLLM_API_KEY", self.vllm_api_key.take());
Self::restore_var("VLLM_BASE_URL", self.vllm_base_url.take());
Self::restore_var("VLLM_MODEL", self.vllm_model.take());
}
}
}
+10
View File
@@ -1301,6 +1301,10 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
"SGLANG_API_KEY",
"deepseek auth set --provider sglang --api-key \"...\"",
),
crate::config::ApiProvider::Vllm => (
"VLLM_API_KEY",
"deepseek auth set --provider vllm --api-key \"...\"",
),
crate::config::ApiProvider::Deepseek | crate::config::ApiProvider::DeepseekCN => {
("DEEPSEEK_API_KEY", "deepseek auth set --provider deepseek")
}
@@ -1314,6 +1318,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
crate::config::ApiProvider::Novita => "novita",
crate::config::ApiProvider::Fireworks => "fireworks",
crate::config::ApiProvider::Sglang => "sglang",
crate::config::ApiProvider::Vllm => "vllm",
crate::config::ApiProvider::Deepseek
| crate::config::ApiProvider::DeepseekCN => "deepseek",
}
@@ -1544,6 +1549,11 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt
"sglang",
&["SGLANG_API_KEY"][..],
),
(
crate::config::ApiProvider::Vllm,
"vllm",
&["VLLM_API_KEY"][..],
),
] {
let in_env = env_names.iter().any(|n| {
std::env::var(n)
+3 -1
View File
@@ -92,6 +92,7 @@ impl ProviderPickerView {
ApiProvider::Novita => "NOVITA_API_KEY",
ApiProvider::Fireworks => "FIREWORKS_API_KEY",
ApiProvider::Sglang => "SGLANG_API_KEY",
ApiProvider::Vllm => "VLLM_API_KEY",
}
}
@@ -374,7 +375,8 @@ mod tests {
"OpenRouter",
"Novita AI",
"Fireworks AI",
"SGLang"
"SGLang",
"vLLM"
]
);
}
+3
View File
@@ -4507,6 +4507,7 @@ async fn execute_command_input(
providers.novita.api_key = None;
providers.fireworks.api_key = None;
providers.sglang.api_key = None;
providers.vllm.api_key = None;
}
app.api_key_env_only = crate::config::active_provider_uses_env_only_api_key(config);
}
@@ -4884,6 +4885,7 @@ fn render(f: &mut Frame, app: &mut App) {
crate::config::ApiProvider::Novita => Some("Novita"),
crate::config::ApiProvider::Fireworks => Some("Fireworks"),
crate::config::ApiProvider::Sglang => Some("SGLang"),
crate::config::ApiProvider::Vllm => Some("vLLM"),
};
let header_data = HeaderData::new(
app.mode,
@@ -5510,6 +5512,7 @@ async fn apply_provider_picker_api_key(
ApiProvider::Novita => &mut providers.novita,
ApiProvider::Fireworks => &mut providers.fireworks,
ApiProvider::Sglang => &mut providers.sglang,
ApiProvider::Vllm => &mut providers.vllm,
};
entry.api_key = Some(api_key);
}
+12 -4
View File
@@ -56,12 +56,12 @@ the legacy `deepseek login --api-key ...` alias) saves the key to
to the TUI as `DEEPSEEK_MODEL`.
For hosted or self-hosted DeepSeek V4 providers, set `provider = "nvidia-nim"`,
`"fireworks"`, or `"sglang"` or pass `deepseek --provider <name>`. The facade
`"fireworks"`, `"sglang"`, or `"vllm"` or pass `deepseek --provider <name>`. The facade
saves provider credentials to the shared user config and forwards the resolved
key, base URL, provider, and model to the TUI process. Use
`deepseek auth set --provider nvidia-nim --api-key "YOUR_NVIDIA_API_KEY"` or
`deepseek auth set --provider fireworks --api-key "YOUR_FIREWORKS_API_KEY"` to
save hosted-provider keys through the facade. SGLang is self-hosted and can run
save hosted-provider keys through the facade. SGLang and vLLM are self-hosted and can run
without an API key by default.
To bootstrap MCP and skills directories at their resolved paths, run `deepseek-tui setup`.
@@ -99,6 +99,11 @@ default_text_model = "accounts/fireworks/models/deepseek-v4-pro"
provider = "sglang"
base_url = "http://localhost:30000/v1"
default_text_model = "deepseek-ai/DeepSeek-V4-Pro"
[profiles.vllm]
provider = "vllm"
base_url = "http://localhost:8000/v1"
default_text_model = "deepseek-ai/DeepSeek-V4-Pro"
```
Select a profile with:
@@ -114,7 +119,7 @@ These override config values:
- `DEEPSEEK_API_KEY`
- `DEEPSEEK_BASE_URL`
- `DEEPSEEK_PROVIDER` (`deepseek|nvidia-nim|openrouter|novita|fireworks|sglang`)
- `DEEPSEEK_PROVIDER` (`deepseek|nvidia-nim|openrouter|novita|fireworks|sglang|vllm`)
- `DEEPSEEK_MODEL` or `DEEPSEEK_DEFAULT_TEXT_MODEL`
- `NVIDIA_API_KEY` or `NVIDIA_NIM_API_KEY` (preferred when provider is `nvidia-nim`; falls back to `DEEPSEEK_API_KEY`)
- `NVIDIA_NIM_BASE_URL`, `NIM_BASE_URL`, or `NVIDIA_BASE_URL`
@@ -124,6 +129,9 @@ These override config values:
- `SGLANG_BASE_URL`
- `SGLANG_MODEL`
- `SGLANG_API_KEY` (optional; many localhost SGLang servers do not require auth)
- `VLLM_BASE_URL`
- `VLLM_MODEL`
- `VLLM_API_KEY` (optional; many localhost vLLM servers do not require auth)
- `DEEPSEEK_LOG_LEVEL` or `RUST_LOG` (`info`/`debug`/`trace` enables lightweight verbose logs)
- `DEEPSEEK_SKILLS_DIR`
- `DEEPSEEK_MCP_CONFIG`
@@ -294,7 +302,7 @@ If you are upgrading from older releases:
### Core keys (used by the TUI/engine)
- `provider` (string, optional): `deepseek` (default), `nvidia-nim`, `openrouter`, `novita`, `fireworks`, or `sglang`. `nvidia-nim` targets NVIDIA's NIM-hosted DeepSeek endpoints through `https://integrate.api.nvidia.com/v1`; `fireworks` targets `https://api.fireworks.ai/inference/v1`; `sglang` targets a self-hosted OpenAI-compatible endpoint, defaulting to `http://localhost:30000/v1`.
- `provider` (string, optional): `deepseek` (default), `nvidia-nim`, `openrouter`, `novita`, `fireworks`, `sglang`, or `vllm`. `nvidia-nim` targets NVIDIA's NIM-hosted DeepSeek endpoints through `https://integrate.api.nvidia.com/v1`; `fireworks` targets `https://api.fireworks.ai/inference/v1`; `sglang` targets a self-hosted OpenAI-compatible endpoint, defaulting to `http://localhost:30000/v1`; `vllm` targets a self-hosted vLLM OpenAI-compatible endpoint, defaulting to `http://localhost:8000/v1`.
- `api_key` (string, required): must be non-empty (or set `DEEPSEEK_API_KEY`).
- `base_url` (string, optional): defaults to `https://api.deepseek.com` for DeepSeek's OpenAI-compatible Chat Completions API, or `https://integrate.api.nvidia.com/v1` for `provider = "nvidia-nim"`. `https://api.deepseek.com/v1` is also accepted for SDK compatibility; use `https://api.deepseek.com/beta` only for DeepSeek beta features such as strict tool mode, chat prefix completion, and FIM completion.
- `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, and `deepseek-ai/DeepSeek-V4-Pro` for SGLang. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash`. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. Use `/models` or `deepseek models` to discover live IDs from your configured endpoint. `DEEPSEEK_MODEL` overrides this for a single process.