From d0dc26ce25be5820a4af2827eed11dc81864e52a Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 24 Apr 2026 18:29:19 -0500 Subject: [PATCH] Add NVIDIA NIM provider support for DeepSeek --- README.md | 19 +- config.example.toml | 17 +- crates/agent/src/lib.rs | 92 +++++++- crates/cli/src/main.rs | 30 ++- crates/config/src/lib.rs | 152 +++++++++++++ crates/tui/src/client.rs | 198 ++++++++++++++--- crates/tui/src/config.rs | 424 +++++++++++++++++++++++++++++++++++-- crates/tui/src/main.rs | 21 +- crates/tui/src/models.rs | 4 + crates/tui/src/pricing.rs | 15 ++ docs/CONFIGURATION.md | 24 ++- npm/deepseek-tui/README.md | 13 ++ 12 files changed, 930 insertions(+), 79 deletions(-) diff --git a/README.md b/README.md index 14fb91ab..30ff8cb8 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,16 @@ deepseek-tui login --api-key "YOUR_DEEPSEEK_API_KEY" DEEPSEEK_API_KEY="YOUR_DEEPSEEK_API_KEY" deepseek-tui ``` +To use NVIDIA NIM-hosted DeepSeek V4 Pro instead: + +```bash +deepseek auth set --provider nvidia-nim --api-key "YOUR_NVIDIA_API_KEY" +deepseek --provider nvidia-nim + +# or for one process: +DEEPSEEK_PROVIDER=nvidia-nim NVIDIA_API_KEY="YOUR_NVIDIA_API_KEY" deepseek +``` +
Other install methods @@ -77,6 +87,8 @@ Set a default in config with `reasoning_effort = "max"` (or `off` / `low` / |---|---|---|---|---|---| | `deepseek-v4-pro` | default | 1M | $0.145 / 1M | $1.74 / 1M | $3.48 / 1M | | `deepseek-v4-flash` | default | 1M | $0.028 / 1M | $0.14 / 1M | $0.28 / 1M | +| `deepseek-ai/deepseek-v4-pro` via NVIDIA NIM | default | 1M | NVIDIA account terms | NVIDIA account terms | NVIDIA account terms | +| `deepseek-ai/deepseek-v4-flash` via NVIDIA NIM | default | 1M | NVIDIA account terms | NVIDIA account terms | NVIDIA account terms | Legacy `deepseek-chat` and `deepseek-reasoner` remain as silent aliases for `deepseek-v4-flash` (priced identically). Pricing is per 1M tokens as published @@ -104,7 +116,12 @@ Controls: `F1` help, `Esc` backs out of the current action, `Ctrl+K` command pal `~/.deepseek/config.toml` — see [config.example.toml](config.example.toml) for all options. Key environment overrides: `DEEPSEEK_API_KEY`, `DEEPSEEK_BASE_URL`, -`DEEPSEEK_MODEL`, `DEEPSEEK_PROFILE`. +`DEEPSEEK_MODEL`, `DEEPSEEK_PROFILE`, `DEEPSEEK_PROVIDER`. +For NVIDIA NIM, use `DEEPSEEK_PROVIDER=nvidia-nim` plus `NVIDIA_API_KEY` +or `NVIDIA_NIM_API_KEY`; the default model is `deepseek-ai/deepseek-v4-pro` +and the default base URL is `https://integrate.api.nvidia.com/v1`. With +`--provider nvidia-nim`, `--model deepseek-v4-flash` maps to +`deepseek-ai/deepseek-v4-flash`. The client targets DeepSeek's documented OpenAI-compatible Chat Completions API (`/chat/completions`). DeepSeek context caching is automatic; when the API diff --git a/config.example.toml b/config.example.toml index ea6bdda5..a9617abe 100644 --- a/config.example.toml +++ b/config.example.toml @@ -9,6 +9,7 @@ # ───────────────────────────────────────────────────────────────────────────────── # API Keys # ───────────────────────────────────────────────────────────────────────────────── +provider = "deepseek" # deepseek | nvidia-nim api_key = "YOUR_DEEPSEEK_API_KEY" # must be non-empty # ───────────────────────────────────────────────────────────────────────────────── @@ -17,13 +18,19 @@ api_key = "YOUR_DEEPSEEK_API_KEY" # must be non-empty base_url = "https://api.deepseek.com" # base_url = "https://api.deepseeki.com" # China users # base_url = "https://api.deepseek.com/beta" # DeepSeek beta features such as strict tool mode +# For NVIDIA NIM-hosted DeepSeek: +# provider = "nvidia-nim" +# api_key = "YOUR_NVIDIA_API_KEY" +# base_url = "https://integrate.api.nvidia.com/v1" # ───────────────────────────────────────────────────────────────────────────────── # Default Models # ───────────────────────────────────────────────────────────────────────────────── # DeepSeek V4 family: -# deepseek-v4-pro — flagship reasoning model -# deepseek-v4-flash — fast, cost-efficient (legacy aliases: deepseek-chat, deepseek-reasoner) +# deepseek-v4-pro — flagship reasoning model on DeepSeek Platform +# deepseek-v4-flash — fast, cost-efficient (legacy aliases: deepseek-chat, deepseek-reasoner) +# deepseek-ai/deepseek-v4-pro — NVIDIA NIM-hosted Pro model ID +# deepseek-ai/deepseek-v4-flash — NVIDIA NIM-hosted Flash model ID default_text_model = "deepseek-v4-pro" # ───────────────────────────────────────────────────────────────────────────────── @@ -131,6 +138,12 @@ base_url = "https://api.deepseek.com" api_key = "DEV_DEEPSEEK_API_KEY" allow_shell = true +[profiles.nvidia-nim] +provider = "nvidia-nim" +api_key = "YOUR_NVIDIA_API_KEY" +base_url = "https://integrate.api.nvidia.com/v1" +default_text_model = "deepseek-ai/deepseek-v4-pro" + # ───────────────────────────────────────────────────────────────────────────────── # Hooks (optional) # ───────────────────────────────────────────────────────────────────────────────── diff --git a/crates/agent/src/lib.rs b/crates/agent/src/lib.rs index f88c6708..dbab9e2e 100644 --- a/crates/agent/src/lib.rs +++ b/crates/agent/src/lib.rs @@ -49,6 +49,30 @@ impl Default for ModelRegistry { supports_tools: true, supports_reasoning: true, }, + ModelInfo { + id: "deepseek-ai/deepseek-v4-pro".to_string(), + provider: ProviderKind::NvidiaNim, + aliases: vec![ + "deepseek-v4-pro".to_string(), + "nvidia-deepseek-v4-pro".to_string(), + "nim-deepseek-v4-pro".to_string(), + ], + supports_tools: true, + supports_reasoning: true, + }, + ModelInfo { + id: "deepseek-ai/deepseek-v4-flash".to_string(), + provider: ProviderKind::NvidiaNim, + aliases: vec![ + "deepseek-v4-flash".to_string(), + "deepseek-chat".to_string(), + "deepseek-reasoner".to_string(), + "nvidia-deepseek-v4-flash".to_string(), + "nim-deepseek-v4-flash".to_string(), + ], + supports_tools: true, + supports_reasoning: true, + }, ModelInfo { id: "gpt-4.1".to_string(), provider: ProviderKind::Openai, @@ -73,9 +97,9 @@ impl ModelRegistry { pub fn new(models: Vec) -> Self { let mut alias_map = HashMap::new(); for (idx, model) in models.iter().enumerate() { - alias_map.insert(normalize(&model.id), idx); + alias_map.entry(normalize(&model.id)).or_insert(idx); for alias in &model.aliases { - alias_map.insert(normalize(alias), idx); + alias_map.entry(normalize(alias)).or_insert(idx); } } Self { models, alias_map } @@ -96,6 +120,20 @@ impl ModelRegistry { if let Some(name) = requested { fallback_chain.push(format!("requested:{name}")); + if let Some(provider) = provider_hint + && let Some(model) = self + .models + .iter() + .find(|m| m.provider == provider && model_matches(m, name)) + .cloned() + { + return ModelResolution { + requested: Some(name.to_string()), + resolved: model, + used_fallback: false, + fallback_chain, + }; + } if let Some(idx) = self.alias_map.get(&normalize(name)) { return ModelResolution { requested: Some(name.to_string()), @@ -137,3 +175,53 @@ impl ModelRegistry { fn normalize(value: &str) -> String { value.trim().to_ascii_lowercase() } + +fn model_matches(model: &ModelInfo, requested: &str) -> bool { + let requested = normalize(requested); + normalize(&model.id) == requested + || model + .aliases + .iter() + .any(|alias| normalize(alias) == requested) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn deepseek_v4_pro_alias_stays_deepseek_by_default() { + let registry = ModelRegistry::default(); + let resolved = registry.resolve(Some("deepseek-v4-pro"), None); + + assert_eq!(resolved.resolved.provider, ProviderKind::Deepseek); + assert_eq!(resolved.resolved.id, "deepseek-v4-pro"); + } + + #[test] + fn deepseek_v4_pro_alias_resolves_to_nvidia_nim_when_provider_hinted() { + let registry = ModelRegistry::default(); + let resolved = registry.resolve(Some("deepseek-v4-pro"), Some(ProviderKind::NvidiaNim)); + + assert_eq!(resolved.resolved.provider, ProviderKind::NvidiaNim); + assert_eq!(resolved.resolved.id, "deepseek-ai/deepseek-v4-pro"); + } + + #[test] + fn nvidia_nim_default_uses_catalog_model_id() { + let registry = ModelRegistry::default(); + let resolved = registry.resolve(None, Some(ProviderKind::NvidiaNim)); + + assert_eq!(resolved.resolved.provider, ProviderKind::NvidiaNim); + assert_eq!(resolved.resolved.id, "deepseek-ai/deepseek-v4-pro"); + } + + #[test] + fn deepseek_v4_flash_alias_resolves_to_nvidia_nim_when_provider_hinted() { + let registry = ModelRegistry::default(); + let resolved = registry.resolve(Some("deepseek-v4-flash"), Some(ProviderKind::NvidiaNim)); + + assert_eq!(resolved.resolved.provider, ProviderKind::NvidiaNim); + assert_eq!(resolved.resolved.id, "deepseek-ai/deepseek-v4-flash"); + } +} diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index 725b2bf0..67ae1da4 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -18,6 +18,7 @@ use deepseek_state::{StateStore, ThreadListFilters}; #[derive(Debug, Clone, Copy, ValueEnum)] enum ProviderArg { Deepseek, + NvidiaNim, Openai, } @@ -25,6 +26,7 @@ impl From for ProviderKind { fn from(value: ProviderArg) -> Self { match value { ProviderArg::Deepseek => ProviderKind::Deepseek, + ProviderArg::NvidiaNim => ProviderKind::NvidiaNim, ProviderArg::Openai => ProviderKind::Openai, } } @@ -475,6 +477,7 @@ fn run_login_command(store: &mut ConfigStore, args: LoginArgs) -> Result<()> { fn run_logout_command(store: &mut ConfigStore) -> Result<()> { store.config.api_key = None; store.config.providers.deepseek.api_key = None; + store.config.providers.nvidia_nim.api_key = None; store.config.providers.openai.api_key = None; store.config.auth_mode = None; store.config.chatgpt_access_token = None; @@ -495,6 +498,11 @@ fn run_auth_command(store: &mut ConfigStore, command: AuthCommand) -> Result<()> .ok() .filter(|v| !v.trim().is_empty()) .is_some(); + let nvidia_env = std::env::var("NVIDIA_API_KEY") + .or_else(|_| std::env::var("NVIDIA_NIM_API_KEY")) + .ok() + .filter(|v| !v.trim().is_empty()) + .is_some(); let deepseek_file = store .config .providers @@ -510,12 +518,23 @@ fn run_auth_command(store: &mut ConfigStore, command: AuthCommand) -> Result<()> .api_key .as_ref() .is_some_and(|v| !v.trim().is_empty()); + let nvidia_file = store + .config + .providers + .nvidia_nim + .api_key + .as_ref() + .is_some_and(|v| !v.trim().is_empty()); println!("provider: {}", store.config.provider.as_str()); println!( "deepseek auth: env={}, config={}", deepseek_env, deepseek_file ); + println!( + "nvidia-nim auth: env={}, config={}", + nvidia_env, nvidia_file + ); println!("openai auth: env={}, config={}", openai_env, openai_file); Ok(()) } @@ -781,22 +800,23 @@ fn delegate_to_tui( } cmd.args(passthrough); - if resolved_runtime.provider != ProviderKind::Deepseek { + if !matches!( + resolved_runtime.provider, + ProviderKind::Deepseek | ProviderKind::NvidiaNim + ) { bail!( - "The interactive TUI only supports the DeepSeek API. Remove --provider {} or use `deepseek model ...` for provider registry inspection.", + "The interactive TUI supports DeepSeek and NVIDIA NIM providers. Remove --provider {} or use `deepseek model ...` for provider registry inspection.", resolved_runtime.provider.as_str() ); } cmd.env("DEEPSEEK_MODEL", &resolved_runtime.model); cmd.env("DEEPSEEK_BASE_URL", &resolved_runtime.base_url); + cmd.env("DEEPSEEK_PROVIDER", resolved_runtime.provider.as_str()); if let Some(api_key) = resolved_runtime.api_key.as_ref() { cmd.env("DEEPSEEK_API_KEY", api_key); } - if let Some(provider) = cli.provider { - cmd.env("DEEPSEEK_PROVIDER", ProviderKind::from(provider).as_str()); - } if let Some(model) = cli.model.as_ref() { cmd.env("DEEPSEEK_MODEL", model); } diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index 9fdaec0a..985a2dd1 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -7,8 +7,11 @@ use serde::{Deserialize, Serialize}; pub const CONFIG_FILE_NAME: &str = "config.toml"; const DEFAULT_DEEPSEEK_MODEL: &str = "deepseek-v4-pro"; +const DEFAULT_NVIDIA_NIM_MODEL: &str = "deepseek-ai/deepseek-v4-pro"; +const DEFAULT_NVIDIA_NIM_FLASH_MODEL: &str = "deepseek-ai/deepseek-v4-flash"; const DEFAULT_OPENAI_MODEL: &str = "gpt-4.1"; const DEFAULT_DEEPSEEK_BASE_URL: &str = "https://api.deepseek.com"; +const DEFAULT_NVIDIA_NIM_BASE_URL: &str = "https://integrate.api.nvidia.com/v1"; const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1"; #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] @@ -16,6 +19,7 @@ const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1"; pub enum ProviderKind { #[default] Deepseek, + NvidiaNim, Openai, } @@ -24,6 +28,7 @@ impl ProviderKind { pub fn as_str(self) -> &'static str { match self { Self::Deepseek => "deepseek", + Self::NvidiaNim => "nvidia-nim", Self::Openai => "openai", } } @@ -32,6 +37,7 @@ impl ProviderKind { pub fn parse(value: &str) -> Option { match value.trim().to_ascii_lowercase().as_str() { "deepseek" | "deep-seek" => Some(Self::Deepseek), + "nvidia" | "nvidia-nim" | "nvidia_nim" | "nim" => Some(Self::NvidiaNim), "openai" | "open-ai" => Some(Self::Openai), _ => None, } @@ -50,6 +56,8 @@ pub struct ProvidersToml { #[serde(default)] pub deepseek: ProviderConfigToml, #[serde(default)] + pub nvidia_nim: ProviderConfigToml, + #[serde(default)] pub openai: ProviderConfigToml, } @@ -58,6 +66,7 @@ impl ProvidersToml { pub fn for_provider(&self, provider: ProviderKind) -> &ProviderConfigToml { match provider { ProviderKind::Deepseek => &self.deepseek, + ProviderKind::NvidiaNim => &self.nvidia_nim, ProviderKind::Openai => &self.openai, } } @@ -65,6 +74,7 @@ impl ProvidersToml { pub fn for_provider_mut(&mut self, provider: ProviderKind) -> &mut ProviderConfigToml { match provider { ProviderKind::Deepseek => &mut self.deepseek, + ProviderKind::NvidiaNim => &mut self.nvidia_nim, ProviderKind::Openai => &mut self.openai, } } @@ -116,6 +126,9 @@ impl ConfigToml { "providers.deepseek.api_key" => self.providers.deepseek.api_key.clone(), "providers.deepseek.base_url" => self.providers.deepseek.base_url.clone(), "providers.deepseek.model" => self.providers.deepseek.model.clone(), + "providers.nvidia_nim.api_key" => self.providers.nvidia_nim.api_key.clone(), + "providers.nvidia_nim.base_url" => self.providers.nvidia_nim.base_url.clone(), + "providers.nvidia_nim.model" => self.providers.nvidia_nim.model.clone(), "providers.openai.api_key" => self.providers.openai.api_key.clone(), "providers.openai.base_url" => self.providers.openai.base_url.clone(), "providers.openai.model" => self.providers.openai.model.clone(), @@ -161,6 +174,15 @@ impl ConfigToml { "providers.openai.api_key" => self.providers.openai.api_key = Some(value.to_string()), "providers.openai.base_url" => self.providers.openai.base_url = Some(value.to_string()), "providers.openai.model" => self.providers.openai.model = Some(value.to_string()), + "providers.nvidia_nim.api_key" => { + self.providers.nvidia_nim.api_key = Some(value.to_string()); + } + "providers.nvidia_nim.base_url" => { + self.providers.nvidia_nim.base_url = Some(value.to_string()); + } + "providers.nvidia_nim.model" => { + self.providers.nvidia_nim.model = Some(value.to_string()); + } _ => { self.extras .insert(key.to_string(), toml::Value::String(value.to_string())); @@ -199,6 +221,9 @@ impl ConfigToml { "providers.openai.api_key" => self.providers.openai.api_key = None, "providers.openai.base_url" => self.providers.openai.base_url = None, "providers.openai.model" => self.providers.openai.model = None, + "providers.nvidia_nim.api_key" => self.providers.nvidia_nim.api_key = None, + "providers.nvidia_nim.base_url" => self.providers.nvidia_nim.base_url = None, + "providers.nvidia_nim.model" => self.providers.nvidia_nim.model = None, _ => { self.extras.remove(key); } @@ -265,6 +290,15 @@ impl ConfigToml { if let Some(v) = self.providers.openai.model.as_ref() { out.insert("providers.openai.model".to_string(), v.clone()); } + if let Some(v) = self.providers.nvidia_nim.api_key.as_ref() { + out.insert("providers.nvidia_nim.api_key".to_string(), redact_secret(v)); + } + if let Some(v) = self.providers.nvidia_nim.base_url.as_ref() { + out.insert("providers.nvidia_nim.base_url".to_string(), v.clone()); + } + if let Some(v) = self.providers.nvidia_nim.model.as_ref() { + out.insert("providers.nvidia_nim.model".to_string(), v.clone()); + } for (k, v) in &self.extras { out.insert(k.clone(), v.to_string()); @@ -302,6 +336,7 @@ impl ConfigToml { .or(root_deepseek_base_url) .unwrap_or_else(|| match provider { ProviderKind::Deepseek => DEFAULT_DEEPSEEK_BASE_URL.to_string(), + ProviderKind::NvidiaNim => DEFAULT_NVIDIA_NIM_BASE_URL.to_string(), ProviderKind::Openai => DEFAULT_OPENAI_BASE_URL.to_string(), }); @@ -314,8 +349,10 @@ impl ConfigToml { .or_else(|| self.model.clone()) .unwrap_or_else(|| match provider { ProviderKind::Deepseek => DEFAULT_DEEPSEEK_MODEL.to_string(), + ProviderKind::NvidiaNim => DEFAULT_NVIDIA_NIM_MODEL.to_string(), ProviderKind::Openai => DEFAULT_OPENAI_MODEL.to_string(), }); + let model = normalize_model_for_provider(provider, &model); let output_mode = cli .output_mode @@ -363,6 +400,21 @@ impl ConfigToml { } } +fn normalize_model_for_provider(provider: ProviderKind, model: &str) -> String { + let normalized = model.trim().to_ascii_lowercase(); + match (provider, normalized.as_str()) { + (ProviderKind::NvidiaNim, "deepseek-v4-pro" | "deepseek-v4pro") => { + DEFAULT_NVIDIA_NIM_MODEL.to_string() + } + ( + ProviderKind::NvidiaNim, + "deepseek-v4-flash" | "deepseek-v4flash" | "deepseek-chat" | "deepseek-reasoner" + | "deepseek-r1" | "deepseek-v3" | "deepseek-v3.2", + ) => DEFAULT_NVIDIA_NIM_FLASH_MODEL.to_string(), + _ => model.to_string(), + } +} + #[derive(Debug, Clone, Default)] pub struct CliRuntimeOverrides { pub provider: Option, @@ -481,7 +533,9 @@ struct EnvRuntimeOverrides { sandbox_mode: Option, deepseek_api_key: Option, openai_api_key: Option, + nvidia_api_key: Option, deepseek_base_url: Option, + nvidia_base_url: Option, openai_base_url: Option, } @@ -506,9 +560,17 @@ impl EnvRuntimeOverrides { openai_api_key: std::env::var("OPENAI_API_KEY") .ok() .filter(|v| !v.trim().is_empty()), + nvidia_api_key: std::env::var("NVIDIA_API_KEY") + .or_else(|_| std::env::var("NVIDIA_NIM_API_KEY")) + .ok() + .filter(|v| !v.trim().is_empty()), deepseek_base_url: std::env::var("DEEPSEEK_BASE_URL") .ok() .filter(|v| !v.trim().is_empty()), + nvidia_base_url: std::env::var("NVIDIA_NIM_BASE_URL") + .or_else(|_| std::env::var("NVIDIA_BASE_URL")) + .ok() + .filter(|v| !v.trim().is_empty()), openai_base_url: std::env::var("OPENAI_BASE_URL") .ok() .filter(|v| !v.trim().is_empty()), @@ -518,6 +580,7 @@ impl EnvRuntimeOverrides { fn api_key_for(&self, provider: ProviderKind) -> Option { match provider { ProviderKind::Deepseek => self.deepseek_api_key.clone(), + ProviderKind::NvidiaNim => self.nvidia_api_key.clone(), ProviderKind::Openai => self.openai_api_key.clone(), } } @@ -525,6 +588,7 @@ impl EnvRuntimeOverrides { fn base_url_for(&self, provider: ProviderKind) -> Option { match provider { ProviderKind::Deepseek => self.deepseek_base_url.clone(), + ProviderKind::NvidiaNim => self.nvidia_base_url.clone(), ProviderKind::Openai => self.openai_base_url.clone(), } } @@ -547,6 +611,10 @@ mod tests { deepseek_base_url: Option, deepseek_model: Option, deepseek_provider: Option, + nvidia_api_key: Option, + nvidia_nim_api_key: Option, + nvidia_base_url: Option, + nvidia_nim_base_url: Option, } impl EnvGuard { @@ -556,6 +624,10 @@ mod tests { deepseek_base_url: env::var_os("DEEPSEEK_BASE_URL"), deepseek_model: env::var_os("DEEPSEEK_MODEL"), deepseek_provider: env::var_os("DEEPSEEK_PROVIDER"), + nvidia_api_key: env::var_os("NVIDIA_API_KEY"), + nvidia_nim_api_key: env::var_os("NVIDIA_NIM_API_KEY"), + nvidia_base_url: env::var_os("NVIDIA_BASE_URL"), + nvidia_nim_base_url: env::var_os("NVIDIA_NIM_BASE_URL"), }; // Safety: test-only environment mutation guarded by a module mutex. unsafe { @@ -563,6 +635,10 @@ mod tests { env::remove_var("DEEPSEEK_BASE_URL"); env::remove_var("DEEPSEEK_MODEL"); env::remove_var("DEEPSEEK_PROVIDER"); + env::remove_var("NVIDIA_API_KEY"); + env::remove_var("NVIDIA_NIM_API_KEY"); + env::remove_var("NVIDIA_BASE_URL"); + env::remove_var("NVIDIA_NIM_BASE_URL"); } guard } @@ -584,6 +660,10 @@ mod tests { Self::restore_var("DEEPSEEK_BASE_URL", self.deepseek_base_url.take()); Self::restore_var("DEEPSEEK_MODEL", self.deepseek_model.take()); Self::restore_var("DEEPSEEK_PROVIDER", self.deepseek_provider.take()); + Self::restore_var("NVIDIA_API_KEY", self.nvidia_api_key.take()); + Self::restore_var("NVIDIA_NIM_API_KEY", self.nvidia_nim_api_key.take()); + Self::restore_var("NVIDIA_BASE_URL", self.nvidia_base_url.take()); + Self::restore_var("NVIDIA_NIM_BASE_URL", self.nvidia_nim_base_url.take()); } } } @@ -628,6 +708,78 @@ mod tests { assert_eq!(resolved.model, "deepseek-v4-flash"); } + #[test] + fn nvidia_nim_provider_defaults_to_catalog_endpoint_and_model() { + let _lock = env_lock(); + let _env = EnvGuard::without_deepseek_runtime_overrides(); + let config = ConfigToml { + provider: ProviderKind::NvidiaNim, + ..ConfigToml::default() + }; + + let resolved = config.resolve_runtime_options(&CliRuntimeOverrides::default()); + + assert_eq!(resolved.provider, ProviderKind::NvidiaNim); + assert_eq!(resolved.base_url, DEFAULT_NVIDIA_NIM_BASE_URL); + assert_eq!(resolved.model, DEFAULT_NVIDIA_NIM_MODEL); + } + + #[test] + fn nvidia_nim_provider_uses_provider_specific_credentials() { + let _lock = env_lock(); + let _env = EnvGuard::without_deepseek_runtime_overrides(); + let mut config = ConfigToml { + provider: ProviderKind::NvidiaNim, + ..ConfigToml::default() + }; + config.providers.nvidia_nim.api_key = Some("nim-key".to_string()); + config.providers.nvidia_nim.base_url = Some("https://nim.example/v1".to_string()); + config.providers.nvidia_nim.model = Some("deepseek-ai/deepseek-v4-pro".to_string()); + + let resolved = config.resolve_runtime_options(&CliRuntimeOverrides::default()); + + assert_eq!(resolved.provider, ProviderKind::NvidiaNim); + assert_eq!(resolved.api_key.as_deref(), Some("nim-key")); + assert_eq!(resolved.base_url, "https://nim.example/v1"); + assert_eq!(resolved.model, "deepseek-ai/deepseek-v4-pro"); + } + + #[test] + fn nvidia_nim_provider_normalizes_flash_aliases() { + let _lock = env_lock(); + let _env = EnvGuard::without_deepseek_runtime_overrides(); + let cli = CliRuntimeOverrides { + provider: Some(ProviderKind::NvidiaNim), + model: Some("deepseek-v4-flash".to_string()), + ..CliRuntimeOverrides::default() + }; + + let resolved = ConfigToml::default().resolve_runtime_options(&cli); + + assert_eq!(resolved.provider, ProviderKind::NvidiaNim); + assert_eq!(resolved.model, DEFAULT_NVIDIA_NIM_FLASH_MODEL); + } + + #[test] + fn nvidia_nim_provider_uses_nvidia_env_credentials() { + let _lock = env_lock(); + let _env = EnvGuard::without_deepseek_runtime_overrides(); + // Safety: test-only environment mutation guarded by a module mutex. + unsafe { + env::set_var("DEEPSEEK_PROVIDER", "nvidia-nim"); + env::set_var("NVIDIA_API_KEY", "nim-env-key"); + env::set_var("NVIDIA_NIM_BASE_URL", "https://nim-env.example/v1"); + } + + let config = ConfigToml::default(); + let resolved = config.resolve_runtime_options(&CliRuntimeOverrides::default()); + + assert_eq!(resolved.provider, ProviderKind::NvidiaNim); + assert_eq!(resolved.api_key.as_deref(), Some("nim-env-key")); + assert_eq!(resolved.base_url, "https://nim-env.example/v1"); + assert_eq!(resolved.model, DEFAULT_NVIDIA_NIM_MODEL); + } + #[test] fn list_values_redacts_root_api_key() { let config = ConfigToml { diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs index 0b67bb52..bb98422f 100644 --- a/crates/tui/src/client.rs +++ b/crates/tui/src/client.rs @@ -16,7 +16,7 @@ use serde::{Deserialize, Serialize}; use serde_json::{Value, json}; use tokio::sync::Mutex as AsyncMutex; -use crate::config::{Config, DEFAULT_TEXT_MODEL, RetryPolicy}; +use crate::config::{ApiProvider, Config, RetryPolicy}; use crate::llm_client::{ LlmClient, LlmError, RetryConfig as LlmRetryConfig, StreamEventBox, extract_retry_after, with_retry, @@ -132,6 +132,7 @@ pub struct DeepSeekClient { http_client: reqwest::Client, api_key: String, base_url: String, + api_provider: ApiProvider, retry: RetryPolicy, default_model: String, use_chat_completions: AtomicBool, @@ -307,6 +308,7 @@ impl Clone for DeepSeekClient { http_client: self.http_client.clone(), api_key: self.api_key.clone(), base_url: self.base_url.clone(), + api_provider: self.api_provider, retry: self.retry.clone(), default_model: self.default_model.clone(), use_chat_completions: AtomicBool::new( @@ -409,14 +411,13 @@ impl DeepSeekClient { pub fn new(config: &Config) -> Result { let api_key = config.deepseek_api_key()?; let base_url = config.deepseek_base_url(); + let api_provider = config.api_provider(); validate_base_url_security(&base_url)?; let retry = config.retry_policy(); - let default_model = config - .default_text_model - .clone() - .unwrap_or_else(|| DEFAULT_TEXT_MODEL.to_string()); + let default_model = config.default_model(); - logging::info(format!("DeepSeek base URL: {base_url}")); + logging::info(format!("API provider: {}", api_provider.as_str())); + logging::info(format!("API base URL: {base_url}")); logging::info(format!( "Retry policy: enabled={}, max_retries={}, initial_delay={}s, max_delay={}s", retry.enabled, retry.max_retries, retry.initial_delay, retry.max_delay @@ -428,6 +429,7 @@ impl DeepSeekClient { http_client, api_key, base_url, + api_provider, retry, default_model, use_chat_completions: AtomicBool::new(false), @@ -608,7 +610,11 @@ impl DeepSeekClient { if let Some(choice) = request.tool_choice.as_ref() { body["tool_choice"] = choice.clone(); } - apply_reasoning_effort(&mut body, request.reasoning_effort.as_deref()); + apply_reasoning_effort( + &mut body, + request.reasoning_effort.as_deref(), + self.api_provider, + ); let url = api_url(&self.base_url, "responses"); let response = self @@ -659,7 +665,11 @@ impl DeepSeekClient { { body["tool_choice"] = mapped; } - apply_reasoning_effort(&mut body, request.reasoning_effort.as_deref()); + apply_reasoning_effort( + &mut body, + request.reasoning_effort.as_deref(), + self.api_provider, + ); let url = api_url(&self.base_url, "chat/completions"); let response = self @@ -683,7 +693,7 @@ impl DeepSeekClient { impl LlmClient for DeepSeekClient { fn provider_name(&self) -> &'static str { - "deepseek" + self.api_provider.as_str() } fn model(&self) -> &str { @@ -785,7 +795,11 @@ impl LlmClient for DeepSeekClient { { body["tool_choice"] = mapped; } - apply_reasoning_effort(&mut body, request.reasoning_effort.as_deref()); + apply_reasoning_effort( + &mut body, + request.reasoning_effort.as_deref(), + self.api_provider, + ); let url = api_url(&self.base_url, "chat/completions"); let response = self @@ -1670,28 +1684,52 @@ fn should_replay_reasoning_content(model: &str, effort: Option<&str>) -> bool { requires_reasoning_content(model) } -/// Translate the TUI's effort-tier string into DeepSeek's request fields. +/// Translate the TUI's effort-tier string into provider-specific request fields. /// /// The config surface accepts `off | low | medium | high | max`. DeepSeek /// itself collapses `low`/`medium` → `"high"` and `xhigh` → `"max"` at the -/// API boundary (per their docs); `off` emits the disable toggle. -fn apply_reasoning_effort(body: &mut Value, effort: Option<&str>) { +/// API boundary (per their docs); NVIDIA NIM takes equivalent controls through +/// `chat_template_kwargs`. +fn apply_reasoning_effort(body: &mut Value, effort: Option<&str>, provider: ApiProvider) { let Some(effort) = effort else { return; }; let normalized = effort.trim().to_ascii_lowercase(); match normalized.as_str() { - "off" | "disabled" | "none" | "false" => { - body["thinking"] = json!({ "type": "disabled" }); - } - "max" | "maximum" | "xhigh" => { - body["reasoning_effort"] = json!("max"); - body["thinking"] = json!({ "type": "enabled" }); - } + "off" | "disabled" | "none" | "false" => match provider { + ApiProvider::Deepseek => body["thinking"] = json!({ "type": "disabled" }), + ApiProvider::NvidiaNim => { + body["chat_template_kwargs"] = json!({ + "thinking": false, + }) + } + }, + "max" | "maximum" | "xhigh" => match provider { + ApiProvider::Deepseek => { + body["reasoning_effort"] = json!("max"); + body["thinking"] = json!({ "type": "enabled" }); + } + ApiProvider::NvidiaNim => { + body["chat_template_kwargs"] = json!({ + "thinking": true, + "reasoning_effort": "max", + }); + } + }, "low" | "minimal" | "medium" | "mid" | "high" | "" => { - // Per DeepSeek docs: low/medium compat-map to "high". - body["reasoning_effort"] = json!("high"); - body["thinking"] = json!({ "type": "enabled" }); + match provider { + ApiProvider::Deepseek => { + // Per DeepSeek docs: low/medium compat-map to "high". + body["reasoning_effort"] = json!("high"); + body["thinking"] = json!({ "type": "enabled" }); + } + ApiProvider::NvidiaNim => { + body["chat_template_kwargs"] = json!({ + "thinking": true, + "reasoning_effort": "high", + }); + } + } } _ => { // Unknown value — do not mutate the request, let the provider @@ -1710,6 +1748,13 @@ fn has_deepseek_r_series_marker(model_lower: &str) -> bool { }) } +fn reasoning_field(value: &Value) -> Option<&str> { + value + .get("reasoning_content") + .or_else(|| value.get("reasoning")) + .and_then(Value::as_str) +} + fn parse_chat_message(payload: &Value) -> Result { let id = payload .get("id") @@ -1734,8 +1779,8 @@ fn parse_chat_message(payload: &Value) -> Result { .context("Chat API response missing message")?; let mut content_blocks = Vec::new(); - if let Some(reasoning) = message.get("reasoning_content").and_then(Value::as_str) - && !reasoning.trim().is_empty() + if let Some(reasoning) = + reasoning_field(message).filter(|reasoning| !reasoning.trim().is_empty()) { content_blocks.push(ContentBlock::Thinking { thinking: reasoning.to_string(), @@ -1999,9 +2044,9 @@ fn parse_sse_chunk( .map(str::to_string); if let Some(delta) = delta { - // Handle reasoning_content (DeepSeek-Reasoner thinking) + // Handle reasoning_content / reasoning thinking deltas. if is_reasoning_model - && let Some(reasoning) = delta.get("reasoning_content").and_then(Value::as_str) + && let Some(reasoning) = reasoning_field(delta) && !reasoning.is_empty() { if !*thinking_started { @@ -2575,7 +2620,7 @@ mod tests { #[test] fn reasoning_effort_uses_deepseek_top_level_thinking_parameter() { let mut body = json!({}); - apply_reasoning_effort(&mut body, Some("max")); + apply_reasoning_effort(&mut body, Some("max"), ApiProvider::Deepseek); assert_eq!( body.get("reasoning_effort").and_then(Value::as_str), @@ -2591,7 +2636,7 @@ mod tests { #[test] fn reasoning_effort_off_disables_top_level_thinking() { let mut body = json!({}); - apply_reasoning_effort(&mut body, Some("off")); + apply_reasoning_effort(&mut body, Some("off"), ApiProvider::Deepseek); assert_eq!( body.pointer("/thinking/type").and_then(Value::as_str), @@ -2601,6 +2646,101 @@ mod tests { assert!(body.get("extra_body").is_none()); } + #[test] + fn reasoning_effort_uses_nvidia_nim_chat_template_kwargs() { + let mut body = json!({}); + apply_reasoning_effort(&mut body, Some("max"), ApiProvider::NvidiaNim); + + assert_eq!( + body.pointer("/chat_template_kwargs/thinking") + .and_then(Value::as_bool), + Some(true) + ); + assert_eq!( + body.pointer("/chat_template_kwargs/reasoning_effort") + .and_then(Value::as_str), + Some("max") + ); + assert!(body.get("thinking").is_none()); + assert!(body.get("reasoning_effort").is_none()); + } + + #[test] + fn reasoning_effort_off_disables_nvidia_nim_thinking() { + let mut body = json!({}); + apply_reasoning_effort(&mut body, Some("off"), ApiProvider::NvidiaNim); + + assert_eq!( + body.pointer("/chat_template_kwargs/thinking") + .and_then(Value::as_bool), + Some(false) + ); + assert!( + body.pointer("/chat_template_kwargs/reasoning_effort") + .is_none() + ); + } + + #[test] + fn chat_parser_accepts_nvidia_nim_reasoning_field() -> Result<()> { + let response = parse_chat_message(&json!({ + "id": "chatcmpl-test", + "model": "deepseek-ai/deepseek-v4-pro", + "choices": [{ + "message": { + "role": "assistant", + "reasoning": "thinking via NIM", + "content": "final answer" + }, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 10, + "completion_tokens": 3 + } + }))?; + + assert!(matches!( + response.content.first(), + Some(ContentBlock::Thinking { thinking }) if thinking == "thinking via NIM" + )); + assert!(matches!( + response.content.get(1), + Some(ContentBlock::Text { text, .. }) if text == "final answer" + )); + Ok(()) + } + + #[test] + fn sse_parser_accepts_nvidia_nim_reasoning_delta() { + let mut content_index = 0; + let mut text_started = false; + let mut thinking_started = false; + let mut tool_indices = std::collections::HashMap::new(); + let events = parse_sse_chunk( + &json!({ + "choices": [{ + "delta": { + "reasoning": "nim thought" + } + }] + }), + &mut content_index, + &mut text_started, + &mut thinking_started, + &mut tool_indices, + true, + ); + + assert!(events.iter().any(|event| matches!( + event, + StreamEvent::ContentBlockDelta { + delta: Delta::ThinkingDelta { thinking }, + .. + } if thinking == "nim thought" + ))); + } + #[test] fn chat_tool_strict_flag_is_nested_under_function() { let tool = Tool { diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index b33b03a5..5fd76d6b 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -16,8 +16,41 @@ use crate::hooks::HooksConfig; pub const DEFAULT_MAX_SUBAGENTS: usize = 5; pub const MAX_SUBAGENTS: usize = 20; pub const DEFAULT_TEXT_MODEL: &str = "deepseek-v4-pro"; +pub const DEFAULT_NVIDIA_NIM_MODEL: &str = "deepseek-ai/deepseek-v4-pro"; +pub const DEFAULT_NVIDIA_NIM_FLASH_MODEL: &str = "deepseek-ai/deepseek-v4-flash"; +pub const DEFAULT_NVIDIA_NIM_BASE_URL: &str = "https://integrate.api.nvidia.com/v1"; const API_KEYRING_SENTINEL: &str = "__KEYRING__"; -pub const COMMON_DEEPSEEK_MODELS: &[&str] = &["deepseek-v4-pro", "deepseek-v4-flash"]; +pub const COMMON_DEEPSEEK_MODELS: &[&str] = &[ + "deepseek-v4-pro", + "deepseek-v4-flash", + "deepseek-ai/deepseek-v4-pro", + "deepseek-ai/deepseek-v4-flash", +]; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ApiProvider { + Deepseek, + NvidiaNim, +} + +impl ApiProvider { + #[must_use] + pub fn parse(value: &str) -> Option { + match value.trim().to_ascii_lowercase().as_str() { + "deepseek" | "deep-seek" => Some(Self::Deepseek), + "nvidia" | "nvidia-nim" | "nvidia_nim" | "nim" => Some(Self::NvidiaNim), + _ => None, + } + } + + #[must_use] + pub fn as_str(self) -> &'static str { + match self { + Self::Deepseek => "deepseek", + Self::NvidiaNim => "nvidia-nim", + } + } +} /// Canonicalize common model aliases to stable DeepSeek IDs. /// @@ -55,7 +88,7 @@ pub fn normalize_model_name(model: &str) -> Option { } if normalized.chars().all(|ch| { - ch.is_ascii_lowercase() || ch.is_ascii_digit() || matches!(ch, '-' | '_' | '.' | ':') + ch.is_ascii_lowercase() || ch.is_ascii_digit() || matches!(ch, '-' | '_' | '.' | ':' | '/') }) { return Some(normalized); } @@ -127,6 +160,7 @@ impl RetryPolicy { /// Resolved CLI configuration, including defaults and environment overrides. #[derive(Debug, Clone, Default, Deserialize)] pub struct Config { + pub provider: Option, pub api_key: Option, pub base_url: Option, pub default_text_model: Option, @@ -154,6 +188,25 @@ pub struct Config { /// Lifecycle hooks configuration #[serde(default)] pub hooks: Option, + + /// Provider-specific credentials and defaults shared with the `deepseek` facade. + #[serde(default)] + pub providers: Option, +} + +#[derive(Debug, Clone, Default, Deserialize)] +pub struct ProviderConfig { + pub api_key: Option, + pub base_url: Option, + pub model: Option, +} + +#[derive(Debug, Clone, Default, Deserialize)] +pub struct ProvidersConfig { + #[serde(default)] + pub deepseek: ProviderConfig, + #[serde(default)] + pub nvidia_nim: ProviderConfig, } #[derive(Debug, Clone, Deserialize, Default)] @@ -209,6 +262,11 @@ impl Config { /// Validate that critical config fields are present. pub fn validate(&self) -> Result<()> { + if let Some(provider) = self.provider.as_deref() + && ApiProvider::parse(provider).is_none() + { + anyhow::bail!("Invalid provider '{provider}': expected deepseek or nvidia-nim."); + } if let Some(ref key) = self.api_key && key.trim().is_empty() { @@ -225,7 +283,7 @@ impl Config { && normalize_model_name(model).is_none() { anyhow::bail!( - "Invalid default_text_model '{model}': expected a DeepSeek model ID (for example: deepseek-v4-pro, deepseek-v4-flash)." + "Invalid default_text_model '{model}': expected a DeepSeek model ID (for example: deepseek-v4-pro, deepseek-v4-flash, deepseek-ai/deepseek-v4-pro)." ); } if let Some(policy) = self.approval_policy.as_deref() { @@ -293,26 +351,112 @@ impl Config { Ok(()) } - /// Return the `DeepSeek` base URL (normalized). + #[must_use] + pub fn api_provider(&self) -> ApiProvider { + self.provider + .as_deref() + .and_then(ApiProvider::parse) + .unwrap_or_else(|| { + self.base_url + .as_deref() + .filter(|base| base.contains("integrate.api.nvidia.com")) + .map(|_| ApiProvider::NvidiaNim) + .unwrap_or(ApiProvider::Deepseek) + }) + } + + fn provider_config_for(&self, provider: ApiProvider) -> Option<&ProviderConfig> { + let providers = self.providers.as_ref()?; + Some(match provider { + ApiProvider::Deepseek => &providers.deepseek, + ApiProvider::NvidiaNim => &providers.nvidia_nim, + }) + } + + fn provider_config(&self) -> Option<&ProviderConfig> { + self.provider_config_for(self.api_provider()) + } + + #[must_use] + pub fn default_model(&self) -> String { + let provider = self.api_provider(); + if let Some(model) = self + .provider_config() + .and_then(|provider| provider.model.as_deref()) + && let Some(normalized) = normalize_model_for_provider(provider, model) + { + return normalized; + } + if let Some(model) = self.default_text_model.as_deref() + && let Some(normalized) = normalize_model_name(model) + { + return model_for_provider(provider, normalized); + } + + match provider { + ApiProvider::Deepseek => DEFAULT_TEXT_MODEL, + ApiProvider::NvidiaNim => DEFAULT_NVIDIA_NIM_MODEL, + } + .to_string() + } + + /// Return the configured API base URL (normalized). #[must_use] pub fn deepseek_base_url(&self) -> String { - let base = self - .base_url - .clone() - .unwrap_or_else(|| "https://api.deepseek.com".to_string()); + let provider = self.api_provider(); + let provider_base = self + .provider_config_for(provider) + .and_then(|provider| provider.base_url.clone()); + let root_base = match provider { + ApiProvider::Deepseek => self.base_url.clone(), + ApiProvider::NvidiaNim => self + .base_url + .as_ref() + .filter(|base| base.contains("integrate.api.nvidia.com")) + .cloned(), + }; + let base = provider_base.or(root_base).unwrap_or_else(|| { + match provider { + ApiProvider::Deepseek => "https://api.deepseek.com", + ApiProvider::NvidiaNim => DEFAULT_NVIDIA_NIM_BASE_URL, + } + .to_string() + }); normalize_base_url(&base) } - /// Read the `DeepSeek` API key from config/environment. + /// Read the API key from config/environment. pub fn deepseek_api_key(&self) -> Result { - // First check environment variable (highest priority) - if let Ok(key) = std::env::var("DEEPSEEK_API_KEY") - && !key.trim().is_empty() - { - return Ok(key); + let provider = self.api_provider(); + + match provider { + ApiProvider::Deepseek => { + if let Ok(key) = std::env::var("DEEPSEEK_API_KEY") + && !key.trim().is_empty() + { + return Ok(key); + } + } + ApiProvider::NvidiaNim => { + for name in ["NVIDIA_API_KEY", "NVIDIA_NIM_API_KEY", "DEEPSEEK_API_KEY"] { + if let Ok(key) = std::env::var(name) + && !key.trim().is_empty() + { + return Ok(key); + } + } + } } // Then check config file + if let Some(configured) = self + .provider_config_for(provider) + .and_then(|provider| provider.api_key.clone()) + && !configured.trim().is_empty() + { + return Ok(configured); + } + if let Some(configured) = self.api_key.clone() && !configured.trim().is_empty() && configured != API_KEYRING_SENTINEL @@ -320,13 +464,18 @@ impl Config { return Ok(configured); } - // Provide helpful error message with alternatives - anyhow::bail!( - "DeepSeek API key not found. Set it using one of these methods:\n\ - 1. Set DEEPSEEK_API_KEY environment variable (recommended)\n\ - 2. Run 'deepseek login' to save to ~/.deepseek/config.toml\n\ - 3. Add 'api_key = \"your-key\"' to ~/.deepseek/config.toml" - ) + match provider { + ApiProvider::Deepseek => anyhow::bail!( + "DeepSeek API key not found. Set it using one of these methods:\n\ + 1. Set DEEPSEEK_API_KEY environment variable (recommended)\n\ + 2. Run 'deepseek login' to save to ~/.deepseek/config.toml\n\ + 3. Add 'api_key = \"your-key\"' to ~/.deepseek/config.toml" + ), + ApiProvider::NvidiaNim => anyhow::bail!( + "NVIDIA NIM API key not found. Set NVIDIA_API_KEY, NVIDIA_NIM_API_KEY, \ + or save api_key in ~/.deepseek/config.toml with provider = \"nvidia-nim\"." + ), + } } /// Resolve the skills directory path. @@ -576,17 +725,31 @@ fn default_memory_path() -> Option { // === Environment Overrides === fn apply_env_overrides(config: &mut Config) { + if let Ok(value) = std::env::var("DEEPSEEK_PROVIDER") { + config.provider = Some(value); + } if let Ok(value) = std::env::var("DEEPSEEK_API_KEY") { config.api_key = Some(value); } if let Ok(value) = std::env::var("DEEPSEEK_BASE_URL") { config.base_url = Some(value); } + if matches!(config.api_provider(), ApiProvider::NvidiaNim) + && let Ok(value) = + std::env::var("NVIDIA_NIM_BASE_URL").or_else(|_| std::env::var("NVIDIA_BASE_URL")) + { + config.base_url = Some(value); + } if let Ok(value) = std::env::var("DEEPSEEK_MODEL").or_else(|_| std::env::var("DEEPSEEK_DEFAULT_TEXT_MODEL")) { config.default_text_model = Some(value); } + if matches!(config.api_provider(), ApiProvider::NvidiaNim) + && let Ok(value) = std::env::var("NVIDIA_NIM_MODEL") + { + config.default_text_model = Some(value); + } if let Ok(value) = std::env::var("DEEPSEEK_SKILLS_DIR") { config.skills_dir = Some(value); } @@ -722,10 +885,35 @@ fn apply_env_overrides(config: &mut Config) { fn normalize_model_config(config: &mut Config) { if let Some(model) = config.default_text_model.as_deref() - && let Some(normalized) = normalize_model_name(model) + && let Some(normalized) = normalize_model_for_provider(config.api_provider(), model) { config.default_text_model = Some(normalized); } + + if let Some(providers) = config.providers.as_mut() { + if let Some(model) = providers.deepseek.model.as_deref() + && let Some(normalized) = normalize_model_for_provider(ApiProvider::Deepseek, model) + { + providers.deepseek.model = Some(normalized); + } + if let Some(model) = providers.nvidia_nim.model.as_deref() + && let Some(normalized) = normalize_model_for_provider(ApiProvider::NvidiaNim, model) + { + providers.nvidia_nim.model = Some(normalized); + } + } +} + +fn normalize_model_for_provider(provider: ApiProvider, model: &str) -> Option { + normalize_model_name(model).map(|normalized| model_for_provider(provider, normalized)) +} + +fn model_for_provider(provider: ApiProvider, normalized: String) -> String { + match (provider, normalized.as_str()) { + (ApiProvider::NvidiaNim, "deepseek-v4-pro") => DEFAULT_NVIDIA_NIM_MODEL.to_string(), + (ApiProvider::NvidiaNim, "deepseek-v4-flash") => DEFAULT_NVIDIA_NIM_FLASH_MODEL.to_string(), + _ => normalized, + } } fn normalize_base_url(base: &str) -> String { @@ -771,6 +959,7 @@ fn apply_profile(config: ConfigFile, profile: Option<&str>) -> Result { fn merge_config(base: Config, override_cfg: Config) -> Config { Config { + provider: override_cfg.provider.or(base.provider), api_key: override_cfg.api_key.or(base.api_key), base_url: override_cfg.base_url.or(base.base_url), default_text_model: override_cfg.default_text_model.or(base.default_text_model), @@ -792,10 +981,34 @@ fn merge_config(base: Config, override_cfg: Config) -> Config { capacity: override_cfg.capacity.or(base.capacity), tui: override_cfg.tui.or(base.tui), hooks: override_cfg.hooks.or(base.hooks), + providers: merge_providers(base.providers, override_cfg.providers), features: merge_features(base.features, override_cfg.features), } } +fn merge_provider_config(base: ProviderConfig, override_cfg: ProviderConfig) -> ProviderConfig { + ProviderConfig { + api_key: override_cfg.api_key.or(base.api_key), + base_url: override_cfg.base_url.or(base.base_url), + model: override_cfg.model.or(base.model), + } +} + +fn merge_providers( + base: Option, + override_cfg: Option, +) -> Option { + match (base, override_cfg) { + (None, None) => None, + (Some(base), None) => Some(base), + (None, Some(override_cfg)) => Some(override_cfg), + (Some(base), Some(override_cfg)) => Some(ProvidersConfig { + deepseek: merge_provider_config(base.deepseek, override_cfg.deepseek), + nvidia_nim: merge_provider_config(base.nvidia_nim, override_cfg.nvidia_nim), + }), + } +} + fn load_single_config_file(path: &Path) -> Result { let contents = fs::read_to_string(path) .with_context(|| format!("Failed to read config file: {}", path.display()))?; @@ -1033,9 +1246,16 @@ mod tests { home: Option, userprofile: Option, deepseek_config_path: Option, + deepseek_provider: Option, deepseek_api_key: Option, + deepseek_base_url: Option, deepseek_model: Option, deepseek_default_text_model: Option, + nvidia_api_key: Option, + nvidia_nim_api_key: Option, + nvidia_base_url: Option, + nvidia_nim_base_url: Option, + nvidia_nim_model: Option, } impl EnvGuard { @@ -1046,25 +1266,46 @@ mod tests { let home_prev = env::var_os("HOME"); let userprofile_prev = env::var_os("USERPROFILE"); let deepseek_config_prev = env::var_os("DEEPSEEK_CONFIG_PATH"); + let deepseek_provider_prev = env::var_os("DEEPSEEK_PROVIDER"); let api_key_prev = env::var_os("DEEPSEEK_API_KEY"); + let base_url_prev = env::var_os("DEEPSEEK_BASE_URL"); let model_prev = env::var_os("DEEPSEEK_MODEL"); let default_text_model_prev = env::var_os("DEEPSEEK_DEFAULT_TEXT_MODEL"); + let nvidia_api_key_prev = env::var_os("NVIDIA_API_KEY"); + let nvidia_nim_api_key_prev = env::var_os("NVIDIA_NIM_API_KEY"); + let nvidia_base_url_prev = env::var_os("NVIDIA_BASE_URL"); + let nvidia_nim_base_url_prev = env::var_os("NVIDIA_NIM_BASE_URL"); + let nvidia_nim_model_prev = env::var_os("NVIDIA_NIM_MODEL"); // Safety: test-only environment mutation guarded by a global mutex. unsafe { env::set_var("HOME", &home_str); env::set_var("USERPROFILE", &home_str); env::set_var("DEEPSEEK_CONFIG_PATH", &config_str); + env::remove_var("DEEPSEEK_PROVIDER"); env::remove_var("DEEPSEEK_API_KEY"); + env::remove_var("DEEPSEEK_BASE_URL"); env::remove_var("DEEPSEEK_MODEL"); env::remove_var("DEEPSEEK_DEFAULT_TEXT_MODEL"); + env::remove_var("NVIDIA_API_KEY"); + env::remove_var("NVIDIA_NIM_API_KEY"); + env::remove_var("NVIDIA_BASE_URL"); + env::remove_var("NVIDIA_NIM_BASE_URL"); + env::remove_var("NVIDIA_NIM_MODEL"); } Self { home: home_prev, userprofile: userprofile_prev, deepseek_config_path: deepseek_config_prev, + deepseek_provider: deepseek_provider_prev, deepseek_api_key: api_key_prev, + deepseek_base_url: base_url_prev, deepseek_model: model_prev, deepseek_default_text_model: default_text_model_prev, + nvidia_api_key: nvidia_api_key_prev, + nvidia_nim_api_key: nvidia_nim_api_key_prev, + nvidia_base_url: nvidia_base_url_prev, + nvidia_nim_base_url: nvidia_nim_base_url_prev, + nvidia_nim_model: nvidia_nim_model_prev, } } } @@ -1076,12 +1317,19 @@ mod tests { Self::restore_var("HOME", self.home.take()); Self::restore_var("USERPROFILE", self.userprofile.take()); Self::restore_var("DEEPSEEK_CONFIG_PATH", self.deepseek_config_path.take()); + Self::restore_var("DEEPSEEK_PROVIDER", self.deepseek_provider.take()); Self::restore_var("DEEPSEEK_API_KEY", self.deepseek_api_key.take()); + Self::restore_var("DEEPSEEK_BASE_URL", self.deepseek_base_url.take()); Self::restore_var("DEEPSEEK_MODEL", self.deepseek_model.take()); Self::restore_var( "DEEPSEEK_DEFAULT_TEXT_MODEL", self.deepseek_default_text_model.take(), ); + Self::restore_var("NVIDIA_API_KEY", self.nvidia_api_key.take()); + Self::restore_var("NVIDIA_NIM_API_KEY", self.nvidia_nim_api_key.take()); + Self::restore_var("NVIDIA_BASE_URL", self.nvidia_base_url.take()); + Self::restore_var("NVIDIA_NIM_BASE_URL", self.nvidia_nim_base_url.take()); + Self::restore_var("NVIDIA_NIM_MODEL", self.nvidia_nim_model.take()); } } } @@ -1302,6 +1550,14 @@ mod tests { normalize_model_name("DeepSeek-V4").as_deref(), Some("deepseek-v4") ); + assert_eq!( + normalize_model_name("deepseek-ai/deepseek-v4-pro").as_deref(), + Some("deepseek-ai/deepseek-v4-pro") + ); + assert_eq!( + normalize_model_name("deepseek-ai/deepseek-v4-flash").as_deref(), + Some("deepseek-ai/deepseek-v4-flash") + ); } #[test] @@ -1348,4 +1604,128 @@ mod tests { ); Ok(()) } + + #[test] + fn nvidia_nim_provider_uses_nim_defaults() -> Result<()> { + let config = Config { + provider: Some("nvidia-nim".to_string()), + ..Default::default() + }; + + config.validate()?; + assert_eq!(config.api_provider(), ApiProvider::NvidiaNim); + assert_eq!(config.default_model(), DEFAULT_NVIDIA_NIM_MODEL); + assert_eq!(config.deepseek_base_url(), DEFAULT_NVIDIA_NIM_BASE_URL); + Ok(()) + } + + #[test] + fn nvidia_nim_provider_normalizes_deepseek_v4_pro_alias() -> Result<()> { + let _lock = lock_test_env(); + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let temp_root = env::temp_dir().join(format!( + "deepseek-tui-nim-model-alias-test-{}-{}", + std::process::id(), + nanos + )); + fs::create_dir_all(&temp_root)?; + let _guard = EnvGuard::new(&temp_root); + + let config_path = temp_root.join(".deepseek").join("config.toml"); + ensure_parent_dir(&config_path)?; + fs::write( + &config_path, + "provider = \"nvidia-nim\"\ndefault_text_model = \"deepseek-v4-pro\"\napi_key = \"nim-key\"\n", + )?; + + let config = Config::load(None, None)?; + assert_eq!(config.api_provider(), ApiProvider::NvidiaNim); + assert_eq!( + config.default_text_model.as_deref(), + Some(DEFAULT_NVIDIA_NIM_MODEL) + ); + Ok(()) + } + + #[test] + fn nvidia_nim_provider_normalizes_deepseek_v4_flash_alias() -> Result<()> { + let config = Config { + provider: Some("nvidia-nim".to_string()), + default_text_model: Some("deepseek-v4-flash".to_string()), + ..Default::default() + }; + + config.validate()?; + assert_eq!(config.default_model(), DEFAULT_NVIDIA_NIM_FLASH_MODEL); + Ok(()) + } + + #[test] + fn nvidia_nim_env_overrides_provider_and_credentials() -> Result<()> { + let _lock = lock_test_env(); + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let temp_root = env::temp_dir().join(format!( + "deepseek-tui-nim-env-test-{}-{}", + std::process::id(), + nanos + )); + fs::create_dir_all(&temp_root)?; + let _guard = EnvGuard::new(&temp_root); + + // Safety: test-only environment mutation guarded by a global mutex. + unsafe { + env::set_var("DEEPSEEK_PROVIDER", "nvidia-nim"); + env::set_var("NVIDIA_API_KEY", "nim-env-key"); + env::set_var("NVIDIA_NIM_MODEL", "deepseek-ai/deepseek-v4-pro"); + } + + let config = Config::load(None, None)?; + assert_eq!(config.api_provider(), ApiProvider::NvidiaNim); + assert_eq!(config.deepseek_api_key()?, "nim-env-key"); + assert_eq!(config.default_model(), DEFAULT_NVIDIA_NIM_MODEL); + Ok(()) + } + + #[test] + fn nvidia_nim_reads_facade_provider_table() -> Result<()> { + let _lock = lock_test_env(); + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_nanos(); + let temp_root = env::temp_dir().join(format!( + "deepseek-tui-nim-provider-table-test-{}-{}", + std::process::id(), + nanos + )); + fs::create_dir_all(&temp_root)?; + let _guard = EnvGuard::new(&temp_root); + + let config_path = temp_root.join(".deepseek").join("config.toml"); + ensure_parent_dir(&config_path)?; + fs::write( + &config_path, + r#"provider = "nvidia-nim" +default_text_model = "deepseek-v4-flash" + +[providers.nvidia_nim] +api_key = "nim-table-key" +base_url = "https://nim-table.example/v1" +model = "deepseek-v4-pro" +"#, + )?; + + let config = Config::load(None, None)?; + assert_eq!(config.api_provider(), ApiProvider::NvidiaNim); + assert_eq!(config.deepseek_api_key()?, "nim-table-key"); + assert_eq!(config.deepseek_base_url(), "https://nim-table.example/v1"); + assert_eq!(config.default_model(), DEFAULT_NVIDIA_NIM_MODEL); + Ok(()) + } } diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 0ab8542c..6a56656a 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -51,7 +51,7 @@ mod ui; mod utils; mod working_set; -use crate::config::{Config, DEFAULT_TEXT_MODEL, MAX_SUBAGENTS}; +use crate::config::{Config, MAX_SUBAGENTS}; use crate::eval::{EvalHarness, EvalHarnessConfig, ScenarioStepKind}; use crate::features::Feature; use crate::llm_client::LlmClient; @@ -507,7 +507,7 @@ async fn main() -> Result<()> { let model = args .model .or_else(|| config.default_text_model.clone()) - .unwrap_or_else(|| DEFAULT_TEXT_MODEL.to_string()); + .unwrap_or_else(|| config.default_model()); if args.auto || cli.yolo { let workspace = cli.workspace.clone().unwrap_or_else(|| { std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")) @@ -603,10 +603,7 @@ async fn main() -> Result<()> { // One-shot prompt mode let config = load_config_from_cli(&cli)?; if let Some(prompt) = cli.prompt { - let model = config - .default_text_model - .clone() - .unwrap_or_else(|| DEFAULT_TEXT_MODEL.to_string()); + let model = config.default_model(); return run_one_shot(&config, &model, &prompt).await; } @@ -1230,10 +1227,7 @@ async fn run_models(config: &Config, args: ModelsArgs) -> Result<()> { return Ok(()); } - let default_model = config - .default_text_model - .clone() - .unwrap_or_else(|| DEFAULT_TEXT_MODEL.to_string()); + let default_model = config.default_model(); println!("Available models (default: {default_model})"); for model in models { @@ -1531,7 +1525,7 @@ async fn run_review(config: &Config, args: ReviewArgs) -> Result<()> { let model = args .model .or_else(|| config.default_text_model.clone()) - .unwrap_or_else(|| DEFAULT_TEXT_MODEL.to_string()); + .unwrap_or_else(|| config.default_model()); let system = SystemPrompt::Text( "You are a senior code reviewer. Focus on bugs, risks, behavioral regressions, and missing tests. \ @@ -2146,10 +2140,7 @@ async fn run_interactive( .workspace .clone() .unwrap_or_else(|| std::env::current_dir().unwrap_or_else(|_| PathBuf::from("."))); - let model = config - .default_text_model - .clone() - .unwrap_or_else(|| DEFAULT_TEXT_MODEL.to_string()); + let model = config.default_model(); let max_subagents = cli.max_subagents.map_or_else( || config.max_subagents(), |value| value.clamp(1, MAX_SUBAGENTS), diff --git a/crates/tui/src/models.rs b/crates/tui/src/models.rs index 68fdfaac..c2cdc842 100644 --- a/crates/tui/src/models.rs +++ b/crates/tui/src/models.rs @@ -405,6 +405,10 @@ mod tests { context_window_for_model("deepseek-v4-flash"), Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS) ); + assert_eq!( + context_window_for_model("deepseek-ai/deepseek-v4-pro"), + Some(DEEPSEEK_V4_CONTEXT_WINDOW_TOKENS) + ); } #[test] diff --git a/crates/tui/src/pricing.rs b/crates/tui/src/pricing.rs index 70d78048..3b39eabf 100644 --- a/crates/tui/src/pricing.rs +++ b/crates/tui/src/pricing.rs @@ -14,6 +14,11 @@ struct ModelPricing { /// Look up pricing for a model name. fn pricing_for_model(model: &str) -> Option { let lower = model.to_lowercase(); + if lower.starts_with("deepseek-ai/") { + // NVIDIA NIM-hosted DeepSeek uses NVIDIA's catalog/account terms, not + // DeepSeek Platform pricing. Avoid showing misleading DeepSeek costs. + return None; + } if !lower.contains("deepseek") { return None; } @@ -76,3 +81,13 @@ pub fn format_cost(cost: f64) -> String { format!("${:.2}", cost) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn nvidia_nim_deepseek_model_does_not_use_deepseek_platform_pricing() { + assert!(calculate_turn_cost("deepseek-ai/deepseek-v4-pro", 1_000, 1_000).is_none()); + } +} diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 1e0c7f95..39120754 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -20,6 +20,13 @@ DeepSeek auth and model defaults. `deepseek login --api-key ...` writes the root `api_key` field that `deepseek-tui` reads directly, and `deepseek --model deepseek-v4-flash` is forwarded to the TUI as `DEEPSEEK_MODEL`. +For NVIDIA NIM-hosted DeepSeek V4 Pro, set `provider = "nvidia-nim"` or pass +`deepseek --provider nvidia-nim`. The facade stores NIM credentials under +`[providers.nvidia_nim]` and forwards the resolved key, base URL, provider, and +model to the TUI process. Use +`deepseek auth set --provider nvidia-nim --api-key "YOUR_NVIDIA_API_KEY"` to +save the NIM key through the facade. + To bootstrap MCP and skills directories at their resolved paths, run `deepseek-tui setup`. To only scaffold MCP, run `deepseek-tui mcp init`. @@ -40,6 +47,12 @@ default_text_model = "deepseek-v4-pro" [profiles.work] api_key = "WORK_KEY" base_url = "https://api.deepseek.com" + +[profiles.nvidia-nim] +provider = "nvidia-nim" +api_key = "NVIDIA_KEY" +base_url = "https://integrate.api.nvidia.com/v1" +default_text_model = "deepseek-ai/deepseek-v4-pro" ``` Select a profile with: @@ -55,7 +68,11 @@ These override config values: - `DEEPSEEK_API_KEY` - `DEEPSEEK_BASE_URL` +- `DEEPSEEK_PROVIDER` (`deepseek|nvidia-nim`) - `DEEPSEEK_MODEL` or `DEEPSEEK_DEFAULT_TEXT_MODEL` +- `NVIDIA_API_KEY` or `NVIDIA_NIM_API_KEY` (used when provider is `nvidia-nim`) +- `NVIDIA_BASE_URL` or `NVIDIA_NIM_BASE_URL` +- `NVIDIA_NIM_MODEL` - `DEEPSEEK_SKILLS_DIR` - `DEEPSEEK_MCP_CONFIG` - `DEEPSEEK_NOTES_PATH` @@ -128,10 +145,11 @@ If you are upgrading from older releases: ### Core keys (used by the TUI/engine) +- `provider` (string, optional): `deepseek` (default) or `nvidia-nim`. `nvidia-nim` targets NVIDIA's NIM-hosted DeepSeek endpoints through `https://integrate.api.nvidia.com/v1`. - `api_key` (string, required): must be non-empty (or set `DEEPSEEK_API_KEY`). -- `base_url` (string, optional): defaults to `https://api.deepseek.com` for DeepSeek's OpenAI-compatible Chat Completions API. `https://api.deepseek.com/v1` is also accepted for SDK compatibility; use `https://api.deepseek.com/beta` only for DeepSeek beta features such as strict tool mode, chat prefix completion, and FIM completion. -- `default_text_model` (string, optional): defaults to `deepseek-v4-pro`. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash`. Use `/models` or `deepseek models` to discover live IDs from your configured endpoint. `DEEPSEEK_MODEL` overrides this for a single process. -- `reasoning_effort` (string, optional): `off`, `low`, `medium`, `high`, or `max`; defaults to the configured UI tier. `off` sends `thinking = {"type": "disabled"}`. `low` and `medium` are normalized to `high`; `max` sends `reasoning_effort = "max"`. +- `base_url` (string, optional): defaults to `https://api.deepseek.com` for DeepSeek's OpenAI-compatible Chat Completions API, or `https://integrate.api.nvidia.com/v1` for `provider = "nvidia-nim"`. `https://api.deepseek.com/v1` is also accepted for SDK compatibility; use `https://api.deepseek.com/beta` only for DeepSeek beta features such as strict tool mode, chat prefix completion, and FIM completion. +- `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek or `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash`. With `provider = "nvidia-nim"`, `deepseek-v4-pro` maps to `deepseek-ai/deepseek-v4-pro` and `deepseek-v4-flash` maps to `deepseek-ai/deepseek-v4-flash`. Use `/models` or `deepseek models` to discover live IDs from your configured endpoint. `DEEPSEEK_MODEL` overrides this for a single process. +- `reasoning_effort` (string, optional): `off`, `low`, `medium`, `high`, or `max`; defaults to the configured UI tier. DeepSeek Platform receives top-level `thinking` / `reasoning_effort` fields. NVIDIA NIM receives equivalent settings through `chat_template_kwargs`. - `allow_shell` (bool, optional): defaults to `true` (sandboxed). - `approval_policy` (string, optional): `on-request`, `untrusted`, or `never`. Runtime `approval_mode` editing in `/config` also accepts `on-request` and `untrusted` aliases. - `sandbox_mode` (string, optional): `read-only`, `workspace-write`, `danger-full-access`, `external-sandbox`. diff --git a/npm/deepseek-tui/README.md b/npm/deepseek-tui/README.md index 482cb246..13c71e96 100644 --- a/npm/deepseek-tui/README.md +++ b/npm/deepseek-tui/README.md @@ -37,6 +37,19 @@ The app talks to DeepSeek's documented OpenAI-compatible Chat Completions API. Set `DEEPSEEK_BASE_URL` only if you need the China endpoint or DeepSeek beta features such as strict tool mode, chat prefix completion, or FIM completion. +NVIDIA NIM-hosted DeepSeek V4 Pro is also supported: + +```bash +deepseek auth set --provider nvidia-nim --api-key "YOUR_NVIDIA_API_KEY" +deepseek --provider nvidia-nim +``` + +For a single process, set `DEEPSEEK_PROVIDER=nvidia-nim` and `NVIDIA_API_KEY` +or `NVIDIA_NIM_API_KEY`. The NIM default model is +`deepseek-ai/deepseek-v4-pro` and the default base URL is +`https://integrate.api.nvidia.com/v1`. With `--provider nvidia-nim`, +`--model deepseek-v4-flash` maps to `deepseek-ai/deepseek-v4-flash`. + ## Supported platforms - Linux x64