From e9f0c1c13be72ecad575423ff231dc8b4177a54a Mon Sep 17 00:00:00 2001 From: Hunter B Date: Fri, 12 Jun 2026 22:05:22 -0700 Subject: [PATCH] feat(config): add first-party MiniMax provider route Refs #1310. Adds the direct minimax provider slot, auth/config/env bindings, model aliases, docs, and picker/status surfaces. MiniMax requests now set reasoning_split and preserve reasoning_details history so thinking stays out of answer text. Also updates Moonshot/Kimi reasoning classification for the Kimi Code route so reasoning_content streams into Thinking cells instead of inline prose. --- crates/cli/src/lib.rs | 2 + crates/config/src/lib.rs | 197 ++++++++++++++++-- crates/config/src/provider.rs | 40 ++-- crates/tui/src/client.rs | 52 +++++ crates/tui/src/client/chat.rs | 283 ++++++++++++++++++++++++-- crates/tui/src/config.rs | 169 ++++++++++++++- crates/tui/src/config_persistence.rs | 1 + crates/tui/src/core/engine.rs | 5 +- crates/tui/src/main.rs | 9 +- crates/tui/src/models.rs | 34 +++- crates/tui/src/tui/provider_picker.rs | 9 +- crates/tui/src/tui/ui.rs | 3 + docs/CONFIGURATION.md | 6 +- docs/PROVIDERS.md | 10 +- 14 files changed, 759 insertions(+), 61 deletions(-) diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index 65dab59e..dcd149b2 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -782,6 +782,7 @@ fn provider_slot(provider: ProviderKind) -> &'static str { ProviderKind::Anthropic => "anthropic", ProviderKind::Zai => "zai", ProviderKind::Stepfun => "stepfun", + ProviderKind::Minimax => "minimax", } } @@ -919,6 +920,7 @@ fn provider_env_vars(provider: ProviderKind) -> &'static [&'static str] { ProviderKind::Anthropic => &["ANTHROPIC_API_KEY"], ProviderKind::Zai => &["ZAI_API_KEY", "Z_AI_API_KEY"], ProviderKind::Stepfun => &["STEPFUN_API_KEY", "STEP_API_KEY"], + ProviderKind::Minimax => &["MINIMAX_API_KEY"], } } diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index dc058e08..19ad3698 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -108,6 +108,16 @@ const DEFAULT_ZAI_BASE_URL: &str = "https://api.z.ai/api/coding/paas/v4"; // StepFun / StepFlash defaults const DEFAULT_STEPFUN_MODEL: &str = "step-3.7-flash"; const DEFAULT_STEPFUN_BASE_URL: &str = "https://api.stepfun.ai/v1"; +// MiniMax defaults +const DEFAULT_MINIMAX_MODEL: &str = "MiniMax-M3"; +const MINIMAX_M2_7_MODEL: &str = "MiniMax-M2.7"; +const MINIMAX_M2_7_HIGHSPEED_MODEL: &str = "MiniMax-M2.7-highspeed"; +const MINIMAX_M2_5_MODEL: &str = "MiniMax-M2.5"; +const MINIMAX_M2_5_HIGHSPEED_MODEL: &str = "MiniMax-M2.5-highspeed"; +const MINIMAX_M2_1_MODEL: &str = "MiniMax-M2.1"; +const MINIMAX_M2_1_HIGHSPEED_MODEL: &str = "MiniMax-M2.1-highspeed"; +const MINIMAX_M2_MODEL: &str = "MiniMax-M2"; +const DEFAULT_MINIMAX_BASE_URL: &str = "https://api.minimax.io/v1"; #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] #[serde(rename_all = "kebab-case")] @@ -165,11 +175,7 @@ pub enum ProviderKind { OpenaiCodex, #[serde(alias = "claude")] Anthropic, - #[serde( - alias = "z-ai", - alias = "z_ai", - alias = "z.ai" - )] + #[serde(alias = "z-ai", alias = "z_ai", alias = "z.ai")] Zai, #[serde( alias = "step-fun", @@ -180,10 +186,12 @@ pub enum ProviderKind { alias = "step_flash" )] Stepfun, + #[serde(alias = "mini-max", alias = "mini_max", alias = "minimax")] + Minimax, } impl ProviderKind { - pub const ALL: [Self; 23] = [ + pub const ALL: [Self; 24] = [ Self::Deepseek, Self::NvidiaNim, Self::Openai, @@ -207,6 +215,7 @@ impl ProviderKind { Self::Anthropic, Self::Zai, Self::Stepfun, + Self::Minimax, ]; #[must_use] @@ -235,6 +244,7 @@ impl ProviderKind { Self::Anthropic, Self::Zai, Self::Stepfun, + Self::Minimax, ] } @@ -343,12 +353,7 @@ pub struct ProvidersToml { pub openai_codex: ProviderConfigToml, #[serde(default)] pub anthropic: ProviderConfigToml, - #[serde( - default, - alias = "z-ai", - alias = "z_ai", - alias = "z.ai" - )] + #[serde(default, alias = "z-ai", alias = "z_ai", alias = "z.ai")] pub zai: ProviderConfigToml, #[serde( default, @@ -360,6 +365,8 @@ pub struct ProvidersToml { alias = "step_flash" )] pub stepfun: ProviderConfigToml, + #[serde(default, alias = "mini-max", alias = "mini_max", alias = "minimax")] + pub minimax: ProviderConfigToml, } /// Sibling `permissions.toml` schema. @@ -413,6 +420,7 @@ impl ProvidersToml { ProviderKind::Anthropic => &self.anthropic, ProviderKind::Zai => &self.zai, ProviderKind::Stepfun => &self.stepfun, + ProviderKind::Minimax => &self.minimax, } } @@ -441,6 +449,7 @@ impl ProvidersToml { ProviderKind::Anthropic => &mut self.anthropic, ProviderKind::Zai => &mut self.zai, ProviderKind::Stepfun => &mut self.stepfun, + ProviderKind::Minimax => &mut self.minimax, } } } @@ -2166,6 +2175,7 @@ impl ConfigToml { ProviderKind::Anthropic => DEFAULT_ANTHROPIC_BASE_URL.to_string(), ProviderKind::Zai => DEFAULT_ZAI_BASE_URL.to_string(), ProviderKind::Stepfun => DEFAULT_STEPFUN_BASE_URL.to_string(), + ProviderKind::Minimax => DEFAULT_MINIMAX_BASE_URL.to_string(), }) }; // CLI flag wins outright. Otherwise: config-file → injected secrets/env. @@ -2379,6 +2389,11 @@ fn normalize_model_for_provider(provider: ProviderKind, model: &str) -> String { { return canonical.to_string(); } + if matches!(provider, ProviderKind::Minimax) + && let Some(canonical) = canonical_minimax_model_id(model) + { + return canonical.to_string(); + } if matches!( provider, @@ -2386,6 +2401,9 @@ fn normalize_model_for_provider(provider: ProviderKind, model: &str) -> String { | ProviderKind::WanjieArk | ProviderKind::Volcengine | ProviderKind::XiaomiMimo + | ProviderKind::Zai + | ProviderKind::Stepfun + | ProviderKind::Minimax | ProviderKind::Ollama ) { return model.to_string(); @@ -2539,6 +2557,39 @@ fn canonical_xiaomi_mimo_model_id(model: &str) -> Option<&'static str> { } } +fn canonical_minimax_model_id(model: &str) -> Option<&'static str> { + let normalized = model.trim().to_ascii_lowercase(); + let normalized = normalized.replace(['_', ' '], "-"); + match normalized.as_str() { + "minimax" | "minimax-m3" | "minimax-m-3" | "minimax-m-3-thinking" => { + Some(DEFAULT_MINIMAX_MODEL) + } + "minimax-m2.7" | "minimax-m2-7" | "minimax-m-2.7" | "minimax-m-2-7" => { + Some(MINIMAX_M2_7_MODEL) + } + "minimax-m2.7-highspeed" + | "minimax-m2-7-highspeed" + | "minimax-m-2.7-highspeed" + | "minimax-m-2-7-highspeed" => Some(MINIMAX_M2_7_HIGHSPEED_MODEL), + "minimax-m2.5" | "minimax-m2-5" | "minimax-m-2.5" | "minimax-m-2-5" => { + Some(MINIMAX_M2_5_MODEL) + } + "minimax-m2.5-highspeed" + | "minimax-m2-5-highspeed" + | "minimax-m-2.5-highspeed" + | "minimax-m-2-5-highspeed" => Some(MINIMAX_M2_5_HIGHSPEED_MODEL), + "minimax-m2.1" | "minimax-m2-1" | "minimax-m-2.1" | "minimax-m-2-1" => { + Some(MINIMAX_M2_1_MODEL) + } + "minimax-m2.1-highspeed" + | "minimax-m2-1-highspeed" + | "minimax-m-2.1-highspeed" + | "minimax-m-2-1-highspeed" => Some(MINIMAX_M2_1_HIGHSPEED_MODEL), + "minimax-m2" | "minimax-m-2" => Some(MINIMAX_M2_MODEL), + _ => None, + } +} + fn canonical_openrouter_recent_model_id(model: &str) -> Option<&'static str> { let normalized = model.trim().to_ascii_lowercase(); let normalized = normalized.replace(['_', ' '], "-"); @@ -2634,6 +2685,7 @@ fn default_model_for_provider(provider: ProviderKind) -> &'static str { ProviderKind::Anthropic => DEFAULT_ANTHROPIC_MODEL, ProviderKind::Zai => DEFAULT_ZAI_MODEL, ProviderKind::Stepfun => DEFAULT_STEPFUN_MODEL, + ProviderKind::Minimax => DEFAULT_MINIMAX_MODEL, } } @@ -2662,6 +2714,7 @@ fn default_base_url_for_provider(provider: ProviderKind) -> &'static str { ProviderKind::Anthropic => DEFAULT_ANTHROPIC_BASE_URL, ProviderKind::Zai => DEFAULT_ZAI_BASE_URL, ProviderKind::Stepfun => DEFAULT_STEPFUN_BASE_URL, + ProviderKind::Minimax => DEFAULT_MINIMAX_BASE_URL, } } @@ -3447,6 +3500,8 @@ struct EnvRuntimeOverrides { zai_model: Option, stepfun_base_url: Option, stepfun_model: Option, + minimax_base_url: Option, + minimax_model: Option, } impl EnvRuntimeOverrides { @@ -3634,6 +3689,12 @@ impl EnvRuntimeOverrides { .or_else(|_| std::env::var("STEP_MODEL")) .ok() .filter(|v| !v.trim().is_empty()), + minimax_base_url: std::env::var("MINIMAX_BASE_URL") + .ok() + .filter(|v| !v.trim().is_empty()), + minimax_model: std::env::var("MINIMAX_MODEL") + .ok() + .filter(|v| !v.trim().is_empty()), } } @@ -3679,6 +3740,7 @@ impl EnvRuntimeOverrides { ProviderKind::Anthropic => self.anthropic_base_url.clone(), ProviderKind::Zai => self.zai_base_url.clone(), ProviderKind::Stepfun => self.stepfun_base_url.clone(), + ProviderKind::Minimax => self.minimax_base_url.clone(), } } @@ -3699,6 +3761,9 @@ impl EnvRuntimeOverrides { ProviderKind::Together => self.together_model.clone(), ProviderKind::OpenaiCodex => self.openai_codex_model.clone(), ProviderKind::Anthropic => self.anthropic_model.clone(), + ProviderKind::Zai => self.zai_model.clone(), + ProviderKind::Stepfun => self.stepfun_model.clone(), + ProviderKind::Minimax => self.minimax_model.clone(), _ => None, }?; @@ -4096,6 +4161,17 @@ action = "mode.agent" kimi_base_url: Option, kimi_model: Option, kimi_model_name: Option, + zai_api_key: Option, + z_ai_api_key: Option, + zai_base_url: Option, + zai_model: Option, + stepfun_api_key: Option, + step_api_key: Option, + stepfun_base_url: Option, + stepfun_model: Option, + minimax_api_key: Option, + minimax_base_url: Option, + minimax_model: Option, sglang_api_key: Option, sglang_base_url: Option, vllm_api_key: Option, @@ -4179,6 +4255,17 @@ action = "mode.agent" kimi_base_url: env::var_os("KIMI_BASE_URL"), kimi_model: env::var_os("KIMI_MODEL"), kimi_model_name: env::var_os("KIMI_MODEL_NAME"), + zai_api_key: env::var_os("ZAI_API_KEY"), + z_ai_api_key: env::var_os("Z_AI_API_KEY"), + zai_base_url: env::var_os("ZAI_BASE_URL"), + zai_model: env::var_os("ZAI_MODEL"), + stepfun_api_key: env::var_os("STEPFUN_API_KEY"), + step_api_key: env::var_os("STEP_API_KEY"), + stepfun_base_url: env::var_os("STEPFUN_BASE_URL"), + stepfun_model: env::var_os("STEPFUN_MODEL"), + minimax_api_key: env::var_os("MINIMAX_API_KEY"), + minimax_base_url: env::var_os("MINIMAX_BASE_URL"), + minimax_model: env::var_os("MINIMAX_MODEL"), sglang_api_key: env::var_os("SGLANG_API_KEY"), sglang_base_url: env::var_os("SGLANG_BASE_URL"), vllm_api_key: env::var_os("VLLM_API_KEY"), @@ -4257,6 +4344,17 @@ action = "mode.agent" env::remove_var("KIMI_BASE_URL"); env::remove_var("KIMI_MODEL"); env::remove_var("KIMI_MODEL_NAME"); + env::remove_var("ZAI_API_KEY"); + env::remove_var("Z_AI_API_KEY"); + env::remove_var("ZAI_BASE_URL"); + env::remove_var("ZAI_MODEL"); + env::remove_var("STEPFUN_API_KEY"); + env::remove_var("STEP_API_KEY"); + env::remove_var("STEPFUN_BASE_URL"); + env::remove_var("STEPFUN_MODEL"); + env::remove_var("MINIMAX_API_KEY"); + env::remove_var("MINIMAX_BASE_URL"); + env::remove_var("MINIMAX_MODEL"); env::remove_var("SGLANG_API_KEY"); env::remove_var("SGLANG_BASE_URL"); env::remove_var("VLLM_API_KEY"); @@ -4361,6 +4459,17 @@ action = "mode.agent" Self::restore_var("KIMI_BASE_URL", self.kimi_base_url.take()); Self::restore_var("KIMI_MODEL", self.kimi_model.take()); Self::restore_var("KIMI_MODEL_NAME", self.kimi_model_name.take()); + Self::restore_var("ZAI_API_KEY", self.zai_api_key.take()); + Self::restore_var("Z_AI_API_KEY", self.z_ai_api_key.take()); + Self::restore_var("ZAI_BASE_URL", self.zai_base_url.take()); + Self::restore_var("ZAI_MODEL", self.zai_model.take()); + Self::restore_var("STEPFUN_API_KEY", self.stepfun_api_key.take()); + Self::restore_var("STEP_API_KEY", self.step_api_key.take()); + Self::restore_var("STEPFUN_BASE_URL", self.stepfun_base_url.take()); + Self::restore_var("STEPFUN_MODEL", self.stepfun_model.take()); + Self::restore_var("MINIMAX_API_KEY", self.minimax_api_key.take()); + Self::restore_var("MINIMAX_BASE_URL", self.minimax_base_url.take()); + Self::restore_var("MINIMAX_MODEL", self.minimax_model.take()); Self::restore_var("SGLANG_API_KEY", self.sglang_api_key.take()); Self::restore_var("SGLANG_BASE_URL", self.sglang_base_url.take()); Self::restore_var("VLLM_API_KEY", self.vllm_api_key.take()); @@ -5730,6 +5839,70 @@ mode = "token-plan-usa" assert_eq!(resolved.model, DEFAULT_MOONSHOT_MODEL); } + #[test] + fn zai_stepfun_and_minimax_default_to_first_party_routes() { + let _lock = env_lock(); + let _env = EnvGuard::without_deepseek_runtime_overrides(); + + for (provider, expected_base_url, expected_model) in [ + (ProviderKind::Zai, DEFAULT_ZAI_BASE_URL, DEFAULT_ZAI_MODEL), + ( + ProviderKind::Stepfun, + DEFAULT_STEPFUN_BASE_URL, + DEFAULT_STEPFUN_MODEL, + ), + ( + ProviderKind::Minimax, + DEFAULT_MINIMAX_BASE_URL, + DEFAULT_MINIMAX_MODEL, + ), + ] { + let config = ConfigToml { + provider, + ..ConfigToml::default() + }; + let resolved = config.resolve_runtime_options(&CliRuntimeOverrides::default()); + + assert_eq!(resolved.provider, provider); + assert_eq!(resolved.base_url, expected_base_url); + assert_eq!(resolved.model, expected_model); + } + } + + #[test] + fn first_party_provider_env_model_overrides_pass_through() { + let _lock = env_lock(); + let _env = EnvGuard::without_deepseek_runtime_overrides(); + unsafe { + env::set_var("CODEWHALE_PROVIDER", "minimax"); + env::set_var("MINIMAX_MODEL", "MiniMax-M2.7-highspeed"); + env::set_var("MINIMAX_BASE_URL", "https://minimax.example/v1"); + } + + let resolved = + ConfigToml::default().resolve_runtime_options(&CliRuntimeOverrides::default()); + + assert_eq!(resolved.provider, ProviderKind::Minimax); + assert_eq!(resolved.base_url, "https://minimax.example/v1"); + assert_eq!(resolved.model, "MiniMax-M2.7-highspeed"); + } + + #[test] + fn minimax_env_model_override_canonicalizes_known_aliases() { + let _lock = env_lock(); + let _env = EnvGuard::without_deepseek_runtime_overrides(); + unsafe { + env::set_var("CODEWHALE_PROVIDER", "minimax"); + env::set_var("MINIMAX_MODEL", "minimax-m2-5-highspeed"); + } + + let resolved = + ConfigToml::default().resolve_runtime_options(&CliRuntimeOverrides::default()); + + assert_eq!(resolved.provider, ProviderKind::Minimax); + assert_eq!(resolved.model, "MiniMax-M2.5-highspeed"); + } + #[test] fn moonshot_provider_preserves_explicit_kimi_k26() { let _lock = env_lock(); diff --git a/crates/config/src/provider.rs b/crates/config/src/provider.rs index b3c4e8c0..f850a5bf 100644 --- a/crates/config/src/provider.rs +++ b/crates/config/src/provider.rs @@ -8,18 +8,18 @@ use super::{ DEFAULT_ARCEE_BASE_URL, DEFAULT_ARCEE_MODEL, DEFAULT_ATLASCLOUD_BASE_URL, DEFAULT_ATLASCLOUD_MODEL, DEFAULT_DEEPSEEK_BASE_URL, DEFAULT_DEEPSEEK_MODEL, DEFAULT_FIREWORKS_BASE_URL, DEFAULT_FIREWORKS_MODEL, DEFAULT_HUGGINGFACE_BASE_URL, - DEFAULT_HUGGINGFACE_MODEL, DEFAULT_MOONSHOT_BASE_URL, DEFAULT_MOONSHOT_MODEL, - DEFAULT_NOVITA_BASE_URL, DEFAULT_NOVITA_MODEL, DEFAULT_NVIDIA_NIM_BASE_URL, - DEFAULT_NVIDIA_NIM_MODEL, DEFAULT_OLLAMA_BASE_URL, DEFAULT_OLLAMA_MODEL, - DEFAULT_OPENAI_BASE_URL, DEFAULT_OPENAI_CODEX_BASE_URL, DEFAULT_OPENAI_CODEX_MODEL, - DEFAULT_OPENAI_MODEL, DEFAULT_OPENROUTER_BASE_URL, DEFAULT_OPENROUTER_MODEL, - DEFAULT_SGLANG_BASE_URL, DEFAULT_SGLANG_MODEL, DEFAULT_SILICONFLOW_BASE_URL, - DEFAULT_SILICONFLOW_CN_BASE_URL, DEFAULT_SILICONFLOW_MODEL, DEFAULT_STEPFUN_BASE_URL, - DEFAULT_STEPFUN_MODEL, DEFAULT_TOGETHER_BASE_URL, DEFAULT_TOGETHER_MODEL, - DEFAULT_VLLM_BASE_URL, DEFAULT_VLLM_MODEL, DEFAULT_VOLCENGINE_BASE_URL, - DEFAULT_VOLCENGINE_MODEL, DEFAULT_WANJIE_ARK_BASE_URL, DEFAULT_WANJIE_ARK_MODEL, - DEFAULT_XIAOMI_MIMO_BASE_URL, DEFAULT_XIAOMI_MIMO_MODEL, DEFAULT_ZAI_BASE_URL, - DEFAULT_ZAI_MODEL, ProviderKind, + DEFAULT_HUGGINGFACE_MODEL, DEFAULT_MINIMAX_BASE_URL, DEFAULT_MINIMAX_MODEL, + DEFAULT_MOONSHOT_BASE_URL, DEFAULT_MOONSHOT_MODEL, DEFAULT_NOVITA_BASE_URL, + DEFAULT_NOVITA_MODEL, DEFAULT_NVIDIA_NIM_BASE_URL, DEFAULT_NVIDIA_NIM_MODEL, + DEFAULT_OLLAMA_BASE_URL, DEFAULT_OLLAMA_MODEL, DEFAULT_OPENAI_BASE_URL, + DEFAULT_OPENAI_CODEX_BASE_URL, DEFAULT_OPENAI_CODEX_MODEL, DEFAULT_OPENAI_MODEL, + DEFAULT_OPENROUTER_BASE_URL, DEFAULT_OPENROUTER_MODEL, DEFAULT_SGLANG_BASE_URL, + DEFAULT_SGLANG_MODEL, DEFAULT_SILICONFLOW_BASE_URL, DEFAULT_SILICONFLOW_CN_BASE_URL, + DEFAULT_SILICONFLOW_MODEL, DEFAULT_STEPFUN_BASE_URL, DEFAULT_STEPFUN_MODEL, + DEFAULT_TOGETHER_BASE_URL, DEFAULT_TOGETHER_MODEL, DEFAULT_VLLM_BASE_URL, DEFAULT_VLLM_MODEL, + DEFAULT_VOLCENGINE_BASE_URL, DEFAULT_VOLCENGINE_MODEL, DEFAULT_WANJIE_ARK_BASE_URL, + DEFAULT_WANJIE_ARK_MODEL, DEFAULT_XIAOMI_MIMO_BASE_URL, DEFAULT_XIAOMI_MIMO_MODEL, + DEFAULT_ZAI_BASE_URL, DEFAULT_ZAI_MODEL, ProviderKind, }; /// Wire protocol spoken by a provider. @@ -460,6 +460,18 @@ provider!( aliases: ["step-fun", "step_fun", "stepflash", "step-flash", "step_flash"] ); +provider!( + Minimax, + Minimax, + "minimax", + "MiniMax", + DEFAULT_MINIMAX_BASE_URL, + DEFAULT_MINIMAX_MODEL, + ["MINIMAX_API_KEY"], + "minimax", + aliases: ["mini-max", "mini_max"] +); + static DEEPSEEK: Deepseek = Deepseek; static NVIDIA_NIM: NvidiaNim = NvidiaNim; static OPENAI: Openai = Openai; @@ -483,8 +495,9 @@ static OPENAI_CODEX: OpenaiCodex = OpenaiCodex; static ANTHROPIC: Anthropic = Anthropic; static ZAI: Zai = Zai; static STEPFUN: Stepfun = Stepfun; +static MINIMAX: Minimax = Minimax; -static PROVIDER_REGISTRY: [&dyn Provider; 23] = [ +static PROVIDER_REGISTRY: [&dyn Provider; 24] = [ &DEEPSEEK, &NVIDIA_NIM, &OPENAI, @@ -508,6 +521,7 @@ static PROVIDER_REGISTRY: [&dyn Provider; 23] = [ &ANTHROPIC, &ZAI, &STEPFUN, + &MINIMAX, ]; /// Return all built-in provider metadata entries in `ProviderKind::ALL` order. diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs index 0878e99a..b3e0aff7 100644 --- a/crates/tui/src/client.rs +++ b/crates/tui/src/client.rs @@ -1235,6 +1235,13 @@ pub(super) fn apply_reasoning_effort( effort: Option<&str>, provider: ApiProvider, ) { + if matches!(provider, ApiProvider::Minimax) { + // MiniMax's OpenAI-compatible API keeps thinking inside `content` + // unless reasoning_split is enabled. Always request the split shape + // so private thinking renders as Thinking cells rather than answer + // prose. + body["reasoning_split"] = json!(true); + } let Some(effort) = effort else { return; }; @@ -1294,6 +1301,9 @@ pub(super) fn apply_reasoning_effort( "thinking": false, }); } + ApiProvider::Minimax => { + body["thinking"] = json!({ "type": "disabled" }); + } ApiProvider::Zai | ApiProvider::Stepfun => {} }, "low" | "minimal" | "medium" | "mid" | "high" | "" => match provider { @@ -1368,6 +1378,9 @@ pub(super) fn apply_reasoning_effort( "reasoning_effort": "high", }); } + ApiProvider::Minimax => { + body["thinking"] = json!({ "type": "adaptive" }); + } ApiProvider::Zai | ApiProvider::Stepfun => {} }, "xhigh" | "max" | "highest" => match provider { @@ -1422,6 +1435,9 @@ pub(super) fn apply_reasoning_effort( "reasoning_effort": "max", }); } + ApiProvider::Minimax => { + body["thinking"] = json!({ "type": "adaptive" }); + } ApiProvider::Zai | ApiProvider::Stepfun => {} }, _ => {} @@ -2836,6 +2852,36 @@ mod tests { assert!(body.get("reasoning_effort").is_none()); } + #[test] + fn reasoning_effort_minimax_splits_reasoning_from_content() { + let mut body = json!({}); + apply_reasoning_effort(&mut body, Some("high"), ApiProvider::Minimax); + assert_eq!( + body.get("reasoning_split").and_then(Value::as_bool), + Some(true) + ); + assert_eq!( + body.pointer("/thinking/type").and_then(Value::as_str), + Some("adaptive") + ); + assert!(body.get("reasoning_effort").is_none()); + + let mut body = json!({}); + apply_reasoning_effort(&mut body, Some("off"), ApiProvider::Minimax); + assert_eq!( + body.get("reasoning_split").and_then(Value::as_bool), + Some(true) + ); + assert_eq!( + body.pointer("/thinking/type").and_then(Value::as_str), + Some("disabled") + ); + + let mut body = json!({}); + apply_reasoning_effort(&mut body, None, ApiProvider::Minimax); + assert_eq!(body, json!({ "reasoning_split": true })); + } + #[test] fn chat_parser_accepts_nvidia_nim_reasoning_field() -> Result<()> { let response = parse_chat_message(&json!({ @@ -2872,6 +2918,7 @@ mod tests { let mut text_started = false; let mut thinking_started = false; let mut tool_indices = std::collections::HashMap::new(); + let mut reasoning_detail_buffers = std::collections::HashMap::new(); let events = parse_sse_chunk( &json!({ "choices": [{ @@ -2884,6 +2931,7 @@ mod tests { &mut text_started, &mut thinking_started, &mut tool_indices, + &mut reasoning_detail_buffers, true, ); @@ -3038,12 +3086,14 @@ mod tests { let mut thinking_started = false; let mut tool_indices: std::collections::HashMap = std::collections::HashMap::new(); + let mut reasoning_detail_buffers = std::collections::HashMap::new(); let events = parse_sse_chunk( &chunk, &mut content_index, &mut text_started, &mut thinking_started, &mut tool_indices, + &mut reasoning_detail_buffers, false, ); @@ -3097,12 +3147,14 @@ mod tests { let mut thinking_started = false; let mut tool_indices: std::collections::HashMap = std::collections::HashMap::new(); + let mut reasoning_detail_buffers = std::collections::HashMap::new(); let events = parse_sse_chunk( &chunk, &mut content_index, &mut text_started, &mut thinking_started, &mut tool_indices, + &mut reasoning_detail_buffers, false, ); diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs index 53666567..726782cb 100644 --- a/crates/tui/src/client/chat.rs +++ b/crates/tui/src/client/chat.rs @@ -70,6 +70,41 @@ fn apply_provider_token_limit(body: &mut Value, provider: ApiProvider, max_token body["max_completion_tokens"] = json!(max_tokens); } +fn mirror_minimax_reasoning_details_for_messages(messages: &mut [Value]) { + for message in messages { + if message.get("role").and_then(Value::as_str) != Some("assistant") { + continue; + } + if message.get("reasoning_details").is_some() { + continue; + } + let Some(reasoning) = message + .get("reasoning_content") + .and_then(Value::as_str) + .filter(|reasoning| !reasoning.trim().is_empty()) + .map(str::to_string) + else { + continue; + }; + message["reasoning_details"] = json!([ + { + "type": "text", + "text": reasoning, + } + ]); + } +} + +fn mirror_minimax_reasoning_details_for_body(body: &mut Value, provider: ApiProvider) { + if provider != ApiProvider::Minimax { + return; + } + let Some(messages) = body.get_mut("messages").and_then(Value::as_array_mut) else { + return; + }; + mirror_minimax_reasoning_details_for_messages(messages); +} + impl DeepSeekClient { pub(super) async fn create_message_chat( &self, @@ -121,6 +156,7 @@ impl DeepSeekClient { request.reasoning_effort.as_deref(), self.api_provider, ); + mirror_minimax_reasoning_details_for_body(&mut body, self.api_provider); let response_cache_key = if cacheable { let wire_body = @@ -258,6 +294,7 @@ impl DeepSeekClient { request.reasoning_effort.as_deref(), self.api_provider, ); + mirror_minimax_reasoning_details_for_body(&mut body, self.api_provider); let url = api_url_with_suffix( &self.base_url, @@ -323,6 +360,7 @@ impl DeepSeekClient { let mut text_started = false; let mut thinking_started = false; let mut tool_indices: std::collections::HashMap = std::collections::HashMap::new(); + let mut reasoning_detail_buffers: std::collections::HashMap = std::collections::HashMap::new(); let is_reasoning_model = is_reasoning_model_for_stream(api_provider, &model); let mut byte_stream = std::pin::pin!(byte_stream); @@ -411,6 +449,7 @@ impl DeepSeekClient { &mut text_started, &mut thinking_started, &mut tool_indices, + &mut reasoning_detail_buffers, is_reasoning_model, ) { SseDataFrame::Done => break 'stream, @@ -550,6 +589,9 @@ impl<'a> PromptBuilder<'a> { if provider == ApiProvider::Arcee { apply_arcee_waf_safe_message_encoding(&mut messages); } + if provider == ApiProvider::Minimax { + mirror_minimax_reasoning_details_for_messages(&mut messages); + } messages } @@ -1943,7 +1985,23 @@ fn should_replay_reasoning_content_for_provider( model: &str, effort: Option<&str>, ) -> bool { - if !provider_accepts_reasoning_content(provider) && !requires_reasoning_content(model) { + if effort + .map(|value| { + matches!( + value.trim().to_ascii_lowercase().as_str(), + "off" | "disabled" | "none" | "false" + ) + }) + .unwrap_or(false) + { + return false; + } + + if requires_reasoning_content(model) { + return true; + } + + if !provider_accepts_reasoning_content(provider) { // Generic non-DeepSeek model on a provider that rejects the field: // keep stripping it (preserves the #1542 fix). But a known DeepSeek // reasoning model pointed at a DeepSeek-compatible endpoint via the @@ -1951,7 +2009,8 @@ fn should_replay_reasoning_content_for_provider( // or the thinking-mode API returns 400 (#1739 / #1694). return false; } - should_replay_reasoning_content(model, effort) + + model_supports_reasoning(model) } /// Should the SSE parser treat incoming `reasoning_content` deltas as thinking @@ -1995,6 +2054,7 @@ fn provider_accepts_reasoning_content(provider: ApiProvider) -> bool { | ApiProvider::SiliconflowCn | ApiProvider::Volcengine | ApiProvider::Arcee + | ApiProvider::Minimax | ApiProvider::Sglang | ApiProvider::Moonshot // #3016: Kimi thinking traces use reasoning_content ) @@ -2010,11 +2070,54 @@ fn has_deepseek_r_series_marker(model_lower: &str) -> bool { }) } -fn reasoning_field(value: &Value) -> Option<&str> { - value +fn reasoning_delta( + value: &Value, + choice_index: u32, + reasoning_detail_buffers: &mut std::collections::HashMap, +) -> Option { + if let Some(reasoning) = value .get("reasoning_content") .or_else(|| value.get("reasoning")) .and_then(Value::as_str) + { + return Some(reasoning.to_string()); + } + + let details = value.get("reasoning_details").and_then(Value::as_array)?; + let full_text = details + .iter() + .filter_map(|detail| detail.get("text").and_then(Value::as_str)) + .collect::(); + if full_text.is_empty() { + return None; + } + + let previous = reasoning_detail_buffers.entry(choice_index).or_default(); + let delta = full_text + .strip_prefix(previous.as_str()) + .unwrap_or(&full_text) + .to_string(); + *previous = full_text; + Some(delta) +} + +fn reasoning_message_text(value: &Value) -> Option { + if let Some(reasoning) = value + .get("reasoning_content") + .or_else(|| value.get("reasoning")) + .and_then(Value::as_str) + { + return Some(reasoning.to_string()); + } + value + .get("reasoning_details") + .and_then(Value::as_array) + .map(|details| { + details + .iter() + .filter_map(|detail| detail.get("text").and_then(Value::as_str)) + .collect::() + }) } pub(super) fn parse_chat_message(payload: &Value) -> Result { @@ -2042,7 +2145,7 @@ pub(super) fn parse_chat_message(payload: &Value) -> Result { let mut content_blocks = Vec::new(); if let Some(reasoning) = - reasoning_field(message).filter(|reasoning| !reasoning.trim().is_empty()) + reasoning_message_text(message).filter(|reasoning| !reasoning.trim().is_empty()) { content_blocks.push(ContentBlock::Thinking { signature: None, @@ -2219,6 +2322,7 @@ fn parse_sse_data_frame( text_started: &mut bool, thinking_started: &mut bool, tool_indices: &mut std::collections::HashMap, + reasoning_detail_buffers: &mut std::collections::HashMap, is_reasoning_model: bool, ) -> SseDataFrame { if data.trim() == "[DONE]" { @@ -2233,6 +2337,7 @@ fn parse_sse_data_frame( text_started, thinking_started, tool_indices, + reasoning_detail_buffers, is_reasoning_model, ) }, @@ -2248,6 +2353,7 @@ pub(super) fn parse_sse_chunk( text_started: &mut bool, thinking_started: &mut bool, tool_indices: &mut std::collections::HashMap, + reasoning_detail_buffers: &mut std::collections::HashMap, is_reasoning_model: bool, ) -> Vec { let mut events = Vec::new(); @@ -2282,6 +2388,7 @@ pub(super) fn parse_sse_chunk( } for choice in choices { + let choice_index = choice.get("index").and_then(Value::as_u64).unwrap_or(0) as u32; let delta = choice.get("delta"); let finish_reason = choice .get("finish_reason") @@ -2289,14 +2396,16 @@ pub(super) fn parse_sse_chunk( .map(str::to_string); if let Some(delta) = delta { - let reasoning_text = reasoning_field(delta).filter(|s| !s.is_empty()); + let reasoning_text = reasoning_delta(delta, choice_index, reasoning_detail_buffers) + .filter(|s| !s.is_empty()); let content_text = delta .get("content") .and_then(Value::as_str) - .filter(|s| !s.is_empty()); + .filter(|s| !s.is_empty()) + .map(str::to_string); // Handle reasoning_content / reasoning thinking deltas. - if is_reasoning_model && let Some(reasoning) = reasoning_text { + if is_reasoning_model && let Some(reasoning) = reasoning_text.as_deref() { if !*thinking_started { events.push(StreamEvent::ContentBlockStart { index: *content_index, @@ -2345,9 +2454,7 @@ pub(super) fn parse_sse_chunk( } events.push(StreamEvent::ContentBlockDelta { index: *content_index, - delta: Delta::TextDelta { - text: content.to_string(), - }, + delta: Delta::TextDelta { text: content }, }); } @@ -2659,6 +2766,69 @@ mod arcee_waf_message_encoding_tests { } } +#[cfg(test)] +mod minimax_reasoning_replay_tests { + use super::build_chat_messages_for_request_and_provider; + use crate::config::{ApiProvider, DEFAULT_MINIMAX_MODEL}; + use crate::models::{ContentBlock, Message, MessageRequest}; + + fn request_with_assistant_thinking() -> MessageRequest { + MessageRequest { + model: DEFAULT_MINIMAX_MODEL.to_string(), + messages: vec![Message { + role: "assistant".to_string(), + content: vec![ + ContentBlock::Thinking { + thinking: "Inspect tool state".to_string(), + signature: None, + }, + ContentBlock::Text { + text: "Done.".to_string(), + cache_control: None, + }, + ], + }], + max_tokens: 16, + system: None, + tools: None, + tool_choice: None, + metadata: None, + thinking: None, + reasoning_effort: None, + stream: None, + temperature: None, + top_p: None, + } + } + + #[test] + fn minimax_history_replays_thinking_as_reasoning_details() { + let request = request_with_assistant_thinking(); + + let messages = build_chat_messages_for_request_and_provider(&request, ApiProvider::Minimax); + let assistant = &messages[0]; + + assert_eq!( + assistant + .get("reasoning_content") + .and_then(|value| value.as_str()), + Some("Inspect tool state") + ); + assert_eq!( + assistant + .pointer("/reasoning_details/0/type") + .and_then(|value| value.as_str()), + Some("text") + ); + assert_eq!( + assistant + .pointer("/reasoning_details/0/text") + .and_then(|value| value.as_str()), + Some("Inspect tool state") + ); + } +} + // === #103 Phase 4: SSE decoder behavior on canned chunk sequences ============ #[cfg(test)] @@ -2683,12 +2853,14 @@ mod stream_decoder_tests { let mut text_started = false; let mut thinking_started = false; let mut tool_indices = std::collections::HashMap::new(); + let mut reasoning_detail_buffers = std::collections::HashMap::new(); parse_sse_chunk( &chunk, &mut content_index, &mut text_started, &mut thinking_started, &mut tool_indices, + &mut reasoning_detail_buffers, is_reasoning_model, ) } @@ -2765,6 +2937,7 @@ mod stream_decoder_tests { let mut text_started = false; let mut thinking_started = false; let mut tool_indices = std::collections::HashMap::new(); + let mut reasoning_detail_buffers = std::collections::HashMap::new(); let mut events = Vec::new(); for chunk in chunks { let value: Value = serde_json::from_str(chunk).expect("valid SSE JSON"); @@ -2774,6 +2947,7 @@ mod stream_decoder_tests { &mut text_started, &mut thinking_started, &mut tool_indices, + &mut reasoning_detail_buffers, is_reasoning, )); } @@ -2835,12 +3009,65 @@ mod stream_decoder_tests { ); } + #[test] + fn decoder_streams_minimax_reasoning_details_as_incremental_thinking() { + // MiniMax's reasoning_split stream reports reasoning_details text as + // a cumulative buffer. Emit only the suffix so the Thinking cell does + // not duplicate earlier reasoning chunks. + let chunks = [ + r#"{"id":"minimax-1","choices":[{"index":0,"delta":{"reasoning_details":[{"type":"text","text":"Inspect"}]}}]}"#, + r#"{"id":"minimax-1","choices":[{"index":0,"delta":{"reasoning_details":[{"type":"text","text":"Inspect config"}]}}]}"#, + r#"{"id":"minimax-1","choices":[{"index":0,"delta":{"content":"Done."}}]}"#, + ]; + + let is_reasoning = is_reasoning_model_for_stream(ApiProvider::Minimax, "MiniMax-M3"); + let mut content_index = 0u32; + let mut text_started = false; + let mut thinking_started = false; + let mut tool_indices = std::collections::HashMap::new(); + let mut reasoning_detail_buffers = std::collections::HashMap::new(); + let mut events = Vec::new(); + for chunk in chunks { + let value: Value = serde_json::from_str(chunk).expect("valid SSE JSON"); + events.extend(parse_sse_chunk( + &value, + &mut content_index, + &mut text_started, + &mut thinking_started, + &mut tool_indices, + &mut reasoning_detail_buffers, + is_reasoning, + )); + } + + let thinking: String = events + .iter() + .filter_map(|event| match event { + StreamEvent::ContentBlockDelta { + delta: Delta::ThinkingDelta { thinking }, + .. + } => Some(thinking.as_str()), + _ => None, + }) + .collect(); + assert_eq!(thinking, "Inspect config"); + + assert!(!events.iter().any(|event| matches!( + event, + StreamEvent::ContentBlockDelta { + delta: Delta::TextDelta { text }, + .. + } if text == "Inspect" || text == "Inspect config" + ))); + } + #[test] fn decoder_does_not_render_reasoning_as_text_for_known_provider_models() { let mut content_index = 0u32; let mut text_started = false; let mut thinking_started = false; let mut tool_indices = std::collections::HashMap::new(); + let mut reasoning_detail_buffers = std::collections::HashMap::new(); let is_reasoning_model = is_reasoning_model_for_stream(ApiProvider::XiaomiMimo, "mimo-v2.5-pro"); let events = parse_sse_chunk( @@ -2855,6 +3082,7 @@ mod stream_decoder_tests { &mut text_started, &mut thinking_started, &mut tool_indices, + &mut reasoning_detail_buffers, is_reasoning_model, ); @@ -2932,6 +3160,7 @@ mod stream_decoder_tests { let mut text_started = false; let mut thinking_started = false; let mut tool_indices = std::collections::HashMap::new(); + let mut reasoning_detail_buffers = std::collections::HashMap::new(); let outcome = parse_sse_data_frame( " [DONE] ", @@ -2939,6 +3168,7 @@ mod stream_decoder_tests { &mut text_started, &mut thinking_started, &mut tool_indices, + &mut reasoning_detail_buffers, true, ); @@ -3721,6 +3951,7 @@ mod alias_thinking_detection_tests { assert!(provider_accepts_reasoning_content(ApiProvider::NvidiaNim)); assert!(provider_accepts_reasoning_content(ApiProvider::XiaomiMimo)); assert!(provider_accepts_reasoning_content(ApiProvider::Arcee)); + assert!(provider_accepts_reasoning_content(ApiProvider::Minimax)); // #3016: Moonshot's native endpoint streams Kimi thinking as // reasoning_content. assert!(provider_accepts_reasoning_content(ApiProvider::Moonshot)); @@ -3728,17 +3959,41 @@ mod alias_thinking_detection_tests { #[test] fn stream_classifies_moonshot_kimi_as_reasoning() { - // #3016: without this, kimi-k2.6 thinking leaked into answer text. + // #3016: without this, Kimi thinking leaked into answer text. assert!(is_reasoning_model_for_stream( ApiProvider::Moonshot, "kimi-k2.6" )); assert!( - !is_reasoning_model_for_stream(ApiProvider::Moonshot, "kimi-for-coding"), - "kimi-for-coding is Moonshot's documented non-thinking model" + is_reasoning_model_for_stream(ApiProvider::Moonshot, "kimi-for-coding"), + "Kimi Code's stable model id now maps to K2.7 Code and streams reasoning_content" ); } + #[test] + fn moonshot_and_minimax_replay_reasoning_content_for_supported_models() { + assert!(should_replay_reasoning_content_for_provider( + ApiProvider::Moonshot, + "kimi-k2.7-code", + None, + )); + assert!(should_replay_reasoning_content_for_provider( + ApiProvider::Moonshot, + "kimi-for-coding", + None, + )); + assert!(should_replay_reasoning_content_for_provider( + ApiProvider::Minimax, + "MiniMax-M3", + None, + )); + assert!(!should_replay_reasoning_content_for_provider( + ApiProvider::Moonshot, + "kimi-for-coding", + Some("off"), + )); + } + #[test] fn xiaomi_mimo_uses_max_completion_tokens_payload_key() { let mut body = json!({ diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 94b85ffb..3b60c00c 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -179,6 +179,15 @@ pub const DEFAULT_ANTHROPIC_MODEL: &str = "claude-sonnet-4-6"; pub const ANTHROPIC_OPUS_MODEL: &str = "claude-opus-4-8"; pub const ANTHROPIC_HAIKU_MODEL: &str = "claude-haiku-4-5"; pub const DEFAULT_ANTHROPIC_BASE_URL: &str = "https://api.anthropic.com"; +pub const DEFAULT_MINIMAX_MODEL: &str = "MiniMax-M3"; +pub const MINIMAX_M2_7_MODEL: &str = "MiniMax-M2.7"; +pub const MINIMAX_M2_7_HIGHSPEED_MODEL: &str = "MiniMax-M2.7-highspeed"; +pub const MINIMAX_M2_5_MODEL: &str = "MiniMax-M2.5"; +pub const MINIMAX_M2_5_HIGHSPEED_MODEL: &str = "MiniMax-M2.5-highspeed"; +pub const MINIMAX_M2_1_MODEL: &str = "MiniMax-M2.1"; +pub const MINIMAX_M2_1_HIGHSPEED_MODEL: &str = "MiniMax-M2.1-highspeed"; +pub const MINIMAX_M2_MODEL: &str = "MiniMax-M2"; +pub const DEFAULT_MINIMAX_BASE_URL: &str = "https://api.minimax.io/v1"; #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)] #[serde(rename_all = "snake_case")] @@ -207,6 +216,7 @@ pub enum ApiProvider { Anthropic, Zai, Stepfun, + Minimax, } impl ApiProvider { @@ -264,7 +274,7 @@ impl ApiProvider { /// `ApiProvider` discriminant → `ProviderKind` lookup. /// Index 1 is `None` for the legacy `DeepseekCN` variant. - const KIND_LOOKUP: [Option; 24] = [ + const KIND_LOOKUP: [Option; 25] = [ Some(codewhale_config::ProviderKind::Deepseek), None, // DeepseekCN Some(codewhale_config::ProviderKind::NvidiaNim), @@ -289,10 +299,11 @@ impl ApiProvider { Some(codewhale_config::ProviderKind::Anthropic), Some(codewhale_config::ProviderKind::Zai), Some(codewhale_config::ProviderKind::Stepfun), + Some(codewhale_config::ProviderKind::Minimax), ]; /// `ProviderKind` discriminant → `ApiProvider` lookup. - const FROM_KIND_LOOKUP: [Self; 23] = [ + const FROM_KIND_LOOKUP: [Self; 24] = [ Self::Deepseek, Self::NvidiaNim, Self::Openai, @@ -316,6 +327,7 @@ impl ApiProvider { Self::Anthropic, Self::Zai, Self::Stepfun, + Self::Minimax, ]; /// Map to the config-level `ProviderKind`. @@ -785,6 +797,39 @@ fn canonical_moonshot_model_id(model: &str) -> Option<&'static str> { } } +fn canonical_minimax_model_id(model: &str) -> Option<&'static str> { + let normalized = model.trim().to_ascii_lowercase(); + let normalized = normalized.replace(['_', ' '], "-"); + match normalized.as_str() { + "minimax" | "minimax-m3" | "minimax-m-3" | "minimax-m-3-thinking" => { + Some(DEFAULT_MINIMAX_MODEL) + } + "minimax-m2.7" | "minimax-m2-7" | "minimax-m-2.7" | "minimax-m-2-7" => { + Some(MINIMAX_M2_7_MODEL) + } + "minimax-m2.7-highspeed" + | "minimax-m2-7-highspeed" + | "minimax-m-2.7-highspeed" + | "minimax-m-2-7-highspeed" => Some(MINIMAX_M2_7_HIGHSPEED_MODEL), + "minimax-m2.5" | "minimax-m2-5" | "minimax-m-2.5" | "minimax-m-2-5" => { + Some(MINIMAX_M2_5_MODEL) + } + "minimax-m2.5-highspeed" + | "minimax-m2-5-highspeed" + | "minimax-m-2.5-highspeed" + | "minimax-m-2-5-highspeed" => Some(MINIMAX_M2_5_HIGHSPEED_MODEL), + "minimax-m2.1" | "minimax-m2-1" | "minimax-m-2.1" | "minimax-m-2-1" => { + Some(MINIMAX_M2_1_MODEL) + } + "minimax-m2.1-highspeed" + | "minimax-m2-1-highspeed" + | "minimax-m-2.1-highspeed" + | "minimax-m-2-1-highspeed" => Some(MINIMAX_M2_1_HIGHSPEED_MODEL), + "minimax-m2" | "minimax-m-2" => Some(MINIMAX_M2_MODEL), + _ => None, + } +} + /// Normalize a model selected through the TUI for the active provider. /// /// Official DeepSeek endpoints require bare model IDs. Provider-prefixed @@ -821,6 +866,12 @@ pub fn normalize_model_name_for_provider(provider: ApiProvider, model: &str) -> .or_else(|| normalize_custom_model_id(model)); } + if matches!(provider, ApiProvider::Minimax) { + return canonical_minimax_model_id(model) + .map(ToString::to_string) + .or_else(|| normalize_custom_model_id(model)); + } + if matches!(provider, ApiProvider::Huggingface) { return normalize_custom_model_id(model); } @@ -906,6 +957,16 @@ pub fn model_completion_names_for_provider(provider: ApiProvider) -> Vec<&'stati DEFAULT_ANTHROPIC_MODEL, ANTHROPIC_HAIKU_MODEL, ], + ApiProvider::Minimax => vec![ + DEFAULT_MINIMAX_MODEL, + MINIMAX_M2_7_MODEL, + MINIMAX_M2_7_HIGHSPEED_MODEL, + MINIMAX_M2_5_MODEL, + MINIMAX_M2_5_HIGHSPEED_MODEL, + MINIMAX_M2_1_MODEL, + MINIMAX_M2_1_HIGHSPEED_MODEL, + MINIMAX_M2_MODEL, + ], } } @@ -2066,6 +2127,8 @@ pub struct ProvidersConfig { pub zai: ProviderConfig, #[serde(default)] pub stepfun: ProviderConfig, + #[serde(default)] + pub minimax: ProviderConfig, } #[derive(Debug, Clone, Deserialize, Default)] @@ -2235,6 +2298,7 @@ impl Config { ApiProvider::Anthropic => "providers.anthropic", ApiProvider::Zai => "providers.zai", ApiProvider::Stepfun => "providers.stepfun", + ApiProvider::Minimax => "providers.minimax", ApiProvider::Deepseek | ApiProvider::DeepseekCN => return, }; tracing::warn!( @@ -2394,6 +2458,7 @@ impl Config { ApiProvider::Anthropic => &providers.anthropic, ApiProvider::Zai => &providers.zai, ApiProvider::Stepfun => &providers.stepfun, + ApiProvider::Minimax => &providers.minimax, }) } @@ -2424,6 +2489,7 @@ impl Config { ApiProvider::Anthropic => &mut providers.anthropic, ApiProvider::Zai => &mut providers.zai, ApiProvider::Stepfun => &mut providers.stepfun, + ApiProvider::Minimax => &mut providers.minimax, } } @@ -2568,6 +2634,7 @@ impl Config { ApiProvider::Zai => DEFAULT_ZAI_MODEL, ApiProvider::Stepfun => DEFAULT_STEPFUN_MODEL, ApiProvider::Anthropic => DEFAULT_ANTHROPIC_MODEL, + ApiProvider::Minimax => DEFAULT_MINIMAX_MODEL, } .to_string() } @@ -2609,7 +2676,8 @@ impl Config { | ApiProvider::Together | ApiProvider::OpenaiCodex | ApiProvider::Zai - | ApiProvider::Stepfun => None, + | ApiProvider::Stepfun + | ApiProvider::Minimax => None, }; let configured_base_url = provider_base.or(root_base); let base = if provider == ApiProvider::XiaomiMimo { @@ -2659,6 +2727,7 @@ impl Config { ApiProvider::Zai => DEFAULT_ZAI_BASE_URL, ApiProvider::Stepfun => DEFAULT_STEPFUN_BASE_URL, ApiProvider::Anthropic => DEFAULT_ANTHROPIC_BASE_URL, + ApiProvider::Minimax => DEFAULT_MINIMAX_BASE_URL, } .to_string() }) @@ -2711,6 +2780,7 @@ impl Config { ApiProvider::Zai => "zai", ApiProvider::Stepfun => "stepfun", ApiProvider::Anthropic => "anthropic", + ApiProvider::Minimax => "minimax", }; // 0. DeepSeek compatibility slot. The legacy top-level `api_key` @@ -2906,6 +2976,10 @@ impl Config { ), // Self-hosted deployments commonly run without auth on localhost. // Return an empty key and let the client omit the Authorization header. + ApiProvider::Minimax => anyhow::bail!( + "MiniMax API key not found. Run 'codewhale auth set --provider minimax', \ + set MINIMAX_API_KEY, or add [providers.minimax] api_key in ~/.codewhale/config.toml." + ), ApiProvider::Sglang | ApiProvider::Vllm | ApiProvider::Ollama => Ok(String::new()), } } @@ -3759,6 +3833,13 @@ fn apply_env_overrides(config: &mut Config) { .stepfun .base_url = Some(value); } + ApiProvider::Minimax => { + config + .providers + .get_or_insert_with(ProvidersConfig::default) + .minimax + .base_url = Some(value); + } } } if matches!(config.api_provider(), ApiProvider::NvidiaNim) @@ -3968,6 +4049,7 @@ fn apply_env_overrides(config: &mut Config) { ApiProvider::Anthropic => &mut providers.anthropic, ApiProvider::Zai => &mut providers.zai, ApiProvider::Stepfun => &mut providers.stepfun, + ApiProvider::Minimax => &mut providers.minimax, }; let mut provider_headers = entry.http_headers.clone().unwrap_or_default(); provider_headers.extend(headers); @@ -4165,6 +4247,7 @@ fn apply_env_overrides(config: &mut Config) { ApiProvider::Anthropic => &mut providers.anthropic, ApiProvider::Zai => &mut providers.zai, ApiProvider::Stepfun => &mut providers.stepfun, + ApiProvider::Minimax => &mut providers.minimax, }; entry.model = Some(value); } @@ -4507,6 +4590,7 @@ fn default_base_url_for_provider(provider: ApiProvider) -> &'static str { ApiProvider::Zai => DEFAULT_ZAI_BASE_URL, ApiProvider::Stepfun => DEFAULT_STEPFUN_BASE_URL, ApiProvider::Anthropic => DEFAULT_ANTHROPIC_BASE_URL, + ApiProvider::Minimax => DEFAULT_MINIMAX_BASE_URL, } } @@ -4954,6 +5038,7 @@ fn merge_providers( openai_codex: merge_provider_config(base.openai_codex, override_cfg.openai_codex), zai: merge_provider_config(base.zai, override_cfg.zai), stepfun: merge_provider_config(base.stepfun, override_cfg.stepfun), + minimax: merge_provider_config(base.minimax, override_cfg.minimax), }), } } @@ -5468,6 +5553,9 @@ pub fn active_provider_has_env_api_key(config: &Config) -> bool { std::env::var("STEPFUN_API_KEY").is_ok_and(|k| !k.trim().is_empty()) || std::env::var("STEP_API_KEY").is_ok_and(|k| !k.trim().is_empty()) } + ApiProvider::Minimax => { + std::env::var("MINIMAX_API_KEY").is_ok_and(|k| !k.trim().is_empty()) + } } } @@ -5504,6 +5592,7 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool { ApiProvider::Volcengine => "VOLCENGINE_API_KEY", ApiProvider::Zai => "ZAI_API_KEY", ApiProvider::Stepfun => "STEPFUN_API_KEY", + ApiProvider::Minimax => "MINIMAX_API_KEY", }; if std::env::var(env_var).is_ok_and(|k| !k.trim().is_empty()) { return true; @@ -5633,6 +5722,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result ApiProvider::OpenaiCodex => "providers.openai_codex", ApiProvider::Zai => "providers.zai", ApiProvider::Stepfun => "providers.stepfun", + ApiProvider::Minimax => "providers.minimax", }; // Parse existing TOML (or start fresh) so we can edit the right table @@ -5681,6 +5771,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result ApiProvider::OpenaiCodex => "openai_codex", ApiProvider::Zai => "zai", ApiProvider::Stepfun => "stepfun", + ApiProvider::Minimax => "minimax", }; let entry = providers .entry(key_inside.to_string()) @@ -5781,6 +5872,7 @@ fn provider_config_key(provider: ApiProvider) -> Result<&'static str> { ApiProvider::OpenaiCodex => Ok("openai_codex"), ApiProvider::Zai => Ok("zai"), ApiProvider::Stepfun => Ok("stepfun"), + ApiProvider::Minimax => Ok("minimax"), } } @@ -8361,6 +8453,26 @@ api_key = "old-openrouter-key" } } + #[test] + fn normalize_model_name_for_provider_maps_minimax_direct_aliases() { + for (alias, expected) in [ + ("minimax", DEFAULT_MINIMAX_MODEL), + ("minimax-m3", DEFAULT_MINIMAX_MODEL), + ("minimax-m2.7", MINIMAX_M2_7_MODEL), + ("minimax-m2-7-highspeed", MINIMAX_M2_7_HIGHSPEED_MODEL), + ("minimax-m2.5", MINIMAX_M2_5_MODEL), + ("minimax-m2-5-highspeed", MINIMAX_M2_5_HIGHSPEED_MODEL), + ("minimax-m2.1", MINIMAX_M2_1_MODEL), + ("minimax-m2-1-highspeed", MINIMAX_M2_1_HIGHSPEED_MODEL), + ("minimax-m2", MINIMAX_M2_MODEL), + ] { + assert_eq!( + normalize_model_name_for_provider(ApiProvider::Minimax, alias).as_deref(), + Some(expected) + ); + } + } + #[test] fn normalize_model_name_for_provider_maps_arcee_direct_aliases() { for (alias, expected) in [ @@ -8472,6 +8584,28 @@ api_key = "old-openrouter-key" ); } + #[test] + fn model_completion_names_for_minimax_include_direct_chat_models() { + let models = model_completion_names_for_provider(ApiProvider::Minimax); + + for expected in [ + DEFAULT_MINIMAX_MODEL, + MINIMAX_M2_7_MODEL, + MINIMAX_M2_7_HIGHSPEED_MODEL, + MINIMAX_M2_5_MODEL, + MINIMAX_M2_5_HIGHSPEED_MODEL, + MINIMAX_M2_1_MODEL, + MINIMAX_M2_1_HIGHSPEED_MODEL, + MINIMAX_M2_MODEL, + ] { + assert!(models.contains(&expected), "missing {expected}"); + } + assert!( + !models.contains(&OPENROUTER_MINIMAX_M3_MODEL), + "direct MiniMax picker must not expose OpenRouter namespaced IDs" + ); + } + #[test] fn normalize_model_name_rejects_invalid_or_non_deepseek_ids() { assert!(normalize_model_name("qwen3-coder").is_none()); @@ -11405,6 +11539,35 @@ model = "deepseek-ai/deepseek-v4-pro" ); } + #[test] + fn provider_capability_minimax_direct_models_use_api_docs_shape() { + let m3 = provider_capability(ApiProvider::Minimax, DEFAULT_MINIMAX_MODEL); + assert_eq!(m3.context_window, 1_000_000); + assert_eq!(m3.max_output, 524_288); + assert!(m3.thinking_supported); + assert!(!m3.cache_telemetry_supported); + assert_eq!(m3.request_payload_mode, RequestPayloadMode::ChatCompletions); + + for model in [ + MINIMAX_M2_7_MODEL, + MINIMAX_M2_7_HIGHSPEED_MODEL, + MINIMAX_M2_5_MODEL, + MINIMAX_M2_5_HIGHSPEED_MODEL, + MINIMAX_M2_1_MODEL, + MINIMAX_M2_1_HIGHSPEED_MODEL, + MINIMAX_M2_MODEL, + ] { + let cap = provider_capability(ApiProvider::Minimax, model); + assert_eq!(cap.context_window, 204_800, "{model}"); + assert!(cap.thinking_supported, "{model}"); + assert!(!cap.cache_telemetry_supported, "{model}"); + assert_eq!( + cap.request_payload_mode, + RequestPayloadMode::ChatCompletions + ); + } + } + #[test] fn provider_capability_wanjie_ark_reasoner_has_thinking_no_cache() { let cap = provider_capability(ApiProvider::WanjieArk, DEFAULT_WANJIE_ARK_MODEL); diff --git a/crates/tui/src/config_persistence.rs b/crates/tui/src/config_persistence.rs index f77a81b9..ab1cb9a4 100644 --- a/crates/tui/src/config_persistence.rs +++ b/crates/tui/src/config_persistence.rs @@ -223,6 +223,7 @@ fn provider_base_url_table_key(provider: ApiProvider) -> anyhow::Result<&'static ApiProvider::OpenaiCodex => Ok("openai_codex"), ApiProvider::Zai => Ok("zai"), ApiProvider::Stepfun => Ok("stepfun"), + ApiProvider::Minimax => Ok("minimax"), } } diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 7be7bf50..0c8e4cbb 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -614,7 +614,9 @@ impl Engine { let env_var = match provider { ApiProvider::Deepseek | ApiProvider::DeepseekCN => "DEEPSEEK_API_KEY", ApiProvider::NvidiaNim => "NVIDIA_API_KEY/NVIDIA_NIM_API_KEY", - ApiProvider::Openai | ApiProvider::Zai | ApiProvider::Stepfun => "OPENAI_API_KEY", + ApiProvider::Openai => "OPENAI_API_KEY", + ApiProvider::Zai => "ZAI_API_KEY/Z_AI_API_KEY", + ApiProvider::Stepfun => "STEPFUN_API_KEY/STEP_API_KEY", ApiProvider::Anthropic => "ANTHROPIC_API_KEY", ApiProvider::Atlascloud => "ATLASCLOUD_API_KEY", ApiProvider::WanjieArk => "WANJIE_ARK_API_KEY/WANJIE_API_KEY/WANJIE_MAAS_API_KEY", @@ -632,6 +634,7 @@ impl Engine { ApiProvider::Huggingface => "HUGGINGFACE_API_KEY/HF_TOKEN", ApiProvider::Together => "TOGETHER_API_KEY", ApiProvider::OpenaiCodex => "OPENAI_CODEX_ACCESS_TOKEN/CODEX_ACCESS_TOKEN", + ApiProvider::Minimax => "MINIMAX_API_KEY", }; Some(format!( diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 5b1d3522..4ba997b2 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -2601,13 +2601,17 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> { ("DEEPSEEK_API_KEY", "codewhale auth set --provider deepseek") } crate::config::ApiProvider::Zai => ( - "OPENAI_API_KEY", + "ZAI_API_KEY/Z_AI_API_KEY", "codewhale auth set --provider zai --api-key \"...\"", ), crate::config::ApiProvider::Stepfun => ( - "OPENAI_API_KEY", + "STEPFUN_API_KEY/STEP_API_KEY", "codewhale auth set --provider stepfun --api-key \"...\"", ), + crate::config::ApiProvider::Minimax => ( + "MINIMAX_API_KEY", + "codewhale auth set --provider minimax --api-key \"...\"", + ), }; println!( " {} api_key: missing (set {env_var} or `[providers.{}].api_key` in ~/.codewhale/config.toml; or run `{login_hint}`)", @@ -2637,6 +2641,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> { | crate::config::ApiProvider::DeepseekCN => "deepseek", crate::config::ApiProvider::Zai => "zai", crate::config::ApiProvider::Stepfun => "stepfun", + crate::config::ApiProvider::Minimax => "minimax", } ); } diff --git a/crates/tui/src/models.rs b/crates/tui/src/models.rs index b950d507..9d90156a 100644 --- a/crates/tui/src/models.rs +++ b/crates/tui/src/models.rs @@ -286,6 +286,13 @@ fn known_context_window_for_model(model_lower: &str) -> Option { | "kimi-k2.7-code" | "kimi-k2.6" | "kimi-for-coding" => Some(262_144), + "minimax-m2.7" + | "minimax-m2.7-highspeed" + | "minimax-m2.5" + | "minimax-m2.5-highspeed" + | "minimax-m2.1" + | "minimax-m2.1-highspeed" + | "minimax-m2" => Some(204_800), "z-ai/glm-5.1" | "z-ai/glm-5v-turbo" | "glm-5.1" | "glm-5v-turbo" => Some(202_752), "minimax/minimax-m3" | "minimax-m3" | "qwen/qwen3.6-flash" | "qwen/qwen3.6-plus" => { Some(1_000_000) @@ -349,10 +356,10 @@ pub fn model_supports_reasoning(model: &str) -> bool { if lower.contains("deepseek") && lower.contains("v4") { return true; } - // #3016: Moonshot-native Kimi IDs also emit reasoning_content. - // `kimi-for-coding` is Moonshot's documented non-thinking model — it - // must not be classified as reasoning-capable by the prefix rule. - if lower.starts_with("kimi-") && lower != "kimi-for-coding" { + // #3016 plus the 2026 Kimi Code K2.7 update: Moonshot-native Kimi IDs, + // including the stable `kimi-for-coding` coding route, emit + // reasoning_content that must stay out of answer prose. + if lower.starts_with("kimi-") { return true; } matches!( @@ -376,8 +383,16 @@ pub fn model_supports_reasoning(model: &str) -> bool { | "moonshotai/kimi-k2.6:free" | "kimi-k2.7-code" | "kimi-k2.6" + | "kimi-for-coding" | "minimax/minimax-m3" | "minimax-m3" + | "minimax-m2.7" + | "minimax-m2.7-highspeed" + | "minimax-m2.5" + | "minimax-m2.5-highspeed" + | "minimax-m2.1" + | "minimax-m2.1-highspeed" + | "minimax-m2" | "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free" | "nvidia/nemotron-3-ultra-550b-a55b" | "nvidia/nemotron-3-ultra-550b-a55b:free" @@ -628,13 +643,13 @@ mod tests { } #[test] - fn moonshot_native_kimi_ids_support_reasoning_except_for_coding() { + fn moonshot_native_kimi_ids_support_reasoning_including_coding_route() { // #3016: bare Moonshot ids (no moonshotai/ prefix) emit - // reasoning_content; kimi-for-coding is the non-thinking exception. + // reasoning_content; kimi-for-coding currently rides the K2.7 Code path. assert!(model_supports_reasoning("kimi-k2.7-code")); assert!(model_supports_reasoning("kimi-k2.6")); + assert!(model_supports_reasoning("kimi-for-coding")); assert!(model_supports_reasoning("kimi-k2.5")); - assert!(!model_supports_reasoning("kimi-for-coding")); } #[test] @@ -691,13 +706,16 @@ mod tests { ("kimi-k2.7-code", 262_144), ("kimi-k2.6", 262_144), ("minimax-m3", 1_000_000), + ("minimax-m2.7", 204_800), + ("minimax-m2.5-highspeed", 204_800), + ("minimax-m2", 204_800), ("glm-5.1", 202_752), ] { assert_eq!(context_window_for_model(model), Some(expected_window)); assert!(model_supports_reasoning(model)); } assert_eq!(context_window_for_model("kimi-for-coding"), Some(262_144)); - assert!(!model_supports_reasoning("kimi-for-coding")); + assert!(model_supports_reasoning("kimi-for-coding")); assert_eq!(context_window_for_model("glm-5v-turbo"), Some(202_752)); assert!(!model_supports_reasoning("glm-5v-turbo")); assert_eq!(max_output_tokens_for_model("kimi-k2.7-code"), Some(262_144)); diff --git a/crates/tui/src/tui/provider_picker.rs b/crates/tui/src/tui/provider_picker.rs index b5008161..364e85bf 100644 --- a/crates/tui/src/tui/provider_picker.rs +++ b/crates/tui/src/tui/provider_picker.rs @@ -121,7 +121,9 @@ impl ProviderPickerView { ApiProvider::Huggingface => "HUGGINGFACE_API_KEY / HF_TOKEN", ApiProvider::Together => "TOGETHER_API_KEY", ApiProvider::OpenaiCodex => "OPENAI_CODEX_ACCESS_TOKEN / CODEX_ACCESS_TOKEN", - ApiProvider::Zai | ApiProvider::Stepfun => "OPENAI_API_KEY", + ApiProvider::Zai => "ZAI_API_KEY / Z_AI_API_KEY", + ApiProvider::Stepfun => "STEPFUN_API_KEY / STEP_API_KEY", + ApiProvider::Minimax => "MINIMAX_API_KEY", } } @@ -517,7 +519,8 @@ mod tests { "OpenAI Codex (ChatGPT)", "Anthropic", "Z.ai (GLM Coding)", - "StepFun / StepFlash" + "StepFun / StepFlash", + "MiniMax" ] ); } @@ -552,7 +555,7 @@ mod tests { let mut picker = ProviderPickerView::new(ApiProvider::Deepseek, &config); picker.handle_key(key(KeyCode::Up)); - assert_eq!(picker.selected_provider(), ApiProvider::Stepfun); + assert_eq!(picker.selected_provider(), ApiProvider::Minimax); picker.handle_key(key(KeyCode::Down)); assert_eq!(picker.selected_provider(), ApiProvider::Deepseek); diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index d0db1c1b..1919ddc0 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -7595,6 +7595,7 @@ fn render(f: &mut Frame, app: &mut App) { crate::config::ApiProvider::OpenaiCodex => Some("Codex"), crate::config::ApiProvider::Zai => Some("Z.ai"), crate::config::ApiProvider::Stepfun => Some("StepFun"), + crate::config::ApiProvider::Minimax => Some("MiniMax"), }; let status_indicator_started_at = if app.low_motion { None @@ -8655,6 +8656,7 @@ async fn apply_provider_picker_api_key( ApiProvider::Anthropic => &mut providers.anthropic, ApiProvider::Zai => &mut providers.zai, ApiProvider::Stepfun => &mut providers.stepfun, + ApiProvider::Minimax => &mut providers.minimax, }; entry.api_key = Some(api_key); } @@ -8717,6 +8719,7 @@ fn set_provider_auth_mode_in_memory(config: &mut Config, provider: ApiProvider, ApiProvider::Anthropic => &mut providers.anthropic, ApiProvider::Zai => &mut providers.zai, ApiProvider::Stepfun => &mut providers.stepfun, + ApiProvider::Minimax => &mut providers.minimax, }; entry.auth_mode = Some(auth_mode); } diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 9073d85c..46f49247 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -952,12 +952,12 @@ If you are upgrading from older releases: ### Core keys (used by the TUI/engine) -- `provider` (string, optional): `deepseek` (default), `nvidia-nim`, `openai`, `atlascloud`, `wanjie-ark`, `volcengine`, `openrouter`, `xiaomi-mimo`, `novita`, `fireworks`, `siliconflow`, `siliconflow-CN`, `arcee`, `moonshot`, `sglang`, `vllm`, or `ollama`. Legacy `deepseek-cn` configs are still accepted as an alias for `deepseek`; DeepSeek uses the same official host [`https://api.deepseek.com`](https://api-docs.deepseek.com/) worldwide. `nvidia-nim` targets NVIDIA's NIM-hosted DeepSeek endpoints through `https://integrate.api.nvidia.com/v1`; `openai` targets a generic OpenAI-compatible endpoint, defaulting to `https://api.openai.com/v1`; `atlascloud` targets AtlasCloud's OpenAI-compatible endpoint at `https://api.atlascloud.ai/v1`; `wanjie-ark` targets Wanjie Ark's OpenAI-compatible endpoint at `https://maas-openapi.wanjiedata.com/api/v1`; `volcengine` targets Volcengine Ark's OpenAI-compatible coding endpoint at `https://ark.cn-beijing.volces.com/api/coding/v3`; `openrouter` targets `https://openrouter.ai/api/v1`; `xiaomi-mimo` targets Xiaomi MiMo's OpenAI-compatible endpoint, using `https://token-plan-sgp.xiaomimimo.com/v1` by default for Token Plan keys (`tp-...`) and `https://api.xiaomimimo.com/v1` for pay-as-you-go keys; set `base_url` explicitly if your Token Plan account uses the China region; `novita` targets `https://api.novita.ai/v1`; `fireworks` targets `https://api.fireworks.ai/inference/v1`; `siliconflow` targets SiliconFlow, defaulting to `https://api.siliconflow.com/v1`; `siliconflow-CN` targets the SiliconFlow China regional endpoint while sharing `[providers.siliconflow]`; `arcee` targets Arcee AI's OpenAI-compatible endpoint at `https://api.arcee.ai/api/v1`; `moonshot` targets Moonshot/Kimi, defaulting to `https://api.moonshot.ai/v1`; `sglang` targets a self-hosted OpenAI-compatible endpoint, defaulting to `http://localhost:30000/v1`; `vllm` targets a self-hosted vLLM OpenAI-compatible endpoint, defaulting to `http://localhost:8000/v1`; `ollama` targets Ollama's OpenAI-compatible endpoint, defaulting to `http://localhost:11434/v1`. +- `provider` (string, optional): `deepseek` (default), `nvidia-nim`, `openai`, `atlascloud`, `wanjie-ark`, `volcengine`, `openrouter`, `xiaomi-mimo`, `novita`, `fireworks`, `siliconflow`, `siliconflow-CN`, `arcee`, `moonshot`, `minimax`, `zai`, `stepfun`, `sglang`, `vllm`, or `ollama`. Legacy `deepseek-cn` configs are still accepted as an alias for `deepseek`; DeepSeek uses the same official host [`https://api.deepseek.com`](https://api-docs.deepseek.com/) worldwide. `nvidia-nim` targets NVIDIA's NIM-hosted DeepSeek endpoints through `https://integrate.api.nvidia.com/v1`; `openai` targets a generic OpenAI-compatible endpoint, defaulting to `https://api.openai.com/v1`; `atlascloud` targets AtlasCloud's OpenAI-compatible endpoint at `https://api.atlascloud.ai/v1`; `wanjie-ark` targets Wanjie Ark's OpenAI-compatible endpoint at `https://maas-openapi.wanjiedata.com/api/v1`; `volcengine` targets Volcengine Ark's OpenAI-compatible coding endpoint at `https://ark.cn-beijing.volces.com/api/coding/v3`; `openrouter` targets `https://openrouter.ai/api/v1`; `xiaomi-mimo` targets Xiaomi MiMo's OpenAI-compatible endpoint, using `https://token-plan-sgp.xiaomimimo.com/v1` by default for Token Plan keys (`tp-...`) and `https://api.xiaomimimo.com/v1` for pay-as-you-go keys; set `base_url` explicitly if your Token Plan account uses the China region; `novita` targets `https://api.novita.ai/v1`; `fireworks` targets `https://api.fireworks.ai/inference/v1`; `siliconflow` targets SiliconFlow, defaulting to `https://api.siliconflow.com/v1`; `siliconflow-CN` targets the SiliconFlow China regional endpoint while sharing `[providers.siliconflow]`; `arcee` targets Arcee AI's OpenAI-compatible endpoint at `https://api.arcee.ai/api/v1`; `moonshot` targets Moonshot/Kimi, defaulting to `https://api.moonshot.ai/v1`; `minimax` targets MiniMax at `https://api.minimax.io/v1`; `zai` targets Z.ai at `https://api.z.ai/api/coding/paas/v4`; `stepfun` targets StepFun at `https://api.stepfun.ai/v1`; `sglang` targets a self-hosted OpenAI-compatible endpoint, defaulting to `http://localhost:30000/v1`; `vllm` targets a self-hosted vLLM OpenAI-compatible endpoint, defaulting to `http://localhost:8000/v1`; `ollama` targets Ollama's OpenAI-compatible endpoint, defaulting to `http://localhost:11434/v1`. - `api_key` (string, required for hosted providers): must be non-empty for DeepSeek/hosted providers (or set the provider API key env var). Self-hosted SGLang, vLLM, and Ollama can omit it. -- `base_url` (string, optional): defaults to `https://api.deepseek.com/beta` for DeepSeek's OpenAI-compatible Chat Completions API, including legacy `provider = "deepseek-cn"` configs. Other defaults are `https://integrate.api.nvidia.com/v1` for `nvidia-nim`, `https://api.openai.com/v1` for `openai`, `https://api.atlascloud.ai/v1` for `atlascloud`, `https://maas-openapi.wanjiedata.com/api/v1` for `wanjie-ark`, `https://ark.cn-beijing.volces.com/api/coding/v3` for `volcengine`, `https://openrouter.ai/api/v1` for `openrouter`, `https://token-plan-sgp.xiaomimimo.com/v1` for `xiaomi-mimo` when the API key starts with `tp-...` and `https://api.xiaomimimo.com/v1` otherwise, `https://api.novita.ai/v1` for `novita`, `https://api.fireworks.ai/inference/v1` for `fireworks`, `https://api.siliconflow.com/v1` for `siliconflow`, `https://api.siliconflow.cn/v1` for `siliconflow-CN`, `https://api.arcee.ai/api/v1` for `arcee`, `https://api.moonshot.ai/v1` for `moonshot`, `http://localhost:30000/v1` for `sglang`, `http://localhost:8000/v1` for `vllm`, and `http://localhost:11434/v1` for `ollama`. Set `base_url = "https://token-plan-cn.xiaomimimo.com/v1"` explicitly if your Xiaomi MiMo Token Plan account is provisioned in the China region. Set `https://api.deepseek.com` or `https://api.deepseek.com/v1` explicitly to opt out of DeepSeek beta features. +- `base_url` (string, optional): defaults to `https://api.deepseek.com/beta` for DeepSeek's OpenAI-compatible Chat Completions API, including legacy `provider = "deepseek-cn"` configs. Other defaults are `https://integrate.api.nvidia.com/v1` for `nvidia-nim`, `https://api.openai.com/v1` for `openai`, `https://api.atlascloud.ai/v1` for `atlascloud`, `https://maas-openapi.wanjiedata.com/api/v1` for `wanjie-ark`, `https://ark.cn-beijing.volces.com/api/coding/v3` for `volcengine`, `https://openrouter.ai/api/v1` for `openrouter`, `https://token-plan-sgp.xiaomimimo.com/v1` for `xiaomi-mimo` when the API key starts with `tp-...` and `https://api.xiaomimimo.com/v1` otherwise, `https://api.novita.ai/v1` for `novita`, `https://api.fireworks.ai/inference/v1` for `fireworks`, `https://api.siliconflow.com/v1` for `siliconflow`, `https://api.siliconflow.cn/v1` for `siliconflow-CN`, `https://api.arcee.ai/api/v1` for `arcee`, `https://api.moonshot.ai/v1` for `moonshot`, `https://api.minimax.io/v1` for `minimax`, `https://api.z.ai/api/coding/paas/v4` for `zai`, `https://api.stepfun.ai/v1` for `stepfun`, `http://localhost:30000/v1` for `sglang`, `http://localhost:8000/v1` for `vllm`, and `http://localhost:11434/v1` for `ollama`. Set `base_url = "https://token-plan-cn.xiaomimimo.com/v1"` explicitly if your Xiaomi MiMo Token Plan account is provisioned in the China region. Set `https://api.deepseek.com` or `https://api.deepseek.com/v1` explicitly to opt out of DeepSeek beta features. - `path_suffix` (string, optional provider-table key): override the chat-completions path for OpenAI-compatible gateways that do not serve `/v1/chat/completions`. For example, `[providers.openai] path_suffix = "/chat/completions"` sends chat requests to the unversioned base URL plus `/chat/completions`; `models` and `beta/*` requests keep their normal routing. - `insecure_skip_tls_verify` (bool, optional provider-table key): disabled by default. When true on the active provider table, only the LLM provider HTTP client skips TLS certificate verification. Prefer `SSL_CERT_FILE` for corporate or private CA bundles; `codewhale doctor` reports this setting when enabled. -- `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek and generic OpenAI-compatible endpoints, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `deepseek-ai/deepseek-v4-flash` for AtlasCloud, `deepseek-reasoner` for Wanjie Ark, `DeepSeek-V4-Pro` for Volcengine Ark, `deepseek/deepseek-v4-pro` for OpenRouter and Novita, `mimo-v2.5-pro` for Xiaomi MiMo, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, `deepseek-ai/DeepSeek-V4-Pro` for SiliconFlow, `trinity-large-thinking` for Arcee AI, `kimi-k2.7-code` for Moonshot, `deepseek-ai/DeepSeek-V4-Pro` for SGLang/vLLM, and `deepseek-coder:1.3b` for Ollama. Hugging Face and Together AI both default to `deepseek-ai/DeepSeek-V4-Pro`. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows, 384K max output, and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash` until July 24, 2026, except SiliconFlow maps `deepseek-reasoner` and `deepseek-r1` to its Pro model while `deepseek-chat` and `deepseek-v3` map to Flash. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. OpenRouter also recognizes recent large IDs such as `arcee-ai/trinity-large-thinking`, `minimax/minimax-m3`, `minimax/minimax-2.7`, `xiaomi/mimo-v2.5-pro`, `qwen/qwen3.6-flash`, `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `qwen/qwen3.7-max`, `google/gemma-4-31b-it`, `moonshotai/kimi-k2.7-code`, `moonshotai/kimi-k2.6`, `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free`, and `nvidia/nemotron-3-ultra-550b-a55b`; direct Arcee uses bare IDs such as `trinity-large-thinking` and `trinity-large-preview`; direct Moonshot recognizes `kimi-k2.7-code` and `kimi-k2.6`, with `kimi` and `kimi-k2` aliases selecting `kimi-k2.7-code`; direct Xiaomi MiMo recognizes chat IDs `mimo-v2.5-pro` and `mimo-v2.5`, while TTS IDs are selected through `codewhale speech` / `tts`. Generic `openai`, `atlascloud`, `wanjie-ark`, `xiaomi-mimo`, `arcee`, and Ollama model IDs are passed through unchanged after known aliases are normalized. OpenRouter and SiliconFlow provider configs with a custom `base_url` also preserve explicit model values, which lets OpenAI-compatible gateways accept bare model IDs. Use `/models` or `codewhale models` to discover live IDs from your configured endpoint. `CODEWHALE_MODEL` overrides this for a single process; `DEEPSEEK_MODEL` is the legacy alias. +- `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek and generic OpenAI-compatible endpoints, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `deepseek-ai/deepseek-v4-flash` for AtlasCloud, `deepseek-reasoner` for Wanjie Ark, `DeepSeek-V4-Pro` for Volcengine Ark, `deepseek/deepseek-v4-pro` for OpenRouter and Novita, `mimo-v2.5-pro` for Xiaomi MiMo, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, `deepseek-ai/DeepSeek-V4-Pro` for SiliconFlow, `trinity-large-thinking` for Arcee AI, `kimi-k2.7-code` for Moonshot, `MiniMax-M3` for MiniMax, `GLM-5.1` for Z.ai, `step-3.7-flash` for StepFun, `deepseek-ai/DeepSeek-V4-Pro` for SGLang/vLLM, and `deepseek-coder:1.3b` for Ollama. Hugging Face and Together AI both default to `deepseek-ai/DeepSeek-V4-Pro`. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows, 384K max output, and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash` until July 24, 2026, except SiliconFlow maps `deepseek-reasoner` and `deepseek-r1` to its Pro model while `deepseek-chat` and `deepseek-v3` map to Flash. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. OpenRouter also recognizes recent large IDs such as `arcee-ai/trinity-large-thinking`, `minimax/minimax-m3`, `minimax/minimax-2.7`, `xiaomi/mimo-v2.5-pro`, `qwen/qwen3.6-flash`, `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `qwen/qwen3.7-max`, `google/gemma-4-31b-it`, `moonshotai/kimi-k2.7-code`, `moonshotai/kimi-k2.6`, `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free`, and `nvidia/nemotron-3-ultra-550b-a55b`; direct Arcee uses bare IDs such as `trinity-large-thinking` and `trinity-large-preview`; direct Moonshot recognizes `kimi-k2.7-code`, `kimi-k2.6`, and Kimi Code's stable `kimi-for-coding`; direct MiniMax recognizes `MiniMax-M3` and the documented M2.x chat model IDs; direct Xiaomi MiMo recognizes chat IDs `mimo-v2.5-pro` and `mimo-v2.5`, while TTS IDs are selected through `codewhale speech` / `tts`. Generic `openai`, `atlascloud`, `wanjie-ark`, `xiaomi-mimo`, `arcee`, `moonshot`, `minimax`, `zai`, `stepfun`, and Ollama model IDs are passed through unchanged after known aliases are normalized. OpenRouter and SiliconFlow provider configs with a custom `base_url` also preserve explicit model values, which lets OpenAI-compatible gateways accept bare model IDs. Use `/models` or `codewhale models` to discover live IDs from your configured endpoint. `CODEWHALE_MODEL` overrides this for a single process; `DEEPSEEK_MODEL` is the legacy alias. - `reasoning_effort` (string, optional): `off`, `low`, `medium`, `high`, `max`, or `xhigh`; defaults to the configured UI tier. DeepSeek Platform receives top-level `thinking` / `reasoning_effort` fields. OpenAI Codex normalizes stale `off` to `low` and sends `max` as Responses `xhigh`. NVIDIA NIM receives equivalent settings through `chat_template_kwargs`. - `verbosity` (string, optional): `normal` or `concise`. `normal` keeps the default conversational prompt. `concise` appends a prompt discipline block diff --git a/docs/PROVIDERS.md b/docs/PROVIDERS.md index d3a3578b..f0e87215 100644 --- a/docs/PROVIDERS.md +++ b/docs/PROVIDERS.md @@ -135,7 +135,8 @@ endpoint. | `siliconflow` | `[providers.siliconflow]` | `SILICONFLOW_API_KEY` | `SILICONFLOW_BASE_URL`; default `https://api.siliconflow.com/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | OpenAI-compatible hosted route. Official docs use the `.com` endpoint. `SILICONFLOW_MODEL` is accepted. Reasoning aliases `deepseek-reasoner` and `deepseek-r1` map to Pro; `deepseek-chat` and `deepseek-v3` map to Flash. | | `siliconflow-CN` | `[providers.siliconflow_cn]` | `SILICONFLOW_API_KEY` | `SILICONFLOW_BASE_URL`; default `https://api.siliconflow.cn/v1` | Uses the SiliconFlow model set | China regional SiliconFlow route. Falls back to `[providers.siliconflow]` for api_key / base_url / model when unset. Select it with `provider = "siliconflow-CN"` or `CODEWHALE_PROVIDER=siliconflow-CN`. | | `arcee` | `[providers.arcee]` | `ARCEE_API_KEY` | `ARCEE_BASE_URL`; default `https://api.arcee.ai/api/v1` | `trinity-large-thinking`, `trinity-large-preview` | Arcee AI direct OpenAI-compatible route, tracked as 256K-context BF16 serving. `ARCEE_MODEL` is accepted. OpenRouter's `arcee-ai/trinity-large-thinking` remains the OpenRouter namespaced model ID; direct Arcee uses the bare `trinity-large-thinking` ID. | -| `moonshot` | `[providers.moonshot]` | `MOONSHOT_API_KEY`, `KIMI_API_KEY` | `MOONSHOT_BASE_URL`, `KIMI_BASE_URL`; default `https://api.moonshot.ai/v1` | `kimi-k2.7-code`, `kimi-k2.6`; Kimi Code path uses `kimi-for-coding` at `https://api.kimi.com/coding/v1` | Moonshot/Kimi route. `kimi` and `kimi-k2` aliases select `kimi-k2.7-code`; `MOONSHOT_MODEL`, `KIMI_MODEL_NAME`, and `KIMI_MODEL` are accepted. `[providers.moonshot] auth_mode = "kimi_oauth"` reads Kimi Code OAuth credentials from `KIMI_CODE_HOME`/`~/.kimi-code`, with legacy `KIMI_SHARE_DIR`/`~/.kimi` fallback. | +| `moonshot` | `[providers.moonshot]` | `MOONSHOT_API_KEY`, `KIMI_API_KEY` | `MOONSHOT_BASE_URL`, `KIMI_BASE_URL`; default `https://api.moonshot.ai/v1` | `kimi-k2.7-code`, `kimi-k2.6`; Kimi Code path uses `kimi-for-coding` at `https://api.kimi.com/coding/v1` | Moonshot/Kimi route. `kimi` and `kimi-k2` aliases select `kimi-k2.7-code`; `MOONSHOT_MODEL`, `KIMI_MODEL_NAME`, and `KIMI_MODEL` are accepted. Kimi thinking streams through `reasoning_content`; CodeWhale keeps it in Thinking cells and replays it for thinking/tool-call continuity. `[providers.moonshot] auth_mode = "kimi_oauth"` reads Kimi Code OAuth credentials from `KIMI_CODE_HOME`/`~/.kimi-code`, with legacy `KIMI_SHARE_DIR`/`~/.kimi` fallback. | +| `minimax` | `[providers.minimax]` | `MINIMAX_API_KEY` | `MINIMAX_BASE_URL`; default `https://api.minimax.io/v1` | `MiniMax-M3`, `MiniMax-M2.7`, `MiniMax-M2.7-highspeed`, `MiniMax-M2.5`, `MiniMax-M2.5-highspeed`, `MiniMax-M2.1`, `MiniMax-M2.1-highspeed`, `MiniMax-M2` | MiniMax direct OpenAI-compatible route. CodeWhale sends `reasoning_split = true` so MiniMax thinking arrives separately from answer text, and direct MiniMax IDs stay distinct from OpenRouter namespaced IDs such as `minimax/minimax-m3`. | | `sglang` | `[providers.sglang]` | Optional `SGLANG_API_KEY` | `SGLANG_BASE_URL`; default `http://localhost:30000/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Self-hosted OpenAI-compatible route. Localhost deployments commonly omit auth. `SGLANG_MODEL` is accepted. | | `vllm` | `[providers.vllm]` | Optional `VLLM_API_KEY` | `VLLM_BASE_URL`; default `http://localhost:8000/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Self-hosted vLLM OpenAI-compatible route. Localhost deployments commonly omit auth. `VLLM_MODEL` is accepted. | | `ollama` | `[providers.ollama]` | Optional `OLLAMA_API_KEY` | `OLLAMA_BASE_URL`; default `http://localhost:11434/v1` | `deepseek-coder:1.3b`; provider-hinted custom tags pass through | Self-hosted Ollama OpenAI-compatible route. Localhost deployments commonly omit auth. `OLLAMA_MODEL` is accepted. | @@ -220,6 +221,7 @@ endpoint when the endpoint supports model listing. | `siliconflow` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | yes | | `arcee` | `trinity-large-thinking`, `trinity-large-preview`; provider-hinted custom model IDs pass through | yes | yes for `trinity-large-thinking`; no for `trinity-large-preview` | | `moonshot` | `kimi-k2.7-code`, `kimi-k2.6` | yes | yes | +| `minimax` | `MiniMax-M3`, `MiniMax-M2.7`, `MiniMax-M2.7-highspeed`, `MiniMax-M2.5`, `MiniMax-M2.5-highspeed`, `MiniMax-M2.1`, `MiniMax-M2.1-highspeed`, `MiniMax-M2` | yes | yes | | `sglang` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | yes | | `vllm` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | yes | | `ollama` | `deepseek-coder:1.3b`; custom tags pass through when provider hint is `ollama` | yes | no | @@ -259,7 +261,10 @@ Anthropic uses Messages, and `openai-codex` uses Responses. | Wanjie Ark `reasoner` / `r1` model IDs | 128,000 | 4,096 | yes | no | not documented in code | | Direct Arcee API `trinity-large-thinking` | 262,144 | 262,144 | yes | no | not documented in code | | Direct Arcee API `trinity-large-preview` | 262,144 | 4,096 | no in doctor capability metadata | no | not documented in code | -| Generic `openai`, AtlasCloud, and Moonshot/Kimi | 128,000 | 4,096 | no in doctor capability metadata | no | not documented in code | +| Direct Moonshot/Kimi `kimi-k2.7-code`, `kimi-k2.6`, `kimi-for-coding` | 262,144 | 262,144 | yes | no | not documented in code | +| Direct MiniMax `MiniMax-M3` | 1,000,000 | 524,288 | yes | no | not documented in code | +| Direct MiniMax M2.x models | 204,800 | 4,096 fallback until MiniMax output metadata is promoted | yes | no | not documented in code | +| Generic `openai` and AtlasCloud | 128,000 | 4,096 | no in doctor capability metadata | no | not documented in code | | Ollama | 8,192 | 4,096 | no | no | not documented in code | | Hugging Face Inference Providers V4 model IDs | 131,072 | 4,096 | yes | no | not documented in code | | Other recognized DeepSeek model IDs | 128,000 unless the model name carries an explicit `Nk` hint | 4,096 | no unless V4/reasoner logic matches | DeepSeek/NIM only | DeepSeek beta only | @@ -319,6 +324,7 @@ receive no reasoning fields at all for that tier. | `moonshot` | `thinking: {type: disabled}` | `thinking: {type: enabled}` | `thinking: {type: enabled}` | | `ollama` | `think: false` | `think: true` | `think: true` | | `xiaomi-mimo` | `thinking: {type: disabled}` | `thinking: {type: enabled}` | `thinking: {type: enabled}` | +| `minimax` | `reasoning_split: true` + `thinking: {type: disabled}` | `reasoning_split: true` + `thinking: {type: adaptive}` | `reasoning_split: true` + `thinking: {type: adaptive}` | | `nvidia-nim` | `chat_template_kwargs.thinking: false` | `chat_template_kwargs`: `thinking: true` + `reasoning_effort: "high"` | `chat_template_kwargs`: `thinking: true` + `reasoning_effort: "max"` | | `vllm` | `chat_template_kwargs.enable_thinking: false` | `chat_template_kwargs.enable_thinking: true` + `reasoning_effort` low/medium/high | `chat_template_kwargs.enable_thinking: true` + `reasoning_effort: "high"` (vLLM has no max tier) | | `arcee`, `huggingface` | omitted | `reasoning_effort` pass-through | `reasoning_effort: "high"` |