merge #1310 MiniMax provider route
This commit is contained in:
@@ -782,6 +782,7 @@ fn provider_slot(provider: ProviderKind) -> &'static str {
|
||||
ProviderKind::Anthropic => "anthropic",
|
||||
ProviderKind::Zai => "zai",
|
||||
ProviderKind::Stepfun => "stepfun",
|
||||
ProviderKind::Minimax => "minimax",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -919,6 +920,7 @@ fn provider_env_vars(provider: ProviderKind) -> &'static [&'static str] {
|
||||
ProviderKind::Anthropic => &["ANTHROPIC_API_KEY"],
|
||||
ProviderKind::Zai => &["ZAI_API_KEY", "Z_AI_API_KEY"],
|
||||
ProviderKind::Stepfun => &["STEPFUN_API_KEY", "STEP_API_KEY"],
|
||||
ProviderKind::Minimax => &["MINIMAX_API_KEY"],
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+185
-12
@@ -108,6 +108,16 @@ const DEFAULT_ZAI_BASE_URL: &str = "https://api.z.ai/api/coding/paas/v4";
|
||||
// StepFun / StepFlash defaults
|
||||
const DEFAULT_STEPFUN_MODEL: &str = "step-3.7-flash";
|
||||
const DEFAULT_STEPFUN_BASE_URL: &str = "https://api.stepfun.ai/v1";
|
||||
// MiniMax defaults
|
||||
const DEFAULT_MINIMAX_MODEL: &str = "MiniMax-M3";
|
||||
const MINIMAX_M2_7_MODEL: &str = "MiniMax-M2.7";
|
||||
const MINIMAX_M2_7_HIGHSPEED_MODEL: &str = "MiniMax-M2.7-highspeed";
|
||||
const MINIMAX_M2_5_MODEL: &str = "MiniMax-M2.5";
|
||||
const MINIMAX_M2_5_HIGHSPEED_MODEL: &str = "MiniMax-M2.5-highspeed";
|
||||
const MINIMAX_M2_1_MODEL: &str = "MiniMax-M2.1";
|
||||
const MINIMAX_M2_1_HIGHSPEED_MODEL: &str = "MiniMax-M2.1-highspeed";
|
||||
const MINIMAX_M2_MODEL: &str = "MiniMax-M2";
|
||||
const DEFAULT_MINIMAX_BASE_URL: &str = "https://api.minimax.io/v1";
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "kebab-case")]
|
||||
@@ -165,11 +175,7 @@ pub enum ProviderKind {
|
||||
OpenaiCodex,
|
||||
#[serde(alias = "claude")]
|
||||
Anthropic,
|
||||
#[serde(
|
||||
alias = "z-ai",
|
||||
alias = "z_ai",
|
||||
alias = "z.ai"
|
||||
)]
|
||||
#[serde(alias = "z-ai", alias = "z_ai", alias = "z.ai")]
|
||||
Zai,
|
||||
#[serde(
|
||||
alias = "step-fun",
|
||||
@@ -180,10 +186,12 @@ pub enum ProviderKind {
|
||||
alias = "step_flash"
|
||||
)]
|
||||
Stepfun,
|
||||
#[serde(alias = "mini-max", alias = "mini_max", alias = "minimax")]
|
||||
Minimax,
|
||||
}
|
||||
|
||||
impl ProviderKind {
|
||||
pub const ALL: [Self; 23] = [
|
||||
pub const ALL: [Self; 24] = [
|
||||
Self::Deepseek,
|
||||
Self::NvidiaNim,
|
||||
Self::Openai,
|
||||
@@ -207,6 +215,7 @@ impl ProviderKind {
|
||||
Self::Anthropic,
|
||||
Self::Zai,
|
||||
Self::Stepfun,
|
||||
Self::Minimax,
|
||||
];
|
||||
|
||||
#[must_use]
|
||||
@@ -235,6 +244,7 @@ impl ProviderKind {
|
||||
Self::Anthropic,
|
||||
Self::Zai,
|
||||
Self::Stepfun,
|
||||
Self::Minimax,
|
||||
]
|
||||
}
|
||||
|
||||
@@ -343,12 +353,7 @@ pub struct ProvidersToml {
|
||||
pub openai_codex: ProviderConfigToml,
|
||||
#[serde(default)]
|
||||
pub anthropic: ProviderConfigToml,
|
||||
#[serde(
|
||||
default,
|
||||
alias = "z-ai",
|
||||
alias = "z_ai",
|
||||
alias = "z.ai"
|
||||
)]
|
||||
#[serde(default, alias = "z-ai", alias = "z_ai", alias = "z.ai")]
|
||||
pub zai: ProviderConfigToml,
|
||||
#[serde(
|
||||
default,
|
||||
@@ -360,6 +365,8 @@ pub struct ProvidersToml {
|
||||
alias = "step_flash"
|
||||
)]
|
||||
pub stepfun: ProviderConfigToml,
|
||||
#[serde(default, alias = "mini-max", alias = "mini_max", alias = "minimax")]
|
||||
pub minimax: ProviderConfigToml,
|
||||
}
|
||||
|
||||
/// Sibling `permissions.toml` schema.
|
||||
@@ -413,6 +420,7 @@ impl ProvidersToml {
|
||||
ProviderKind::Anthropic => &self.anthropic,
|
||||
ProviderKind::Zai => &self.zai,
|
||||
ProviderKind::Stepfun => &self.stepfun,
|
||||
ProviderKind::Minimax => &self.minimax,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -441,6 +449,7 @@ impl ProvidersToml {
|
||||
ProviderKind::Anthropic => &mut self.anthropic,
|
||||
ProviderKind::Zai => &mut self.zai,
|
||||
ProviderKind::Stepfun => &mut self.stepfun,
|
||||
ProviderKind::Minimax => &mut self.minimax,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2166,6 +2175,7 @@ impl ConfigToml {
|
||||
ProviderKind::Anthropic => DEFAULT_ANTHROPIC_BASE_URL.to_string(),
|
||||
ProviderKind::Zai => DEFAULT_ZAI_BASE_URL.to_string(),
|
||||
ProviderKind::Stepfun => DEFAULT_STEPFUN_BASE_URL.to_string(),
|
||||
ProviderKind::Minimax => DEFAULT_MINIMAX_BASE_URL.to_string(),
|
||||
})
|
||||
};
|
||||
// CLI flag wins outright. Otherwise: config-file → injected secrets/env.
|
||||
@@ -2379,6 +2389,11 @@ fn normalize_model_for_provider(provider: ProviderKind, model: &str) -> String {
|
||||
{
|
||||
return canonical.to_string();
|
||||
}
|
||||
if matches!(provider, ProviderKind::Minimax)
|
||||
&& let Some(canonical) = canonical_minimax_model_id(model)
|
||||
{
|
||||
return canonical.to_string();
|
||||
}
|
||||
|
||||
if matches!(
|
||||
provider,
|
||||
@@ -2386,6 +2401,9 @@ fn normalize_model_for_provider(provider: ProviderKind, model: &str) -> String {
|
||||
| ProviderKind::WanjieArk
|
||||
| ProviderKind::Volcengine
|
||||
| ProviderKind::XiaomiMimo
|
||||
| ProviderKind::Zai
|
||||
| ProviderKind::Stepfun
|
||||
| ProviderKind::Minimax
|
||||
| ProviderKind::Ollama
|
||||
) {
|
||||
return model.to_string();
|
||||
@@ -2539,6 +2557,39 @@ fn canonical_xiaomi_mimo_model_id(model: &str) -> Option<&'static str> {
|
||||
}
|
||||
}
|
||||
|
||||
fn canonical_minimax_model_id(model: &str) -> Option<&'static str> {
|
||||
let normalized = model.trim().to_ascii_lowercase();
|
||||
let normalized = normalized.replace(['_', ' '], "-");
|
||||
match normalized.as_str() {
|
||||
"minimax" | "minimax-m3" | "minimax-m-3" | "minimax-m-3-thinking" => {
|
||||
Some(DEFAULT_MINIMAX_MODEL)
|
||||
}
|
||||
"minimax-m2.7" | "minimax-m2-7" | "minimax-m-2.7" | "minimax-m-2-7" => {
|
||||
Some(MINIMAX_M2_7_MODEL)
|
||||
}
|
||||
"minimax-m2.7-highspeed"
|
||||
| "minimax-m2-7-highspeed"
|
||||
| "minimax-m-2.7-highspeed"
|
||||
| "minimax-m-2-7-highspeed" => Some(MINIMAX_M2_7_HIGHSPEED_MODEL),
|
||||
"minimax-m2.5" | "minimax-m2-5" | "minimax-m-2.5" | "minimax-m-2-5" => {
|
||||
Some(MINIMAX_M2_5_MODEL)
|
||||
}
|
||||
"minimax-m2.5-highspeed"
|
||||
| "minimax-m2-5-highspeed"
|
||||
| "minimax-m-2.5-highspeed"
|
||||
| "minimax-m-2-5-highspeed" => Some(MINIMAX_M2_5_HIGHSPEED_MODEL),
|
||||
"minimax-m2.1" | "minimax-m2-1" | "minimax-m-2.1" | "minimax-m-2-1" => {
|
||||
Some(MINIMAX_M2_1_MODEL)
|
||||
}
|
||||
"minimax-m2.1-highspeed"
|
||||
| "minimax-m2-1-highspeed"
|
||||
| "minimax-m-2.1-highspeed"
|
||||
| "minimax-m-2-1-highspeed" => Some(MINIMAX_M2_1_HIGHSPEED_MODEL),
|
||||
"minimax-m2" | "minimax-m-2" => Some(MINIMAX_M2_MODEL),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn canonical_openrouter_recent_model_id(model: &str) -> Option<&'static str> {
|
||||
let normalized = model.trim().to_ascii_lowercase();
|
||||
let normalized = normalized.replace(['_', ' '], "-");
|
||||
@@ -2634,6 +2685,7 @@ fn default_model_for_provider(provider: ProviderKind) -> &'static str {
|
||||
ProviderKind::Anthropic => DEFAULT_ANTHROPIC_MODEL,
|
||||
ProviderKind::Zai => DEFAULT_ZAI_MODEL,
|
||||
ProviderKind::Stepfun => DEFAULT_STEPFUN_MODEL,
|
||||
ProviderKind::Minimax => DEFAULT_MINIMAX_MODEL,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2662,6 +2714,7 @@ fn default_base_url_for_provider(provider: ProviderKind) -> &'static str {
|
||||
ProviderKind::Anthropic => DEFAULT_ANTHROPIC_BASE_URL,
|
||||
ProviderKind::Zai => DEFAULT_ZAI_BASE_URL,
|
||||
ProviderKind::Stepfun => DEFAULT_STEPFUN_BASE_URL,
|
||||
ProviderKind::Minimax => DEFAULT_MINIMAX_BASE_URL,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3447,6 +3500,8 @@ struct EnvRuntimeOverrides {
|
||||
zai_model: Option<String>,
|
||||
stepfun_base_url: Option<String>,
|
||||
stepfun_model: Option<String>,
|
||||
minimax_base_url: Option<String>,
|
||||
minimax_model: Option<String>,
|
||||
}
|
||||
|
||||
impl EnvRuntimeOverrides {
|
||||
@@ -3634,6 +3689,12 @@ impl EnvRuntimeOverrides {
|
||||
.or_else(|_| std::env::var("STEP_MODEL"))
|
||||
.ok()
|
||||
.filter(|v| !v.trim().is_empty()),
|
||||
minimax_base_url: std::env::var("MINIMAX_BASE_URL")
|
||||
.ok()
|
||||
.filter(|v| !v.trim().is_empty()),
|
||||
minimax_model: std::env::var("MINIMAX_MODEL")
|
||||
.ok()
|
||||
.filter(|v| !v.trim().is_empty()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3679,6 +3740,7 @@ impl EnvRuntimeOverrides {
|
||||
ProviderKind::Anthropic => self.anthropic_base_url.clone(),
|
||||
ProviderKind::Zai => self.zai_base_url.clone(),
|
||||
ProviderKind::Stepfun => self.stepfun_base_url.clone(),
|
||||
ProviderKind::Minimax => self.minimax_base_url.clone(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3699,6 +3761,9 @@ impl EnvRuntimeOverrides {
|
||||
ProviderKind::Together => self.together_model.clone(),
|
||||
ProviderKind::OpenaiCodex => self.openai_codex_model.clone(),
|
||||
ProviderKind::Anthropic => self.anthropic_model.clone(),
|
||||
ProviderKind::Zai => self.zai_model.clone(),
|
||||
ProviderKind::Stepfun => self.stepfun_model.clone(),
|
||||
ProviderKind::Minimax => self.minimax_model.clone(),
|
||||
_ => None,
|
||||
}?;
|
||||
|
||||
@@ -4096,6 +4161,17 @@ action = "mode.agent"
|
||||
kimi_base_url: Option<OsString>,
|
||||
kimi_model: Option<OsString>,
|
||||
kimi_model_name: Option<OsString>,
|
||||
zai_api_key: Option<OsString>,
|
||||
z_ai_api_key: Option<OsString>,
|
||||
zai_base_url: Option<OsString>,
|
||||
zai_model: Option<OsString>,
|
||||
stepfun_api_key: Option<OsString>,
|
||||
step_api_key: Option<OsString>,
|
||||
stepfun_base_url: Option<OsString>,
|
||||
stepfun_model: Option<OsString>,
|
||||
minimax_api_key: Option<OsString>,
|
||||
minimax_base_url: Option<OsString>,
|
||||
minimax_model: Option<OsString>,
|
||||
sglang_api_key: Option<OsString>,
|
||||
sglang_base_url: Option<OsString>,
|
||||
vllm_api_key: Option<OsString>,
|
||||
@@ -4179,6 +4255,17 @@ action = "mode.agent"
|
||||
kimi_base_url: env::var_os("KIMI_BASE_URL"),
|
||||
kimi_model: env::var_os("KIMI_MODEL"),
|
||||
kimi_model_name: env::var_os("KIMI_MODEL_NAME"),
|
||||
zai_api_key: env::var_os("ZAI_API_KEY"),
|
||||
z_ai_api_key: env::var_os("Z_AI_API_KEY"),
|
||||
zai_base_url: env::var_os("ZAI_BASE_URL"),
|
||||
zai_model: env::var_os("ZAI_MODEL"),
|
||||
stepfun_api_key: env::var_os("STEPFUN_API_KEY"),
|
||||
step_api_key: env::var_os("STEP_API_KEY"),
|
||||
stepfun_base_url: env::var_os("STEPFUN_BASE_URL"),
|
||||
stepfun_model: env::var_os("STEPFUN_MODEL"),
|
||||
minimax_api_key: env::var_os("MINIMAX_API_KEY"),
|
||||
minimax_base_url: env::var_os("MINIMAX_BASE_URL"),
|
||||
minimax_model: env::var_os("MINIMAX_MODEL"),
|
||||
sglang_api_key: env::var_os("SGLANG_API_KEY"),
|
||||
sglang_base_url: env::var_os("SGLANG_BASE_URL"),
|
||||
vllm_api_key: env::var_os("VLLM_API_KEY"),
|
||||
@@ -4257,6 +4344,17 @@ action = "mode.agent"
|
||||
env::remove_var("KIMI_BASE_URL");
|
||||
env::remove_var("KIMI_MODEL");
|
||||
env::remove_var("KIMI_MODEL_NAME");
|
||||
env::remove_var("ZAI_API_KEY");
|
||||
env::remove_var("Z_AI_API_KEY");
|
||||
env::remove_var("ZAI_BASE_URL");
|
||||
env::remove_var("ZAI_MODEL");
|
||||
env::remove_var("STEPFUN_API_KEY");
|
||||
env::remove_var("STEP_API_KEY");
|
||||
env::remove_var("STEPFUN_BASE_URL");
|
||||
env::remove_var("STEPFUN_MODEL");
|
||||
env::remove_var("MINIMAX_API_KEY");
|
||||
env::remove_var("MINIMAX_BASE_URL");
|
||||
env::remove_var("MINIMAX_MODEL");
|
||||
env::remove_var("SGLANG_API_KEY");
|
||||
env::remove_var("SGLANG_BASE_URL");
|
||||
env::remove_var("VLLM_API_KEY");
|
||||
@@ -4361,6 +4459,17 @@ action = "mode.agent"
|
||||
Self::restore_var("KIMI_BASE_URL", self.kimi_base_url.take());
|
||||
Self::restore_var("KIMI_MODEL", self.kimi_model.take());
|
||||
Self::restore_var("KIMI_MODEL_NAME", self.kimi_model_name.take());
|
||||
Self::restore_var("ZAI_API_KEY", self.zai_api_key.take());
|
||||
Self::restore_var("Z_AI_API_KEY", self.z_ai_api_key.take());
|
||||
Self::restore_var("ZAI_BASE_URL", self.zai_base_url.take());
|
||||
Self::restore_var("ZAI_MODEL", self.zai_model.take());
|
||||
Self::restore_var("STEPFUN_API_KEY", self.stepfun_api_key.take());
|
||||
Self::restore_var("STEP_API_KEY", self.step_api_key.take());
|
||||
Self::restore_var("STEPFUN_BASE_URL", self.stepfun_base_url.take());
|
||||
Self::restore_var("STEPFUN_MODEL", self.stepfun_model.take());
|
||||
Self::restore_var("MINIMAX_API_KEY", self.minimax_api_key.take());
|
||||
Self::restore_var("MINIMAX_BASE_URL", self.minimax_base_url.take());
|
||||
Self::restore_var("MINIMAX_MODEL", self.minimax_model.take());
|
||||
Self::restore_var("SGLANG_API_KEY", self.sglang_api_key.take());
|
||||
Self::restore_var("SGLANG_BASE_URL", self.sglang_base_url.take());
|
||||
Self::restore_var("VLLM_API_KEY", self.vllm_api_key.take());
|
||||
@@ -5730,6 +5839,70 @@ mode = "token-plan-usa"
|
||||
assert_eq!(resolved.model, DEFAULT_MOONSHOT_MODEL);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn zai_stepfun_and_minimax_default_to_first_party_routes() {
|
||||
let _lock = env_lock();
|
||||
let _env = EnvGuard::without_deepseek_runtime_overrides();
|
||||
|
||||
for (provider, expected_base_url, expected_model) in [
|
||||
(ProviderKind::Zai, DEFAULT_ZAI_BASE_URL, DEFAULT_ZAI_MODEL),
|
||||
(
|
||||
ProviderKind::Stepfun,
|
||||
DEFAULT_STEPFUN_BASE_URL,
|
||||
DEFAULT_STEPFUN_MODEL,
|
||||
),
|
||||
(
|
||||
ProviderKind::Minimax,
|
||||
DEFAULT_MINIMAX_BASE_URL,
|
||||
DEFAULT_MINIMAX_MODEL,
|
||||
),
|
||||
] {
|
||||
let config = ConfigToml {
|
||||
provider,
|
||||
..ConfigToml::default()
|
||||
};
|
||||
let resolved = config.resolve_runtime_options(&CliRuntimeOverrides::default());
|
||||
|
||||
assert_eq!(resolved.provider, provider);
|
||||
assert_eq!(resolved.base_url, expected_base_url);
|
||||
assert_eq!(resolved.model, expected_model);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn first_party_provider_env_model_overrides_pass_through() {
|
||||
let _lock = env_lock();
|
||||
let _env = EnvGuard::without_deepseek_runtime_overrides();
|
||||
unsafe {
|
||||
env::set_var("CODEWHALE_PROVIDER", "minimax");
|
||||
env::set_var("MINIMAX_MODEL", "MiniMax-M2.7-highspeed");
|
||||
env::set_var("MINIMAX_BASE_URL", "https://minimax.example/v1");
|
||||
}
|
||||
|
||||
let resolved =
|
||||
ConfigToml::default().resolve_runtime_options(&CliRuntimeOverrides::default());
|
||||
|
||||
assert_eq!(resolved.provider, ProviderKind::Minimax);
|
||||
assert_eq!(resolved.base_url, "https://minimax.example/v1");
|
||||
assert_eq!(resolved.model, "MiniMax-M2.7-highspeed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn minimax_env_model_override_canonicalizes_known_aliases() {
|
||||
let _lock = env_lock();
|
||||
let _env = EnvGuard::without_deepseek_runtime_overrides();
|
||||
unsafe {
|
||||
env::set_var("CODEWHALE_PROVIDER", "minimax");
|
||||
env::set_var("MINIMAX_MODEL", "minimax-m2-5-highspeed");
|
||||
}
|
||||
|
||||
let resolved =
|
||||
ConfigToml::default().resolve_runtime_options(&CliRuntimeOverrides::default());
|
||||
|
||||
assert_eq!(resolved.provider, ProviderKind::Minimax);
|
||||
assert_eq!(resolved.model, "MiniMax-M2.5-highspeed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn moonshot_provider_preserves_explicit_kimi_k26() {
|
||||
let _lock = env_lock();
|
||||
|
||||
@@ -8,18 +8,18 @@ use super::{
|
||||
DEFAULT_ARCEE_BASE_URL, DEFAULT_ARCEE_MODEL, DEFAULT_ATLASCLOUD_BASE_URL,
|
||||
DEFAULT_ATLASCLOUD_MODEL, DEFAULT_DEEPSEEK_BASE_URL, DEFAULT_DEEPSEEK_MODEL,
|
||||
DEFAULT_FIREWORKS_BASE_URL, DEFAULT_FIREWORKS_MODEL, DEFAULT_HUGGINGFACE_BASE_URL,
|
||||
DEFAULT_HUGGINGFACE_MODEL, DEFAULT_MOONSHOT_BASE_URL, DEFAULT_MOONSHOT_MODEL,
|
||||
DEFAULT_NOVITA_BASE_URL, DEFAULT_NOVITA_MODEL, DEFAULT_NVIDIA_NIM_BASE_URL,
|
||||
DEFAULT_NVIDIA_NIM_MODEL, DEFAULT_OLLAMA_BASE_URL, DEFAULT_OLLAMA_MODEL,
|
||||
DEFAULT_OPENAI_BASE_URL, DEFAULT_OPENAI_CODEX_BASE_URL, DEFAULT_OPENAI_CODEX_MODEL,
|
||||
DEFAULT_OPENAI_MODEL, DEFAULT_OPENROUTER_BASE_URL, DEFAULT_OPENROUTER_MODEL,
|
||||
DEFAULT_SGLANG_BASE_URL, DEFAULT_SGLANG_MODEL, DEFAULT_SILICONFLOW_BASE_URL,
|
||||
DEFAULT_SILICONFLOW_CN_BASE_URL, DEFAULT_SILICONFLOW_MODEL, DEFAULT_STEPFUN_BASE_URL,
|
||||
DEFAULT_STEPFUN_MODEL, DEFAULT_TOGETHER_BASE_URL, DEFAULT_TOGETHER_MODEL,
|
||||
DEFAULT_VLLM_BASE_URL, DEFAULT_VLLM_MODEL, DEFAULT_VOLCENGINE_BASE_URL,
|
||||
DEFAULT_VOLCENGINE_MODEL, DEFAULT_WANJIE_ARK_BASE_URL, DEFAULT_WANJIE_ARK_MODEL,
|
||||
DEFAULT_XIAOMI_MIMO_BASE_URL, DEFAULT_XIAOMI_MIMO_MODEL, DEFAULT_ZAI_BASE_URL,
|
||||
DEFAULT_ZAI_MODEL, ProviderKind,
|
||||
DEFAULT_HUGGINGFACE_MODEL, DEFAULT_MINIMAX_BASE_URL, DEFAULT_MINIMAX_MODEL,
|
||||
DEFAULT_MOONSHOT_BASE_URL, DEFAULT_MOONSHOT_MODEL, DEFAULT_NOVITA_BASE_URL,
|
||||
DEFAULT_NOVITA_MODEL, DEFAULT_NVIDIA_NIM_BASE_URL, DEFAULT_NVIDIA_NIM_MODEL,
|
||||
DEFAULT_OLLAMA_BASE_URL, DEFAULT_OLLAMA_MODEL, DEFAULT_OPENAI_BASE_URL,
|
||||
DEFAULT_OPENAI_CODEX_BASE_URL, DEFAULT_OPENAI_CODEX_MODEL, DEFAULT_OPENAI_MODEL,
|
||||
DEFAULT_OPENROUTER_BASE_URL, DEFAULT_OPENROUTER_MODEL, DEFAULT_SGLANG_BASE_URL,
|
||||
DEFAULT_SGLANG_MODEL, DEFAULT_SILICONFLOW_BASE_URL, DEFAULT_SILICONFLOW_CN_BASE_URL,
|
||||
DEFAULT_SILICONFLOW_MODEL, DEFAULT_STEPFUN_BASE_URL, DEFAULT_STEPFUN_MODEL,
|
||||
DEFAULT_TOGETHER_BASE_URL, DEFAULT_TOGETHER_MODEL, DEFAULT_VLLM_BASE_URL, DEFAULT_VLLM_MODEL,
|
||||
DEFAULT_VOLCENGINE_BASE_URL, DEFAULT_VOLCENGINE_MODEL, DEFAULT_WANJIE_ARK_BASE_URL,
|
||||
DEFAULT_WANJIE_ARK_MODEL, DEFAULT_XIAOMI_MIMO_BASE_URL, DEFAULT_XIAOMI_MIMO_MODEL,
|
||||
DEFAULT_ZAI_BASE_URL, DEFAULT_ZAI_MODEL, ProviderKind,
|
||||
};
|
||||
|
||||
/// Wire protocol spoken by a provider.
|
||||
@@ -460,6 +460,18 @@ provider!(
|
||||
aliases: ["step-fun", "step_fun", "stepflash", "step-flash", "step_flash"]
|
||||
);
|
||||
|
||||
provider!(
|
||||
Minimax,
|
||||
Minimax,
|
||||
"minimax",
|
||||
"MiniMax",
|
||||
DEFAULT_MINIMAX_BASE_URL,
|
||||
DEFAULT_MINIMAX_MODEL,
|
||||
["MINIMAX_API_KEY"],
|
||||
"minimax",
|
||||
aliases: ["mini-max", "mini_max"]
|
||||
);
|
||||
|
||||
static DEEPSEEK: Deepseek = Deepseek;
|
||||
static NVIDIA_NIM: NvidiaNim = NvidiaNim;
|
||||
static OPENAI: Openai = Openai;
|
||||
@@ -483,8 +495,9 @@ static OPENAI_CODEX: OpenaiCodex = OpenaiCodex;
|
||||
static ANTHROPIC: Anthropic = Anthropic;
|
||||
static ZAI: Zai = Zai;
|
||||
static STEPFUN: Stepfun = Stepfun;
|
||||
static MINIMAX: Minimax = Minimax;
|
||||
|
||||
static PROVIDER_REGISTRY: [&dyn Provider; 23] = [
|
||||
static PROVIDER_REGISTRY: [&dyn Provider; 24] = [
|
||||
&DEEPSEEK,
|
||||
&NVIDIA_NIM,
|
||||
&OPENAI,
|
||||
@@ -508,6 +521,7 @@ static PROVIDER_REGISTRY: [&dyn Provider; 23] = [
|
||||
&ANTHROPIC,
|
||||
&ZAI,
|
||||
&STEPFUN,
|
||||
&MINIMAX,
|
||||
];
|
||||
|
||||
/// Return all built-in provider metadata entries in `ProviderKind::ALL` order.
|
||||
|
||||
@@ -1235,6 +1235,13 @@ pub(super) fn apply_reasoning_effort(
|
||||
effort: Option<&str>,
|
||||
provider: ApiProvider,
|
||||
) {
|
||||
if matches!(provider, ApiProvider::Minimax) {
|
||||
// MiniMax's OpenAI-compatible API keeps thinking inside `content`
|
||||
// unless reasoning_split is enabled. Always request the split shape
|
||||
// so private thinking renders as Thinking cells rather than answer
|
||||
// prose.
|
||||
body["reasoning_split"] = json!(true);
|
||||
}
|
||||
let Some(effort) = effort else {
|
||||
return;
|
||||
};
|
||||
@@ -1294,6 +1301,9 @@ pub(super) fn apply_reasoning_effort(
|
||||
"thinking": false,
|
||||
});
|
||||
}
|
||||
ApiProvider::Minimax => {
|
||||
body["thinking"] = json!({ "type": "disabled" });
|
||||
}
|
||||
ApiProvider::Zai | ApiProvider::Stepfun => {}
|
||||
},
|
||||
"low" | "minimal" | "medium" | "mid" | "high" | "" => match provider {
|
||||
@@ -1368,6 +1378,9 @@ pub(super) fn apply_reasoning_effort(
|
||||
"reasoning_effort": "high",
|
||||
});
|
||||
}
|
||||
ApiProvider::Minimax => {
|
||||
body["thinking"] = json!({ "type": "adaptive" });
|
||||
}
|
||||
ApiProvider::Zai | ApiProvider::Stepfun => {}
|
||||
},
|
||||
"xhigh" | "max" | "highest" => match provider {
|
||||
@@ -1422,6 +1435,9 @@ pub(super) fn apply_reasoning_effort(
|
||||
"reasoning_effort": "max",
|
||||
});
|
||||
}
|
||||
ApiProvider::Minimax => {
|
||||
body["thinking"] = json!({ "type": "adaptive" });
|
||||
}
|
||||
ApiProvider::Zai | ApiProvider::Stepfun => {}
|
||||
},
|
||||
_ => {}
|
||||
@@ -2836,6 +2852,36 @@ mod tests {
|
||||
assert!(body.get("reasoning_effort").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reasoning_effort_minimax_splits_reasoning_from_content() {
|
||||
let mut body = json!({});
|
||||
apply_reasoning_effort(&mut body, Some("high"), ApiProvider::Minimax);
|
||||
assert_eq!(
|
||||
body.get("reasoning_split").and_then(Value::as_bool),
|
||||
Some(true)
|
||||
);
|
||||
assert_eq!(
|
||||
body.pointer("/thinking/type").and_then(Value::as_str),
|
||||
Some("adaptive")
|
||||
);
|
||||
assert!(body.get("reasoning_effort").is_none());
|
||||
|
||||
let mut body = json!({});
|
||||
apply_reasoning_effort(&mut body, Some("off"), ApiProvider::Minimax);
|
||||
assert_eq!(
|
||||
body.get("reasoning_split").and_then(Value::as_bool),
|
||||
Some(true)
|
||||
);
|
||||
assert_eq!(
|
||||
body.pointer("/thinking/type").and_then(Value::as_str),
|
||||
Some("disabled")
|
||||
);
|
||||
|
||||
let mut body = json!({});
|
||||
apply_reasoning_effort(&mut body, None, ApiProvider::Minimax);
|
||||
assert_eq!(body, json!({ "reasoning_split": true }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chat_parser_accepts_nvidia_nim_reasoning_field() -> Result<()> {
|
||||
let response = parse_chat_message(&json!({
|
||||
@@ -2872,6 +2918,7 @@ mod tests {
|
||||
let mut text_started = false;
|
||||
let mut thinking_started = false;
|
||||
let mut tool_indices = std::collections::HashMap::new();
|
||||
let mut reasoning_detail_buffers = std::collections::HashMap::new();
|
||||
let events = parse_sse_chunk(
|
||||
&json!({
|
||||
"choices": [{
|
||||
@@ -2884,6 +2931,7 @@ mod tests {
|
||||
&mut text_started,
|
||||
&mut thinking_started,
|
||||
&mut tool_indices,
|
||||
&mut reasoning_detail_buffers,
|
||||
true,
|
||||
);
|
||||
|
||||
@@ -3038,12 +3086,14 @@ mod tests {
|
||||
let mut thinking_started = false;
|
||||
let mut tool_indices: std::collections::HashMap<u32, u32> =
|
||||
std::collections::HashMap::new();
|
||||
let mut reasoning_detail_buffers = std::collections::HashMap::new();
|
||||
let events = parse_sse_chunk(
|
||||
&chunk,
|
||||
&mut content_index,
|
||||
&mut text_started,
|
||||
&mut thinking_started,
|
||||
&mut tool_indices,
|
||||
&mut reasoning_detail_buffers,
|
||||
false,
|
||||
);
|
||||
|
||||
@@ -3097,12 +3147,14 @@ mod tests {
|
||||
let mut thinking_started = false;
|
||||
let mut tool_indices: std::collections::HashMap<u32, u32> =
|
||||
std::collections::HashMap::new();
|
||||
let mut reasoning_detail_buffers = std::collections::HashMap::new();
|
||||
let events = parse_sse_chunk(
|
||||
&chunk,
|
||||
&mut content_index,
|
||||
&mut text_started,
|
||||
&mut thinking_started,
|
||||
&mut tool_indices,
|
||||
&mut reasoning_detail_buffers,
|
||||
false,
|
||||
);
|
||||
|
||||
|
||||
+269
-14
@@ -70,6 +70,41 @@ fn apply_provider_token_limit(body: &mut Value, provider: ApiProvider, max_token
|
||||
body["max_completion_tokens"] = json!(max_tokens);
|
||||
}
|
||||
|
||||
fn mirror_minimax_reasoning_details_for_messages(messages: &mut [Value]) {
|
||||
for message in messages {
|
||||
if message.get("role").and_then(Value::as_str) != Some("assistant") {
|
||||
continue;
|
||||
}
|
||||
if message.get("reasoning_details").is_some() {
|
||||
continue;
|
||||
}
|
||||
let Some(reasoning) = message
|
||||
.get("reasoning_content")
|
||||
.and_then(Value::as_str)
|
||||
.filter(|reasoning| !reasoning.trim().is_empty())
|
||||
.map(str::to_string)
|
||||
else {
|
||||
continue;
|
||||
};
|
||||
message["reasoning_details"] = json!([
|
||||
{
|
||||
"type": "text",
|
||||
"text": reasoning,
|
||||
}
|
||||
]);
|
||||
}
|
||||
}
|
||||
|
||||
fn mirror_minimax_reasoning_details_for_body(body: &mut Value, provider: ApiProvider) {
|
||||
if provider != ApiProvider::Minimax {
|
||||
return;
|
||||
}
|
||||
let Some(messages) = body.get_mut("messages").and_then(Value::as_array_mut) else {
|
||||
return;
|
||||
};
|
||||
mirror_minimax_reasoning_details_for_messages(messages);
|
||||
}
|
||||
|
||||
impl DeepSeekClient {
|
||||
pub(super) async fn create_message_chat(
|
||||
&self,
|
||||
@@ -121,6 +156,7 @@ impl DeepSeekClient {
|
||||
request.reasoning_effort.as_deref(),
|
||||
self.api_provider,
|
||||
);
|
||||
mirror_minimax_reasoning_details_for_body(&mut body, self.api_provider);
|
||||
|
||||
let response_cache_key = if cacheable {
|
||||
let wire_body =
|
||||
@@ -258,6 +294,7 @@ impl DeepSeekClient {
|
||||
request.reasoning_effort.as_deref(),
|
||||
self.api_provider,
|
||||
);
|
||||
mirror_minimax_reasoning_details_for_body(&mut body, self.api_provider);
|
||||
|
||||
let url = api_url_with_suffix(
|
||||
&self.base_url,
|
||||
@@ -323,6 +360,7 @@ impl DeepSeekClient {
|
||||
let mut text_started = false;
|
||||
let mut thinking_started = false;
|
||||
let mut tool_indices: std::collections::HashMap<u32, u32> = std::collections::HashMap::new();
|
||||
let mut reasoning_detail_buffers: std::collections::HashMap<u32, String> = std::collections::HashMap::new();
|
||||
let is_reasoning_model = is_reasoning_model_for_stream(api_provider, &model);
|
||||
|
||||
let mut byte_stream = std::pin::pin!(byte_stream);
|
||||
@@ -411,6 +449,7 @@ impl DeepSeekClient {
|
||||
&mut text_started,
|
||||
&mut thinking_started,
|
||||
&mut tool_indices,
|
||||
&mut reasoning_detail_buffers,
|
||||
is_reasoning_model,
|
||||
) {
|
||||
SseDataFrame::Done => break 'stream,
|
||||
@@ -550,6 +589,9 @@ impl<'a> PromptBuilder<'a> {
|
||||
if provider == ApiProvider::Arcee {
|
||||
apply_arcee_waf_safe_message_encoding(&mut messages);
|
||||
}
|
||||
if provider == ApiProvider::Minimax {
|
||||
mirror_minimax_reasoning_details_for_messages(&mut messages);
|
||||
}
|
||||
messages
|
||||
}
|
||||
|
||||
@@ -1943,7 +1985,23 @@ fn should_replay_reasoning_content_for_provider(
|
||||
model: &str,
|
||||
effort: Option<&str>,
|
||||
) -> bool {
|
||||
if !provider_accepts_reasoning_content(provider) && !requires_reasoning_content(model) {
|
||||
if effort
|
||||
.map(|value| {
|
||||
matches!(
|
||||
value.trim().to_ascii_lowercase().as_str(),
|
||||
"off" | "disabled" | "none" | "false"
|
||||
)
|
||||
})
|
||||
.unwrap_or(false)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if requires_reasoning_content(model) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if !provider_accepts_reasoning_content(provider) {
|
||||
// Generic non-DeepSeek model on a provider that rejects the field:
|
||||
// keep stripping it (preserves the #1542 fix). But a known DeepSeek
|
||||
// reasoning model pointed at a DeepSeek-compatible endpoint via the
|
||||
@@ -1951,7 +2009,8 @@ fn should_replay_reasoning_content_for_provider(
|
||||
// or the thinking-mode API returns 400 (#1739 / #1694).
|
||||
return false;
|
||||
}
|
||||
should_replay_reasoning_content(model, effort)
|
||||
|
||||
model_supports_reasoning(model)
|
||||
}
|
||||
|
||||
/// Should the SSE parser treat incoming `reasoning_content` deltas as thinking
|
||||
@@ -1995,6 +2054,7 @@ fn provider_accepts_reasoning_content(provider: ApiProvider) -> bool {
|
||||
| ApiProvider::SiliconflowCn
|
||||
| ApiProvider::Volcengine
|
||||
| ApiProvider::Arcee
|
||||
| ApiProvider::Minimax
|
||||
| ApiProvider::Sglang
|
||||
| ApiProvider::Moonshot // #3016: Kimi thinking traces use reasoning_content
|
||||
)
|
||||
@@ -2010,11 +2070,54 @@ fn has_deepseek_r_series_marker(model_lower: &str) -> bool {
|
||||
})
|
||||
}
|
||||
|
||||
fn reasoning_field(value: &Value) -> Option<&str> {
|
||||
value
|
||||
fn reasoning_delta(
|
||||
value: &Value,
|
||||
choice_index: u32,
|
||||
reasoning_detail_buffers: &mut std::collections::HashMap<u32, String>,
|
||||
) -> Option<String> {
|
||||
if let Some(reasoning) = value
|
||||
.get("reasoning_content")
|
||||
.or_else(|| value.get("reasoning"))
|
||||
.and_then(Value::as_str)
|
||||
{
|
||||
return Some(reasoning.to_string());
|
||||
}
|
||||
|
||||
let details = value.get("reasoning_details").and_then(Value::as_array)?;
|
||||
let full_text = details
|
||||
.iter()
|
||||
.filter_map(|detail| detail.get("text").and_then(Value::as_str))
|
||||
.collect::<String>();
|
||||
if full_text.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let previous = reasoning_detail_buffers.entry(choice_index).or_default();
|
||||
let delta = full_text
|
||||
.strip_prefix(previous.as_str())
|
||||
.unwrap_or(&full_text)
|
||||
.to_string();
|
||||
*previous = full_text;
|
||||
Some(delta)
|
||||
}
|
||||
|
||||
fn reasoning_message_text(value: &Value) -> Option<String> {
|
||||
if let Some(reasoning) = value
|
||||
.get("reasoning_content")
|
||||
.or_else(|| value.get("reasoning"))
|
||||
.and_then(Value::as_str)
|
||||
{
|
||||
return Some(reasoning.to_string());
|
||||
}
|
||||
value
|
||||
.get("reasoning_details")
|
||||
.and_then(Value::as_array)
|
||||
.map(|details| {
|
||||
details
|
||||
.iter()
|
||||
.filter_map(|detail| detail.get("text").and_then(Value::as_str))
|
||||
.collect::<String>()
|
||||
})
|
||||
}
|
||||
|
||||
pub(super) fn parse_chat_message(payload: &Value) -> Result<MessageResponse> {
|
||||
@@ -2042,7 +2145,7 @@ pub(super) fn parse_chat_message(payload: &Value) -> Result<MessageResponse> {
|
||||
|
||||
let mut content_blocks = Vec::new();
|
||||
if let Some(reasoning) =
|
||||
reasoning_field(message).filter(|reasoning| !reasoning.trim().is_empty())
|
||||
reasoning_message_text(message).filter(|reasoning| !reasoning.trim().is_empty())
|
||||
{
|
||||
content_blocks.push(ContentBlock::Thinking {
|
||||
signature: None,
|
||||
@@ -2219,6 +2322,7 @@ fn parse_sse_data_frame(
|
||||
text_started: &mut bool,
|
||||
thinking_started: &mut bool,
|
||||
tool_indices: &mut std::collections::HashMap<u32, u32>,
|
||||
reasoning_detail_buffers: &mut std::collections::HashMap<u32, String>,
|
||||
is_reasoning_model: bool,
|
||||
) -> SseDataFrame {
|
||||
if data.trim() == "[DONE]" {
|
||||
@@ -2233,6 +2337,7 @@ fn parse_sse_data_frame(
|
||||
text_started,
|
||||
thinking_started,
|
||||
tool_indices,
|
||||
reasoning_detail_buffers,
|
||||
is_reasoning_model,
|
||||
)
|
||||
},
|
||||
@@ -2248,6 +2353,7 @@ pub(super) fn parse_sse_chunk(
|
||||
text_started: &mut bool,
|
||||
thinking_started: &mut bool,
|
||||
tool_indices: &mut std::collections::HashMap<u32, u32>,
|
||||
reasoning_detail_buffers: &mut std::collections::HashMap<u32, String>,
|
||||
is_reasoning_model: bool,
|
||||
) -> Vec<StreamEvent> {
|
||||
let mut events = Vec::new();
|
||||
@@ -2282,6 +2388,7 @@ pub(super) fn parse_sse_chunk(
|
||||
}
|
||||
|
||||
for choice in choices {
|
||||
let choice_index = choice.get("index").and_then(Value::as_u64).unwrap_or(0) as u32;
|
||||
let delta = choice.get("delta");
|
||||
let finish_reason = choice
|
||||
.get("finish_reason")
|
||||
@@ -2289,14 +2396,16 @@ pub(super) fn parse_sse_chunk(
|
||||
.map(str::to_string);
|
||||
|
||||
if let Some(delta) = delta {
|
||||
let reasoning_text = reasoning_field(delta).filter(|s| !s.is_empty());
|
||||
let reasoning_text = reasoning_delta(delta, choice_index, reasoning_detail_buffers)
|
||||
.filter(|s| !s.is_empty());
|
||||
let content_text = delta
|
||||
.get("content")
|
||||
.and_then(Value::as_str)
|
||||
.filter(|s| !s.is_empty());
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(str::to_string);
|
||||
|
||||
// Handle reasoning_content / reasoning thinking deltas.
|
||||
if is_reasoning_model && let Some(reasoning) = reasoning_text {
|
||||
if is_reasoning_model && let Some(reasoning) = reasoning_text.as_deref() {
|
||||
if !*thinking_started {
|
||||
events.push(StreamEvent::ContentBlockStart {
|
||||
index: *content_index,
|
||||
@@ -2345,9 +2454,7 @@ pub(super) fn parse_sse_chunk(
|
||||
}
|
||||
events.push(StreamEvent::ContentBlockDelta {
|
||||
index: *content_index,
|
||||
delta: Delta::TextDelta {
|
||||
text: content.to_string(),
|
||||
},
|
||||
delta: Delta::TextDelta { text: content },
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2659,6 +2766,69 @@ mod arcee_waf_message_encoding_tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod minimax_reasoning_replay_tests {
|
||||
use super::build_chat_messages_for_request_and_provider;
|
||||
use crate::config::{ApiProvider, DEFAULT_MINIMAX_MODEL};
|
||||
use crate::models::{ContentBlock, Message, MessageRequest};
|
||||
|
||||
fn request_with_assistant_thinking() -> MessageRequest {
|
||||
MessageRequest {
|
||||
model: DEFAULT_MINIMAX_MODEL.to_string(),
|
||||
messages: vec![Message {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
thinking: "Inspect tool state".to_string(),
|
||||
signature: None,
|
||||
},
|
||||
ContentBlock::Text {
|
||||
text: "Done.".to_string(),
|
||||
cache_control: None,
|
||||
},
|
||||
],
|
||||
}],
|
||||
max_tokens: 16,
|
||||
system: None,
|
||||
tools: None,
|
||||
tool_choice: None,
|
||||
metadata: None,
|
||||
thinking: None,
|
||||
reasoning_effort: None,
|
||||
stream: None,
|
||||
temperature: None,
|
||||
top_p: None,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn minimax_history_replays_thinking_as_reasoning_details() {
|
||||
let request = request_with_assistant_thinking();
|
||||
|
||||
let messages = build_chat_messages_for_request_and_provider(&request, ApiProvider::Minimax);
|
||||
let assistant = &messages[0];
|
||||
|
||||
assert_eq!(
|
||||
assistant
|
||||
.get("reasoning_content")
|
||||
.and_then(|value| value.as_str()),
|
||||
Some("Inspect tool state")
|
||||
);
|
||||
assert_eq!(
|
||||
assistant
|
||||
.pointer("/reasoning_details/0/type")
|
||||
.and_then(|value| value.as_str()),
|
||||
Some("text")
|
||||
);
|
||||
assert_eq!(
|
||||
assistant
|
||||
.pointer("/reasoning_details/0/text")
|
||||
.and_then(|value| value.as_str()),
|
||||
Some("Inspect tool state")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// === #103 Phase 4: SSE decoder behavior on canned chunk sequences ============
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -2683,12 +2853,14 @@ mod stream_decoder_tests {
|
||||
let mut text_started = false;
|
||||
let mut thinking_started = false;
|
||||
let mut tool_indices = std::collections::HashMap::new();
|
||||
let mut reasoning_detail_buffers = std::collections::HashMap::new();
|
||||
parse_sse_chunk(
|
||||
&chunk,
|
||||
&mut content_index,
|
||||
&mut text_started,
|
||||
&mut thinking_started,
|
||||
&mut tool_indices,
|
||||
&mut reasoning_detail_buffers,
|
||||
is_reasoning_model,
|
||||
)
|
||||
}
|
||||
@@ -2765,6 +2937,7 @@ mod stream_decoder_tests {
|
||||
let mut text_started = false;
|
||||
let mut thinking_started = false;
|
||||
let mut tool_indices = std::collections::HashMap::new();
|
||||
let mut reasoning_detail_buffers = std::collections::HashMap::new();
|
||||
let mut events = Vec::new();
|
||||
for chunk in chunks {
|
||||
let value: Value = serde_json::from_str(chunk).expect("valid SSE JSON");
|
||||
@@ -2774,6 +2947,7 @@ mod stream_decoder_tests {
|
||||
&mut text_started,
|
||||
&mut thinking_started,
|
||||
&mut tool_indices,
|
||||
&mut reasoning_detail_buffers,
|
||||
is_reasoning,
|
||||
));
|
||||
}
|
||||
@@ -2835,12 +3009,65 @@ mod stream_decoder_tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decoder_streams_minimax_reasoning_details_as_incremental_thinking() {
|
||||
// MiniMax's reasoning_split stream reports reasoning_details text as
|
||||
// a cumulative buffer. Emit only the suffix so the Thinking cell does
|
||||
// not duplicate earlier reasoning chunks.
|
||||
let chunks = [
|
||||
r#"{"id":"minimax-1","choices":[{"index":0,"delta":{"reasoning_details":[{"type":"text","text":"Inspect"}]}}]}"#,
|
||||
r#"{"id":"minimax-1","choices":[{"index":0,"delta":{"reasoning_details":[{"type":"text","text":"Inspect config"}]}}]}"#,
|
||||
r#"{"id":"minimax-1","choices":[{"index":0,"delta":{"content":"Done."}}]}"#,
|
||||
];
|
||||
|
||||
let is_reasoning = is_reasoning_model_for_stream(ApiProvider::Minimax, "MiniMax-M3");
|
||||
let mut content_index = 0u32;
|
||||
let mut text_started = false;
|
||||
let mut thinking_started = false;
|
||||
let mut tool_indices = std::collections::HashMap::new();
|
||||
let mut reasoning_detail_buffers = std::collections::HashMap::new();
|
||||
let mut events = Vec::new();
|
||||
for chunk in chunks {
|
||||
let value: Value = serde_json::from_str(chunk).expect("valid SSE JSON");
|
||||
events.extend(parse_sse_chunk(
|
||||
&value,
|
||||
&mut content_index,
|
||||
&mut text_started,
|
||||
&mut thinking_started,
|
||||
&mut tool_indices,
|
||||
&mut reasoning_detail_buffers,
|
||||
is_reasoning,
|
||||
));
|
||||
}
|
||||
|
||||
let thinking: String = events
|
||||
.iter()
|
||||
.filter_map(|event| match event {
|
||||
StreamEvent::ContentBlockDelta {
|
||||
delta: Delta::ThinkingDelta { thinking },
|
||||
..
|
||||
} => Some(thinking.as_str()),
|
||||
_ => None,
|
||||
})
|
||||
.collect();
|
||||
assert_eq!(thinking, "Inspect config");
|
||||
|
||||
assert!(!events.iter().any(|event| matches!(
|
||||
event,
|
||||
StreamEvent::ContentBlockDelta {
|
||||
delta: Delta::TextDelta { text },
|
||||
..
|
||||
} if text == "Inspect" || text == "Inspect config"
|
||||
)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decoder_does_not_render_reasoning_as_text_for_known_provider_models() {
|
||||
let mut content_index = 0u32;
|
||||
let mut text_started = false;
|
||||
let mut thinking_started = false;
|
||||
let mut tool_indices = std::collections::HashMap::new();
|
||||
let mut reasoning_detail_buffers = std::collections::HashMap::new();
|
||||
let is_reasoning_model =
|
||||
is_reasoning_model_for_stream(ApiProvider::XiaomiMimo, "mimo-v2.5-pro");
|
||||
let events = parse_sse_chunk(
|
||||
@@ -2855,6 +3082,7 @@ mod stream_decoder_tests {
|
||||
&mut text_started,
|
||||
&mut thinking_started,
|
||||
&mut tool_indices,
|
||||
&mut reasoning_detail_buffers,
|
||||
is_reasoning_model,
|
||||
);
|
||||
|
||||
@@ -2932,6 +3160,7 @@ mod stream_decoder_tests {
|
||||
let mut text_started = false;
|
||||
let mut thinking_started = false;
|
||||
let mut tool_indices = std::collections::HashMap::new();
|
||||
let mut reasoning_detail_buffers = std::collections::HashMap::new();
|
||||
|
||||
let outcome = parse_sse_data_frame(
|
||||
" [DONE] ",
|
||||
@@ -2939,6 +3168,7 @@ mod stream_decoder_tests {
|
||||
&mut text_started,
|
||||
&mut thinking_started,
|
||||
&mut tool_indices,
|
||||
&mut reasoning_detail_buffers,
|
||||
true,
|
||||
);
|
||||
|
||||
@@ -3721,6 +3951,7 @@ mod alias_thinking_detection_tests {
|
||||
assert!(provider_accepts_reasoning_content(ApiProvider::NvidiaNim));
|
||||
assert!(provider_accepts_reasoning_content(ApiProvider::XiaomiMimo));
|
||||
assert!(provider_accepts_reasoning_content(ApiProvider::Arcee));
|
||||
assert!(provider_accepts_reasoning_content(ApiProvider::Minimax));
|
||||
// #3016: Moonshot's native endpoint streams Kimi thinking as
|
||||
// reasoning_content.
|
||||
assert!(provider_accepts_reasoning_content(ApiProvider::Moonshot));
|
||||
@@ -3728,17 +3959,41 @@ mod alias_thinking_detection_tests {
|
||||
|
||||
#[test]
|
||||
fn stream_classifies_moonshot_kimi_as_reasoning() {
|
||||
// #3016: without this, kimi-k2.6 thinking leaked into answer text.
|
||||
// #3016: without this, Kimi thinking leaked into answer text.
|
||||
assert!(is_reasoning_model_for_stream(
|
||||
ApiProvider::Moonshot,
|
||||
"kimi-k2.6"
|
||||
));
|
||||
assert!(
|
||||
!is_reasoning_model_for_stream(ApiProvider::Moonshot, "kimi-for-coding"),
|
||||
"kimi-for-coding is Moonshot's documented non-thinking model"
|
||||
is_reasoning_model_for_stream(ApiProvider::Moonshot, "kimi-for-coding"),
|
||||
"Kimi Code's stable model id now maps to K2.7 Code and streams reasoning_content"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn moonshot_and_minimax_replay_reasoning_content_for_supported_models() {
|
||||
assert!(should_replay_reasoning_content_for_provider(
|
||||
ApiProvider::Moonshot,
|
||||
"kimi-k2.7-code",
|
||||
None,
|
||||
));
|
||||
assert!(should_replay_reasoning_content_for_provider(
|
||||
ApiProvider::Moonshot,
|
||||
"kimi-for-coding",
|
||||
None,
|
||||
));
|
||||
assert!(should_replay_reasoning_content_for_provider(
|
||||
ApiProvider::Minimax,
|
||||
"MiniMax-M3",
|
||||
None,
|
||||
));
|
||||
assert!(!should_replay_reasoning_content_for_provider(
|
||||
ApiProvider::Moonshot,
|
||||
"kimi-for-coding",
|
||||
Some("off"),
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn xiaomi_mimo_uses_max_completion_tokens_payload_key() {
|
||||
let mut body = json!({
|
||||
|
||||
+166
-3
@@ -179,6 +179,15 @@ pub const DEFAULT_ANTHROPIC_MODEL: &str = "claude-sonnet-4-6";
|
||||
pub const ANTHROPIC_OPUS_MODEL: &str = "claude-opus-4-8";
|
||||
pub const ANTHROPIC_HAIKU_MODEL: &str = "claude-haiku-4-5";
|
||||
pub const DEFAULT_ANTHROPIC_BASE_URL: &str = "https://api.anthropic.com";
|
||||
pub const DEFAULT_MINIMAX_MODEL: &str = "MiniMax-M3";
|
||||
pub const MINIMAX_M2_7_MODEL: &str = "MiniMax-M2.7";
|
||||
pub const MINIMAX_M2_7_HIGHSPEED_MODEL: &str = "MiniMax-M2.7-highspeed";
|
||||
pub const MINIMAX_M2_5_MODEL: &str = "MiniMax-M2.5";
|
||||
pub const MINIMAX_M2_5_HIGHSPEED_MODEL: &str = "MiniMax-M2.5-highspeed";
|
||||
pub const MINIMAX_M2_1_MODEL: &str = "MiniMax-M2.1";
|
||||
pub const MINIMAX_M2_1_HIGHSPEED_MODEL: &str = "MiniMax-M2.1-highspeed";
|
||||
pub const MINIMAX_M2_MODEL: &str = "MiniMax-M2";
|
||||
pub const DEFAULT_MINIMAX_BASE_URL: &str = "https://api.minimax.io/v1";
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
@@ -207,6 +216,7 @@ pub enum ApiProvider {
|
||||
Anthropic,
|
||||
Zai,
|
||||
Stepfun,
|
||||
Minimax,
|
||||
}
|
||||
|
||||
impl ApiProvider {
|
||||
@@ -264,7 +274,7 @@ impl ApiProvider {
|
||||
|
||||
/// `ApiProvider` discriminant → `ProviderKind` lookup.
|
||||
/// Index 1 is `None` for the legacy `DeepseekCN` variant.
|
||||
const KIND_LOOKUP: [Option<codewhale_config::ProviderKind>; 24] = [
|
||||
const KIND_LOOKUP: [Option<codewhale_config::ProviderKind>; 25] = [
|
||||
Some(codewhale_config::ProviderKind::Deepseek),
|
||||
None, // DeepseekCN
|
||||
Some(codewhale_config::ProviderKind::NvidiaNim),
|
||||
@@ -289,10 +299,11 @@ impl ApiProvider {
|
||||
Some(codewhale_config::ProviderKind::Anthropic),
|
||||
Some(codewhale_config::ProviderKind::Zai),
|
||||
Some(codewhale_config::ProviderKind::Stepfun),
|
||||
Some(codewhale_config::ProviderKind::Minimax),
|
||||
];
|
||||
|
||||
/// `ProviderKind` discriminant → `ApiProvider` lookup.
|
||||
const FROM_KIND_LOOKUP: [Self; 23] = [
|
||||
const FROM_KIND_LOOKUP: [Self; 24] = [
|
||||
Self::Deepseek,
|
||||
Self::NvidiaNim,
|
||||
Self::Openai,
|
||||
@@ -316,6 +327,7 @@ impl ApiProvider {
|
||||
Self::Anthropic,
|
||||
Self::Zai,
|
||||
Self::Stepfun,
|
||||
Self::Minimax,
|
||||
];
|
||||
|
||||
/// Map to the config-level `ProviderKind`.
|
||||
@@ -785,6 +797,39 @@ fn canonical_moonshot_model_id(model: &str) -> Option<&'static str> {
|
||||
}
|
||||
}
|
||||
|
||||
fn canonical_minimax_model_id(model: &str) -> Option<&'static str> {
|
||||
let normalized = model.trim().to_ascii_lowercase();
|
||||
let normalized = normalized.replace(['_', ' '], "-");
|
||||
match normalized.as_str() {
|
||||
"minimax" | "minimax-m3" | "minimax-m-3" | "minimax-m-3-thinking" => {
|
||||
Some(DEFAULT_MINIMAX_MODEL)
|
||||
}
|
||||
"minimax-m2.7" | "minimax-m2-7" | "minimax-m-2.7" | "minimax-m-2-7" => {
|
||||
Some(MINIMAX_M2_7_MODEL)
|
||||
}
|
||||
"minimax-m2.7-highspeed"
|
||||
| "minimax-m2-7-highspeed"
|
||||
| "minimax-m-2.7-highspeed"
|
||||
| "minimax-m-2-7-highspeed" => Some(MINIMAX_M2_7_HIGHSPEED_MODEL),
|
||||
"minimax-m2.5" | "minimax-m2-5" | "minimax-m-2.5" | "minimax-m-2-5" => {
|
||||
Some(MINIMAX_M2_5_MODEL)
|
||||
}
|
||||
"minimax-m2.5-highspeed"
|
||||
| "minimax-m2-5-highspeed"
|
||||
| "minimax-m-2.5-highspeed"
|
||||
| "minimax-m-2-5-highspeed" => Some(MINIMAX_M2_5_HIGHSPEED_MODEL),
|
||||
"minimax-m2.1" | "minimax-m2-1" | "minimax-m-2.1" | "minimax-m-2-1" => {
|
||||
Some(MINIMAX_M2_1_MODEL)
|
||||
}
|
||||
"minimax-m2.1-highspeed"
|
||||
| "minimax-m2-1-highspeed"
|
||||
| "minimax-m-2.1-highspeed"
|
||||
| "minimax-m-2-1-highspeed" => Some(MINIMAX_M2_1_HIGHSPEED_MODEL),
|
||||
"minimax-m2" | "minimax-m-2" => Some(MINIMAX_M2_MODEL),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Normalize a model selected through the TUI for the active provider.
|
||||
///
|
||||
/// Official DeepSeek endpoints require bare model IDs. Provider-prefixed
|
||||
@@ -821,6 +866,12 @@ pub fn normalize_model_name_for_provider(provider: ApiProvider, model: &str) ->
|
||||
.or_else(|| normalize_custom_model_id(model));
|
||||
}
|
||||
|
||||
if matches!(provider, ApiProvider::Minimax) {
|
||||
return canonical_minimax_model_id(model)
|
||||
.map(ToString::to_string)
|
||||
.or_else(|| normalize_custom_model_id(model));
|
||||
}
|
||||
|
||||
if matches!(provider, ApiProvider::Huggingface) {
|
||||
return normalize_custom_model_id(model);
|
||||
}
|
||||
@@ -906,6 +957,16 @@ pub fn model_completion_names_for_provider(provider: ApiProvider) -> Vec<&'stati
|
||||
DEFAULT_ANTHROPIC_MODEL,
|
||||
ANTHROPIC_HAIKU_MODEL,
|
||||
],
|
||||
ApiProvider::Minimax => vec![
|
||||
DEFAULT_MINIMAX_MODEL,
|
||||
MINIMAX_M2_7_MODEL,
|
||||
MINIMAX_M2_7_HIGHSPEED_MODEL,
|
||||
MINIMAX_M2_5_MODEL,
|
||||
MINIMAX_M2_5_HIGHSPEED_MODEL,
|
||||
MINIMAX_M2_1_MODEL,
|
||||
MINIMAX_M2_1_HIGHSPEED_MODEL,
|
||||
MINIMAX_M2_MODEL,
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2066,6 +2127,8 @@ pub struct ProvidersConfig {
|
||||
pub zai: ProviderConfig,
|
||||
#[serde(default)]
|
||||
pub stepfun: ProviderConfig,
|
||||
#[serde(default)]
|
||||
pub minimax: ProviderConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Default)]
|
||||
@@ -2235,6 +2298,7 @@ impl Config {
|
||||
ApiProvider::Anthropic => "providers.anthropic",
|
||||
ApiProvider::Zai => "providers.zai",
|
||||
ApiProvider::Stepfun => "providers.stepfun",
|
||||
ApiProvider::Minimax => "providers.minimax",
|
||||
ApiProvider::Deepseek | ApiProvider::DeepseekCN => return,
|
||||
};
|
||||
tracing::warn!(
|
||||
@@ -2394,6 +2458,7 @@ impl Config {
|
||||
ApiProvider::Anthropic => &providers.anthropic,
|
||||
ApiProvider::Zai => &providers.zai,
|
||||
ApiProvider::Stepfun => &providers.stepfun,
|
||||
ApiProvider::Minimax => &providers.minimax,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -2424,6 +2489,7 @@ impl Config {
|
||||
ApiProvider::Anthropic => &mut providers.anthropic,
|
||||
ApiProvider::Zai => &mut providers.zai,
|
||||
ApiProvider::Stepfun => &mut providers.stepfun,
|
||||
ApiProvider::Minimax => &mut providers.minimax,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2568,6 +2634,7 @@ impl Config {
|
||||
ApiProvider::Zai => DEFAULT_ZAI_MODEL,
|
||||
ApiProvider::Stepfun => DEFAULT_STEPFUN_MODEL,
|
||||
ApiProvider::Anthropic => DEFAULT_ANTHROPIC_MODEL,
|
||||
ApiProvider::Minimax => DEFAULT_MINIMAX_MODEL,
|
||||
}
|
||||
.to_string()
|
||||
}
|
||||
@@ -2609,7 +2676,8 @@ impl Config {
|
||||
| ApiProvider::Together
|
||||
| ApiProvider::OpenaiCodex
|
||||
| ApiProvider::Zai
|
||||
| ApiProvider::Stepfun => None,
|
||||
| ApiProvider::Stepfun
|
||||
| ApiProvider::Minimax => None,
|
||||
};
|
||||
let configured_base_url = provider_base.or(root_base);
|
||||
let base = if provider == ApiProvider::XiaomiMimo {
|
||||
@@ -2659,6 +2727,7 @@ impl Config {
|
||||
ApiProvider::Zai => DEFAULT_ZAI_BASE_URL,
|
||||
ApiProvider::Stepfun => DEFAULT_STEPFUN_BASE_URL,
|
||||
ApiProvider::Anthropic => DEFAULT_ANTHROPIC_BASE_URL,
|
||||
ApiProvider::Minimax => DEFAULT_MINIMAX_BASE_URL,
|
||||
}
|
||||
.to_string()
|
||||
})
|
||||
@@ -2711,6 +2780,7 @@ impl Config {
|
||||
ApiProvider::Zai => "zai",
|
||||
ApiProvider::Stepfun => "stepfun",
|
||||
ApiProvider::Anthropic => "anthropic",
|
||||
ApiProvider::Minimax => "minimax",
|
||||
};
|
||||
|
||||
// 0. DeepSeek compatibility slot. The legacy top-level `api_key`
|
||||
@@ -2906,6 +2976,10 @@ impl Config {
|
||||
),
|
||||
// Self-hosted deployments commonly run without auth on localhost.
|
||||
// Return an empty key and let the client omit the Authorization header.
|
||||
ApiProvider::Minimax => anyhow::bail!(
|
||||
"MiniMax API key not found. Run 'codewhale auth set --provider minimax', \
|
||||
set MINIMAX_API_KEY, or add [providers.minimax] api_key in ~/.codewhale/config.toml."
|
||||
),
|
||||
ApiProvider::Sglang | ApiProvider::Vllm | ApiProvider::Ollama => Ok(String::new()),
|
||||
}
|
||||
}
|
||||
@@ -3759,6 +3833,13 @@ fn apply_env_overrides(config: &mut Config) {
|
||||
.stepfun
|
||||
.base_url = Some(value);
|
||||
}
|
||||
ApiProvider::Minimax => {
|
||||
config
|
||||
.providers
|
||||
.get_or_insert_with(ProvidersConfig::default)
|
||||
.minimax
|
||||
.base_url = Some(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
if matches!(config.api_provider(), ApiProvider::NvidiaNim)
|
||||
@@ -3968,6 +4049,7 @@ fn apply_env_overrides(config: &mut Config) {
|
||||
ApiProvider::Anthropic => &mut providers.anthropic,
|
||||
ApiProvider::Zai => &mut providers.zai,
|
||||
ApiProvider::Stepfun => &mut providers.stepfun,
|
||||
ApiProvider::Minimax => &mut providers.minimax,
|
||||
};
|
||||
let mut provider_headers = entry.http_headers.clone().unwrap_or_default();
|
||||
provider_headers.extend(headers);
|
||||
@@ -4165,6 +4247,7 @@ fn apply_env_overrides(config: &mut Config) {
|
||||
ApiProvider::Anthropic => &mut providers.anthropic,
|
||||
ApiProvider::Zai => &mut providers.zai,
|
||||
ApiProvider::Stepfun => &mut providers.stepfun,
|
||||
ApiProvider::Minimax => &mut providers.minimax,
|
||||
};
|
||||
entry.model = Some(value);
|
||||
}
|
||||
@@ -4507,6 +4590,7 @@ fn default_base_url_for_provider(provider: ApiProvider) -> &'static str {
|
||||
ApiProvider::Zai => DEFAULT_ZAI_BASE_URL,
|
||||
ApiProvider::Stepfun => DEFAULT_STEPFUN_BASE_URL,
|
||||
ApiProvider::Anthropic => DEFAULT_ANTHROPIC_BASE_URL,
|
||||
ApiProvider::Minimax => DEFAULT_MINIMAX_BASE_URL,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4954,6 +5038,7 @@ fn merge_providers(
|
||||
openai_codex: merge_provider_config(base.openai_codex, override_cfg.openai_codex),
|
||||
zai: merge_provider_config(base.zai, override_cfg.zai),
|
||||
stepfun: merge_provider_config(base.stepfun, override_cfg.stepfun),
|
||||
minimax: merge_provider_config(base.minimax, override_cfg.minimax),
|
||||
}),
|
||||
}
|
||||
}
|
||||
@@ -5468,6 +5553,9 @@ pub fn active_provider_has_env_api_key(config: &Config) -> bool {
|
||||
std::env::var("STEPFUN_API_KEY").is_ok_and(|k| !k.trim().is_empty())
|
||||
|| std::env::var("STEP_API_KEY").is_ok_and(|k| !k.trim().is_empty())
|
||||
}
|
||||
ApiProvider::Minimax => {
|
||||
std::env::var("MINIMAX_API_KEY").is_ok_and(|k| !k.trim().is_empty())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5504,6 +5592,7 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool {
|
||||
ApiProvider::Volcengine => "VOLCENGINE_API_KEY",
|
||||
ApiProvider::Zai => "ZAI_API_KEY",
|
||||
ApiProvider::Stepfun => "STEPFUN_API_KEY",
|
||||
ApiProvider::Minimax => "MINIMAX_API_KEY",
|
||||
};
|
||||
if std::env::var(env_var).is_ok_and(|k| !k.trim().is_empty()) {
|
||||
return true;
|
||||
@@ -5633,6 +5722,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result<PathBuf>
|
||||
ApiProvider::OpenaiCodex => "providers.openai_codex",
|
||||
ApiProvider::Zai => "providers.zai",
|
||||
ApiProvider::Stepfun => "providers.stepfun",
|
||||
ApiProvider::Minimax => "providers.minimax",
|
||||
};
|
||||
|
||||
// Parse existing TOML (or start fresh) so we can edit the right table
|
||||
@@ -5681,6 +5771,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result<PathBuf>
|
||||
ApiProvider::OpenaiCodex => "openai_codex",
|
||||
ApiProvider::Zai => "zai",
|
||||
ApiProvider::Stepfun => "stepfun",
|
||||
ApiProvider::Minimax => "minimax",
|
||||
};
|
||||
let entry = providers
|
||||
.entry(key_inside.to_string())
|
||||
@@ -5781,6 +5872,7 @@ fn provider_config_key(provider: ApiProvider) -> Result<&'static str> {
|
||||
ApiProvider::OpenaiCodex => Ok("openai_codex"),
|
||||
ApiProvider::Zai => Ok("zai"),
|
||||
ApiProvider::Stepfun => Ok("stepfun"),
|
||||
ApiProvider::Minimax => Ok("minimax"),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -8361,6 +8453,26 @@ api_key = "old-openrouter-key"
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_model_name_for_provider_maps_minimax_direct_aliases() {
|
||||
for (alias, expected) in [
|
||||
("minimax", DEFAULT_MINIMAX_MODEL),
|
||||
("minimax-m3", DEFAULT_MINIMAX_MODEL),
|
||||
("minimax-m2.7", MINIMAX_M2_7_MODEL),
|
||||
("minimax-m2-7-highspeed", MINIMAX_M2_7_HIGHSPEED_MODEL),
|
||||
("minimax-m2.5", MINIMAX_M2_5_MODEL),
|
||||
("minimax-m2-5-highspeed", MINIMAX_M2_5_HIGHSPEED_MODEL),
|
||||
("minimax-m2.1", MINIMAX_M2_1_MODEL),
|
||||
("minimax-m2-1-highspeed", MINIMAX_M2_1_HIGHSPEED_MODEL),
|
||||
("minimax-m2", MINIMAX_M2_MODEL),
|
||||
] {
|
||||
assert_eq!(
|
||||
normalize_model_name_for_provider(ApiProvider::Minimax, alias).as_deref(),
|
||||
Some(expected)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_model_name_for_provider_maps_arcee_direct_aliases() {
|
||||
for (alias, expected) in [
|
||||
@@ -8472,6 +8584,28 @@ api_key = "old-openrouter-key"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn model_completion_names_for_minimax_include_direct_chat_models() {
|
||||
let models = model_completion_names_for_provider(ApiProvider::Minimax);
|
||||
|
||||
for expected in [
|
||||
DEFAULT_MINIMAX_MODEL,
|
||||
MINIMAX_M2_7_MODEL,
|
||||
MINIMAX_M2_7_HIGHSPEED_MODEL,
|
||||
MINIMAX_M2_5_MODEL,
|
||||
MINIMAX_M2_5_HIGHSPEED_MODEL,
|
||||
MINIMAX_M2_1_MODEL,
|
||||
MINIMAX_M2_1_HIGHSPEED_MODEL,
|
||||
MINIMAX_M2_MODEL,
|
||||
] {
|
||||
assert!(models.contains(&expected), "missing {expected}");
|
||||
}
|
||||
assert!(
|
||||
!models.contains(&OPENROUTER_MINIMAX_M3_MODEL),
|
||||
"direct MiniMax picker must not expose OpenRouter namespaced IDs"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn normalize_model_name_rejects_invalid_or_non_deepseek_ids() {
|
||||
assert!(normalize_model_name("qwen3-coder").is_none());
|
||||
@@ -11405,6 +11539,35 @@ model = "deepseek-ai/deepseek-v4-pro"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn provider_capability_minimax_direct_models_use_api_docs_shape() {
|
||||
let m3 = provider_capability(ApiProvider::Minimax, DEFAULT_MINIMAX_MODEL);
|
||||
assert_eq!(m3.context_window, 1_000_000);
|
||||
assert_eq!(m3.max_output, 524_288);
|
||||
assert!(m3.thinking_supported);
|
||||
assert!(!m3.cache_telemetry_supported);
|
||||
assert_eq!(m3.request_payload_mode, RequestPayloadMode::ChatCompletions);
|
||||
|
||||
for model in [
|
||||
MINIMAX_M2_7_MODEL,
|
||||
MINIMAX_M2_7_HIGHSPEED_MODEL,
|
||||
MINIMAX_M2_5_MODEL,
|
||||
MINIMAX_M2_5_HIGHSPEED_MODEL,
|
||||
MINIMAX_M2_1_MODEL,
|
||||
MINIMAX_M2_1_HIGHSPEED_MODEL,
|
||||
MINIMAX_M2_MODEL,
|
||||
] {
|
||||
let cap = provider_capability(ApiProvider::Minimax, model);
|
||||
assert_eq!(cap.context_window, 204_800, "{model}");
|
||||
assert!(cap.thinking_supported, "{model}");
|
||||
assert!(!cap.cache_telemetry_supported, "{model}");
|
||||
assert_eq!(
|
||||
cap.request_payload_mode,
|
||||
RequestPayloadMode::ChatCompletions
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn provider_capability_wanjie_ark_reasoner_has_thinking_no_cache() {
|
||||
let cap = provider_capability(ApiProvider::WanjieArk, DEFAULT_WANJIE_ARK_MODEL);
|
||||
|
||||
@@ -223,6 +223,7 @@ fn provider_base_url_table_key(provider: ApiProvider) -> anyhow::Result<&'static
|
||||
ApiProvider::OpenaiCodex => Ok("openai_codex"),
|
||||
ApiProvider::Zai => Ok("zai"),
|
||||
ApiProvider::Stepfun => Ok("stepfun"),
|
||||
ApiProvider::Minimax => Ok("minimax"),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -614,7 +614,9 @@ impl Engine {
|
||||
let env_var = match provider {
|
||||
ApiProvider::Deepseek | ApiProvider::DeepseekCN => "DEEPSEEK_API_KEY",
|
||||
ApiProvider::NvidiaNim => "NVIDIA_API_KEY/NVIDIA_NIM_API_KEY",
|
||||
ApiProvider::Openai | ApiProvider::Zai | ApiProvider::Stepfun => "OPENAI_API_KEY",
|
||||
ApiProvider::Openai => "OPENAI_API_KEY",
|
||||
ApiProvider::Zai => "ZAI_API_KEY/Z_AI_API_KEY",
|
||||
ApiProvider::Stepfun => "STEPFUN_API_KEY/STEP_API_KEY",
|
||||
ApiProvider::Anthropic => "ANTHROPIC_API_KEY",
|
||||
ApiProvider::Atlascloud => "ATLASCLOUD_API_KEY",
|
||||
ApiProvider::WanjieArk => "WANJIE_ARK_API_KEY/WANJIE_API_KEY/WANJIE_MAAS_API_KEY",
|
||||
@@ -632,6 +634,7 @@ impl Engine {
|
||||
ApiProvider::Huggingface => "HUGGINGFACE_API_KEY/HF_TOKEN",
|
||||
ApiProvider::Together => "TOGETHER_API_KEY",
|
||||
ApiProvider::OpenaiCodex => "OPENAI_CODEX_ACCESS_TOKEN/CODEX_ACCESS_TOKEN",
|
||||
ApiProvider::Minimax => "MINIMAX_API_KEY",
|
||||
};
|
||||
|
||||
Some(format!(
|
||||
|
||||
@@ -2601,13 +2601,17 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
|
||||
("DEEPSEEK_API_KEY", "codewhale auth set --provider deepseek")
|
||||
}
|
||||
crate::config::ApiProvider::Zai => (
|
||||
"OPENAI_API_KEY",
|
||||
"ZAI_API_KEY/Z_AI_API_KEY",
|
||||
"codewhale auth set --provider zai --api-key \"...\"",
|
||||
),
|
||||
crate::config::ApiProvider::Stepfun => (
|
||||
"OPENAI_API_KEY",
|
||||
"STEPFUN_API_KEY/STEP_API_KEY",
|
||||
"codewhale auth set --provider stepfun --api-key \"...\"",
|
||||
),
|
||||
crate::config::ApiProvider::Minimax => (
|
||||
"MINIMAX_API_KEY",
|
||||
"codewhale auth set --provider minimax --api-key \"...\"",
|
||||
),
|
||||
};
|
||||
println!(
|
||||
" {} api_key: missing (set {env_var} or `[providers.{}].api_key` in ~/.codewhale/config.toml; or run `{login_hint}`)",
|
||||
@@ -2637,6 +2641,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
|
||||
| crate::config::ApiProvider::DeepseekCN => "deepseek",
|
||||
crate::config::ApiProvider::Zai => "zai",
|
||||
crate::config::ApiProvider::Stepfun => "stepfun",
|
||||
crate::config::ApiProvider::Minimax => "minimax",
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
@@ -286,6 +286,13 @@ fn known_context_window_for_model(model_lower: &str) -> Option<u32> {
|
||||
| "kimi-k2.7-code"
|
||||
| "kimi-k2.6"
|
||||
| "kimi-for-coding" => Some(262_144),
|
||||
"minimax-m2.7"
|
||||
| "minimax-m2.7-highspeed"
|
||||
| "minimax-m2.5"
|
||||
| "minimax-m2.5-highspeed"
|
||||
| "minimax-m2.1"
|
||||
| "minimax-m2.1-highspeed"
|
||||
| "minimax-m2" => Some(204_800),
|
||||
"z-ai/glm-5.1" | "z-ai/glm-5v-turbo" | "glm-5.1" | "glm-5v-turbo" => Some(202_752),
|
||||
"minimax/minimax-m3" | "minimax-m3" | "qwen/qwen3.6-flash" | "qwen/qwen3.6-plus" => {
|
||||
Some(1_000_000)
|
||||
@@ -349,10 +356,10 @@ pub fn model_supports_reasoning(model: &str) -> bool {
|
||||
if lower.contains("deepseek") && lower.contains("v4") {
|
||||
return true;
|
||||
}
|
||||
// #3016: Moonshot-native Kimi IDs also emit reasoning_content.
|
||||
// `kimi-for-coding` is Moonshot's documented non-thinking model — it
|
||||
// must not be classified as reasoning-capable by the prefix rule.
|
||||
if lower.starts_with("kimi-") && lower != "kimi-for-coding" {
|
||||
// #3016 plus the 2026 Kimi Code K2.7 update: Moonshot-native Kimi IDs,
|
||||
// including the stable `kimi-for-coding` coding route, emit
|
||||
// reasoning_content that must stay out of answer prose.
|
||||
if lower.starts_with("kimi-") {
|
||||
return true;
|
||||
}
|
||||
matches!(
|
||||
@@ -376,8 +383,16 @@ pub fn model_supports_reasoning(model: &str) -> bool {
|
||||
| "moonshotai/kimi-k2.6:free"
|
||||
| "kimi-k2.7-code"
|
||||
| "kimi-k2.6"
|
||||
| "kimi-for-coding"
|
||||
| "minimax/minimax-m3"
|
||||
| "minimax-m3"
|
||||
| "minimax-m2.7"
|
||||
| "minimax-m2.7-highspeed"
|
||||
| "minimax-m2.5"
|
||||
| "minimax-m2.5-highspeed"
|
||||
| "minimax-m2.1"
|
||||
| "minimax-m2.1-highspeed"
|
||||
| "minimax-m2"
|
||||
| "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free"
|
||||
| "nvidia/nemotron-3-ultra-550b-a55b"
|
||||
| "nvidia/nemotron-3-ultra-550b-a55b:free"
|
||||
@@ -628,13 +643,13 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn moonshot_native_kimi_ids_support_reasoning_except_for_coding() {
|
||||
fn moonshot_native_kimi_ids_support_reasoning_including_coding_route() {
|
||||
// #3016: bare Moonshot ids (no moonshotai/ prefix) emit
|
||||
// reasoning_content; kimi-for-coding is the non-thinking exception.
|
||||
// reasoning_content; kimi-for-coding currently rides the K2.7 Code path.
|
||||
assert!(model_supports_reasoning("kimi-k2.7-code"));
|
||||
assert!(model_supports_reasoning("kimi-k2.6"));
|
||||
assert!(model_supports_reasoning("kimi-for-coding"));
|
||||
assert!(model_supports_reasoning("kimi-k2.5"));
|
||||
assert!(!model_supports_reasoning("kimi-for-coding"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -691,13 +706,16 @@ mod tests {
|
||||
("kimi-k2.7-code", 262_144),
|
||||
("kimi-k2.6", 262_144),
|
||||
("minimax-m3", 1_000_000),
|
||||
("minimax-m2.7", 204_800),
|
||||
("minimax-m2.5-highspeed", 204_800),
|
||||
("minimax-m2", 204_800),
|
||||
("glm-5.1", 202_752),
|
||||
] {
|
||||
assert_eq!(context_window_for_model(model), Some(expected_window));
|
||||
assert!(model_supports_reasoning(model));
|
||||
}
|
||||
assert_eq!(context_window_for_model("kimi-for-coding"), Some(262_144));
|
||||
assert!(!model_supports_reasoning("kimi-for-coding"));
|
||||
assert!(model_supports_reasoning("kimi-for-coding"));
|
||||
assert_eq!(context_window_for_model("glm-5v-turbo"), Some(202_752));
|
||||
assert!(!model_supports_reasoning("glm-5v-turbo"));
|
||||
assert_eq!(max_output_tokens_for_model("kimi-k2.7-code"), Some(262_144));
|
||||
|
||||
@@ -121,7 +121,9 @@ impl ProviderPickerView {
|
||||
ApiProvider::Huggingface => "HUGGINGFACE_API_KEY / HF_TOKEN",
|
||||
ApiProvider::Together => "TOGETHER_API_KEY",
|
||||
ApiProvider::OpenaiCodex => "OPENAI_CODEX_ACCESS_TOKEN / CODEX_ACCESS_TOKEN",
|
||||
ApiProvider::Zai | ApiProvider::Stepfun => "OPENAI_API_KEY",
|
||||
ApiProvider::Zai => "ZAI_API_KEY / Z_AI_API_KEY",
|
||||
ApiProvider::Stepfun => "STEPFUN_API_KEY / STEP_API_KEY",
|
||||
ApiProvider::Minimax => "MINIMAX_API_KEY",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -517,7 +519,8 @@ mod tests {
|
||||
"OpenAI Codex (ChatGPT)",
|
||||
"Anthropic",
|
||||
"Z.ai (GLM Coding)",
|
||||
"StepFun / StepFlash"
|
||||
"StepFun / StepFlash",
|
||||
"MiniMax"
|
||||
]
|
||||
);
|
||||
}
|
||||
@@ -552,7 +555,7 @@ mod tests {
|
||||
let mut picker = ProviderPickerView::new(ApiProvider::Deepseek, &config);
|
||||
|
||||
picker.handle_key(key(KeyCode::Up));
|
||||
assert_eq!(picker.selected_provider(), ApiProvider::Stepfun);
|
||||
assert_eq!(picker.selected_provider(), ApiProvider::Minimax);
|
||||
|
||||
picker.handle_key(key(KeyCode::Down));
|
||||
assert_eq!(picker.selected_provider(), ApiProvider::Deepseek);
|
||||
|
||||
@@ -7595,6 +7595,7 @@ fn render(f: &mut Frame, app: &mut App) {
|
||||
crate::config::ApiProvider::OpenaiCodex => Some("Codex"),
|
||||
crate::config::ApiProvider::Zai => Some("Z.ai"),
|
||||
crate::config::ApiProvider::Stepfun => Some("StepFun"),
|
||||
crate::config::ApiProvider::Minimax => Some("MiniMax"),
|
||||
};
|
||||
let status_indicator_started_at = if app.low_motion {
|
||||
None
|
||||
@@ -8655,6 +8656,7 @@ async fn apply_provider_picker_api_key(
|
||||
ApiProvider::Anthropic => &mut providers.anthropic,
|
||||
ApiProvider::Zai => &mut providers.zai,
|
||||
ApiProvider::Stepfun => &mut providers.stepfun,
|
||||
ApiProvider::Minimax => &mut providers.minimax,
|
||||
};
|
||||
entry.api_key = Some(api_key);
|
||||
}
|
||||
@@ -8717,6 +8719,7 @@ fn set_provider_auth_mode_in_memory(config: &mut Config, provider: ApiProvider,
|
||||
ApiProvider::Anthropic => &mut providers.anthropic,
|
||||
ApiProvider::Zai => &mut providers.zai,
|
||||
ApiProvider::Stepfun => &mut providers.stepfun,
|
||||
ApiProvider::Minimax => &mut providers.minimax,
|
||||
};
|
||||
entry.auth_mode = Some(auth_mode);
|
||||
}
|
||||
|
||||
@@ -952,12 +952,12 @@ If you are upgrading from older releases:
|
||||
|
||||
### Core keys (used by the TUI/engine)
|
||||
|
||||
- `provider` (string, optional): `deepseek` (default), `nvidia-nim`, `openai`, `atlascloud`, `wanjie-ark`, `volcengine`, `openrouter`, `xiaomi-mimo`, `novita`, `fireworks`, `siliconflow`, `siliconflow-CN`, `arcee`, `moonshot`, `sglang`, `vllm`, or `ollama`. Legacy `deepseek-cn` configs are still accepted as an alias for `deepseek`; DeepSeek uses the same official host [`https://api.deepseek.com`](https://api-docs.deepseek.com/) worldwide. `nvidia-nim` targets NVIDIA's NIM-hosted DeepSeek endpoints through `https://integrate.api.nvidia.com/v1`; `openai` targets a generic OpenAI-compatible endpoint, defaulting to `https://api.openai.com/v1`; `atlascloud` targets AtlasCloud's OpenAI-compatible endpoint at `https://api.atlascloud.ai/v1`; `wanjie-ark` targets Wanjie Ark's OpenAI-compatible endpoint at `https://maas-openapi.wanjiedata.com/api/v1`; `volcengine` targets Volcengine Ark's OpenAI-compatible coding endpoint at `https://ark.cn-beijing.volces.com/api/coding/v3`; `openrouter` targets `https://openrouter.ai/api/v1`; `xiaomi-mimo` targets Xiaomi MiMo's OpenAI-compatible endpoint, using `https://token-plan-sgp.xiaomimimo.com/v1` by default for Token Plan keys (`tp-...`) and `https://api.xiaomimimo.com/v1` for pay-as-you-go keys; set `base_url` explicitly if your Token Plan account uses the China region; `novita` targets `https://api.novita.ai/v1`; `fireworks` targets `https://api.fireworks.ai/inference/v1`; `siliconflow` targets SiliconFlow, defaulting to `https://api.siliconflow.com/v1`; `siliconflow-CN` targets the SiliconFlow China regional endpoint while sharing `[providers.siliconflow]`; `arcee` targets Arcee AI's OpenAI-compatible endpoint at `https://api.arcee.ai/api/v1`; `moonshot` targets Moonshot/Kimi, defaulting to `https://api.moonshot.ai/v1`; `sglang` targets a self-hosted OpenAI-compatible endpoint, defaulting to `http://localhost:30000/v1`; `vllm` targets a self-hosted vLLM OpenAI-compatible endpoint, defaulting to `http://localhost:8000/v1`; `ollama` targets Ollama's OpenAI-compatible endpoint, defaulting to `http://localhost:11434/v1`.
|
||||
- `provider` (string, optional): `deepseek` (default), `nvidia-nim`, `openai`, `atlascloud`, `wanjie-ark`, `volcengine`, `openrouter`, `xiaomi-mimo`, `novita`, `fireworks`, `siliconflow`, `siliconflow-CN`, `arcee`, `moonshot`, `minimax`, `zai`, `stepfun`, `sglang`, `vllm`, or `ollama`. Legacy `deepseek-cn` configs are still accepted as an alias for `deepseek`; DeepSeek uses the same official host [`https://api.deepseek.com`](https://api-docs.deepseek.com/) worldwide. `nvidia-nim` targets NVIDIA's NIM-hosted DeepSeek endpoints through `https://integrate.api.nvidia.com/v1`; `openai` targets a generic OpenAI-compatible endpoint, defaulting to `https://api.openai.com/v1`; `atlascloud` targets AtlasCloud's OpenAI-compatible endpoint at `https://api.atlascloud.ai/v1`; `wanjie-ark` targets Wanjie Ark's OpenAI-compatible endpoint at `https://maas-openapi.wanjiedata.com/api/v1`; `volcengine` targets Volcengine Ark's OpenAI-compatible coding endpoint at `https://ark.cn-beijing.volces.com/api/coding/v3`; `openrouter` targets `https://openrouter.ai/api/v1`; `xiaomi-mimo` targets Xiaomi MiMo's OpenAI-compatible endpoint, using `https://token-plan-sgp.xiaomimimo.com/v1` by default for Token Plan keys (`tp-...`) and `https://api.xiaomimimo.com/v1` for pay-as-you-go keys; set `base_url` explicitly if your Token Plan account uses the China region; `novita` targets `https://api.novita.ai/v1`; `fireworks` targets `https://api.fireworks.ai/inference/v1`; `siliconflow` targets SiliconFlow, defaulting to `https://api.siliconflow.com/v1`; `siliconflow-CN` targets the SiliconFlow China regional endpoint while sharing `[providers.siliconflow]`; `arcee` targets Arcee AI's OpenAI-compatible endpoint at `https://api.arcee.ai/api/v1`; `moonshot` targets Moonshot/Kimi, defaulting to `https://api.moonshot.ai/v1`; `minimax` targets MiniMax at `https://api.minimax.io/v1`; `zai` targets Z.ai at `https://api.z.ai/api/coding/paas/v4`; `stepfun` targets StepFun at `https://api.stepfun.ai/v1`; `sglang` targets a self-hosted OpenAI-compatible endpoint, defaulting to `http://localhost:30000/v1`; `vllm` targets a self-hosted vLLM OpenAI-compatible endpoint, defaulting to `http://localhost:8000/v1`; `ollama` targets Ollama's OpenAI-compatible endpoint, defaulting to `http://localhost:11434/v1`.
|
||||
- `api_key` (string, required for hosted providers): must be non-empty for DeepSeek/hosted providers (or set the provider API key env var). Self-hosted SGLang, vLLM, and Ollama can omit it.
|
||||
- `base_url` (string, optional): defaults to `https://api.deepseek.com/beta` for DeepSeek's OpenAI-compatible Chat Completions API, including legacy `provider = "deepseek-cn"` configs. Other defaults are `https://integrate.api.nvidia.com/v1` for `nvidia-nim`, `https://api.openai.com/v1` for `openai`, `https://api.atlascloud.ai/v1` for `atlascloud`, `https://maas-openapi.wanjiedata.com/api/v1` for `wanjie-ark`, `https://ark.cn-beijing.volces.com/api/coding/v3` for `volcengine`, `https://openrouter.ai/api/v1` for `openrouter`, `https://token-plan-sgp.xiaomimimo.com/v1` for `xiaomi-mimo` when the API key starts with `tp-...` and `https://api.xiaomimimo.com/v1` otherwise, `https://api.novita.ai/v1` for `novita`, `https://api.fireworks.ai/inference/v1` for `fireworks`, `https://api.siliconflow.com/v1` for `siliconflow`, `https://api.siliconflow.cn/v1` for `siliconflow-CN`, `https://api.arcee.ai/api/v1` for `arcee`, `https://api.moonshot.ai/v1` for `moonshot`, `http://localhost:30000/v1` for `sglang`, `http://localhost:8000/v1` for `vllm`, and `http://localhost:11434/v1` for `ollama`. Set `base_url = "https://token-plan-cn.xiaomimimo.com/v1"` explicitly if your Xiaomi MiMo Token Plan account is provisioned in the China region. Set `https://api.deepseek.com` or `https://api.deepseek.com/v1` explicitly to opt out of DeepSeek beta features.
|
||||
- `base_url` (string, optional): defaults to `https://api.deepseek.com/beta` for DeepSeek's OpenAI-compatible Chat Completions API, including legacy `provider = "deepseek-cn"` configs. Other defaults are `https://integrate.api.nvidia.com/v1` for `nvidia-nim`, `https://api.openai.com/v1` for `openai`, `https://api.atlascloud.ai/v1` for `atlascloud`, `https://maas-openapi.wanjiedata.com/api/v1` for `wanjie-ark`, `https://ark.cn-beijing.volces.com/api/coding/v3` for `volcengine`, `https://openrouter.ai/api/v1` for `openrouter`, `https://token-plan-sgp.xiaomimimo.com/v1` for `xiaomi-mimo` when the API key starts with `tp-...` and `https://api.xiaomimimo.com/v1` otherwise, `https://api.novita.ai/v1` for `novita`, `https://api.fireworks.ai/inference/v1` for `fireworks`, `https://api.siliconflow.com/v1` for `siliconflow`, `https://api.siliconflow.cn/v1` for `siliconflow-CN`, `https://api.arcee.ai/api/v1` for `arcee`, `https://api.moonshot.ai/v1` for `moonshot`, `https://api.minimax.io/v1` for `minimax`, `https://api.z.ai/api/coding/paas/v4` for `zai`, `https://api.stepfun.ai/v1` for `stepfun`, `http://localhost:30000/v1` for `sglang`, `http://localhost:8000/v1` for `vllm`, and `http://localhost:11434/v1` for `ollama`. Set `base_url = "https://token-plan-cn.xiaomimimo.com/v1"` explicitly if your Xiaomi MiMo Token Plan account is provisioned in the China region. Set `https://api.deepseek.com` or `https://api.deepseek.com/v1` explicitly to opt out of DeepSeek beta features.
|
||||
- `path_suffix` (string, optional provider-table key): override the chat-completions path for OpenAI-compatible gateways that do not serve `/v1/chat/completions`. For example, `[providers.openai] path_suffix = "/chat/completions"` sends chat requests to the unversioned base URL plus `/chat/completions`; `models` and `beta/*` requests keep their normal routing.
|
||||
- `insecure_skip_tls_verify` (bool, optional provider-table key): disabled by default. When true on the active provider table, only the LLM provider HTTP client skips TLS certificate verification. Prefer `SSL_CERT_FILE` for corporate or private CA bundles; `codewhale doctor` reports this setting when enabled.
|
||||
- `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek and generic OpenAI-compatible endpoints, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `deepseek-ai/deepseek-v4-flash` for AtlasCloud, `deepseek-reasoner` for Wanjie Ark, `DeepSeek-V4-Pro` for Volcengine Ark, `deepseek/deepseek-v4-pro` for OpenRouter and Novita, `mimo-v2.5-pro` for Xiaomi MiMo, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, `deepseek-ai/DeepSeek-V4-Pro` for SiliconFlow, `trinity-large-thinking` for Arcee AI, `kimi-k2.7-code` for Moonshot, `deepseek-ai/DeepSeek-V4-Pro` for SGLang/vLLM, and `deepseek-coder:1.3b` for Ollama. Hugging Face and Together AI both default to `deepseek-ai/DeepSeek-V4-Pro`. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows, 384K max output, and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash` until July 24, 2026, except SiliconFlow maps `deepseek-reasoner` and `deepseek-r1` to its Pro model while `deepseek-chat` and `deepseek-v3` map to Flash. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. OpenRouter also recognizes recent large IDs such as `arcee-ai/trinity-large-thinking`, `minimax/minimax-m3`, `minimax/minimax-2.7`, `xiaomi/mimo-v2.5-pro`, `qwen/qwen3.6-flash`, `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `qwen/qwen3.7-max`, `google/gemma-4-31b-it`, `moonshotai/kimi-k2.7-code`, `moonshotai/kimi-k2.6`, `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free`, and `nvidia/nemotron-3-ultra-550b-a55b`; direct Arcee uses bare IDs such as `trinity-large-thinking` and `trinity-large-preview`; direct Moonshot recognizes `kimi-k2.7-code` and `kimi-k2.6`, with `kimi` and `kimi-k2` aliases selecting `kimi-k2.7-code`; direct Xiaomi MiMo recognizes chat IDs `mimo-v2.5-pro` and `mimo-v2.5`, while TTS IDs are selected through `codewhale speech` / `tts`. Generic `openai`, `atlascloud`, `wanjie-ark`, `xiaomi-mimo`, `arcee`, and Ollama model IDs are passed through unchanged after known aliases are normalized. OpenRouter and SiliconFlow provider configs with a custom `base_url` also preserve explicit model values, which lets OpenAI-compatible gateways accept bare model IDs. Use `/models` or `codewhale models` to discover live IDs from your configured endpoint. `CODEWHALE_MODEL` overrides this for a single process; `DEEPSEEK_MODEL` is the legacy alias.
|
||||
- `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek and generic OpenAI-compatible endpoints, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `deepseek-ai/deepseek-v4-flash` for AtlasCloud, `deepseek-reasoner` for Wanjie Ark, `DeepSeek-V4-Pro` for Volcengine Ark, `deepseek/deepseek-v4-pro` for OpenRouter and Novita, `mimo-v2.5-pro` for Xiaomi MiMo, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, `deepseek-ai/DeepSeek-V4-Pro` for SiliconFlow, `trinity-large-thinking` for Arcee AI, `kimi-k2.7-code` for Moonshot, `MiniMax-M3` for MiniMax, `GLM-5.1` for Z.ai, `step-3.7-flash` for StepFun, `deepseek-ai/DeepSeek-V4-Pro` for SGLang/vLLM, and `deepseek-coder:1.3b` for Ollama. Hugging Face and Together AI both default to `deepseek-ai/DeepSeek-V4-Pro`. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows, 384K max output, and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash` until July 24, 2026, except SiliconFlow maps `deepseek-reasoner` and `deepseek-r1` to its Pro model while `deepseek-chat` and `deepseek-v3` map to Flash. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. OpenRouter also recognizes recent large IDs such as `arcee-ai/trinity-large-thinking`, `minimax/minimax-m3`, `minimax/minimax-2.7`, `xiaomi/mimo-v2.5-pro`, `qwen/qwen3.6-flash`, `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `qwen/qwen3.7-max`, `google/gemma-4-31b-it`, `moonshotai/kimi-k2.7-code`, `moonshotai/kimi-k2.6`, `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free`, and `nvidia/nemotron-3-ultra-550b-a55b`; direct Arcee uses bare IDs such as `trinity-large-thinking` and `trinity-large-preview`; direct Moonshot recognizes `kimi-k2.7-code`, `kimi-k2.6`, and Kimi Code's stable `kimi-for-coding`; direct MiniMax recognizes `MiniMax-M3` and the documented M2.x chat model IDs; direct Xiaomi MiMo recognizes chat IDs `mimo-v2.5-pro` and `mimo-v2.5`, while TTS IDs are selected through `codewhale speech` / `tts`. Generic `openai`, `atlascloud`, `wanjie-ark`, `xiaomi-mimo`, `arcee`, `moonshot`, `minimax`, `zai`, `stepfun`, and Ollama model IDs are passed through unchanged after known aliases are normalized. OpenRouter and SiliconFlow provider configs with a custom `base_url` also preserve explicit model values, which lets OpenAI-compatible gateways accept bare model IDs. Use `/models` or `codewhale models` to discover live IDs from your configured endpoint. `CODEWHALE_MODEL` overrides this for a single process; `DEEPSEEK_MODEL` is the legacy alias.
|
||||
- `reasoning_effort` (string, optional): `off`, `low`, `medium`, `high`, `max`, or `xhigh`; defaults to the configured UI tier. DeepSeek Platform receives top-level `thinking` / `reasoning_effort` fields. OpenAI Codex normalizes stale `off` to `low` and sends `max` as Responses `xhigh`. NVIDIA NIM receives equivalent settings through `chat_template_kwargs`.
|
||||
- `verbosity` (string, optional): `normal` or `concise`. `normal` keeps the
|
||||
default conversational prompt. `concise` appends a prompt discipline block
|
||||
|
||||
+8
-2
@@ -135,7 +135,8 @@ endpoint.
|
||||
| `siliconflow` | `[providers.siliconflow]` | `SILICONFLOW_API_KEY` | `SILICONFLOW_BASE_URL`; default `https://api.siliconflow.com/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | OpenAI-compatible hosted route. Official docs use the `.com` endpoint. `SILICONFLOW_MODEL` is accepted. Reasoning aliases `deepseek-reasoner` and `deepseek-r1` map to Pro; `deepseek-chat` and `deepseek-v3` map to Flash. |
|
||||
| `siliconflow-CN` | `[providers.siliconflow_cn]` | `SILICONFLOW_API_KEY` | `SILICONFLOW_BASE_URL`; default `https://api.siliconflow.cn/v1` | Uses the SiliconFlow model set | China regional SiliconFlow route. Falls back to `[providers.siliconflow]` for api_key / base_url / model when unset. Select it with `provider = "siliconflow-CN"` or `CODEWHALE_PROVIDER=siliconflow-CN`. |
|
||||
| `arcee` | `[providers.arcee]` | `ARCEE_API_KEY` | `ARCEE_BASE_URL`; default `https://api.arcee.ai/api/v1` | `trinity-large-thinking`, `trinity-large-preview` | Arcee AI direct OpenAI-compatible route, tracked as 256K-context BF16 serving. `ARCEE_MODEL` is accepted. OpenRouter's `arcee-ai/trinity-large-thinking` remains the OpenRouter namespaced model ID; direct Arcee uses the bare `trinity-large-thinking` ID. |
|
||||
| `moonshot` | `[providers.moonshot]` | `MOONSHOT_API_KEY`, `KIMI_API_KEY` | `MOONSHOT_BASE_URL`, `KIMI_BASE_URL`; default `https://api.moonshot.ai/v1` | `kimi-k2.7-code`, `kimi-k2.6`; Kimi Code path uses `kimi-for-coding` at `https://api.kimi.com/coding/v1` | Moonshot/Kimi route. `kimi` and `kimi-k2` aliases select `kimi-k2.7-code`; `MOONSHOT_MODEL`, `KIMI_MODEL_NAME`, and `KIMI_MODEL` are accepted. `[providers.moonshot] auth_mode = "kimi_oauth"` reads Kimi Code OAuth credentials from `KIMI_CODE_HOME`/`~/.kimi-code`, with legacy `KIMI_SHARE_DIR`/`~/.kimi` fallback. |
|
||||
| `moonshot` | `[providers.moonshot]` | `MOONSHOT_API_KEY`, `KIMI_API_KEY` | `MOONSHOT_BASE_URL`, `KIMI_BASE_URL`; default `https://api.moonshot.ai/v1` | `kimi-k2.7-code`, `kimi-k2.6`; Kimi Code path uses `kimi-for-coding` at `https://api.kimi.com/coding/v1` | Moonshot/Kimi route. `kimi` and `kimi-k2` aliases select `kimi-k2.7-code`; `MOONSHOT_MODEL`, `KIMI_MODEL_NAME`, and `KIMI_MODEL` are accepted. Kimi thinking streams through `reasoning_content`; CodeWhale keeps it in Thinking cells and replays it for thinking/tool-call continuity. `[providers.moonshot] auth_mode = "kimi_oauth"` reads Kimi Code OAuth credentials from `KIMI_CODE_HOME`/`~/.kimi-code`, with legacy `KIMI_SHARE_DIR`/`~/.kimi` fallback. |
|
||||
| `minimax` | `[providers.minimax]` | `MINIMAX_API_KEY` | `MINIMAX_BASE_URL`; default `https://api.minimax.io/v1` | `MiniMax-M3`, `MiniMax-M2.7`, `MiniMax-M2.7-highspeed`, `MiniMax-M2.5`, `MiniMax-M2.5-highspeed`, `MiniMax-M2.1`, `MiniMax-M2.1-highspeed`, `MiniMax-M2` | MiniMax direct OpenAI-compatible route. CodeWhale sends `reasoning_split = true` so MiniMax thinking arrives separately from answer text, and direct MiniMax IDs stay distinct from OpenRouter namespaced IDs such as `minimax/minimax-m3`. |
|
||||
| `sglang` | `[providers.sglang]` | Optional `SGLANG_API_KEY` | `SGLANG_BASE_URL`; default `http://localhost:30000/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Self-hosted OpenAI-compatible route. Localhost deployments commonly omit auth. `SGLANG_MODEL` is accepted. |
|
||||
| `vllm` | `[providers.vllm]` | Optional `VLLM_API_KEY` | `VLLM_BASE_URL`; default `http://localhost:8000/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Self-hosted vLLM OpenAI-compatible route. Localhost deployments commonly omit auth. `VLLM_MODEL` is accepted. |
|
||||
| `ollama` | `[providers.ollama]` | Optional `OLLAMA_API_KEY` | `OLLAMA_BASE_URL`; default `http://localhost:11434/v1` | `deepseek-coder:1.3b`; provider-hinted custom tags pass through | Self-hosted Ollama OpenAI-compatible route. Localhost deployments commonly omit auth. `OLLAMA_MODEL` is accepted. |
|
||||
@@ -220,6 +221,7 @@ endpoint when the endpoint supports model listing.
|
||||
| `siliconflow` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | yes |
|
||||
| `arcee` | `trinity-large-thinking`, `trinity-large-preview`; provider-hinted custom model IDs pass through | yes | yes for `trinity-large-thinking`; no for `trinity-large-preview` |
|
||||
| `moonshot` | `kimi-k2.7-code`, `kimi-k2.6` | yes | yes |
|
||||
| `minimax` | `MiniMax-M3`, `MiniMax-M2.7`, `MiniMax-M2.7-highspeed`, `MiniMax-M2.5`, `MiniMax-M2.5-highspeed`, `MiniMax-M2.1`, `MiniMax-M2.1-highspeed`, `MiniMax-M2` | yes | yes |
|
||||
| `sglang` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | yes |
|
||||
| `vllm` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | yes |
|
||||
| `ollama` | `deepseek-coder:1.3b`; custom tags pass through when provider hint is `ollama` | yes | no |
|
||||
@@ -259,7 +261,10 @@ Anthropic uses Messages, and `openai-codex` uses Responses.
|
||||
| Wanjie Ark `reasoner` / `r1` model IDs | 128,000 | 4,096 | yes | no | not documented in code |
|
||||
| Direct Arcee API `trinity-large-thinking` | 262,144 | 262,144 | yes | no | not documented in code |
|
||||
| Direct Arcee API `trinity-large-preview` | 262,144 | 4,096 | no in doctor capability metadata | no | not documented in code |
|
||||
| Generic `openai`, AtlasCloud, and Moonshot/Kimi | 128,000 | 4,096 | no in doctor capability metadata | no | not documented in code |
|
||||
| Direct Moonshot/Kimi `kimi-k2.7-code`, `kimi-k2.6`, `kimi-for-coding` | 262,144 | 262,144 | yes | no | not documented in code |
|
||||
| Direct MiniMax `MiniMax-M3` | 1,000,000 | 524,288 | yes | no | not documented in code |
|
||||
| Direct MiniMax M2.x models | 204,800 | 4,096 fallback until MiniMax output metadata is promoted | yes | no | not documented in code |
|
||||
| Generic `openai` and AtlasCloud | 128,000 | 4,096 | no in doctor capability metadata | no | not documented in code |
|
||||
| Ollama | 8,192 | 4,096 | no | no | not documented in code |
|
||||
| Hugging Face Inference Providers V4 model IDs | 131,072 | 4,096 | yes | no | not documented in code |
|
||||
| Other recognized DeepSeek model IDs | 128,000 unless the model name carries an explicit `Nk` hint | 4,096 | no unless V4/reasoner logic matches | DeepSeek/NIM only | DeepSeek beta only |
|
||||
@@ -319,6 +324,7 @@ receive no reasoning fields at all for that tier.
|
||||
| `moonshot` | `thinking: {type: disabled}` | `thinking: {type: enabled}` | `thinking: {type: enabled}` |
|
||||
| `ollama` | `think: false` | `think: true` | `think: true` |
|
||||
| `xiaomi-mimo` | `thinking: {type: disabled}` | `thinking: {type: enabled}` | `thinking: {type: enabled}` |
|
||||
| `minimax` | `reasoning_split: true` + `thinking: {type: disabled}` | `reasoning_split: true` + `thinking: {type: adaptive}` | `reasoning_split: true` + `thinking: {type: adaptive}` |
|
||||
| `nvidia-nim` | `chat_template_kwargs.thinking: false` | `chat_template_kwargs`: `thinking: true` + `reasoning_effort: "high"` | `chat_template_kwargs`: `thinking: true` + `reasoning_effort: "max"` |
|
||||
| `vllm` | `chat_template_kwargs.enable_thinking: false` | `chat_template_kwargs.enable_thinking: true` + `reasoning_effort` low/medium/high | `chat_template_kwargs.enable_thinking: true` + `reasoning_effort: "high"` (vLLM has no max tier) |
|
||||
| `arcee`, `huggingface` | omitted | `reasoning_effort` pass-through | `reasoning_effort: "high"` |
|
||||
|
||||
Reference in New Issue
Block a user