feat(config): add first-party MiniMax provider route

Refs #1310.

Adds the direct minimax provider slot, auth/config/env bindings, model aliases, docs, and picker/status surfaces. MiniMax requests now set reasoning_split and preserve reasoning_details history so thinking stays out of answer text.

Also updates Moonshot/Kimi reasoning classification for the Kimi Code route so reasoning_content streams into Thinking cells instead of inline prose.
This commit is contained in:
Hunter B
2026-06-12 22:05:22 -07:00
parent e6005eb9ac
commit e9f0c1c13b
14 changed files with 759 additions and 61 deletions
+2
View File
@@ -782,6 +782,7 @@ fn provider_slot(provider: ProviderKind) -> &'static str {
ProviderKind::Anthropic => "anthropic",
ProviderKind::Zai => "zai",
ProviderKind::Stepfun => "stepfun",
ProviderKind::Minimax => "minimax",
}
}
@@ -919,6 +920,7 @@ fn provider_env_vars(provider: ProviderKind) -> &'static [&'static str] {
ProviderKind::Anthropic => &["ANTHROPIC_API_KEY"],
ProviderKind::Zai => &["ZAI_API_KEY", "Z_AI_API_KEY"],
ProviderKind::Stepfun => &["STEPFUN_API_KEY", "STEP_API_KEY"],
ProviderKind::Minimax => &["MINIMAX_API_KEY"],
}
}
+185 -12
View File
@@ -108,6 +108,16 @@ const DEFAULT_ZAI_BASE_URL: &str = "https://api.z.ai/api/coding/paas/v4";
// StepFun / StepFlash defaults
const DEFAULT_STEPFUN_MODEL: &str = "step-3.7-flash";
const DEFAULT_STEPFUN_BASE_URL: &str = "https://api.stepfun.ai/v1";
// MiniMax defaults
const DEFAULT_MINIMAX_MODEL: &str = "MiniMax-M3";
const MINIMAX_M2_7_MODEL: &str = "MiniMax-M2.7";
const MINIMAX_M2_7_HIGHSPEED_MODEL: &str = "MiniMax-M2.7-highspeed";
const MINIMAX_M2_5_MODEL: &str = "MiniMax-M2.5";
const MINIMAX_M2_5_HIGHSPEED_MODEL: &str = "MiniMax-M2.5-highspeed";
const MINIMAX_M2_1_MODEL: &str = "MiniMax-M2.1";
const MINIMAX_M2_1_HIGHSPEED_MODEL: &str = "MiniMax-M2.1-highspeed";
const MINIMAX_M2_MODEL: &str = "MiniMax-M2";
const DEFAULT_MINIMAX_BASE_URL: &str = "https://api.minimax.io/v1";
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "kebab-case")]
@@ -165,11 +175,7 @@ pub enum ProviderKind {
OpenaiCodex,
#[serde(alias = "claude")]
Anthropic,
#[serde(
alias = "z-ai",
alias = "z_ai",
alias = "z.ai"
)]
#[serde(alias = "z-ai", alias = "z_ai", alias = "z.ai")]
Zai,
#[serde(
alias = "step-fun",
@@ -180,10 +186,12 @@ pub enum ProviderKind {
alias = "step_flash"
)]
Stepfun,
#[serde(alias = "mini-max", alias = "mini_max", alias = "minimax")]
Minimax,
}
impl ProviderKind {
pub const ALL: [Self; 23] = [
pub const ALL: [Self; 24] = [
Self::Deepseek,
Self::NvidiaNim,
Self::Openai,
@@ -207,6 +215,7 @@ impl ProviderKind {
Self::Anthropic,
Self::Zai,
Self::Stepfun,
Self::Minimax,
];
#[must_use]
@@ -235,6 +244,7 @@ impl ProviderKind {
Self::Anthropic,
Self::Zai,
Self::Stepfun,
Self::Minimax,
]
}
@@ -343,12 +353,7 @@ pub struct ProvidersToml {
pub openai_codex: ProviderConfigToml,
#[serde(default)]
pub anthropic: ProviderConfigToml,
#[serde(
default,
alias = "z-ai",
alias = "z_ai",
alias = "z.ai"
)]
#[serde(default, alias = "z-ai", alias = "z_ai", alias = "z.ai")]
pub zai: ProviderConfigToml,
#[serde(
default,
@@ -360,6 +365,8 @@ pub struct ProvidersToml {
alias = "step_flash"
)]
pub stepfun: ProviderConfigToml,
#[serde(default, alias = "mini-max", alias = "mini_max", alias = "minimax")]
pub minimax: ProviderConfigToml,
}
/// Sibling `permissions.toml` schema.
@@ -413,6 +420,7 @@ impl ProvidersToml {
ProviderKind::Anthropic => &self.anthropic,
ProviderKind::Zai => &self.zai,
ProviderKind::Stepfun => &self.stepfun,
ProviderKind::Minimax => &self.minimax,
}
}
@@ -441,6 +449,7 @@ impl ProvidersToml {
ProviderKind::Anthropic => &mut self.anthropic,
ProviderKind::Zai => &mut self.zai,
ProviderKind::Stepfun => &mut self.stepfun,
ProviderKind::Minimax => &mut self.minimax,
}
}
}
@@ -2166,6 +2175,7 @@ impl ConfigToml {
ProviderKind::Anthropic => DEFAULT_ANTHROPIC_BASE_URL.to_string(),
ProviderKind::Zai => DEFAULT_ZAI_BASE_URL.to_string(),
ProviderKind::Stepfun => DEFAULT_STEPFUN_BASE_URL.to_string(),
ProviderKind::Minimax => DEFAULT_MINIMAX_BASE_URL.to_string(),
})
};
// CLI flag wins outright. Otherwise: config-file → injected secrets/env.
@@ -2379,6 +2389,11 @@ fn normalize_model_for_provider(provider: ProviderKind, model: &str) -> String {
{
return canonical.to_string();
}
if matches!(provider, ProviderKind::Minimax)
&& let Some(canonical) = canonical_minimax_model_id(model)
{
return canonical.to_string();
}
if matches!(
provider,
@@ -2386,6 +2401,9 @@ fn normalize_model_for_provider(provider: ProviderKind, model: &str) -> String {
| ProviderKind::WanjieArk
| ProviderKind::Volcengine
| ProviderKind::XiaomiMimo
| ProviderKind::Zai
| ProviderKind::Stepfun
| ProviderKind::Minimax
| ProviderKind::Ollama
) {
return model.to_string();
@@ -2539,6 +2557,39 @@ fn canonical_xiaomi_mimo_model_id(model: &str) -> Option<&'static str> {
}
}
fn canonical_minimax_model_id(model: &str) -> Option<&'static str> {
let normalized = model.trim().to_ascii_lowercase();
let normalized = normalized.replace(['_', ' '], "-");
match normalized.as_str() {
"minimax" | "minimax-m3" | "minimax-m-3" | "minimax-m-3-thinking" => {
Some(DEFAULT_MINIMAX_MODEL)
}
"minimax-m2.7" | "minimax-m2-7" | "minimax-m-2.7" | "minimax-m-2-7" => {
Some(MINIMAX_M2_7_MODEL)
}
"minimax-m2.7-highspeed"
| "minimax-m2-7-highspeed"
| "minimax-m-2.7-highspeed"
| "minimax-m-2-7-highspeed" => Some(MINIMAX_M2_7_HIGHSPEED_MODEL),
"minimax-m2.5" | "minimax-m2-5" | "minimax-m-2.5" | "minimax-m-2-5" => {
Some(MINIMAX_M2_5_MODEL)
}
"minimax-m2.5-highspeed"
| "minimax-m2-5-highspeed"
| "minimax-m-2.5-highspeed"
| "minimax-m-2-5-highspeed" => Some(MINIMAX_M2_5_HIGHSPEED_MODEL),
"minimax-m2.1" | "minimax-m2-1" | "minimax-m-2.1" | "minimax-m-2-1" => {
Some(MINIMAX_M2_1_MODEL)
}
"minimax-m2.1-highspeed"
| "minimax-m2-1-highspeed"
| "minimax-m-2.1-highspeed"
| "minimax-m-2-1-highspeed" => Some(MINIMAX_M2_1_HIGHSPEED_MODEL),
"minimax-m2" | "minimax-m-2" => Some(MINIMAX_M2_MODEL),
_ => None,
}
}
fn canonical_openrouter_recent_model_id(model: &str) -> Option<&'static str> {
let normalized = model.trim().to_ascii_lowercase();
let normalized = normalized.replace(['_', ' '], "-");
@@ -2634,6 +2685,7 @@ fn default_model_for_provider(provider: ProviderKind) -> &'static str {
ProviderKind::Anthropic => DEFAULT_ANTHROPIC_MODEL,
ProviderKind::Zai => DEFAULT_ZAI_MODEL,
ProviderKind::Stepfun => DEFAULT_STEPFUN_MODEL,
ProviderKind::Minimax => DEFAULT_MINIMAX_MODEL,
}
}
@@ -2662,6 +2714,7 @@ fn default_base_url_for_provider(provider: ProviderKind) -> &'static str {
ProviderKind::Anthropic => DEFAULT_ANTHROPIC_BASE_URL,
ProviderKind::Zai => DEFAULT_ZAI_BASE_URL,
ProviderKind::Stepfun => DEFAULT_STEPFUN_BASE_URL,
ProviderKind::Minimax => DEFAULT_MINIMAX_BASE_URL,
}
}
@@ -3447,6 +3500,8 @@ struct EnvRuntimeOverrides {
zai_model: Option<String>,
stepfun_base_url: Option<String>,
stepfun_model: Option<String>,
minimax_base_url: Option<String>,
minimax_model: Option<String>,
}
impl EnvRuntimeOverrides {
@@ -3634,6 +3689,12 @@ impl EnvRuntimeOverrides {
.or_else(|_| std::env::var("STEP_MODEL"))
.ok()
.filter(|v| !v.trim().is_empty()),
minimax_base_url: std::env::var("MINIMAX_BASE_URL")
.ok()
.filter(|v| !v.trim().is_empty()),
minimax_model: std::env::var("MINIMAX_MODEL")
.ok()
.filter(|v| !v.trim().is_empty()),
}
}
@@ -3679,6 +3740,7 @@ impl EnvRuntimeOverrides {
ProviderKind::Anthropic => self.anthropic_base_url.clone(),
ProviderKind::Zai => self.zai_base_url.clone(),
ProviderKind::Stepfun => self.stepfun_base_url.clone(),
ProviderKind::Minimax => self.minimax_base_url.clone(),
}
}
@@ -3699,6 +3761,9 @@ impl EnvRuntimeOverrides {
ProviderKind::Together => self.together_model.clone(),
ProviderKind::OpenaiCodex => self.openai_codex_model.clone(),
ProviderKind::Anthropic => self.anthropic_model.clone(),
ProviderKind::Zai => self.zai_model.clone(),
ProviderKind::Stepfun => self.stepfun_model.clone(),
ProviderKind::Minimax => self.minimax_model.clone(),
_ => None,
}?;
@@ -4096,6 +4161,17 @@ action = "mode.agent"
kimi_base_url: Option<OsString>,
kimi_model: Option<OsString>,
kimi_model_name: Option<OsString>,
zai_api_key: Option<OsString>,
z_ai_api_key: Option<OsString>,
zai_base_url: Option<OsString>,
zai_model: Option<OsString>,
stepfun_api_key: Option<OsString>,
step_api_key: Option<OsString>,
stepfun_base_url: Option<OsString>,
stepfun_model: Option<OsString>,
minimax_api_key: Option<OsString>,
minimax_base_url: Option<OsString>,
minimax_model: Option<OsString>,
sglang_api_key: Option<OsString>,
sglang_base_url: Option<OsString>,
vllm_api_key: Option<OsString>,
@@ -4179,6 +4255,17 @@ action = "mode.agent"
kimi_base_url: env::var_os("KIMI_BASE_URL"),
kimi_model: env::var_os("KIMI_MODEL"),
kimi_model_name: env::var_os("KIMI_MODEL_NAME"),
zai_api_key: env::var_os("ZAI_API_KEY"),
z_ai_api_key: env::var_os("Z_AI_API_KEY"),
zai_base_url: env::var_os("ZAI_BASE_URL"),
zai_model: env::var_os("ZAI_MODEL"),
stepfun_api_key: env::var_os("STEPFUN_API_KEY"),
step_api_key: env::var_os("STEP_API_KEY"),
stepfun_base_url: env::var_os("STEPFUN_BASE_URL"),
stepfun_model: env::var_os("STEPFUN_MODEL"),
minimax_api_key: env::var_os("MINIMAX_API_KEY"),
minimax_base_url: env::var_os("MINIMAX_BASE_URL"),
minimax_model: env::var_os("MINIMAX_MODEL"),
sglang_api_key: env::var_os("SGLANG_API_KEY"),
sglang_base_url: env::var_os("SGLANG_BASE_URL"),
vllm_api_key: env::var_os("VLLM_API_KEY"),
@@ -4257,6 +4344,17 @@ action = "mode.agent"
env::remove_var("KIMI_BASE_URL");
env::remove_var("KIMI_MODEL");
env::remove_var("KIMI_MODEL_NAME");
env::remove_var("ZAI_API_KEY");
env::remove_var("Z_AI_API_KEY");
env::remove_var("ZAI_BASE_URL");
env::remove_var("ZAI_MODEL");
env::remove_var("STEPFUN_API_KEY");
env::remove_var("STEP_API_KEY");
env::remove_var("STEPFUN_BASE_URL");
env::remove_var("STEPFUN_MODEL");
env::remove_var("MINIMAX_API_KEY");
env::remove_var("MINIMAX_BASE_URL");
env::remove_var("MINIMAX_MODEL");
env::remove_var("SGLANG_API_KEY");
env::remove_var("SGLANG_BASE_URL");
env::remove_var("VLLM_API_KEY");
@@ -4361,6 +4459,17 @@ action = "mode.agent"
Self::restore_var("KIMI_BASE_URL", self.kimi_base_url.take());
Self::restore_var("KIMI_MODEL", self.kimi_model.take());
Self::restore_var("KIMI_MODEL_NAME", self.kimi_model_name.take());
Self::restore_var("ZAI_API_KEY", self.zai_api_key.take());
Self::restore_var("Z_AI_API_KEY", self.z_ai_api_key.take());
Self::restore_var("ZAI_BASE_URL", self.zai_base_url.take());
Self::restore_var("ZAI_MODEL", self.zai_model.take());
Self::restore_var("STEPFUN_API_KEY", self.stepfun_api_key.take());
Self::restore_var("STEP_API_KEY", self.step_api_key.take());
Self::restore_var("STEPFUN_BASE_URL", self.stepfun_base_url.take());
Self::restore_var("STEPFUN_MODEL", self.stepfun_model.take());
Self::restore_var("MINIMAX_API_KEY", self.minimax_api_key.take());
Self::restore_var("MINIMAX_BASE_URL", self.minimax_base_url.take());
Self::restore_var("MINIMAX_MODEL", self.minimax_model.take());
Self::restore_var("SGLANG_API_KEY", self.sglang_api_key.take());
Self::restore_var("SGLANG_BASE_URL", self.sglang_base_url.take());
Self::restore_var("VLLM_API_KEY", self.vllm_api_key.take());
@@ -5730,6 +5839,70 @@ mode = "token-plan-usa"
assert_eq!(resolved.model, DEFAULT_MOONSHOT_MODEL);
}
#[test]
fn zai_stepfun_and_minimax_default_to_first_party_routes() {
let _lock = env_lock();
let _env = EnvGuard::without_deepseek_runtime_overrides();
for (provider, expected_base_url, expected_model) in [
(ProviderKind::Zai, DEFAULT_ZAI_BASE_URL, DEFAULT_ZAI_MODEL),
(
ProviderKind::Stepfun,
DEFAULT_STEPFUN_BASE_URL,
DEFAULT_STEPFUN_MODEL,
),
(
ProviderKind::Minimax,
DEFAULT_MINIMAX_BASE_URL,
DEFAULT_MINIMAX_MODEL,
),
] {
let config = ConfigToml {
provider,
..ConfigToml::default()
};
let resolved = config.resolve_runtime_options(&CliRuntimeOverrides::default());
assert_eq!(resolved.provider, provider);
assert_eq!(resolved.base_url, expected_base_url);
assert_eq!(resolved.model, expected_model);
}
}
#[test]
fn first_party_provider_env_model_overrides_pass_through() {
let _lock = env_lock();
let _env = EnvGuard::without_deepseek_runtime_overrides();
unsafe {
env::set_var("CODEWHALE_PROVIDER", "minimax");
env::set_var("MINIMAX_MODEL", "MiniMax-M2.7-highspeed");
env::set_var("MINIMAX_BASE_URL", "https://minimax.example/v1");
}
let resolved =
ConfigToml::default().resolve_runtime_options(&CliRuntimeOverrides::default());
assert_eq!(resolved.provider, ProviderKind::Minimax);
assert_eq!(resolved.base_url, "https://minimax.example/v1");
assert_eq!(resolved.model, "MiniMax-M2.7-highspeed");
}
#[test]
fn minimax_env_model_override_canonicalizes_known_aliases() {
let _lock = env_lock();
let _env = EnvGuard::without_deepseek_runtime_overrides();
unsafe {
env::set_var("CODEWHALE_PROVIDER", "minimax");
env::set_var("MINIMAX_MODEL", "minimax-m2-5-highspeed");
}
let resolved =
ConfigToml::default().resolve_runtime_options(&CliRuntimeOverrides::default());
assert_eq!(resolved.provider, ProviderKind::Minimax);
assert_eq!(resolved.model, "MiniMax-M2.5-highspeed");
}
#[test]
fn moonshot_provider_preserves_explicit_kimi_k26() {
let _lock = env_lock();
+27 -13
View File
@@ -8,18 +8,18 @@ use super::{
DEFAULT_ARCEE_BASE_URL, DEFAULT_ARCEE_MODEL, DEFAULT_ATLASCLOUD_BASE_URL,
DEFAULT_ATLASCLOUD_MODEL, DEFAULT_DEEPSEEK_BASE_URL, DEFAULT_DEEPSEEK_MODEL,
DEFAULT_FIREWORKS_BASE_URL, DEFAULT_FIREWORKS_MODEL, DEFAULT_HUGGINGFACE_BASE_URL,
DEFAULT_HUGGINGFACE_MODEL, DEFAULT_MOONSHOT_BASE_URL, DEFAULT_MOONSHOT_MODEL,
DEFAULT_NOVITA_BASE_URL, DEFAULT_NOVITA_MODEL, DEFAULT_NVIDIA_NIM_BASE_URL,
DEFAULT_NVIDIA_NIM_MODEL, DEFAULT_OLLAMA_BASE_URL, DEFAULT_OLLAMA_MODEL,
DEFAULT_OPENAI_BASE_URL, DEFAULT_OPENAI_CODEX_BASE_URL, DEFAULT_OPENAI_CODEX_MODEL,
DEFAULT_OPENAI_MODEL, DEFAULT_OPENROUTER_BASE_URL, DEFAULT_OPENROUTER_MODEL,
DEFAULT_SGLANG_BASE_URL, DEFAULT_SGLANG_MODEL, DEFAULT_SILICONFLOW_BASE_URL,
DEFAULT_SILICONFLOW_CN_BASE_URL, DEFAULT_SILICONFLOW_MODEL, DEFAULT_STEPFUN_BASE_URL,
DEFAULT_STEPFUN_MODEL, DEFAULT_TOGETHER_BASE_URL, DEFAULT_TOGETHER_MODEL,
DEFAULT_VLLM_BASE_URL, DEFAULT_VLLM_MODEL, DEFAULT_VOLCENGINE_BASE_URL,
DEFAULT_VOLCENGINE_MODEL, DEFAULT_WANJIE_ARK_BASE_URL, DEFAULT_WANJIE_ARK_MODEL,
DEFAULT_XIAOMI_MIMO_BASE_URL, DEFAULT_XIAOMI_MIMO_MODEL, DEFAULT_ZAI_BASE_URL,
DEFAULT_ZAI_MODEL, ProviderKind,
DEFAULT_HUGGINGFACE_MODEL, DEFAULT_MINIMAX_BASE_URL, DEFAULT_MINIMAX_MODEL,
DEFAULT_MOONSHOT_BASE_URL, DEFAULT_MOONSHOT_MODEL, DEFAULT_NOVITA_BASE_URL,
DEFAULT_NOVITA_MODEL, DEFAULT_NVIDIA_NIM_BASE_URL, DEFAULT_NVIDIA_NIM_MODEL,
DEFAULT_OLLAMA_BASE_URL, DEFAULT_OLLAMA_MODEL, DEFAULT_OPENAI_BASE_URL,
DEFAULT_OPENAI_CODEX_BASE_URL, DEFAULT_OPENAI_CODEX_MODEL, DEFAULT_OPENAI_MODEL,
DEFAULT_OPENROUTER_BASE_URL, DEFAULT_OPENROUTER_MODEL, DEFAULT_SGLANG_BASE_URL,
DEFAULT_SGLANG_MODEL, DEFAULT_SILICONFLOW_BASE_URL, DEFAULT_SILICONFLOW_CN_BASE_URL,
DEFAULT_SILICONFLOW_MODEL, DEFAULT_STEPFUN_BASE_URL, DEFAULT_STEPFUN_MODEL,
DEFAULT_TOGETHER_BASE_URL, DEFAULT_TOGETHER_MODEL, DEFAULT_VLLM_BASE_URL, DEFAULT_VLLM_MODEL,
DEFAULT_VOLCENGINE_BASE_URL, DEFAULT_VOLCENGINE_MODEL, DEFAULT_WANJIE_ARK_BASE_URL,
DEFAULT_WANJIE_ARK_MODEL, DEFAULT_XIAOMI_MIMO_BASE_URL, DEFAULT_XIAOMI_MIMO_MODEL,
DEFAULT_ZAI_BASE_URL, DEFAULT_ZAI_MODEL, ProviderKind,
};
/// Wire protocol spoken by a provider.
@@ -460,6 +460,18 @@ provider!(
aliases: ["step-fun", "step_fun", "stepflash", "step-flash", "step_flash"]
);
provider!(
Minimax,
Minimax,
"minimax",
"MiniMax",
DEFAULT_MINIMAX_BASE_URL,
DEFAULT_MINIMAX_MODEL,
["MINIMAX_API_KEY"],
"minimax",
aliases: ["mini-max", "mini_max"]
);
static DEEPSEEK: Deepseek = Deepseek;
static NVIDIA_NIM: NvidiaNim = NvidiaNim;
static OPENAI: Openai = Openai;
@@ -483,8 +495,9 @@ static OPENAI_CODEX: OpenaiCodex = OpenaiCodex;
static ANTHROPIC: Anthropic = Anthropic;
static ZAI: Zai = Zai;
static STEPFUN: Stepfun = Stepfun;
static MINIMAX: Minimax = Minimax;
static PROVIDER_REGISTRY: [&dyn Provider; 23] = [
static PROVIDER_REGISTRY: [&dyn Provider; 24] = [
&DEEPSEEK,
&NVIDIA_NIM,
&OPENAI,
@@ -508,6 +521,7 @@ static PROVIDER_REGISTRY: [&dyn Provider; 23] = [
&ANTHROPIC,
&ZAI,
&STEPFUN,
&MINIMAX,
];
/// Return all built-in provider metadata entries in `ProviderKind::ALL` order.
+52
View File
@@ -1235,6 +1235,13 @@ pub(super) fn apply_reasoning_effort(
effort: Option<&str>,
provider: ApiProvider,
) {
if matches!(provider, ApiProvider::Minimax) {
// MiniMax's OpenAI-compatible API keeps thinking inside `content`
// unless reasoning_split is enabled. Always request the split shape
// so private thinking renders as Thinking cells rather than answer
// prose.
body["reasoning_split"] = json!(true);
}
let Some(effort) = effort else {
return;
};
@@ -1294,6 +1301,9 @@ pub(super) fn apply_reasoning_effort(
"thinking": false,
});
}
ApiProvider::Minimax => {
body["thinking"] = json!({ "type": "disabled" });
}
ApiProvider::Zai | ApiProvider::Stepfun => {}
},
"low" | "minimal" | "medium" | "mid" | "high" | "" => match provider {
@@ -1368,6 +1378,9 @@ pub(super) fn apply_reasoning_effort(
"reasoning_effort": "high",
});
}
ApiProvider::Minimax => {
body["thinking"] = json!({ "type": "adaptive" });
}
ApiProvider::Zai | ApiProvider::Stepfun => {}
},
"xhigh" | "max" | "highest" => match provider {
@@ -1422,6 +1435,9 @@ pub(super) fn apply_reasoning_effort(
"reasoning_effort": "max",
});
}
ApiProvider::Minimax => {
body["thinking"] = json!({ "type": "adaptive" });
}
ApiProvider::Zai | ApiProvider::Stepfun => {}
},
_ => {}
@@ -2836,6 +2852,36 @@ mod tests {
assert!(body.get("reasoning_effort").is_none());
}
#[test]
fn reasoning_effort_minimax_splits_reasoning_from_content() {
let mut body = json!({});
apply_reasoning_effort(&mut body, Some("high"), ApiProvider::Minimax);
assert_eq!(
body.get("reasoning_split").and_then(Value::as_bool),
Some(true)
);
assert_eq!(
body.pointer("/thinking/type").and_then(Value::as_str),
Some("adaptive")
);
assert!(body.get("reasoning_effort").is_none());
let mut body = json!({});
apply_reasoning_effort(&mut body, Some("off"), ApiProvider::Minimax);
assert_eq!(
body.get("reasoning_split").and_then(Value::as_bool),
Some(true)
);
assert_eq!(
body.pointer("/thinking/type").and_then(Value::as_str),
Some("disabled")
);
let mut body = json!({});
apply_reasoning_effort(&mut body, None, ApiProvider::Minimax);
assert_eq!(body, json!({ "reasoning_split": true }));
}
#[test]
fn chat_parser_accepts_nvidia_nim_reasoning_field() -> Result<()> {
let response = parse_chat_message(&json!({
@@ -2872,6 +2918,7 @@ mod tests {
let mut text_started = false;
let mut thinking_started = false;
let mut tool_indices = std::collections::HashMap::new();
let mut reasoning_detail_buffers = std::collections::HashMap::new();
let events = parse_sse_chunk(
&json!({
"choices": [{
@@ -2884,6 +2931,7 @@ mod tests {
&mut text_started,
&mut thinking_started,
&mut tool_indices,
&mut reasoning_detail_buffers,
true,
);
@@ -3038,12 +3086,14 @@ mod tests {
let mut thinking_started = false;
let mut tool_indices: std::collections::HashMap<u32, u32> =
std::collections::HashMap::new();
let mut reasoning_detail_buffers = std::collections::HashMap::new();
let events = parse_sse_chunk(
&chunk,
&mut content_index,
&mut text_started,
&mut thinking_started,
&mut tool_indices,
&mut reasoning_detail_buffers,
false,
);
@@ -3097,12 +3147,14 @@ mod tests {
let mut thinking_started = false;
let mut tool_indices: std::collections::HashMap<u32, u32> =
std::collections::HashMap::new();
let mut reasoning_detail_buffers = std::collections::HashMap::new();
let events = parse_sse_chunk(
&chunk,
&mut content_index,
&mut text_started,
&mut thinking_started,
&mut tool_indices,
&mut reasoning_detail_buffers,
false,
);
+269 -14
View File
@@ -70,6 +70,41 @@ fn apply_provider_token_limit(body: &mut Value, provider: ApiProvider, max_token
body["max_completion_tokens"] = json!(max_tokens);
}
fn mirror_minimax_reasoning_details_for_messages(messages: &mut [Value]) {
for message in messages {
if message.get("role").and_then(Value::as_str) != Some("assistant") {
continue;
}
if message.get("reasoning_details").is_some() {
continue;
}
let Some(reasoning) = message
.get("reasoning_content")
.and_then(Value::as_str)
.filter(|reasoning| !reasoning.trim().is_empty())
.map(str::to_string)
else {
continue;
};
message["reasoning_details"] = json!([
{
"type": "text",
"text": reasoning,
}
]);
}
}
fn mirror_minimax_reasoning_details_for_body(body: &mut Value, provider: ApiProvider) {
if provider != ApiProvider::Minimax {
return;
}
let Some(messages) = body.get_mut("messages").and_then(Value::as_array_mut) else {
return;
};
mirror_minimax_reasoning_details_for_messages(messages);
}
impl DeepSeekClient {
pub(super) async fn create_message_chat(
&self,
@@ -121,6 +156,7 @@ impl DeepSeekClient {
request.reasoning_effort.as_deref(),
self.api_provider,
);
mirror_minimax_reasoning_details_for_body(&mut body, self.api_provider);
let response_cache_key = if cacheable {
let wire_body =
@@ -258,6 +294,7 @@ impl DeepSeekClient {
request.reasoning_effort.as_deref(),
self.api_provider,
);
mirror_minimax_reasoning_details_for_body(&mut body, self.api_provider);
let url = api_url_with_suffix(
&self.base_url,
@@ -323,6 +360,7 @@ impl DeepSeekClient {
let mut text_started = false;
let mut thinking_started = false;
let mut tool_indices: std::collections::HashMap<u32, u32> = std::collections::HashMap::new();
let mut reasoning_detail_buffers: std::collections::HashMap<u32, String> = std::collections::HashMap::new();
let is_reasoning_model = is_reasoning_model_for_stream(api_provider, &model);
let mut byte_stream = std::pin::pin!(byte_stream);
@@ -411,6 +449,7 @@ impl DeepSeekClient {
&mut text_started,
&mut thinking_started,
&mut tool_indices,
&mut reasoning_detail_buffers,
is_reasoning_model,
) {
SseDataFrame::Done => break 'stream,
@@ -550,6 +589,9 @@ impl<'a> PromptBuilder<'a> {
if provider == ApiProvider::Arcee {
apply_arcee_waf_safe_message_encoding(&mut messages);
}
if provider == ApiProvider::Minimax {
mirror_minimax_reasoning_details_for_messages(&mut messages);
}
messages
}
@@ -1943,7 +1985,23 @@ fn should_replay_reasoning_content_for_provider(
model: &str,
effort: Option<&str>,
) -> bool {
if !provider_accepts_reasoning_content(provider) && !requires_reasoning_content(model) {
if effort
.map(|value| {
matches!(
value.trim().to_ascii_lowercase().as_str(),
"off" | "disabled" | "none" | "false"
)
})
.unwrap_or(false)
{
return false;
}
if requires_reasoning_content(model) {
return true;
}
if !provider_accepts_reasoning_content(provider) {
// Generic non-DeepSeek model on a provider that rejects the field:
// keep stripping it (preserves the #1542 fix). But a known DeepSeek
// reasoning model pointed at a DeepSeek-compatible endpoint via the
@@ -1951,7 +2009,8 @@ fn should_replay_reasoning_content_for_provider(
// or the thinking-mode API returns 400 (#1739 / #1694).
return false;
}
should_replay_reasoning_content(model, effort)
model_supports_reasoning(model)
}
/// Should the SSE parser treat incoming `reasoning_content` deltas as thinking
@@ -1995,6 +2054,7 @@ fn provider_accepts_reasoning_content(provider: ApiProvider) -> bool {
| ApiProvider::SiliconflowCn
| ApiProvider::Volcengine
| ApiProvider::Arcee
| ApiProvider::Minimax
| ApiProvider::Sglang
| ApiProvider::Moonshot // #3016: Kimi thinking traces use reasoning_content
)
@@ -2010,11 +2070,54 @@ fn has_deepseek_r_series_marker(model_lower: &str) -> bool {
})
}
fn reasoning_field(value: &Value) -> Option<&str> {
value
fn reasoning_delta(
value: &Value,
choice_index: u32,
reasoning_detail_buffers: &mut std::collections::HashMap<u32, String>,
) -> Option<String> {
if let Some(reasoning) = value
.get("reasoning_content")
.or_else(|| value.get("reasoning"))
.and_then(Value::as_str)
{
return Some(reasoning.to_string());
}
let details = value.get("reasoning_details").and_then(Value::as_array)?;
let full_text = details
.iter()
.filter_map(|detail| detail.get("text").and_then(Value::as_str))
.collect::<String>();
if full_text.is_empty() {
return None;
}
let previous = reasoning_detail_buffers.entry(choice_index).or_default();
let delta = full_text
.strip_prefix(previous.as_str())
.unwrap_or(&full_text)
.to_string();
*previous = full_text;
Some(delta)
}
fn reasoning_message_text(value: &Value) -> Option<String> {
if let Some(reasoning) = value
.get("reasoning_content")
.or_else(|| value.get("reasoning"))
.and_then(Value::as_str)
{
return Some(reasoning.to_string());
}
value
.get("reasoning_details")
.and_then(Value::as_array)
.map(|details| {
details
.iter()
.filter_map(|detail| detail.get("text").and_then(Value::as_str))
.collect::<String>()
})
}
pub(super) fn parse_chat_message(payload: &Value) -> Result<MessageResponse> {
@@ -2042,7 +2145,7 @@ pub(super) fn parse_chat_message(payload: &Value) -> Result<MessageResponse> {
let mut content_blocks = Vec::new();
if let Some(reasoning) =
reasoning_field(message).filter(|reasoning| !reasoning.trim().is_empty())
reasoning_message_text(message).filter(|reasoning| !reasoning.trim().is_empty())
{
content_blocks.push(ContentBlock::Thinking {
signature: None,
@@ -2219,6 +2322,7 @@ fn parse_sse_data_frame(
text_started: &mut bool,
thinking_started: &mut bool,
tool_indices: &mut std::collections::HashMap<u32, u32>,
reasoning_detail_buffers: &mut std::collections::HashMap<u32, String>,
is_reasoning_model: bool,
) -> SseDataFrame {
if data.trim() == "[DONE]" {
@@ -2233,6 +2337,7 @@ fn parse_sse_data_frame(
text_started,
thinking_started,
tool_indices,
reasoning_detail_buffers,
is_reasoning_model,
)
},
@@ -2248,6 +2353,7 @@ pub(super) fn parse_sse_chunk(
text_started: &mut bool,
thinking_started: &mut bool,
tool_indices: &mut std::collections::HashMap<u32, u32>,
reasoning_detail_buffers: &mut std::collections::HashMap<u32, String>,
is_reasoning_model: bool,
) -> Vec<StreamEvent> {
let mut events = Vec::new();
@@ -2282,6 +2388,7 @@ pub(super) fn parse_sse_chunk(
}
for choice in choices {
let choice_index = choice.get("index").and_then(Value::as_u64).unwrap_or(0) as u32;
let delta = choice.get("delta");
let finish_reason = choice
.get("finish_reason")
@@ -2289,14 +2396,16 @@ pub(super) fn parse_sse_chunk(
.map(str::to_string);
if let Some(delta) = delta {
let reasoning_text = reasoning_field(delta).filter(|s| !s.is_empty());
let reasoning_text = reasoning_delta(delta, choice_index, reasoning_detail_buffers)
.filter(|s| !s.is_empty());
let content_text = delta
.get("content")
.and_then(Value::as_str)
.filter(|s| !s.is_empty());
.filter(|s| !s.is_empty())
.map(str::to_string);
// Handle reasoning_content / reasoning thinking deltas.
if is_reasoning_model && let Some(reasoning) = reasoning_text {
if is_reasoning_model && let Some(reasoning) = reasoning_text.as_deref() {
if !*thinking_started {
events.push(StreamEvent::ContentBlockStart {
index: *content_index,
@@ -2345,9 +2454,7 @@ pub(super) fn parse_sse_chunk(
}
events.push(StreamEvent::ContentBlockDelta {
index: *content_index,
delta: Delta::TextDelta {
text: content.to_string(),
},
delta: Delta::TextDelta { text: content },
});
}
@@ -2659,6 +2766,69 @@ mod arcee_waf_message_encoding_tests {
}
}
#[cfg(test)]
mod minimax_reasoning_replay_tests {
use super::build_chat_messages_for_request_and_provider;
use crate::config::{ApiProvider, DEFAULT_MINIMAX_MODEL};
use crate::models::{ContentBlock, Message, MessageRequest};
fn request_with_assistant_thinking() -> MessageRequest {
MessageRequest {
model: DEFAULT_MINIMAX_MODEL.to_string(),
messages: vec![Message {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
thinking: "Inspect tool state".to_string(),
signature: None,
},
ContentBlock::Text {
text: "Done.".to_string(),
cache_control: None,
},
],
}],
max_tokens: 16,
system: None,
tools: None,
tool_choice: None,
metadata: None,
thinking: None,
reasoning_effort: None,
stream: None,
temperature: None,
top_p: None,
}
}
#[test]
fn minimax_history_replays_thinking_as_reasoning_details() {
let request = request_with_assistant_thinking();
let messages = build_chat_messages_for_request_and_provider(&request, ApiProvider::Minimax);
let assistant = &messages[0];
assert_eq!(
assistant
.get("reasoning_content")
.and_then(|value| value.as_str()),
Some("Inspect tool state")
);
assert_eq!(
assistant
.pointer("/reasoning_details/0/type")
.and_then(|value| value.as_str()),
Some("text")
);
assert_eq!(
assistant
.pointer("/reasoning_details/0/text")
.and_then(|value| value.as_str()),
Some("Inspect tool state")
);
}
}
// === #103 Phase 4: SSE decoder behavior on canned chunk sequences ============
#[cfg(test)]
@@ -2683,12 +2853,14 @@ mod stream_decoder_tests {
let mut text_started = false;
let mut thinking_started = false;
let mut tool_indices = std::collections::HashMap::new();
let mut reasoning_detail_buffers = std::collections::HashMap::new();
parse_sse_chunk(
&chunk,
&mut content_index,
&mut text_started,
&mut thinking_started,
&mut tool_indices,
&mut reasoning_detail_buffers,
is_reasoning_model,
)
}
@@ -2765,6 +2937,7 @@ mod stream_decoder_tests {
let mut text_started = false;
let mut thinking_started = false;
let mut tool_indices = std::collections::HashMap::new();
let mut reasoning_detail_buffers = std::collections::HashMap::new();
let mut events = Vec::new();
for chunk in chunks {
let value: Value = serde_json::from_str(chunk).expect("valid SSE JSON");
@@ -2774,6 +2947,7 @@ mod stream_decoder_tests {
&mut text_started,
&mut thinking_started,
&mut tool_indices,
&mut reasoning_detail_buffers,
is_reasoning,
));
}
@@ -2835,12 +3009,65 @@ mod stream_decoder_tests {
);
}
#[test]
fn decoder_streams_minimax_reasoning_details_as_incremental_thinking() {
// MiniMax's reasoning_split stream reports reasoning_details text as
// a cumulative buffer. Emit only the suffix so the Thinking cell does
// not duplicate earlier reasoning chunks.
let chunks = [
r#"{"id":"minimax-1","choices":[{"index":0,"delta":{"reasoning_details":[{"type":"text","text":"Inspect"}]}}]}"#,
r#"{"id":"minimax-1","choices":[{"index":0,"delta":{"reasoning_details":[{"type":"text","text":"Inspect config"}]}}]}"#,
r#"{"id":"minimax-1","choices":[{"index":0,"delta":{"content":"Done."}}]}"#,
];
let is_reasoning = is_reasoning_model_for_stream(ApiProvider::Minimax, "MiniMax-M3");
let mut content_index = 0u32;
let mut text_started = false;
let mut thinking_started = false;
let mut tool_indices = std::collections::HashMap::new();
let mut reasoning_detail_buffers = std::collections::HashMap::new();
let mut events = Vec::new();
for chunk in chunks {
let value: Value = serde_json::from_str(chunk).expect("valid SSE JSON");
events.extend(parse_sse_chunk(
&value,
&mut content_index,
&mut text_started,
&mut thinking_started,
&mut tool_indices,
&mut reasoning_detail_buffers,
is_reasoning,
));
}
let thinking: String = events
.iter()
.filter_map(|event| match event {
StreamEvent::ContentBlockDelta {
delta: Delta::ThinkingDelta { thinking },
..
} => Some(thinking.as_str()),
_ => None,
})
.collect();
assert_eq!(thinking, "Inspect config");
assert!(!events.iter().any(|event| matches!(
event,
StreamEvent::ContentBlockDelta {
delta: Delta::TextDelta { text },
..
} if text == "Inspect" || text == "Inspect config"
)));
}
#[test]
fn decoder_does_not_render_reasoning_as_text_for_known_provider_models() {
let mut content_index = 0u32;
let mut text_started = false;
let mut thinking_started = false;
let mut tool_indices = std::collections::HashMap::new();
let mut reasoning_detail_buffers = std::collections::HashMap::new();
let is_reasoning_model =
is_reasoning_model_for_stream(ApiProvider::XiaomiMimo, "mimo-v2.5-pro");
let events = parse_sse_chunk(
@@ -2855,6 +3082,7 @@ mod stream_decoder_tests {
&mut text_started,
&mut thinking_started,
&mut tool_indices,
&mut reasoning_detail_buffers,
is_reasoning_model,
);
@@ -2932,6 +3160,7 @@ mod stream_decoder_tests {
let mut text_started = false;
let mut thinking_started = false;
let mut tool_indices = std::collections::HashMap::new();
let mut reasoning_detail_buffers = std::collections::HashMap::new();
let outcome = parse_sse_data_frame(
" [DONE] ",
@@ -2939,6 +3168,7 @@ mod stream_decoder_tests {
&mut text_started,
&mut thinking_started,
&mut tool_indices,
&mut reasoning_detail_buffers,
true,
);
@@ -3721,6 +3951,7 @@ mod alias_thinking_detection_tests {
assert!(provider_accepts_reasoning_content(ApiProvider::NvidiaNim));
assert!(provider_accepts_reasoning_content(ApiProvider::XiaomiMimo));
assert!(provider_accepts_reasoning_content(ApiProvider::Arcee));
assert!(provider_accepts_reasoning_content(ApiProvider::Minimax));
// #3016: Moonshot's native endpoint streams Kimi thinking as
// reasoning_content.
assert!(provider_accepts_reasoning_content(ApiProvider::Moonshot));
@@ -3728,17 +3959,41 @@ mod alias_thinking_detection_tests {
#[test]
fn stream_classifies_moonshot_kimi_as_reasoning() {
// #3016: without this, kimi-k2.6 thinking leaked into answer text.
// #3016: without this, Kimi thinking leaked into answer text.
assert!(is_reasoning_model_for_stream(
ApiProvider::Moonshot,
"kimi-k2.6"
));
assert!(
!is_reasoning_model_for_stream(ApiProvider::Moonshot, "kimi-for-coding"),
"kimi-for-coding is Moonshot's documented non-thinking model"
is_reasoning_model_for_stream(ApiProvider::Moonshot, "kimi-for-coding"),
"Kimi Code's stable model id now maps to K2.7 Code and streams reasoning_content"
);
}
#[test]
fn moonshot_and_minimax_replay_reasoning_content_for_supported_models() {
assert!(should_replay_reasoning_content_for_provider(
ApiProvider::Moonshot,
"kimi-k2.7-code",
None,
));
assert!(should_replay_reasoning_content_for_provider(
ApiProvider::Moonshot,
"kimi-for-coding",
None,
));
assert!(should_replay_reasoning_content_for_provider(
ApiProvider::Minimax,
"MiniMax-M3",
None,
));
assert!(!should_replay_reasoning_content_for_provider(
ApiProvider::Moonshot,
"kimi-for-coding",
Some("off"),
));
}
#[test]
fn xiaomi_mimo_uses_max_completion_tokens_payload_key() {
let mut body = json!({
+166 -3
View File
@@ -179,6 +179,15 @@ pub const DEFAULT_ANTHROPIC_MODEL: &str = "claude-sonnet-4-6";
pub const ANTHROPIC_OPUS_MODEL: &str = "claude-opus-4-8";
pub const ANTHROPIC_HAIKU_MODEL: &str = "claude-haiku-4-5";
pub const DEFAULT_ANTHROPIC_BASE_URL: &str = "https://api.anthropic.com";
pub const DEFAULT_MINIMAX_MODEL: &str = "MiniMax-M3";
pub const MINIMAX_M2_7_MODEL: &str = "MiniMax-M2.7";
pub const MINIMAX_M2_7_HIGHSPEED_MODEL: &str = "MiniMax-M2.7-highspeed";
pub const MINIMAX_M2_5_MODEL: &str = "MiniMax-M2.5";
pub const MINIMAX_M2_5_HIGHSPEED_MODEL: &str = "MiniMax-M2.5-highspeed";
pub const MINIMAX_M2_1_MODEL: &str = "MiniMax-M2.1";
pub const MINIMAX_M2_1_HIGHSPEED_MODEL: &str = "MiniMax-M2.1-highspeed";
pub const MINIMAX_M2_MODEL: &str = "MiniMax-M2";
pub const DEFAULT_MINIMAX_BASE_URL: &str = "https://api.minimax.io/v1";
#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
#[serde(rename_all = "snake_case")]
@@ -207,6 +216,7 @@ pub enum ApiProvider {
Anthropic,
Zai,
Stepfun,
Minimax,
}
impl ApiProvider {
@@ -264,7 +274,7 @@ impl ApiProvider {
/// `ApiProvider` discriminant → `ProviderKind` lookup.
/// Index 1 is `None` for the legacy `DeepseekCN` variant.
const KIND_LOOKUP: [Option<codewhale_config::ProviderKind>; 24] = [
const KIND_LOOKUP: [Option<codewhale_config::ProviderKind>; 25] = [
Some(codewhale_config::ProviderKind::Deepseek),
None, // DeepseekCN
Some(codewhale_config::ProviderKind::NvidiaNim),
@@ -289,10 +299,11 @@ impl ApiProvider {
Some(codewhale_config::ProviderKind::Anthropic),
Some(codewhale_config::ProviderKind::Zai),
Some(codewhale_config::ProviderKind::Stepfun),
Some(codewhale_config::ProviderKind::Minimax),
];
/// `ProviderKind` discriminant → `ApiProvider` lookup.
const FROM_KIND_LOOKUP: [Self; 23] = [
const FROM_KIND_LOOKUP: [Self; 24] = [
Self::Deepseek,
Self::NvidiaNim,
Self::Openai,
@@ -316,6 +327,7 @@ impl ApiProvider {
Self::Anthropic,
Self::Zai,
Self::Stepfun,
Self::Minimax,
];
/// Map to the config-level `ProviderKind`.
@@ -785,6 +797,39 @@ fn canonical_moonshot_model_id(model: &str) -> Option<&'static str> {
}
}
fn canonical_minimax_model_id(model: &str) -> Option<&'static str> {
let normalized = model.trim().to_ascii_lowercase();
let normalized = normalized.replace(['_', ' '], "-");
match normalized.as_str() {
"minimax" | "minimax-m3" | "minimax-m-3" | "minimax-m-3-thinking" => {
Some(DEFAULT_MINIMAX_MODEL)
}
"minimax-m2.7" | "minimax-m2-7" | "minimax-m-2.7" | "minimax-m-2-7" => {
Some(MINIMAX_M2_7_MODEL)
}
"minimax-m2.7-highspeed"
| "minimax-m2-7-highspeed"
| "minimax-m-2.7-highspeed"
| "minimax-m-2-7-highspeed" => Some(MINIMAX_M2_7_HIGHSPEED_MODEL),
"minimax-m2.5" | "minimax-m2-5" | "minimax-m-2.5" | "minimax-m-2-5" => {
Some(MINIMAX_M2_5_MODEL)
}
"minimax-m2.5-highspeed"
| "minimax-m2-5-highspeed"
| "minimax-m-2.5-highspeed"
| "minimax-m-2-5-highspeed" => Some(MINIMAX_M2_5_HIGHSPEED_MODEL),
"minimax-m2.1" | "minimax-m2-1" | "minimax-m-2.1" | "minimax-m-2-1" => {
Some(MINIMAX_M2_1_MODEL)
}
"minimax-m2.1-highspeed"
| "minimax-m2-1-highspeed"
| "minimax-m-2.1-highspeed"
| "minimax-m-2-1-highspeed" => Some(MINIMAX_M2_1_HIGHSPEED_MODEL),
"minimax-m2" | "minimax-m-2" => Some(MINIMAX_M2_MODEL),
_ => None,
}
}
/// Normalize a model selected through the TUI for the active provider.
///
/// Official DeepSeek endpoints require bare model IDs. Provider-prefixed
@@ -821,6 +866,12 @@ pub fn normalize_model_name_for_provider(provider: ApiProvider, model: &str) ->
.or_else(|| normalize_custom_model_id(model));
}
if matches!(provider, ApiProvider::Minimax) {
return canonical_minimax_model_id(model)
.map(ToString::to_string)
.or_else(|| normalize_custom_model_id(model));
}
if matches!(provider, ApiProvider::Huggingface) {
return normalize_custom_model_id(model);
}
@@ -906,6 +957,16 @@ pub fn model_completion_names_for_provider(provider: ApiProvider) -> Vec<&'stati
DEFAULT_ANTHROPIC_MODEL,
ANTHROPIC_HAIKU_MODEL,
],
ApiProvider::Minimax => vec![
DEFAULT_MINIMAX_MODEL,
MINIMAX_M2_7_MODEL,
MINIMAX_M2_7_HIGHSPEED_MODEL,
MINIMAX_M2_5_MODEL,
MINIMAX_M2_5_HIGHSPEED_MODEL,
MINIMAX_M2_1_MODEL,
MINIMAX_M2_1_HIGHSPEED_MODEL,
MINIMAX_M2_MODEL,
],
}
}
@@ -2066,6 +2127,8 @@ pub struct ProvidersConfig {
pub zai: ProviderConfig,
#[serde(default)]
pub stepfun: ProviderConfig,
#[serde(default)]
pub minimax: ProviderConfig,
}
#[derive(Debug, Clone, Deserialize, Default)]
@@ -2235,6 +2298,7 @@ impl Config {
ApiProvider::Anthropic => "providers.anthropic",
ApiProvider::Zai => "providers.zai",
ApiProvider::Stepfun => "providers.stepfun",
ApiProvider::Minimax => "providers.minimax",
ApiProvider::Deepseek | ApiProvider::DeepseekCN => return,
};
tracing::warn!(
@@ -2394,6 +2458,7 @@ impl Config {
ApiProvider::Anthropic => &providers.anthropic,
ApiProvider::Zai => &providers.zai,
ApiProvider::Stepfun => &providers.stepfun,
ApiProvider::Minimax => &providers.minimax,
})
}
@@ -2424,6 +2489,7 @@ impl Config {
ApiProvider::Anthropic => &mut providers.anthropic,
ApiProvider::Zai => &mut providers.zai,
ApiProvider::Stepfun => &mut providers.stepfun,
ApiProvider::Minimax => &mut providers.minimax,
}
}
@@ -2568,6 +2634,7 @@ impl Config {
ApiProvider::Zai => DEFAULT_ZAI_MODEL,
ApiProvider::Stepfun => DEFAULT_STEPFUN_MODEL,
ApiProvider::Anthropic => DEFAULT_ANTHROPIC_MODEL,
ApiProvider::Minimax => DEFAULT_MINIMAX_MODEL,
}
.to_string()
}
@@ -2609,7 +2676,8 @@ impl Config {
| ApiProvider::Together
| ApiProvider::OpenaiCodex
| ApiProvider::Zai
| ApiProvider::Stepfun => None,
| ApiProvider::Stepfun
| ApiProvider::Minimax => None,
};
let configured_base_url = provider_base.or(root_base);
let base = if provider == ApiProvider::XiaomiMimo {
@@ -2659,6 +2727,7 @@ impl Config {
ApiProvider::Zai => DEFAULT_ZAI_BASE_URL,
ApiProvider::Stepfun => DEFAULT_STEPFUN_BASE_URL,
ApiProvider::Anthropic => DEFAULT_ANTHROPIC_BASE_URL,
ApiProvider::Minimax => DEFAULT_MINIMAX_BASE_URL,
}
.to_string()
})
@@ -2711,6 +2780,7 @@ impl Config {
ApiProvider::Zai => "zai",
ApiProvider::Stepfun => "stepfun",
ApiProvider::Anthropic => "anthropic",
ApiProvider::Minimax => "minimax",
};
// 0. DeepSeek compatibility slot. The legacy top-level `api_key`
@@ -2906,6 +2976,10 @@ impl Config {
),
// Self-hosted deployments commonly run without auth on localhost.
// Return an empty key and let the client omit the Authorization header.
ApiProvider::Minimax => anyhow::bail!(
"MiniMax API key not found. Run 'codewhale auth set --provider minimax', \
set MINIMAX_API_KEY, or add [providers.minimax] api_key in ~/.codewhale/config.toml."
),
ApiProvider::Sglang | ApiProvider::Vllm | ApiProvider::Ollama => Ok(String::new()),
}
}
@@ -3759,6 +3833,13 @@ fn apply_env_overrides(config: &mut Config) {
.stepfun
.base_url = Some(value);
}
ApiProvider::Minimax => {
config
.providers
.get_or_insert_with(ProvidersConfig::default)
.minimax
.base_url = Some(value);
}
}
}
if matches!(config.api_provider(), ApiProvider::NvidiaNim)
@@ -3968,6 +4049,7 @@ fn apply_env_overrides(config: &mut Config) {
ApiProvider::Anthropic => &mut providers.anthropic,
ApiProvider::Zai => &mut providers.zai,
ApiProvider::Stepfun => &mut providers.stepfun,
ApiProvider::Minimax => &mut providers.minimax,
};
let mut provider_headers = entry.http_headers.clone().unwrap_or_default();
provider_headers.extend(headers);
@@ -4165,6 +4247,7 @@ fn apply_env_overrides(config: &mut Config) {
ApiProvider::Anthropic => &mut providers.anthropic,
ApiProvider::Zai => &mut providers.zai,
ApiProvider::Stepfun => &mut providers.stepfun,
ApiProvider::Minimax => &mut providers.minimax,
};
entry.model = Some(value);
}
@@ -4507,6 +4590,7 @@ fn default_base_url_for_provider(provider: ApiProvider) -> &'static str {
ApiProvider::Zai => DEFAULT_ZAI_BASE_URL,
ApiProvider::Stepfun => DEFAULT_STEPFUN_BASE_URL,
ApiProvider::Anthropic => DEFAULT_ANTHROPIC_BASE_URL,
ApiProvider::Minimax => DEFAULT_MINIMAX_BASE_URL,
}
}
@@ -4954,6 +5038,7 @@ fn merge_providers(
openai_codex: merge_provider_config(base.openai_codex, override_cfg.openai_codex),
zai: merge_provider_config(base.zai, override_cfg.zai),
stepfun: merge_provider_config(base.stepfun, override_cfg.stepfun),
minimax: merge_provider_config(base.minimax, override_cfg.minimax),
}),
}
}
@@ -5468,6 +5553,9 @@ pub fn active_provider_has_env_api_key(config: &Config) -> bool {
std::env::var("STEPFUN_API_KEY").is_ok_and(|k| !k.trim().is_empty())
|| std::env::var("STEP_API_KEY").is_ok_and(|k| !k.trim().is_empty())
}
ApiProvider::Minimax => {
std::env::var("MINIMAX_API_KEY").is_ok_and(|k| !k.trim().is_empty())
}
}
}
@@ -5504,6 +5592,7 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool {
ApiProvider::Volcengine => "VOLCENGINE_API_KEY",
ApiProvider::Zai => "ZAI_API_KEY",
ApiProvider::Stepfun => "STEPFUN_API_KEY",
ApiProvider::Minimax => "MINIMAX_API_KEY",
};
if std::env::var(env_var).is_ok_and(|k| !k.trim().is_empty()) {
return true;
@@ -5633,6 +5722,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result<PathBuf>
ApiProvider::OpenaiCodex => "providers.openai_codex",
ApiProvider::Zai => "providers.zai",
ApiProvider::Stepfun => "providers.stepfun",
ApiProvider::Minimax => "providers.minimax",
};
// Parse existing TOML (or start fresh) so we can edit the right table
@@ -5681,6 +5771,7 @@ pub fn save_api_key_for(provider: ApiProvider, api_key: &str) -> Result<PathBuf>
ApiProvider::OpenaiCodex => "openai_codex",
ApiProvider::Zai => "zai",
ApiProvider::Stepfun => "stepfun",
ApiProvider::Minimax => "minimax",
};
let entry = providers
.entry(key_inside.to_string())
@@ -5781,6 +5872,7 @@ fn provider_config_key(provider: ApiProvider) -> Result<&'static str> {
ApiProvider::OpenaiCodex => Ok("openai_codex"),
ApiProvider::Zai => Ok("zai"),
ApiProvider::Stepfun => Ok("stepfun"),
ApiProvider::Minimax => Ok("minimax"),
}
}
@@ -8361,6 +8453,26 @@ api_key = "old-openrouter-key"
}
}
#[test]
fn normalize_model_name_for_provider_maps_minimax_direct_aliases() {
for (alias, expected) in [
("minimax", DEFAULT_MINIMAX_MODEL),
("minimax-m3", DEFAULT_MINIMAX_MODEL),
("minimax-m2.7", MINIMAX_M2_7_MODEL),
("minimax-m2-7-highspeed", MINIMAX_M2_7_HIGHSPEED_MODEL),
("minimax-m2.5", MINIMAX_M2_5_MODEL),
("minimax-m2-5-highspeed", MINIMAX_M2_5_HIGHSPEED_MODEL),
("minimax-m2.1", MINIMAX_M2_1_MODEL),
("minimax-m2-1-highspeed", MINIMAX_M2_1_HIGHSPEED_MODEL),
("minimax-m2", MINIMAX_M2_MODEL),
] {
assert_eq!(
normalize_model_name_for_provider(ApiProvider::Minimax, alias).as_deref(),
Some(expected)
);
}
}
#[test]
fn normalize_model_name_for_provider_maps_arcee_direct_aliases() {
for (alias, expected) in [
@@ -8472,6 +8584,28 @@ api_key = "old-openrouter-key"
);
}
#[test]
fn model_completion_names_for_minimax_include_direct_chat_models() {
let models = model_completion_names_for_provider(ApiProvider::Minimax);
for expected in [
DEFAULT_MINIMAX_MODEL,
MINIMAX_M2_7_MODEL,
MINIMAX_M2_7_HIGHSPEED_MODEL,
MINIMAX_M2_5_MODEL,
MINIMAX_M2_5_HIGHSPEED_MODEL,
MINIMAX_M2_1_MODEL,
MINIMAX_M2_1_HIGHSPEED_MODEL,
MINIMAX_M2_MODEL,
] {
assert!(models.contains(&expected), "missing {expected}");
}
assert!(
!models.contains(&OPENROUTER_MINIMAX_M3_MODEL),
"direct MiniMax picker must not expose OpenRouter namespaced IDs"
);
}
#[test]
fn normalize_model_name_rejects_invalid_or_non_deepseek_ids() {
assert!(normalize_model_name("qwen3-coder").is_none());
@@ -11405,6 +11539,35 @@ model = "deepseek-ai/deepseek-v4-pro"
);
}
#[test]
fn provider_capability_minimax_direct_models_use_api_docs_shape() {
let m3 = provider_capability(ApiProvider::Minimax, DEFAULT_MINIMAX_MODEL);
assert_eq!(m3.context_window, 1_000_000);
assert_eq!(m3.max_output, 524_288);
assert!(m3.thinking_supported);
assert!(!m3.cache_telemetry_supported);
assert_eq!(m3.request_payload_mode, RequestPayloadMode::ChatCompletions);
for model in [
MINIMAX_M2_7_MODEL,
MINIMAX_M2_7_HIGHSPEED_MODEL,
MINIMAX_M2_5_MODEL,
MINIMAX_M2_5_HIGHSPEED_MODEL,
MINIMAX_M2_1_MODEL,
MINIMAX_M2_1_HIGHSPEED_MODEL,
MINIMAX_M2_MODEL,
] {
let cap = provider_capability(ApiProvider::Minimax, model);
assert_eq!(cap.context_window, 204_800, "{model}");
assert!(cap.thinking_supported, "{model}");
assert!(!cap.cache_telemetry_supported, "{model}");
assert_eq!(
cap.request_payload_mode,
RequestPayloadMode::ChatCompletions
);
}
}
#[test]
fn provider_capability_wanjie_ark_reasoner_has_thinking_no_cache() {
let cap = provider_capability(ApiProvider::WanjieArk, DEFAULT_WANJIE_ARK_MODEL);
+1
View File
@@ -223,6 +223,7 @@ fn provider_base_url_table_key(provider: ApiProvider) -> anyhow::Result<&'static
ApiProvider::OpenaiCodex => Ok("openai_codex"),
ApiProvider::Zai => Ok("zai"),
ApiProvider::Stepfun => Ok("stepfun"),
ApiProvider::Minimax => Ok("minimax"),
}
}
+4 -1
View File
@@ -614,7 +614,9 @@ impl Engine {
let env_var = match provider {
ApiProvider::Deepseek | ApiProvider::DeepseekCN => "DEEPSEEK_API_KEY",
ApiProvider::NvidiaNim => "NVIDIA_API_KEY/NVIDIA_NIM_API_KEY",
ApiProvider::Openai | ApiProvider::Zai | ApiProvider::Stepfun => "OPENAI_API_KEY",
ApiProvider::Openai => "OPENAI_API_KEY",
ApiProvider::Zai => "ZAI_API_KEY/Z_AI_API_KEY",
ApiProvider::Stepfun => "STEPFUN_API_KEY/STEP_API_KEY",
ApiProvider::Anthropic => "ANTHROPIC_API_KEY",
ApiProvider::Atlascloud => "ATLASCLOUD_API_KEY",
ApiProvider::WanjieArk => "WANJIE_ARK_API_KEY/WANJIE_API_KEY/WANJIE_MAAS_API_KEY",
@@ -632,6 +634,7 @@ impl Engine {
ApiProvider::Huggingface => "HUGGINGFACE_API_KEY/HF_TOKEN",
ApiProvider::Together => "TOGETHER_API_KEY",
ApiProvider::OpenaiCodex => "OPENAI_CODEX_ACCESS_TOKEN/CODEX_ACCESS_TOKEN",
ApiProvider::Minimax => "MINIMAX_API_KEY",
};
Some(format!(
+7 -2
View File
@@ -2601,13 +2601,17 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
("DEEPSEEK_API_KEY", "codewhale auth set --provider deepseek")
}
crate::config::ApiProvider::Zai => (
"OPENAI_API_KEY",
"ZAI_API_KEY/Z_AI_API_KEY",
"codewhale auth set --provider zai --api-key \"...\"",
),
crate::config::ApiProvider::Stepfun => (
"OPENAI_API_KEY",
"STEPFUN_API_KEY/STEP_API_KEY",
"codewhale auth set --provider stepfun --api-key \"...\"",
),
crate::config::ApiProvider::Minimax => (
"MINIMAX_API_KEY",
"codewhale auth set --provider minimax --api-key \"...\"",
),
};
println!(
" {} api_key: missing (set {env_var} or `[providers.{}].api_key` in ~/.codewhale/config.toml; or run `{login_hint}`)",
@@ -2637,6 +2641,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
| crate::config::ApiProvider::DeepseekCN => "deepseek",
crate::config::ApiProvider::Zai => "zai",
crate::config::ApiProvider::Stepfun => "stepfun",
crate::config::ApiProvider::Minimax => "minimax",
}
);
}
+26 -8
View File
@@ -286,6 +286,13 @@ fn known_context_window_for_model(model_lower: &str) -> Option<u32> {
| "kimi-k2.7-code"
| "kimi-k2.6"
| "kimi-for-coding" => Some(262_144),
"minimax-m2.7"
| "minimax-m2.7-highspeed"
| "minimax-m2.5"
| "minimax-m2.5-highspeed"
| "minimax-m2.1"
| "minimax-m2.1-highspeed"
| "minimax-m2" => Some(204_800),
"z-ai/glm-5.1" | "z-ai/glm-5v-turbo" | "glm-5.1" | "glm-5v-turbo" => Some(202_752),
"minimax/minimax-m3" | "minimax-m3" | "qwen/qwen3.6-flash" | "qwen/qwen3.6-plus" => {
Some(1_000_000)
@@ -349,10 +356,10 @@ pub fn model_supports_reasoning(model: &str) -> bool {
if lower.contains("deepseek") && lower.contains("v4") {
return true;
}
// #3016: Moonshot-native Kimi IDs also emit reasoning_content.
// `kimi-for-coding` is Moonshot's documented non-thinking model — it
// must not be classified as reasoning-capable by the prefix rule.
if lower.starts_with("kimi-") && lower != "kimi-for-coding" {
// #3016 plus the 2026 Kimi Code K2.7 update: Moonshot-native Kimi IDs,
// including the stable `kimi-for-coding` coding route, emit
// reasoning_content that must stay out of answer prose.
if lower.starts_with("kimi-") {
return true;
}
matches!(
@@ -376,8 +383,16 @@ pub fn model_supports_reasoning(model: &str) -> bool {
| "moonshotai/kimi-k2.6:free"
| "kimi-k2.7-code"
| "kimi-k2.6"
| "kimi-for-coding"
| "minimax/minimax-m3"
| "minimax-m3"
| "minimax-m2.7"
| "minimax-m2.7-highspeed"
| "minimax-m2.5"
| "minimax-m2.5-highspeed"
| "minimax-m2.1"
| "minimax-m2.1-highspeed"
| "minimax-m2"
| "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free"
| "nvidia/nemotron-3-ultra-550b-a55b"
| "nvidia/nemotron-3-ultra-550b-a55b:free"
@@ -628,13 +643,13 @@ mod tests {
}
#[test]
fn moonshot_native_kimi_ids_support_reasoning_except_for_coding() {
fn moonshot_native_kimi_ids_support_reasoning_including_coding_route() {
// #3016: bare Moonshot ids (no moonshotai/ prefix) emit
// reasoning_content; kimi-for-coding is the non-thinking exception.
// reasoning_content; kimi-for-coding currently rides the K2.7 Code path.
assert!(model_supports_reasoning("kimi-k2.7-code"));
assert!(model_supports_reasoning("kimi-k2.6"));
assert!(model_supports_reasoning("kimi-for-coding"));
assert!(model_supports_reasoning("kimi-k2.5"));
assert!(!model_supports_reasoning("kimi-for-coding"));
}
#[test]
@@ -691,13 +706,16 @@ mod tests {
("kimi-k2.7-code", 262_144),
("kimi-k2.6", 262_144),
("minimax-m3", 1_000_000),
("minimax-m2.7", 204_800),
("minimax-m2.5-highspeed", 204_800),
("minimax-m2", 204_800),
("glm-5.1", 202_752),
] {
assert_eq!(context_window_for_model(model), Some(expected_window));
assert!(model_supports_reasoning(model));
}
assert_eq!(context_window_for_model("kimi-for-coding"), Some(262_144));
assert!(!model_supports_reasoning("kimi-for-coding"));
assert!(model_supports_reasoning("kimi-for-coding"));
assert_eq!(context_window_for_model("glm-5v-turbo"), Some(202_752));
assert!(!model_supports_reasoning("glm-5v-turbo"));
assert_eq!(max_output_tokens_for_model("kimi-k2.7-code"), Some(262_144));
+6 -3
View File
@@ -121,7 +121,9 @@ impl ProviderPickerView {
ApiProvider::Huggingface => "HUGGINGFACE_API_KEY / HF_TOKEN",
ApiProvider::Together => "TOGETHER_API_KEY",
ApiProvider::OpenaiCodex => "OPENAI_CODEX_ACCESS_TOKEN / CODEX_ACCESS_TOKEN",
ApiProvider::Zai | ApiProvider::Stepfun => "OPENAI_API_KEY",
ApiProvider::Zai => "ZAI_API_KEY / Z_AI_API_KEY",
ApiProvider::Stepfun => "STEPFUN_API_KEY / STEP_API_KEY",
ApiProvider::Minimax => "MINIMAX_API_KEY",
}
}
@@ -517,7 +519,8 @@ mod tests {
"OpenAI Codex (ChatGPT)",
"Anthropic",
"Z.ai (GLM Coding)",
"StepFun / StepFlash"
"StepFun / StepFlash",
"MiniMax"
]
);
}
@@ -552,7 +555,7 @@ mod tests {
let mut picker = ProviderPickerView::new(ApiProvider::Deepseek, &config);
picker.handle_key(key(KeyCode::Up));
assert_eq!(picker.selected_provider(), ApiProvider::Stepfun);
assert_eq!(picker.selected_provider(), ApiProvider::Minimax);
picker.handle_key(key(KeyCode::Down));
assert_eq!(picker.selected_provider(), ApiProvider::Deepseek);
+3
View File
@@ -7595,6 +7595,7 @@ fn render(f: &mut Frame, app: &mut App) {
crate::config::ApiProvider::OpenaiCodex => Some("Codex"),
crate::config::ApiProvider::Zai => Some("Z.ai"),
crate::config::ApiProvider::Stepfun => Some("StepFun"),
crate::config::ApiProvider::Minimax => Some("MiniMax"),
};
let status_indicator_started_at = if app.low_motion {
None
@@ -8655,6 +8656,7 @@ async fn apply_provider_picker_api_key(
ApiProvider::Anthropic => &mut providers.anthropic,
ApiProvider::Zai => &mut providers.zai,
ApiProvider::Stepfun => &mut providers.stepfun,
ApiProvider::Minimax => &mut providers.minimax,
};
entry.api_key = Some(api_key);
}
@@ -8717,6 +8719,7 @@ fn set_provider_auth_mode_in_memory(config: &mut Config, provider: ApiProvider,
ApiProvider::Anthropic => &mut providers.anthropic,
ApiProvider::Zai => &mut providers.zai,
ApiProvider::Stepfun => &mut providers.stepfun,
ApiProvider::Minimax => &mut providers.minimax,
};
entry.auth_mode = Some(auth_mode);
}
+3 -3
View File
@@ -952,12 +952,12 @@ If you are upgrading from older releases:
### Core keys (used by the TUI/engine)
- `provider` (string, optional): `deepseek` (default), `nvidia-nim`, `openai`, `atlascloud`, `wanjie-ark`, `volcengine`, `openrouter`, `xiaomi-mimo`, `novita`, `fireworks`, `siliconflow`, `siliconflow-CN`, `arcee`, `moonshot`, `sglang`, `vllm`, or `ollama`. Legacy `deepseek-cn` configs are still accepted as an alias for `deepseek`; DeepSeek uses the same official host [`https://api.deepseek.com`](https://api-docs.deepseek.com/) worldwide. `nvidia-nim` targets NVIDIA's NIM-hosted DeepSeek endpoints through `https://integrate.api.nvidia.com/v1`; `openai` targets a generic OpenAI-compatible endpoint, defaulting to `https://api.openai.com/v1`; `atlascloud` targets AtlasCloud's OpenAI-compatible endpoint at `https://api.atlascloud.ai/v1`; `wanjie-ark` targets Wanjie Ark's OpenAI-compatible endpoint at `https://maas-openapi.wanjiedata.com/api/v1`; `volcengine` targets Volcengine Ark's OpenAI-compatible coding endpoint at `https://ark.cn-beijing.volces.com/api/coding/v3`; `openrouter` targets `https://openrouter.ai/api/v1`; `xiaomi-mimo` targets Xiaomi MiMo's OpenAI-compatible endpoint, using `https://token-plan-sgp.xiaomimimo.com/v1` by default for Token Plan keys (`tp-...`) and `https://api.xiaomimimo.com/v1` for pay-as-you-go keys; set `base_url` explicitly if your Token Plan account uses the China region; `novita` targets `https://api.novita.ai/v1`; `fireworks` targets `https://api.fireworks.ai/inference/v1`; `siliconflow` targets SiliconFlow, defaulting to `https://api.siliconflow.com/v1`; `siliconflow-CN` targets the SiliconFlow China regional endpoint while sharing `[providers.siliconflow]`; `arcee` targets Arcee AI's OpenAI-compatible endpoint at `https://api.arcee.ai/api/v1`; `moonshot` targets Moonshot/Kimi, defaulting to `https://api.moonshot.ai/v1`; `sglang` targets a self-hosted OpenAI-compatible endpoint, defaulting to `http://localhost:30000/v1`; `vllm` targets a self-hosted vLLM OpenAI-compatible endpoint, defaulting to `http://localhost:8000/v1`; `ollama` targets Ollama's OpenAI-compatible endpoint, defaulting to `http://localhost:11434/v1`.
- `provider` (string, optional): `deepseek` (default), `nvidia-nim`, `openai`, `atlascloud`, `wanjie-ark`, `volcengine`, `openrouter`, `xiaomi-mimo`, `novita`, `fireworks`, `siliconflow`, `siliconflow-CN`, `arcee`, `moonshot`, `minimax`, `zai`, `stepfun`, `sglang`, `vllm`, or `ollama`. Legacy `deepseek-cn` configs are still accepted as an alias for `deepseek`; DeepSeek uses the same official host [`https://api.deepseek.com`](https://api-docs.deepseek.com/) worldwide. `nvidia-nim` targets NVIDIA's NIM-hosted DeepSeek endpoints through `https://integrate.api.nvidia.com/v1`; `openai` targets a generic OpenAI-compatible endpoint, defaulting to `https://api.openai.com/v1`; `atlascloud` targets AtlasCloud's OpenAI-compatible endpoint at `https://api.atlascloud.ai/v1`; `wanjie-ark` targets Wanjie Ark's OpenAI-compatible endpoint at `https://maas-openapi.wanjiedata.com/api/v1`; `volcengine` targets Volcengine Ark's OpenAI-compatible coding endpoint at `https://ark.cn-beijing.volces.com/api/coding/v3`; `openrouter` targets `https://openrouter.ai/api/v1`; `xiaomi-mimo` targets Xiaomi MiMo's OpenAI-compatible endpoint, using `https://token-plan-sgp.xiaomimimo.com/v1` by default for Token Plan keys (`tp-...`) and `https://api.xiaomimimo.com/v1` for pay-as-you-go keys; set `base_url` explicitly if your Token Plan account uses the China region; `novita` targets `https://api.novita.ai/v1`; `fireworks` targets `https://api.fireworks.ai/inference/v1`; `siliconflow` targets SiliconFlow, defaulting to `https://api.siliconflow.com/v1`; `siliconflow-CN` targets the SiliconFlow China regional endpoint while sharing `[providers.siliconflow]`; `arcee` targets Arcee AI's OpenAI-compatible endpoint at `https://api.arcee.ai/api/v1`; `moonshot` targets Moonshot/Kimi, defaulting to `https://api.moonshot.ai/v1`; `minimax` targets MiniMax at `https://api.minimax.io/v1`; `zai` targets Z.ai at `https://api.z.ai/api/coding/paas/v4`; `stepfun` targets StepFun at `https://api.stepfun.ai/v1`; `sglang` targets a self-hosted OpenAI-compatible endpoint, defaulting to `http://localhost:30000/v1`; `vllm` targets a self-hosted vLLM OpenAI-compatible endpoint, defaulting to `http://localhost:8000/v1`; `ollama` targets Ollama's OpenAI-compatible endpoint, defaulting to `http://localhost:11434/v1`.
- `api_key` (string, required for hosted providers): must be non-empty for DeepSeek/hosted providers (or set the provider API key env var). Self-hosted SGLang, vLLM, and Ollama can omit it.
- `base_url` (string, optional): defaults to `https://api.deepseek.com/beta` for DeepSeek's OpenAI-compatible Chat Completions API, including legacy `provider = "deepseek-cn"` configs. Other defaults are `https://integrate.api.nvidia.com/v1` for `nvidia-nim`, `https://api.openai.com/v1` for `openai`, `https://api.atlascloud.ai/v1` for `atlascloud`, `https://maas-openapi.wanjiedata.com/api/v1` for `wanjie-ark`, `https://ark.cn-beijing.volces.com/api/coding/v3` for `volcengine`, `https://openrouter.ai/api/v1` for `openrouter`, `https://token-plan-sgp.xiaomimimo.com/v1` for `xiaomi-mimo` when the API key starts with `tp-...` and `https://api.xiaomimimo.com/v1` otherwise, `https://api.novita.ai/v1` for `novita`, `https://api.fireworks.ai/inference/v1` for `fireworks`, `https://api.siliconflow.com/v1` for `siliconflow`, `https://api.siliconflow.cn/v1` for `siliconflow-CN`, `https://api.arcee.ai/api/v1` for `arcee`, `https://api.moonshot.ai/v1` for `moonshot`, `http://localhost:30000/v1` for `sglang`, `http://localhost:8000/v1` for `vllm`, and `http://localhost:11434/v1` for `ollama`. Set `base_url = "https://token-plan-cn.xiaomimimo.com/v1"` explicitly if your Xiaomi MiMo Token Plan account is provisioned in the China region. Set `https://api.deepseek.com` or `https://api.deepseek.com/v1` explicitly to opt out of DeepSeek beta features.
- `base_url` (string, optional): defaults to `https://api.deepseek.com/beta` for DeepSeek's OpenAI-compatible Chat Completions API, including legacy `provider = "deepseek-cn"` configs. Other defaults are `https://integrate.api.nvidia.com/v1` for `nvidia-nim`, `https://api.openai.com/v1` for `openai`, `https://api.atlascloud.ai/v1` for `atlascloud`, `https://maas-openapi.wanjiedata.com/api/v1` for `wanjie-ark`, `https://ark.cn-beijing.volces.com/api/coding/v3` for `volcengine`, `https://openrouter.ai/api/v1` for `openrouter`, `https://token-plan-sgp.xiaomimimo.com/v1` for `xiaomi-mimo` when the API key starts with `tp-...` and `https://api.xiaomimimo.com/v1` otherwise, `https://api.novita.ai/v1` for `novita`, `https://api.fireworks.ai/inference/v1` for `fireworks`, `https://api.siliconflow.com/v1` for `siliconflow`, `https://api.siliconflow.cn/v1` for `siliconflow-CN`, `https://api.arcee.ai/api/v1` for `arcee`, `https://api.moonshot.ai/v1` for `moonshot`, `https://api.minimax.io/v1` for `minimax`, `https://api.z.ai/api/coding/paas/v4` for `zai`, `https://api.stepfun.ai/v1` for `stepfun`, `http://localhost:30000/v1` for `sglang`, `http://localhost:8000/v1` for `vllm`, and `http://localhost:11434/v1` for `ollama`. Set `base_url = "https://token-plan-cn.xiaomimimo.com/v1"` explicitly if your Xiaomi MiMo Token Plan account is provisioned in the China region. Set `https://api.deepseek.com` or `https://api.deepseek.com/v1` explicitly to opt out of DeepSeek beta features.
- `path_suffix` (string, optional provider-table key): override the chat-completions path for OpenAI-compatible gateways that do not serve `/v1/chat/completions`. For example, `[providers.openai] path_suffix = "/chat/completions"` sends chat requests to the unversioned base URL plus `/chat/completions`; `models` and `beta/*` requests keep their normal routing.
- `insecure_skip_tls_verify` (bool, optional provider-table key): disabled by default. When true on the active provider table, only the LLM provider HTTP client skips TLS certificate verification. Prefer `SSL_CERT_FILE` for corporate or private CA bundles; `codewhale doctor` reports this setting when enabled.
- `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek and generic OpenAI-compatible endpoints, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `deepseek-ai/deepseek-v4-flash` for AtlasCloud, `deepseek-reasoner` for Wanjie Ark, `DeepSeek-V4-Pro` for Volcengine Ark, `deepseek/deepseek-v4-pro` for OpenRouter and Novita, `mimo-v2.5-pro` for Xiaomi MiMo, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, `deepseek-ai/DeepSeek-V4-Pro` for SiliconFlow, `trinity-large-thinking` for Arcee AI, `kimi-k2.7-code` for Moonshot, `deepseek-ai/DeepSeek-V4-Pro` for SGLang/vLLM, and `deepseek-coder:1.3b` for Ollama. Hugging Face and Together AI both default to `deepseek-ai/DeepSeek-V4-Pro`. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows, 384K max output, and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash` until July 24, 2026, except SiliconFlow maps `deepseek-reasoner` and `deepseek-r1` to its Pro model while `deepseek-chat` and `deepseek-v3` map to Flash. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. OpenRouter also recognizes recent large IDs such as `arcee-ai/trinity-large-thinking`, `minimax/minimax-m3`, `minimax/minimax-2.7`, `xiaomi/mimo-v2.5-pro`, `qwen/qwen3.6-flash`, `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `qwen/qwen3.7-max`, `google/gemma-4-31b-it`, `moonshotai/kimi-k2.7-code`, `moonshotai/kimi-k2.6`, `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free`, and `nvidia/nemotron-3-ultra-550b-a55b`; direct Arcee uses bare IDs such as `trinity-large-thinking` and `trinity-large-preview`; direct Moonshot recognizes `kimi-k2.7-code` and `kimi-k2.6`, with `kimi` and `kimi-k2` aliases selecting `kimi-k2.7-code`; direct Xiaomi MiMo recognizes chat IDs `mimo-v2.5-pro` and `mimo-v2.5`, while TTS IDs are selected through `codewhale speech` / `tts`. Generic `openai`, `atlascloud`, `wanjie-ark`, `xiaomi-mimo`, `arcee`, and Ollama model IDs are passed through unchanged after known aliases are normalized. OpenRouter and SiliconFlow provider configs with a custom `base_url` also preserve explicit model values, which lets OpenAI-compatible gateways accept bare model IDs. Use `/models` or `codewhale models` to discover live IDs from your configured endpoint. `CODEWHALE_MODEL` overrides this for a single process; `DEEPSEEK_MODEL` is the legacy alias.
- `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek and generic OpenAI-compatible endpoints, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `deepseek-ai/deepseek-v4-flash` for AtlasCloud, `deepseek-reasoner` for Wanjie Ark, `DeepSeek-V4-Pro` for Volcengine Ark, `deepseek/deepseek-v4-pro` for OpenRouter and Novita, `mimo-v2.5-pro` for Xiaomi MiMo, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, `deepseek-ai/DeepSeek-V4-Pro` for SiliconFlow, `trinity-large-thinking` for Arcee AI, `kimi-k2.7-code` for Moonshot, `MiniMax-M3` for MiniMax, `GLM-5.1` for Z.ai, `step-3.7-flash` for StepFun, `deepseek-ai/DeepSeek-V4-Pro` for SGLang/vLLM, and `deepseek-coder:1.3b` for Ollama. Hugging Face and Together AI both default to `deepseek-ai/DeepSeek-V4-Pro`. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows, 384K max output, and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash` until July 24, 2026, except SiliconFlow maps `deepseek-reasoner` and `deepseek-r1` to its Pro model while `deepseek-chat` and `deepseek-v3` map to Flash. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. OpenRouter also recognizes recent large IDs such as `arcee-ai/trinity-large-thinking`, `minimax/minimax-m3`, `minimax/minimax-2.7`, `xiaomi/mimo-v2.5-pro`, `qwen/qwen3.6-flash`, `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `qwen/qwen3.7-max`, `google/gemma-4-31b-it`, `moonshotai/kimi-k2.7-code`, `moonshotai/kimi-k2.6`, `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free`, and `nvidia/nemotron-3-ultra-550b-a55b`; direct Arcee uses bare IDs such as `trinity-large-thinking` and `trinity-large-preview`; direct Moonshot recognizes `kimi-k2.7-code`, `kimi-k2.6`, and Kimi Code's stable `kimi-for-coding`; direct MiniMax recognizes `MiniMax-M3` and the documented M2.x chat model IDs; direct Xiaomi MiMo recognizes chat IDs `mimo-v2.5-pro` and `mimo-v2.5`, while TTS IDs are selected through `codewhale speech` / `tts`. Generic `openai`, `atlascloud`, `wanjie-ark`, `xiaomi-mimo`, `arcee`, `moonshot`, `minimax`, `zai`, `stepfun`, and Ollama model IDs are passed through unchanged after known aliases are normalized. OpenRouter and SiliconFlow provider configs with a custom `base_url` also preserve explicit model values, which lets OpenAI-compatible gateways accept bare model IDs. Use `/models` or `codewhale models` to discover live IDs from your configured endpoint. `CODEWHALE_MODEL` overrides this for a single process; `DEEPSEEK_MODEL` is the legacy alias.
- `reasoning_effort` (string, optional): `off`, `low`, `medium`, `high`, `max`, or `xhigh`; defaults to the configured UI tier. DeepSeek Platform receives top-level `thinking` / `reasoning_effort` fields. OpenAI Codex normalizes stale `off` to `low` and sends `max` as Responses `xhigh`. NVIDIA NIM receives equivalent settings through `chat_template_kwargs`.
- `verbosity` (string, optional): `normal` or `concise`. `normal` keeps the
default conversational prompt. `concise` appends a prompt discipline block
+8 -2
View File
@@ -135,7 +135,8 @@ endpoint.
| `siliconflow` | `[providers.siliconflow]` | `SILICONFLOW_API_KEY` | `SILICONFLOW_BASE_URL`; default `https://api.siliconflow.com/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | OpenAI-compatible hosted route. Official docs use the `.com` endpoint. `SILICONFLOW_MODEL` is accepted. Reasoning aliases `deepseek-reasoner` and `deepseek-r1` map to Pro; `deepseek-chat` and `deepseek-v3` map to Flash. |
| `siliconflow-CN` | `[providers.siliconflow_cn]` | `SILICONFLOW_API_KEY` | `SILICONFLOW_BASE_URL`; default `https://api.siliconflow.cn/v1` | Uses the SiliconFlow model set | China regional SiliconFlow route. Falls back to `[providers.siliconflow]` for api_key / base_url / model when unset. Select it with `provider = "siliconflow-CN"` or `CODEWHALE_PROVIDER=siliconflow-CN`. |
| `arcee` | `[providers.arcee]` | `ARCEE_API_KEY` | `ARCEE_BASE_URL`; default `https://api.arcee.ai/api/v1` | `trinity-large-thinking`, `trinity-large-preview` | Arcee AI direct OpenAI-compatible route, tracked as 256K-context BF16 serving. `ARCEE_MODEL` is accepted. OpenRouter's `arcee-ai/trinity-large-thinking` remains the OpenRouter namespaced model ID; direct Arcee uses the bare `trinity-large-thinking` ID. |
| `moonshot` | `[providers.moonshot]` | `MOONSHOT_API_KEY`, `KIMI_API_KEY` | `MOONSHOT_BASE_URL`, `KIMI_BASE_URL`; default `https://api.moonshot.ai/v1` | `kimi-k2.7-code`, `kimi-k2.6`; Kimi Code path uses `kimi-for-coding` at `https://api.kimi.com/coding/v1` | Moonshot/Kimi route. `kimi` and `kimi-k2` aliases select `kimi-k2.7-code`; `MOONSHOT_MODEL`, `KIMI_MODEL_NAME`, and `KIMI_MODEL` are accepted. `[providers.moonshot] auth_mode = "kimi_oauth"` reads Kimi Code OAuth credentials from `KIMI_CODE_HOME`/`~/.kimi-code`, with legacy `KIMI_SHARE_DIR`/`~/.kimi` fallback. |
| `moonshot` | `[providers.moonshot]` | `MOONSHOT_API_KEY`, `KIMI_API_KEY` | `MOONSHOT_BASE_URL`, `KIMI_BASE_URL`; default `https://api.moonshot.ai/v1` | `kimi-k2.7-code`, `kimi-k2.6`; Kimi Code path uses `kimi-for-coding` at `https://api.kimi.com/coding/v1` | Moonshot/Kimi route. `kimi` and `kimi-k2` aliases select `kimi-k2.7-code`; `MOONSHOT_MODEL`, `KIMI_MODEL_NAME`, and `KIMI_MODEL` are accepted. Kimi thinking streams through `reasoning_content`; CodeWhale keeps it in Thinking cells and replays it for thinking/tool-call continuity. `[providers.moonshot] auth_mode = "kimi_oauth"` reads Kimi Code OAuth credentials from `KIMI_CODE_HOME`/`~/.kimi-code`, with legacy `KIMI_SHARE_DIR`/`~/.kimi` fallback. |
| `minimax` | `[providers.minimax]` | `MINIMAX_API_KEY` | `MINIMAX_BASE_URL`; default `https://api.minimax.io/v1` | `MiniMax-M3`, `MiniMax-M2.7`, `MiniMax-M2.7-highspeed`, `MiniMax-M2.5`, `MiniMax-M2.5-highspeed`, `MiniMax-M2.1`, `MiniMax-M2.1-highspeed`, `MiniMax-M2` | MiniMax direct OpenAI-compatible route. CodeWhale sends `reasoning_split = true` so MiniMax thinking arrives separately from answer text, and direct MiniMax IDs stay distinct from OpenRouter namespaced IDs such as `minimax/minimax-m3`. |
| `sglang` | `[providers.sglang]` | Optional `SGLANG_API_KEY` | `SGLANG_BASE_URL`; default `http://localhost:30000/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Self-hosted OpenAI-compatible route. Localhost deployments commonly omit auth. `SGLANG_MODEL` is accepted. |
| `vllm` | `[providers.vllm]` | Optional `VLLM_API_KEY` | `VLLM_BASE_URL`; default `http://localhost:8000/v1` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | Self-hosted vLLM OpenAI-compatible route. Localhost deployments commonly omit auth. `VLLM_MODEL` is accepted. |
| `ollama` | `[providers.ollama]` | Optional `OLLAMA_API_KEY` | `OLLAMA_BASE_URL`; default `http://localhost:11434/v1` | `deepseek-coder:1.3b`; provider-hinted custom tags pass through | Self-hosted Ollama OpenAI-compatible route. Localhost deployments commonly omit auth. `OLLAMA_MODEL` is accepted. |
@@ -220,6 +221,7 @@ endpoint when the endpoint supports model listing.
| `siliconflow` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | yes |
| `arcee` | `trinity-large-thinking`, `trinity-large-preview`; provider-hinted custom model IDs pass through | yes | yes for `trinity-large-thinking`; no for `trinity-large-preview` |
| `moonshot` | `kimi-k2.7-code`, `kimi-k2.6` | yes | yes |
| `minimax` | `MiniMax-M3`, `MiniMax-M2.7`, `MiniMax-M2.7-highspeed`, `MiniMax-M2.5`, `MiniMax-M2.5-highspeed`, `MiniMax-M2.1`, `MiniMax-M2.1-highspeed`, `MiniMax-M2` | yes | yes |
| `sglang` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | yes |
| `vllm` | `deepseek-ai/DeepSeek-V4-Pro`, `deepseek-ai/DeepSeek-V4-Flash` | yes | yes |
| `ollama` | `deepseek-coder:1.3b`; custom tags pass through when provider hint is `ollama` | yes | no |
@@ -259,7 +261,10 @@ Anthropic uses Messages, and `openai-codex` uses Responses.
| Wanjie Ark `reasoner` / `r1` model IDs | 128,000 | 4,096 | yes | no | not documented in code |
| Direct Arcee API `trinity-large-thinking` | 262,144 | 262,144 | yes | no | not documented in code |
| Direct Arcee API `trinity-large-preview` | 262,144 | 4,096 | no in doctor capability metadata | no | not documented in code |
| Generic `openai`, AtlasCloud, and Moonshot/Kimi | 128,000 | 4,096 | no in doctor capability metadata | no | not documented in code |
| Direct Moonshot/Kimi `kimi-k2.7-code`, `kimi-k2.6`, `kimi-for-coding` | 262,144 | 262,144 | yes | no | not documented in code |
| Direct MiniMax `MiniMax-M3` | 1,000,000 | 524,288 | yes | no | not documented in code |
| Direct MiniMax M2.x models | 204,800 | 4,096 fallback until MiniMax output metadata is promoted | yes | no | not documented in code |
| Generic `openai` and AtlasCloud | 128,000 | 4,096 | no in doctor capability metadata | no | not documented in code |
| Ollama | 8,192 | 4,096 | no | no | not documented in code |
| Hugging Face Inference Providers V4 model IDs | 131,072 | 4,096 | yes | no | not documented in code |
| Other recognized DeepSeek model IDs | 128,000 unless the model name carries an explicit `Nk` hint | 4,096 | no unless V4/reasoner logic matches | DeepSeek/NIM only | DeepSeek beta only |
@@ -319,6 +324,7 @@ receive no reasoning fields at all for that tier.
| `moonshot` | `thinking: {type: disabled}` | `thinking: {type: enabled}` | `thinking: {type: enabled}` |
| `ollama` | `think: false` | `think: true` | `think: true` |
| `xiaomi-mimo` | `thinking: {type: disabled}` | `thinking: {type: enabled}` | `thinking: {type: enabled}` |
| `minimax` | `reasoning_split: true` + `thinking: {type: disabled}` | `reasoning_split: true` + `thinking: {type: adaptive}` | `reasoning_split: true` + `thinking: {type: adaptive}` |
| `nvidia-nim` | `chat_template_kwargs.thinking: false` | `chat_template_kwargs`: `thinking: true` + `reasoning_effort: "high"` | `chat_template_kwargs`: `thinking: true` + `reasoning_effort: "max"` |
| `vllm` | `chat_template_kwargs.enable_thinking: false` | `chat_template_kwargs.enable_thinking: true` + `reasoning_effort` low/medium/high | `chat_template_kwargs.enable_thinking: true` + `reasoning_effort: "high"` (vLLM has no max tier) |
| `arcee`, `huggingface` | omitted | `reasoning_effort` pass-through | `reasoning_effort: "high"` |