diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs index f7eb50b8..93f722d6 100644 --- a/crates/tui/src/client.rs +++ b/crates/tui/src/client.rs @@ -1226,7 +1226,8 @@ pub(super) fn apply_reasoning_effort( | ApiProvider::SiliconflowCn | ApiProvider::Sglang | ApiProvider::Volcengine - | ApiProvider::Together => { + | ApiProvider::Together + | ApiProvider::Atlascloud => { body["thinking"] = json!({ "type": "disabled" }); } ApiProvider::OpenaiCodex => { @@ -1248,12 +1249,17 @@ pub(super) fn apply_reasoning_effort( }); } ApiProvider::Openai - | ApiProvider::Atlascloud | ApiProvider::WanjieArk | ApiProvider::Arcee - | ApiProvider::Huggingface - | ApiProvider::Moonshot - | ApiProvider::Ollama => {} + | ApiProvider::Huggingface => {} + ApiProvider::Moonshot => { + // #3024: Kimi models accept thinking enable/disable. + body["thinking"] = json!({ "type": "disabled" }); + } + ApiProvider::Ollama => { + // #3024: Ollama OpenAI-compat endpoint accepts think param. + body["think"] = json!(false); + } ApiProvider::NvidiaNim => { body["chat_template_kwargs"] = json!({ "thinking": false, @@ -1267,7 +1273,8 @@ pub(super) fn apply_reasoning_effort( | ApiProvider::Siliconflow | ApiProvider::SiliconflowCn | ApiProvider::Sglang - | ApiProvider::Volcengine => { + | ApiProvider::Volcengine + | ApiProvider::Atlascloud => { body["reasoning_effort"] = json!("high"); body["thinking"] = json!({ "type": "enabled" }); } @@ -1311,12 +1318,15 @@ pub(super) fn apply_reasoning_effort( }; body["reasoning_effort"] = json!(value); } - ApiProvider::Openai - | ApiProvider::Atlascloud - | ApiProvider::WanjieArk - | ApiProvider::Moonshot - | ApiProvider::Ollama - | ApiProvider::OpenaiCodex => {} + ApiProvider::Openai | ApiProvider::WanjieArk | ApiProvider::OpenaiCodex => {} + ApiProvider::Moonshot => { + // #3024: Kimi models accept thinking enable. + body["thinking"] = json!({ "type": "enabled" }); + } + ApiProvider::Ollama => { + // #3024: Ollama think param. + body["think"] = json!(true); + } ApiProvider::NvidiaNim => { body["chat_template_kwargs"] = json!({ "thinking": true, @@ -1330,7 +1340,8 @@ pub(super) fn apply_reasoning_effort( | ApiProvider::Siliconflow | ApiProvider::SiliconflowCn | ApiProvider::Sglang - | ApiProvider::Volcengine => { + | ApiProvider::Volcengine + | ApiProvider::Atlascloud => { body["reasoning_effort"] = json!("max"); body["thinking"] = json!({ "type": "enabled" }); } @@ -1355,12 +1366,15 @@ pub(super) fn apply_reasoning_effort( // "max" to "high" instead of sending an invalid value. body["reasoning_effort"] = json!("high"); } - ApiProvider::Openai - | ApiProvider::Atlascloud - | ApiProvider::WanjieArk - | ApiProvider::Moonshot - | ApiProvider::Ollama - | ApiProvider::OpenaiCodex => {} + ApiProvider::Openai | ApiProvider::WanjieArk | ApiProvider::OpenaiCodex => {} + ApiProvider::Moonshot => { + // #3024: Kimi models accept thinking enable. + body["thinking"] = json!({ "type": "enabled" }); + } + ApiProvider::Ollama => { + // #3024: Ollama think param. + body["think"] = json!(true); + } ApiProvider::NvidiaNim => { body["chat_template_kwargs"] = json!({ "thinking": true, @@ -2543,12 +2557,9 @@ mod tests { fn reasoning_effort_off_is_omitted_for_strict_openai_like_providers() { for provider in [ ApiProvider::Openai, - ApiProvider::Atlascloud, ApiProvider::WanjieArk, ApiProvider::Arcee, ApiProvider::Huggingface, - ApiProvider::Moonshot, - ApiProvider::Ollama, ApiProvider::Fireworks, ] { let mut body = json!({}); @@ -2562,6 +2573,49 @@ mod tests { } } + #[test] + fn reasoning_effort_atlascloud_speaks_deepseek_dialect() { + let mut body = json!({}); + apply_reasoning_effort(&mut body, Some("high"), ApiProvider::Atlascloud); + assert_eq!( + body, + json!({ "reasoning_effort": "high", "thinking": { "type": "enabled" } }) + ); + + let mut body = json!({}); + apply_reasoning_effort(&mut body, Some("max"), ApiProvider::Atlascloud); + assert_eq!( + body, + json!({ "reasoning_effort": "max", "thinking": { "type": "enabled" } }) + ); + + let mut body = json!({}); + apply_reasoning_effort(&mut body, Some("off"), ApiProvider::Atlascloud); + assert_eq!(body, json!({ "thinking": { "type": "disabled" } })); + } + + #[test] + fn reasoning_effort_moonshot_toggles_thinking() { + let mut body = json!({}); + apply_reasoning_effort(&mut body, Some("high"), ApiProvider::Moonshot); + assert_eq!(body, json!({ "thinking": { "type": "enabled" } })); + + let mut body = json!({}); + apply_reasoning_effort(&mut body, Some("off"), ApiProvider::Moonshot); + assert_eq!(body, json!({ "thinking": { "type": "disabled" } })); + } + + #[test] + fn reasoning_effort_ollama_toggles_think_flag() { + let mut body = json!({}); + apply_reasoning_effort(&mut body, Some("high"), ApiProvider::Ollama); + assert_eq!(body, json!({ "think": true })); + + let mut body = json!({}); + apply_reasoning_effort(&mut body, Some("off"), ApiProvider::Ollama); + assert_eq!(body, json!({ "think": false })); + } + #[test] fn reasoning_effort_uses_nvidia_nim_chat_template_kwargs() { let mut body = json!({}); diff --git a/docs/PROVIDERS.md b/docs/PROVIDERS.md index 89556be3..d514f6a5 100644 --- a/docs/PROVIDERS.md +++ b/docs/PROVIDERS.md @@ -283,6 +283,30 @@ DeepSeek compatibility aliases `deepseek-chat` and `deepseek-reasoner` map to `deepseek-v4-flash` capability metadata and are scheduled to retire on 2026-07-24 at 2026-07-24T15:59:00Z. +## Reasoning Effort + +`/reasoning ` (and the `reasoning_effort` config key) is translated to +each provider's wire dialect by the client before the request is sent. `off` +disables thinking where the dialect supports it; providers marked "omitted" +receive no reasoning fields at all for that tier. + +| Provider | `off` | `low`/`medium`/`high` | `max`/`xhigh` | +| --- | --- | --- | --- | +| `deepseek`, `deepseek-cn`, `siliconflow`, `siliconflow-CN`, `sglang`, `volcengine`, `atlascloud` | `thinking: {type: disabled}` | `reasoning_effort: "high"` + `thinking: {type: enabled}` | `reasoning_effort: "max"` + `thinking: {type: enabled}` | +| `openrouter`, `novita`, `together` | `thinking: {type: disabled}` | `reasoning_effort` pass-through + `thinking: {type: enabled}` | `reasoning_effort: "xhigh"` + `thinking: {type: enabled}` | +| `moonshot` | `thinking: {type: disabled}` | `thinking: {type: enabled}` | `thinking: {type: enabled}` | +| `ollama` | `think: false` | `think: true` | `think: true` | +| `xiaomi-mimo` | `thinking: {type: disabled}` | `thinking: {type: enabled}` | `thinking: {type: enabled}` | +| `nvidia-nim` | `chat_template_kwargs.thinking: false` | `chat_template_kwargs`: `thinking: true` + `reasoning_effort: "high"` | `chat_template_kwargs`: `thinking: true` + `reasoning_effort: "max"` | +| `vllm` | `chat_template_kwargs.enable_thinking: false` | `chat_template_kwargs.enable_thinking: true` + `reasoning_effort` low/medium/high | `chat_template_kwargs.enable_thinking: true` + `reasoning_effort: "high"` (vLLM has no max tier) | +| `arcee`, `huggingface` | omitted | `reasoning_effort` pass-through | `reasoning_effort: "high"` | +| `fireworks` | omitted | `reasoning_effort: "high"` | `reasoning_effort: "max"` | +| `openai`, `wanjie-ark` | omitted | omitted | omitted | +| `openai-codex` | Responses API `reasoning` field (handled by the Responses bridge) | Responses API `reasoning` field | Responses API `reasoning` field | + +AtlasCloud serves DeepSeek models, so it speaks the DeepSeek reasoning dialect, +including the `max` tier (#3024). + ## Drift Check Run this before changing provider IDs, provider TOML tables, static model