Merge PR #3050 from Hmbown: wire reasoning-effort for Atlascloud, Moonshot, Ollama dialects

fix(reasoning): wire reasoning-effort for Atlascloud, Moonshot, Ollama
This commit is contained in:
Hunter Bown
2026-06-10 22:30:00 -07:00
committed by GitHub
2 changed files with 100 additions and 22 deletions
+76 -22
View File
@@ -1226,7 +1226,8 @@ pub(super) fn apply_reasoning_effort(
| ApiProvider::SiliconflowCn | ApiProvider::SiliconflowCn
| ApiProvider::Sglang | ApiProvider::Sglang
| ApiProvider::Volcengine | ApiProvider::Volcengine
| ApiProvider::Together => { | ApiProvider::Together
| ApiProvider::Atlascloud => {
body["thinking"] = json!({ "type": "disabled" }); body["thinking"] = json!({ "type": "disabled" });
} }
ApiProvider::OpenaiCodex => { ApiProvider::OpenaiCodex => {
@@ -1248,12 +1249,17 @@ pub(super) fn apply_reasoning_effort(
}); });
} }
ApiProvider::Openai ApiProvider::Openai
| ApiProvider::Atlascloud
| ApiProvider::WanjieArk | ApiProvider::WanjieArk
| ApiProvider::Arcee | ApiProvider::Arcee
| ApiProvider::Huggingface | ApiProvider::Huggingface => {}
| ApiProvider::Moonshot ApiProvider::Moonshot => {
| ApiProvider::Ollama => {} // #3024: Kimi models accept thinking enable/disable.
body["thinking"] = json!({ "type": "disabled" });
}
ApiProvider::Ollama => {
// #3024: Ollama OpenAI-compat endpoint accepts think param.
body["think"] = json!(false);
}
ApiProvider::NvidiaNim => { ApiProvider::NvidiaNim => {
body["chat_template_kwargs"] = json!({ body["chat_template_kwargs"] = json!({
"thinking": false, "thinking": false,
@@ -1267,7 +1273,8 @@ pub(super) fn apply_reasoning_effort(
| ApiProvider::Siliconflow | ApiProvider::Siliconflow
| ApiProvider::SiliconflowCn | ApiProvider::SiliconflowCn
| ApiProvider::Sglang | ApiProvider::Sglang
| ApiProvider::Volcengine => { | ApiProvider::Volcengine
| ApiProvider::Atlascloud => {
body["reasoning_effort"] = json!("high"); body["reasoning_effort"] = json!("high");
body["thinking"] = json!({ "type": "enabled" }); body["thinking"] = json!({ "type": "enabled" });
} }
@@ -1311,12 +1318,15 @@ pub(super) fn apply_reasoning_effort(
}; };
body["reasoning_effort"] = json!(value); body["reasoning_effort"] = json!(value);
} }
ApiProvider::Openai ApiProvider::Openai | ApiProvider::WanjieArk | ApiProvider::OpenaiCodex => {}
| ApiProvider::Atlascloud ApiProvider::Moonshot => {
| ApiProvider::WanjieArk // #3024: Kimi models accept thinking enable.
| ApiProvider::Moonshot body["thinking"] = json!({ "type": "enabled" });
| ApiProvider::Ollama }
| ApiProvider::OpenaiCodex => {} ApiProvider::Ollama => {
// #3024: Ollama think param.
body["think"] = json!(true);
}
ApiProvider::NvidiaNim => { ApiProvider::NvidiaNim => {
body["chat_template_kwargs"] = json!({ body["chat_template_kwargs"] = json!({
"thinking": true, "thinking": true,
@@ -1330,7 +1340,8 @@ pub(super) fn apply_reasoning_effort(
| ApiProvider::Siliconflow | ApiProvider::Siliconflow
| ApiProvider::SiliconflowCn | ApiProvider::SiliconflowCn
| ApiProvider::Sglang | ApiProvider::Sglang
| ApiProvider::Volcengine => { | ApiProvider::Volcengine
| ApiProvider::Atlascloud => {
body["reasoning_effort"] = json!("max"); body["reasoning_effort"] = json!("max");
body["thinking"] = json!({ "type": "enabled" }); body["thinking"] = json!({ "type": "enabled" });
} }
@@ -1355,12 +1366,15 @@ pub(super) fn apply_reasoning_effort(
// "max" to "high" instead of sending an invalid value. // "max" to "high" instead of sending an invalid value.
body["reasoning_effort"] = json!("high"); body["reasoning_effort"] = json!("high");
} }
ApiProvider::Openai ApiProvider::Openai | ApiProvider::WanjieArk | ApiProvider::OpenaiCodex => {}
| ApiProvider::Atlascloud ApiProvider::Moonshot => {
| ApiProvider::WanjieArk // #3024: Kimi models accept thinking enable.
| ApiProvider::Moonshot body["thinking"] = json!({ "type": "enabled" });
| ApiProvider::Ollama }
| ApiProvider::OpenaiCodex => {} ApiProvider::Ollama => {
// #3024: Ollama think param.
body["think"] = json!(true);
}
ApiProvider::NvidiaNim => { ApiProvider::NvidiaNim => {
body["chat_template_kwargs"] = json!({ body["chat_template_kwargs"] = json!({
"thinking": true, "thinking": true,
@@ -2543,12 +2557,9 @@ mod tests {
fn reasoning_effort_off_is_omitted_for_strict_openai_like_providers() { fn reasoning_effort_off_is_omitted_for_strict_openai_like_providers() {
for provider in [ for provider in [
ApiProvider::Openai, ApiProvider::Openai,
ApiProvider::Atlascloud,
ApiProvider::WanjieArk, ApiProvider::WanjieArk,
ApiProvider::Arcee, ApiProvider::Arcee,
ApiProvider::Huggingface, ApiProvider::Huggingface,
ApiProvider::Moonshot,
ApiProvider::Ollama,
ApiProvider::Fireworks, ApiProvider::Fireworks,
] { ] {
let mut body = json!({}); let mut body = json!({});
@@ -2562,6 +2573,49 @@ mod tests {
} }
} }
#[test]
fn reasoning_effort_atlascloud_speaks_deepseek_dialect() {
let mut body = json!({});
apply_reasoning_effort(&mut body, Some("high"), ApiProvider::Atlascloud);
assert_eq!(
body,
json!({ "reasoning_effort": "high", "thinking": { "type": "enabled" } })
);
let mut body = json!({});
apply_reasoning_effort(&mut body, Some("max"), ApiProvider::Atlascloud);
assert_eq!(
body,
json!({ "reasoning_effort": "max", "thinking": { "type": "enabled" } })
);
let mut body = json!({});
apply_reasoning_effort(&mut body, Some("off"), ApiProvider::Atlascloud);
assert_eq!(body, json!({ "thinking": { "type": "disabled" } }));
}
#[test]
fn reasoning_effort_moonshot_toggles_thinking() {
let mut body = json!({});
apply_reasoning_effort(&mut body, Some("high"), ApiProvider::Moonshot);
assert_eq!(body, json!({ "thinking": { "type": "enabled" } }));
let mut body = json!({});
apply_reasoning_effort(&mut body, Some("off"), ApiProvider::Moonshot);
assert_eq!(body, json!({ "thinking": { "type": "disabled" } }));
}
#[test]
fn reasoning_effort_ollama_toggles_think_flag() {
let mut body = json!({});
apply_reasoning_effort(&mut body, Some("high"), ApiProvider::Ollama);
assert_eq!(body, json!({ "think": true }));
let mut body = json!({});
apply_reasoning_effort(&mut body, Some("off"), ApiProvider::Ollama);
assert_eq!(body, json!({ "think": false }));
}
#[test] #[test]
fn reasoning_effort_uses_nvidia_nim_chat_template_kwargs() { fn reasoning_effort_uses_nvidia_nim_chat_template_kwargs() {
let mut body = json!({}); let mut body = json!({});
+24
View File
@@ -283,6 +283,30 @@ DeepSeek compatibility aliases `deepseek-chat` and `deepseek-reasoner` map to
`deepseek-v4-flash` capability metadata and are scheduled to retire on `deepseek-v4-flash` capability metadata and are scheduled to retire on
2026-07-24 at 2026-07-24T15:59:00Z. 2026-07-24 at 2026-07-24T15:59:00Z.
## Reasoning Effort
`/reasoning <effort>` (and the `reasoning_effort` config key) is translated to
each provider's wire dialect by the client before the request is sent. `off`
disables thinking where the dialect supports it; providers marked "omitted"
receive no reasoning fields at all for that tier.
| Provider | `off` | `low`/`medium`/`high` | `max`/`xhigh` |
| --- | --- | --- | --- |
| `deepseek`, `deepseek-cn`, `siliconflow`, `siliconflow-CN`, `sglang`, `volcengine`, `atlascloud` | `thinking: {type: disabled}` | `reasoning_effort: "high"` + `thinking: {type: enabled}` | `reasoning_effort: "max"` + `thinking: {type: enabled}` |
| `openrouter`, `novita`, `together` | `thinking: {type: disabled}` | `reasoning_effort` pass-through + `thinking: {type: enabled}` | `reasoning_effort: "xhigh"` + `thinking: {type: enabled}` |
| `moonshot` | `thinking: {type: disabled}` | `thinking: {type: enabled}` | `thinking: {type: enabled}` |
| `ollama` | `think: false` | `think: true` | `think: true` |
| `xiaomi-mimo` | `thinking: {type: disabled}` | `thinking: {type: enabled}` | `thinking: {type: enabled}` |
| `nvidia-nim` | `chat_template_kwargs.thinking: false` | `chat_template_kwargs`: `thinking: true` + `reasoning_effort: "high"` | `chat_template_kwargs`: `thinking: true` + `reasoning_effort: "max"` |
| `vllm` | `chat_template_kwargs.enable_thinking: false` | `chat_template_kwargs.enable_thinking: true` + `reasoning_effort` low/medium/high | `chat_template_kwargs.enable_thinking: true` + `reasoning_effort: "high"` (vLLM has no max tier) |
| `arcee`, `huggingface` | omitted | `reasoning_effort` pass-through | `reasoning_effort: "high"` |
| `fireworks` | omitted | `reasoning_effort: "high"` | `reasoning_effort: "max"` |
| `openai`, `wanjie-ark` | omitted | omitted | omitted |
| `openai-codex` | Responses API `reasoning` field (handled by the Responses bridge) | Responses API `reasoning` field | Responses API `reasoning` field |
AtlasCloud serves DeepSeek models, so it speaks the DeepSeek reasoning dialect,
including the `max` tier (#3024).
## Drift Check ## Drift Check
Run this before changing provider IDs, provider TOML tables, static model Run this before changing provider IDs, provider TOML tables, static model