Merge PR #3050 from Hmbown: wire reasoning-effort for Atlascloud, Moonshot, Ollama dialects
fix(reasoning): wire reasoning-effort for Atlascloud, Moonshot, Ollama
This commit is contained in:
+76
-22
@@ -1226,7 +1226,8 @@ pub(super) fn apply_reasoning_effort(
|
|||||||
| ApiProvider::SiliconflowCn
|
| ApiProvider::SiliconflowCn
|
||||||
| ApiProvider::Sglang
|
| ApiProvider::Sglang
|
||||||
| ApiProvider::Volcengine
|
| ApiProvider::Volcengine
|
||||||
| ApiProvider::Together => {
|
| ApiProvider::Together
|
||||||
|
| ApiProvider::Atlascloud => {
|
||||||
body["thinking"] = json!({ "type": "disabled" });
|
body["thinking"] = json!({ "type": "disabled" });
|
||||||
}
|
}
|
||||||
ApiProvider::OpenaiCodex => {
|
ApiProvider::OpenaiCodex => {
|
||||||
@@ -1248,12 +1249,17 @@ pub(super) fn apply_reasoning_effort(
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
ApiProvider::Openai
|
ApiProvider::Openai
|
||||||
| ApiProvider::Atlascloud
|
|
||||||
| ApiProvider::WanjieArk
|
| ApiProvider::WanjieArk
|
||||||
| ApiProvider::Arcee
|
| ApiProvider::Arcee
|
||||||
| ApiProvider::Huggingface
|
| ApiProvider::Huggingface => {}
|
||||||
| ApiProvider::Moonshot
|
ApiProvider::Moonshot => {
|
||||||
| ApiProvider::Ollama => {}
|
// #3024: Kimi models accept thinking enable/disable.
|
||||||
|
body["thinking"] = json!({ "type": "disabled" });
|
||||||
|
}
|
||||||
|
ApiProvider::Ollama => {
|
||||||
|
// #3024: Ollama OpenAI-compat endpoint accepts think param.
|
||||||
|
body["think"] = json!(false);
|
||||||
|
}
|
||||||
ApiProvider::NvidiaNim => {
|
ApiProvider::NvidiaNim => {
|
||||||
body["chat_template_kwargs"] = json!({
|
body["chat_template_kwargs"] = json!({
|
||||||
"thinking": false,
|
"thinking": false,
|
||||||
@@ -1267,7 +1273,8 @@ pub(super) fn apply_reasoning_effort(
|
|||||||
| ApiProvider::Siliconflow
|
| ApiProvider::Siliconflow
|
||||||
| ApiProvider::SiliconflowCn
|
| ApiProvider::SiliconflowCn
|
||||||
| ApiProvider::Sglang
|
| ApiProvider::Sglang
|
||||||
| ApiProvider::Volcengine => {
|
| ApiProvider::Volcengine
|
||||||
|
| ApiProvider::Atlascloud => {
|
||||||
body["reasoning_effort"] = json!("high");
|
body["reasoning_effort"] = json!("high");
|
||||||
body["thinking"] = json!({ "type": "enabled" });
|
body["thinking"] = json!({ "type": "enabled" });
|
||||||
}
|
}
|
||||||
@@ -1311,12 +1318,15 @@ pub(super) fn apply_reasoning_effort(
|
|||||||
};
|
};
|
||||||
body["reasoning_effort"] = json!(value);
|
body["reasoning_effort"] = json!(value);
|
||||||
}
|
}
|
||||||
ApiProvider::Openai
|
ApiProvider::Openai | ApiProvider::WanjieArk | ApiProvider::OpenaiCodex => {}
|
||||||
| ApiProvider::Atlascloud
|
ApiProvider::Moonshot => {
|
||||||
| ApiProvider::WanjieArk
|
// #3024: Kimi models accept thinking enable.
|
||||||
| ApiProvider::Moonshot
|
body["thinking"] = json!({ "type": "enabled" });
|
||||||
| ApiProvider::Ollama
|
}
|
||||||
| ApiProvider::OpenaiCodex => {}
|
ApiProvider::Ollama => {
|
||||||
|
// #3024: Ollama think param.
|
||||||
|
body["think"] = json!(true);
|
||||||
|
}
|
||||||
ApiProvider::NvidiaNim => {
|
ApiProvider::NvidiaNim => {
|
||||||
body["chat_template_kwargs"] = json!({
|
body["chat_template_kwargs"] = json!({
|
||||||
"thinking": true,
|
"thinking": true,
|
||||||
@@ -1330,7 +1340,8 @@ pub(super) fn apply_reasoning_effort(
|
|||||||
| ApiProvider::Siliconflow
|
| ApiProvider::Siliconflow
|
||||||
| ApiProvider::SiliconflowCn
|
| ApiProvider::SiliconflowCn
|
||||||
| ApiProvider::Sglang
|
| ApiProvider::Sglang
|
||||||
| ApiProvider::Volcengine => {
|
| ApiProvider::Volcengine
|
||||||
|
| ApiProvider::Atlascloud => {
|
||||||
body["reasoning_effort"] = json!("max");
|
body["reasoning_effort"] = json!("max");
|
||||||
body["thinking"] = json!({ "type": "enabled" });
|
body["thinking"] = json!({ "type": "enabled" });
|
||||||
}
|
}
|
||||||
@@ -1355,12 +1366,15 @@ pub(super) fn apply_reasoning_effort(
|
|||||||
// "max" to "high" instead of sending an invalid value.
|
// "max" to "high" instead of sending an invalid value.
|
||||||
body["reasoning_effort"] = json!("high");
|
body["reasoning_effort"] = json!("high");
|
||||||
}
|
}
|
||||||
ApiProvider::Openai
|
ApiProvider::Openai | ApiProvider::WanjieArk | ApiProvider::OpenaiCodex => {}
|
||||||
| ApiProvider::Atlascloud
|
ApiProvider::Moonshot => {
|
||||||
| ApiProvider::WanjieArk
|
// #3024: Kimi models accept thinking enable.
|
||||||
| ApiProvider::Moonshot
|
body["thinking"] = json!({ "type": "enabled" });
|
||||||
| ApiProvider::Ollama
|
}
|
||||||
| ApiProvider::OpenaiCodex => {}
|
ApiProvider::Ollama => {
|
||||||
|
// #3024: Ollama think param.
|
||||||
|
body["think"] = json!(true);
|
||||||
|
}
|
||||||
ApiProvider::NvidiaNim => {
|
ApiProvider::NvidiaNim => {
|
||||||
body["chat_template_kwargs"] = json!({
|
body["chat_template_kwargs"] = json!({
|
||||||
"thinking": true,
|
"thinking": true,
|
||||||
@@ -2543,12 +2557,9 @@ mod tests {
|
|||||||
fn reasoning_effort_off_is_omitted_for_strict_openai_like_providers() {
|
fn reasoning_effort_off_is_omitted_for_strict_openai_like_providers() {
|
||||||
for provider in [
|
for provider in [
|
||||||
ApiProvider::Openai,
|
ApiProvider::Openai,
|
||||||
ApiProvider::Atlascloud,
|
|
||||||
ApiProvider::WanjieArk,
|
ApiProvider::WanjieArk,
|
||||||
ApiProvider::Arcee,
|
ApiProvider::Arcee,
|
||||||
ApiProvider::Huggingface,
|
ApiProvider::Huggingface,
|
||||||
ApiProvider::Moonshot,
|
|
||||||
ApiProvider::Ollama,
|
|
||||||
ApiProvider::Fireworks,
|
ApiProvider::Fireworks,
|
||||||
] {
|
] {
|
||||||
let mut body = json!({});
|
let mut body = json!({});
|
||||||
@@ -2562,6 +2573,49 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reasoning_effort_atlascloud_speaks_deepseek_dialect() {
|
||||||
|
let mut body = json!({});
|
||||||
|
apply_reasoning_effort(&mut body, Some("high"), ApiProvider::Atlascloud);
|
||||||
|
assert_eq!(
|
||||||
|
body,
|
||||||
|
json!({ "reasoning_effort": "high", "thinking": { "type": "enabled" } })
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut body = json!({});
|
||||||
|
apply_reasoning_effort(&mut body, Some("max"), ApiProvider::Atlascloud);
|
||||||
|
assert_eq!(
|
||||||
|
body,
|
||||||
|
json!({ "reasoning_effort": "max", "thinking": { "type": "enabled" } })
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut body = json!({});
|
||||||
|
apply_reasoning_effort(&mut body, Some("off"), ApiProvider::Atlascloud);
|
||||||
|
assert_eq!(body, json!({ "thinking": { "type": "disabled" } }));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reasoning_effort_moonshot_toggles_thinking() {
|
||||||
|
let mut body = json!({});
|
||||||
|
apply_reasoning_effort(&mut body, Some("high"), ApiProvider::Moonshot);
|
||||||
|
assert_eq!(body, json!({ "thinking": { "type": "enabled" } }));
|
||||||
|
|
||||||
|
let mut body = json!({});
|
||||||
|
apply_reasoning_effort(&mut body, Some("off"), ApiProvider::Moonshot);
|
||||||
|
assert_eq!(body, json!({ "thinking": { "type": "disabled" } }));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn reasoning_effort_ollama_toggles_think_flag() {
|
||||||
|
let mut body = json!({});
|
||||||
|
apply_reasoning_effort(&mut body, Some("high"), ApiProvider::Ollama);
|
||||||
|
assert_eq!(body, json!({ "think": true }));
|
||||||
|
|
||||||
|
let mut body = json!({});
|
||||||
|
apply_reasoning_effort(&mut body, Some("off"), ApiProvider::Ollama);
|
||||||
|
assert_eq!(body, json!({ "think": false }));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn reasoning_effort_uses_nvidia_nim_chat_template_kwargs() {
|
fn reasoning_effort_uses_nvidia_nim_chat_template_kwargs() {
|
||||||
let mut body = json!({});
|
let mut body = json!({});
|
||||||
|
|||||||
@@ -283,6 +283,30 @@ DeepSeek compatibility aliases `deepseek-chat` and `deepseek-reasoner` map to
|
|||||||
`deepseek-v4-flash` capability metadata and are scheduled to retire on
|
`deepseek-v4-flash` capability metadata and are scheduled to retire on
|
||||||
2026-07-24 at 2026-07-24T15:59:00Z.
|
2026-07-24 at 2026-07-24T15:59:00Z.
|
||||||
|
|
||||||
|
## Reasoning Effort
|
||||||
|
|
||||||
|
`/reasoning <effort>` (and the `reasoning_effort` config key) is translated to
|
||||||
|
each provider's wire dialect by the client before the request is sent. `off`
|
||||||
|
disables thinking where the dialect supports it; providers marked "omitted"
|
||||||
|
receive no reasoning fields at all for that tier.
|
||||||
|
|
||||||
|
| Provider | `off` | `low`/`medium`/`high` | `max`/`xhigh` |
|
||||||
|
| --- | --- | --- | --- |
|
||||||
|
| `deepseek`, `deepseek-cn`, `siliconflow`, `siliconflow-CN`, `sglang`, `volcengine`, `atlascloud` | `thinking: {type: disabled}` | `reasoning_effort: "high"` + `thinking: {type: enabled}` | `reasoning_effort: "max"` + `thinking: {type: enabled}` |
|
||||||
|
| `openrouter`, `novita`, `together` | `thinking: {type: disabled}` | `reasoning_effort` pass-through + `thinking: {type: enabled}` | `reasoning_effort: "xhigh"` + `thinking: {type: enabled}` |
|
||||||
|
| `moonshot` | `thinking: {type: disabled}` | `thinking: {type: enabled}` | `thinking: {type: enabled}` |
|
||||||
|
| `ollama` | `think: false` | `think: true` | `think: true` |
|
||||||
|
| `xiaomi-mimo` | `thinking: {type: disabled}` | `thinking: {type: enabled}` | `thinking: {type: enabled}` |
|
||||||
|
| `nvidia-nim` | `chat_template_kwargs.thinking: false` | `chat_template_kwargs`: `thinking: true` + `reasoning_effort: "high"` | `chat_template_kwargs`: `thinking: true` + `reasoning_effort: "max"` |
|
||||||
|
| `vllm` | `chat_template_kwargs.enable_thinking: false` | `chat_template_kwargs.enable_thinking: true` + `reasoning_effort` low/medium/high | `chat_template_kwargs.enable_thinking: true` + `reasoning_effort: "high"` (vLLM has no max tier) |
|
||||||
|
| `arcee`, `huggingface` | omitted | `reasoning_effort` pass-through | `reasoning_effort: "high"` |
|
||||||
|
| `fireworks` | omitted | `reasoning_effort: "high"` | `reasoning_effort: "max"` |
|
||||||
|
| `openai`, `wanjie-ark` | omitted | omitted | omitted |
|
||||||
|
| `openai-codex` | Responses API `reasoning` field (handled by the Responses bridge) | Responses API `reasoning` field | Responses API `reasoning` field |
|
||||||
|
|
||||||
|
AtlasCloud serves DeepSeek models, so it speaks the DeepSeek reasoning dialect,
|
||||||
|
including the `max` tier (#3024).
|
||||||
|
|
||||||
## Drift Check
|
## Drift Check
|
||||||
|
|
||||||
Run this before changing provider IDs, provider TOML tables, static model
|
Run this before changing provider IDs, provider TOML tables, static model
|
||||||
|
|||||||
Reference in New Issue
Block a user