diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs index b3e0aff7..3a6b36d6 100644 --- a/crates/tui/src/client.rs +++ b/crates/tui/src/client.rs @@ -1258,7 +1258,8 @@ pub(super) fn apply_reasoning_effort( | ApiProvider::Sglang | ApiProvider::Volcengine | ApiProvider::Together - | ApiProvider::Atlascloud => { + | ApiProvider::Atlascloud + | ApiProvider::Zai => { body["thinking"] = json!({ "type": "disabled" }); } ApiProvider::OpenaiCodex => { @@ -1304,7 +1305,7 @@ pub(super) fn apply_reasoning_effort( ApiProvider::Minimax => { body["thinking"] = json!({ "type": "disabled" }); } - ApiProvider::Zai | ApiProvider::Stepfun => {} + ApiProvider::Stepfun => {} }, "low" | "minimal" | "medium" | "mid" | "high" | "" => match provider { // DeepSeek compatibility: low/medium both map to high @@ -1381,9 +1382,15 @@ pub(super) fn apply_reasoning_effort( ApiProvider::Minimax => { body["thinking"] = json!({ "type": "adaptive" }); } - ApiProvider::Zai | ApiProvider::Stepfun => {} + ApiProvider::Zai => { + body["thinking"] = json!({ + "type": "enabled", + "clear_thinking": false, + }); + } + ApiProvider::Stepfun => {} }, - "xhigh" | "max" | "highest" => match provider { + "xhigh" | "max" | "highest" | "ultracode" => match provider { ApiProvider::Deepseek | ApiProvider::DeepseekCN | ApiProvider::Siliconflow @@ -1438,7 +1445,13 @@ pub(super) fn apply_reasoning_effort( ApiProvider::Minimax => { body["thinking"] = json!({ "type": "adaptive" }); } - ApiProvider::Zai | ApiProvider::Stepfun => {} + ApiProvider::Zai => { + body["thinking"] = json!({ + "type": "enabled", + "clear_thinking": false, + }); + } + ApiProvider::Stepfun => {} }, _ => {} } @@ -2882,6 +2895,34 @@ mod tests { assert_eq!(body, json!({ "reasoning_split": true })); } + #[test] + fn reasoning_effort_zai_uses_documented_thinking_shape() { + let mut body = json!({}); + apply_reasoning_effort(&mut body, Some("high"), ApiProvider::Zai); + assert_eq!( + body, + json!({ "thinking": { "type": "enabled", "clear_thinking": false } }) + ); + + let mut body = json!({}); + apply_reasoning_effort(&mut body, Some("max"), ApiProvider::Zai); + assert_eq!( + body, + json!({ "thinking": { "type": "enabled", "clear_thinking": false } }) + ); + + let mut body = json!({}); + apply_reasoning_effort(&mut body, Some("ultracode"), ApiProvider::Zai); + assert_eq!( + body, + json!({ "thinking": { "type": "enabled", "clear_thinking": false } }) + ); + + let mut body = json!({}); + apply_reasoning_effort(&mut body, Some("off"), ApiProvider::Zai); + assert_eq!(body, json!({ "thinking": { "type": "disabled" } })); + } + #[test] fn chat_parser_accepts_nvidia_nim_reasoning_field() -> Result<()> { let response = parse_chat_message(&json!({ diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs index 726782cb..e00947db 100644 --- a/crates/tui/src/client/chat.rs +++ b/crates/tui/src/client/chat.rs @@ -2056,6 +2056,7 @@ fn provider_accepts_reasoning_content(provider: ApiProvider) -> bool { | ApiProvider::Arcee | ApiProvider::Minimax | ApiProvider::Sglang + | ApiProvider::Zai | ApiProvider::Moonshot // #3016: Kimi thinking traces use reasoning_content ) } @@ -3952,6 +3953,7 @@ mod alias_thinking_detection_tests { assert!(provider_accepts_reasoning_content(ApiProvider::XiaomiMimo)); assert!(provider_accepts_reasoning_content(ApiProvider::Arcee)); assert!(provider_accepts_reasoning_content(ApiProvider::Minimax)); + assert!(provider_accepts_reasoning_content(ApiProvider::Zai)); // #3016: Moonshot's native endpoint streams Kimi thinking as // reasoning_content. assert!(provider_accepts_reasoning_content(ApiProvider::Moonshot)); @@ -3987,6 +3989,11 @@ mod alias_thinking_detection_tests { "MiniMax-M3", None, )); + assert!(should_replay_reasoning_content_for_provider( + ApiProvider::Zai, + "GLM-5.2", + None, + )); assert!(!should_replay_reasoning_content_for_provider( ApiProvider::Moonshot, "kimi-for-coding", @@ -4096,6 +4103,10 @@ mod alias_thinking_detection_tests { is_reasoning_model_for_stream(ApiProvider::Arcee, "trinity-large-thinking"), "trinity-large-thinking should stream reasoning as thinking on direct Arcee" ); + assert!( + is_reasoning_model_for_stream(ApiProvider::Zai, "GLM-5.2"), + "GLM-5.2 should stream reasoning_content as thinking on direct Z.ai" + ); for model in [ "arcee-ai/trinity-large-thinking", "minimax/minimax-m3", diff --git a/crates/tui/src/client/responses.rs b/crates/tui/src/client/responses.rs index ac938bc6..57e8c509 100644 --- a/crates/tui/src/client/responses.rs +++ b/crates/tui/src/client/responses.rs @@ -609,7 +609,7 @@ fn codex_responses_reasoning_effort(raw: &str) -> Option<&'static str> { "minimal" => Some("low"), "low" => Some("low"), "high" => Some("high"), - "xhigh" | "max" | "maximum" => Some("xhigh"), + "xhigh" | "max" | "maximum" | "ultracode" => Some("xhigh"), _ => Some("medium"), } } @@ -707,6 +707,7 @@ mod tests { assert_eq!(codex_responses_reasoning_effort("max"), Some("xhigh")); assert_eq!(codex_responses_reasoning_effort("maximum"), Some("xhigh")); assert_eq!(codex_responses_reasoning_effort("xhigh"), Some("xhigh")); + assert_eq!(codex_responses_reasoning_effort("ultracode"), Some("xhigh")); assert_eq!(codex_responses_reasoning_effort("high"), Some("high")); assert_eq!(codex_responses_reasoning_effort("medium"), Some("medium")); assert_eq!(codex_responses_reasoning_effort("minimal"), Some("low")); diff --git a/crates/tui/src/model_routing.rs b/crates/tui/src/model_routing.rs index ba0b0cf7..274470f9 100644 --- a/crates/tui/src/model_routing.rs +++ b/crates/tui/src/model_routing.rs @@ -315,7 +315,7 @@ fn parse_auto_route_reasoning_effort(effort: &str) -> Option { "off" | "disabled" | "none" | "false" => Some(ReasoningEffort::Off), "low" | "minimal" | "medium" | "mid" => Some(ReasoningEffort::High), "high" => Some(ReasoningEffort::High), - "max" | "maximum" | "xhigh" => Some(ReasoningEffort::Max), + "max" | "maximum" | "xhigh" | "ultracode" => Some(ReasoningEffort::Max), _ => None, } } @@ -622,6 +622,12 @@ mod tests { assert_eq!(rec.model, "deepseek-v4-pro"); assert_eq!(rec.reasoning_effort, Some(ReasoningEffort::Max)); + + let rec = parse_auto_route_recommendation( + r#"{"model":"deepseek-v4-pro","reasoning_effort":"ultracode"}"#, + ) + .expect("ultracode should parse as max"); + assert_eq!(rec.reasoning_effort, Some(ReasoningEffort::Max)); } #[test] diff --git a/crates/tui/src/settings.rs b/crates/tui/src/settings.rs index d00521e5..bc2e3a20 100644 --- a/crates/tui/src/settings.rs +++ b/crates/tui/src/settings.rs @@ -1116,10 +1116,10 @@ fn normalize_reasoning_effort_setting(value: &str) -> Result> { "medium" | "mid" => "medium", "high" => "high", "auto" | "automatic" => "auto", - "max" | "maximum" | "xhigh" => "max", + "max" | "maximum" | "xhigh" | "ultracode" => "max", _ => { anyhow::bail!( - "Failed to update setting: invalid reasoning_effort '{value}'. Expected: auto, off, low, medium, high, max, xhigh, or default." + "Failed to update setting: invalid reasoning_effort '{value}'. Expected: auto, off, low, medium, high, max, xhigh, ultracode, or default." ); } }; @@ -1388,6 +1388,10 @@ mod tests { .set("reasoning_effort", "xhigh") .expect("normalize xhigh"); assert_eq!(settings.reasoning_effort.as_deref(), Some("max")); + settings + .set("reasoning_effort", "ultracode") + .expect("normalize ultracode"); + assert_eq!(settings.reasoning_effort.as_deref(), Some("max")); settings .set("reasoning_effort", "default") .expect("clear effort"); diff --git a/crates/tui/src/tui/app.rs b/crates/tui/src/tui/app.rs index 1184a6f1..34c502d7 100644 --- a/crates/tui/src/tui/app.rs +++ b/crates/tui/src/tui/app.rs @@ -207,7 +207,7 @@ impl ReasoningEffort { "medium" | "mid" => Self::Medium, "high" => Self::High, "auto" | "automatic" => Self::Auto, - "max" | "maximum" | "xhigh" => Self::Max, + "max" | "maximum" | "xhigh" | "ultracode" => Self::Max, _ => Self::default(), } } @@ -5664,6 +5664,10 @@ mod tests { ReasoningEffort::Max.api_value_for_provider(ApiProvider::Deepseek), Some("max") ); + assert_eq!( + ReasoningEffort::from_setting("ultracode"), + ReasoningEffort::Max + ); } #[test] diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index 46f49247..297fe67b 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -958,7 +958,7 @@ If you are upgrading from older releases: - `path_suffix` (string, optional provider-table key): override the chat-completions path for OpenAI-compatible gateways that do not serve `/v1/chat/completions`. For example, `[providers.openai] path_suffix = "/chat/completions"` sends chat requests to the unversioned base URL plus `/chat/completions`; `models` and `beta/*` requests keep their normal routing. - `insecure_skip_tls_verify` (bool, optional provider-table key): disabled by default. When true on the active provider table, only the LLM provider HTTP client skips TLS certificate verification. Prefer `SSL_CERT_FILE` for corporate or private CA bundles; `codewhale doctor` reports this setting when enabled. - `default_text_model` (string, optional): defaults to `deepseek-v4-pro` for DeepSeek and generic OpenAI-compatible endpoints, `deepseek-ai/deepseek-v4-pro` for NVIDIA NIM, `deepseek-ai/deepseek-v4-flash` for AtlasCloud, `deepseek-reasoner` for Wanjie Ark, `DeepSeek-V4-Pro` for Volcengine Ark, `deepseek/deepseek-v4-pro` for OpenRouter and Novita, `mimo-v2.5-pro` for Xiaomi MiMo, `accounts/fireworks/models/deepseek-v4-pro` for Fireworks, `deepseek-ai/DeepSeek-V4-Pro` for SiliconFlow, `trinity-large-thinking` for Arcee AI, `kimi-k2.7-code` for Moonshot, `MiniMax-M3` for MiniMax, `GLM-5.1` for Z.ai, `step-3.7-flash` for StepFun, `deepseek-ai/DeepSeek-V4-Pro` for SGLang/vLLM, and `deepseek-coder:1.3b` for Ollama. Hugging Face and Together AI both default to `deepseek-ai/DeepSeek-V4-Pro`. Current public DeepSeek IDs are `deepseek-v4-pro` and `deepseek-v4-flash`, both with 1M context windows, 384K max output, and thinking mode enabled by default. Legacy `deepseek-chat` and `deepseek-reasoner` remain compatibility aliases for `deepseek-v4-flash` until July 24, 2026, except SiliconFlow maps `deepseek-reasoner` and `deepseek-r1` to its Pro model while `deepseek-chat` and `deepseek-v3` map to Flash. Provider-specific mappings translate `deepseek-v4-pro` / `deepseek-v4-flash` to each provider's model ID where supported. OpenRouter also recognizes recent large IDs such as `arcee-ai/trinity-large-thinking`, `minimax/minimax-m3`, `minimax/minimax-2.7`, `xiaomi/mimo-v2.5-pro`, `qwen/qwen3.6-flash`, `qwen/qwen3.6-35b-a3b`, `qwen/qwen3.6-max-preview`, `qwen/qwen3.6-27b`, `qwen/qwen3.6-plus`, `qwen/qwen3.7-max`, `google/gemma-4-31b-it`, `moonshotai/kimi-k2.7-code`, `moonshotai/kimi-k2.6`, `nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free`, and `nvidia/nemotron-3-ultra-550b-a55b`; direct Arcee uses bare IDs such as `trinity-large-thinking` and `trinity-large-preview`; direct Moonshot recognizes `kimi-k2.7-code`, `kimi-k2.6`, and Kimi Code's stable `kimi-for-coding`; direct MiniMax recognizes `MiniMax-M3` and the documented M2.x chat model IDs; direct Xiaomi MiMo recognizes chat IDs `mimo-v2.5-pro` and `mimo-v2.5`, while TTS IDs are selected through `codewhale speech` / `tts`. Generic `openai`, `atlascloud`, `wanjie-ark`, `xiaomi-mimo`, `arcee`, `moonshot`, `minimax`, `zai`, `stepfun`, and Ollama model IDs are passed through unchanged after known aliases are normalized. OpenRouter and SiliconFlow provider configs with a custom `base_url` also preserve explicit model values, which lets OpenAI-compatible gateways accept bare model IDs. Use `/models` or `codewhale models` to discover live IDs from your configured endpoint. `CODEWHALE_MODEL` overrides this for a single process; `DEEPSEEK_MODEL` is the legacy alias. -- `reasoning_effort` (string, optional): `off`, `low`, `medium`, `high`, `max`, or `xhigh`; defaults to the configured UI tier. DeepSeek Platform receives top-level `thinking` / `reasoning_effort` fields. OpenAI Codex normalizes stale `off` to `low` and sends `max` as Responses `xhigh`. NVIDIA NIM receives equivalent settings through `chat_template_kwargs`. +- `reasoning_effort` (string, optional): `off`, `low`, `medium`, `high`, `max`, `xhigh`, or `ultracode`; defaults to the configured UI tier. DeepSeek Platform receives top-level `thinking` / `reasoning_effort` fields. OpenAI Codex normalizes stale `off` to `low` and sends `max` / `ultracode` as Responses `xhigh`. Z.ai receives documented `thinking` controls and treats enabled thinking as the GLM coding high/max lane. NVIDIA NIM receives equivalent settings through `chat_template_kwargs`. - `verbosity` (string, optional): `normal` or `concise`. `normal` keeps the default conversational prompt. `concise` appends a prompt discipline block for direct, low-chatter output; CLI noninteractive commands (`exec`, `eval`,