diff --git a/CHANGELOG.md b/CHANGELOG.md index 849ab494..347c1ae6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -90,7 +90,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 `low`/`medium`/`high`/`xhigh` tiers. Startup, `/config`, and the model picker now display Codex labels instead of leaking DeepSeek `off`/`max` names, while Codex still reports as a Responses payload - provider. + provider. The Responses request builder also clamps legacy `minimal` input + to `low` and has regression coverage that Codex requests use + `reasoning.effort`, not DeepSeek `thinking` fields. - **OpenAI Codex context metadata (#3070).** The `gpt-5.5` default and CodeWhale aliases now use OpenAI's documented 1,050,000-token context window and 128,000 max-output metadata for context pressure, prompts, and doctor diff --git a/crates/tui/CHANGELOG.md b/crates/tui/CHANGELOG.md index 70b793fc..d4a98da3 100644 --- a/crates/tui/CHANGELOG.md +++ b/crates/tui/CHANGELOG.md @@ -90,7 +90,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 `low`/`medium`/`high`/`xhigh` tiers. Startup, `/config`, and the model picker now display Codex labels instead of leaking DeepSeek `off`/`max` names, while Codex still reports as a Responses payload - provider. + provider. The Responses request builder also clamps legacy `minimal` input + to `low` and has regression coverage that Codex requests use + `reasoning.effort`, not DeepSeek `thinking` fields. - **OpenAI Codex context metadata (#3070).** The `gpt-5.5` default and CodeWhale aliases now use OpenAI's documented 1,050,000-token context window and 128,000 max-output metadata for context pressure, prompts, and doctor diff --git a/crates/tui/src/client/responses.rs b/crates/tui/src/client/responses.rs index ab8f548f..5133268a 100644 --- a/crates/tui/src/client/responses.rs +++ b/crates/tui/src/client/responses.rs @@ -23,64 +23,63 @@ use super::{DeepSeekClient, ERROR_BODY_MAX_BYTES, bounded_error_text, system_to_ /// Base URL path for the Codex Responses endpoint. const CODEX_RESPONSES_PATH: &str = "/codex/responses"; -impl DeepSeekClient { - /// Build the Responses API request body from a `MessageRequest`. - fn build_responses_body(&self, request: &MessageRequest) -> Value { - let model = &request.model; - let mut body = json!({ - "model": model, - "stream": true, - "store": false, - }); +/// Build the Responses API request body from a `MessageRequest`. +fn build_responses_body(request: &MessageRequest) -> Value { + let model = &request.model; + let mut body = json!({ + "model": model, + "stream": true, + "store": false, + }); - // Instructions (system prompt). The Codex Responses backend rejects - // requests without instructions, so fall back to a minimal system - // prompt when the caller did not supply one. - let instructions = system_to_instructions(request.system.clone()) - .filter(|text| !text.trim().is_empty()) - .unwrap_or_else(|| "You are a helpful assistant.".to_string()); - body["instructions"] = json!(instructions); + // Instructions (system prompt). The Codex Responses backend rejects + // requests without instructions, so fall back to a minimal system + // prompt when the caller did not supply one. + let instructions = system_to_instructions(request.system.clone()) + .filter(|text| !text.trim().is_empty()) + .unwrap_or_else(|| "You are a helpful assistant.".to_string()); + body["instructions"] = json!(instructions); - // Convert messages to Responses input items. - let input = convert_messages_to_responses_input(request); - body["input"] = json!(input); + // Convert messages to Responses input items. + let input = convert_messages_to_responses_input(request); + body["input"] = json!(input); - // Convert tools to Responses function tools. - if let Some(tools) = request.tools.as_ref() { - let responses_tools: Vec = - tools.iter().map(tool_to_responses_function).collect(); - if !responses_tools.is_empty() { - body["tools"] = json!(responses_tools); - body["tool_choice"] = json!("auto"); - body["parallel_tool_calls"] = json!(true); - } + // Convert tools to Responses function tools. + if let Some(tools) = request.tools.as_ref() { + let responses_tools: Vec = tools.iter().map(tool_to_responses_function).collect(); + if !responses_tools.is_empty() { + body["tools"] = json!(responses_tools); + body["tool_choice"] = json!("auto"); + body["parallel_tool_calls"] = json!(true); } - - // Reasoning configuration. The Codex Responses backend accepts - // low/medium/high/xhigh, so provider-aware callers normalize inherited - // DeepSeek-only values before request construction: "off" becomes - // "low", and CodeWhale's "auto" falls back to "medium". - if let Some(raw) = request.reasoning_effort.as_deref() - && let Some(effort) = codex_responses_reasoning_effort(raw) - { - body["reasoning"] = json!({ - "effort": effort, - "summary": "auto", - }); - } - - // Include reasoning summaries in the stream. - body["include"] = json!(["reasoning.encrypted_content"]); - - body } + // Reasoning configuration. The Codex Responses backend accepts + // low/medium/high/xhigh, so provider-aware callers normalize inherited + // DeepSeek-only values before request construction: "off" becomes + // "low", and CodeWhale's "auto" falls back to "medium". + if let Some(raw) = request.reasoning_effort.as_deref() + && let Some(effort) = codex_responses_reasoning_effort(raw) + { + body["reasoning"] = json!({ + "effort": effort, + "summary": "auto", + }); + } + + // Include reasoning summaries in the stream. + body["include"] = json!(["reasoning.encrypted_content"]); + + body +} + +impl DeepSeekClient { /// Handle a streaming Responses API request for the OpenAI Codex provider. pub(super) async fn handle_responses_stream( &self, request: MessageRequest, ) -> Result { - let body = self.build_responses_body(&request); + let body = build_responses_body(&request); let url = format!("{}{}", self.base_url, CODEX_RESPONSES_PATH); // The bearer Authorization header is already installed as a default @@ -614,7 +613,7 @@ fn tool_to_responses_function(tool: &Tool) -> Value { fn codex_responses_reasoning_effort(raw: &str) -> Option<&'static str> { match raw.trim().to_ascii_lowercase().as_str() { "off" | "disabled" | "none" | "false" => Some("low"), - "minimal" => Some("minimal"), + "minimal" => Some("low"), "low" => Some("low"), "high" => Some("high"), "xhigh" | "max" | "maximum" => Some("xhigh"), @@ -661,6 +660,7 @@ fn parse_responses_usage(val: &Value) -> Usage { #[cfg(test)] mod tests { use super::*; + use crate::models::Message; #[test] @@ -670,10 +670,48 @@ mod tests { assert_eq!(codex_responses_reasoning_effort("xhigh"), Some("xhigh")); assert_eq!(codex_responses_reasoning_effort("high"), Some("high")); assert_eq!(codex_responses_reasoning_effort("medium"), Some("medium")); + assert_eq!(codex_responses_reasoning_effort("minimal"), Some("low")); assert_eq!(codex_responses_reasoning_effort("auto"), Some("medium")); assert_eq!(codex_responses_reasoning_effort("off"), Some("low")); } + #[test] + fn codex_responses_body_uses_responses_reasoning_not_deepseek_thinking() { + let request = MessageRequest { + model: "gpt-5.5".to_string(), + messages: vec![Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: "hello".to_string(), + cache_control: None, + }], + }], + max_tokens: 128, + system: None, + tools: None, + tool_choice: None, + metadata: None, + thinking: None, + reasoning_effort: Some("max".to_string()), + stream: None, + temperature: None, + top_p: None, + }; + + let body = build_responses_body(&request); + + assert_eq!( + body.pointer("/reasoning/effort").and_then(Value::as_str), + Some("xhigh") + ); + assert_eq!( + body.pointer("/reasoning/summary").and_then(Value::as_str), + Some("auto") + ); + assert!(body.get("thinking").is_none()); + assert!(body.get("reasoning_effort").is_none()); + } + #[test] fn responses_input_includes_user_role_tool_results() { let request = MessageRequest {