fix(codex): keep Responses reasoning shape provider-native

Codex Responses requests now clamp legacy minimal effort to low and carry a regression test that the request body uses reasoning.effort without DeepSeek thinking or chat-completions reasoning_effort fields.
2026-06-12 02:27:59 -07:00
parent 8c690cb7bf
commit c1a48492b6
3 changed files with 92 additions and 50 deletions
@@ -90,7 +90,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
  `low`/`medium`/`high`/`xhigh` tiers. Startup, `/config`, and the model
  picker now display Codex labels instead of leaking DeepSeek
  `off`/`max` names, while Codex still reports as a Responses payload
-  provider.
+  provider. The Responses request builder also clamps legacy `minimal` input
+  to `low` and has regression coverage that Codex requests use
+  `reasoning.effort`, not DeepSeek `thinking` fields.
 - **OpenAI Codex context metadata (#3070).** The `gpt-5.5` default and
  CodeWhale aliases now use OpenAI's documented 1,050,000-token context window
  and 128,000 max-output metadata for context pressure, prompts, and doctor
@@ -90,7 +90,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
  `low`/`medium`/`high`/`xhigh` tiers. Startup, `/config`, and the model
  picker now display Codex labels instead of leaking DeepSeek
  `off`/`max` names, while Codex still reports as a Responses payload
-  provider.
+  provider. The Responses request builder also clamps legacy `minimal` input
+  to `low` and has regression coverage that Codex requests use
+  `reasoning.effort`, not DeepSeek `thinking` fields.
 - **OpenAI Codex context metadata (#3070).** The `gpt-5.5` default and
  CodeWhale aliases now use OpenAI's documented 1,050,000-token context window
  and 128,000 max-output metadata for context pressure, prompts, and doctor
@@ -23,64 +23,63 @@ use super::{DeepSeekClient, ERROR_BODY_MAX_BYTES, bounded_error_text, system_to_
 /// Base URL path for the Codex Responses endpoint.
 const CODEX_RESPONSES_PATH: &str = "/codex/responses";

-impl DeepSeekClient {
-    /// Build the Responses API request body from a `MessageRequest`.
-    fn build_responses_body(&self, request: &MessageRequest) -> Value {
-        let model = &request.model;
-        let mut body = json!({
-            "model": model,
-            "stream": true,
-            "store": false,
-        });
+/// Build the Responses API request body from a `MessageRequest`.
+fn build_responses_body(request: &MessageRequest) -> Value {
+    let model = &request.model;
+    let mut body = json!({
+        "model": model,
+        "stream": true,
+        "store": false,
+    });

-        // Instructions (system prompt). The Codex Responses backend rejects
-        // requests without instructions, so fall back to a minimal system
-        // prompt when the caller did not supply one.
-        let instructions = system_to_instructions(request.system.clone())
-            .filter(|text| !text.trim().is_empty())
-            .unwrap_or_else(|| "You are a helpful assistant.".to_string());
-        body["instructions"] = json!(instructions);
+    // Instructions (system prompt). The Codex Responses backend rejects
+    // requests without instructions, so fall back to a minimal system
+    // prompt when the caller did not supply one.
+    let instructions = system_to_instructions(request.system.clone())
+        .filter(|text| !text.trim().is_empty())
+        .unwrap_or_else(|| "You are a helpful assistant.".to_string());
+    body["instructions"] = json!(instructions);

-        // Convert messages to Responses input items.
-        let input = convert_messages_to_responses_input(request);
-        body["input"] = json!(input);
+    // Convert messages to Responses input items.
+    let input = convert_messages_to_responses_input(request);
+    body["input"] = json!(input);

-        // Convert tools to Responses function tools.
-        if let Some(tools) = request.tools.as_ref() {
-            let responses_tools: Vec<Value> =
-                tools.iter().map(tool_to_responses_function).collect();
-            if !responses_tools.is_empty() {
-                body["tools"] = json!(responses_tools);
-                body["tool_choice"] = json!("auto");
-                body["parallel_tool_calls"] = json!(true);
-            }
+    // Convert tools to Responses function tools.
+    if let Some(tools) = request.tools.as_ref() {
+        let responses_tools: Vec<Value> = tools.iter().map(tool_to_responses_function).collect();
+        if !responses_tools.is_empty() {
+            body["tools"] = json!(responses_tools);
+            body["tool_choice"] = json!("auto");
+            body["parallel_tool_calls"] = json!(true);
        }
-
-        // Reasoning configuration. The Codex Responses backend accepts
-        // low/medium/high/xhigh, so provider-aware callers normalize inherited
-        // DeepSeek-only values before request construction: "off" becomes
-        // "low", and CodeWhale's "auto" falls back to "medium".
-        if let Some(raw) = request.reasoning_effort.as_deref()
-            && let Some(effort) = codex_responses_reasoning_effort(raw)
-        {
-            body["reasoning"] = json!({
-                "effort": effort,
-                "summary": "auto",
-            });
-        }
-
-        // Include reasoning summaries in the stream.
-        body["include"] = json!(["reasoning.encrypted_content"]);
-
-        body
    }

+    // Reasoning configuration. The Codex Responses backend accepts
+    // low/medium/high/xhigh, so provider-aware callers normalize inherited
+    // DeepSeek-only values before request construction: "off" becomes
+    // "low", and CodeWhale's "auto" falls back to "medium".
+    if let Some(raw) = request.reasoning_effort.as_deref()
+        && let Some(effort) = codex_responses_reasoning_effort(raw)
+    {
+        body["reasoning"] = json!({
+            "effort": effort,
+            "summary": "auto",
+        });
+    }
+
+    // Include reasoning summaries in the stream.
+    body["include"] = json!(["reasoning.encrypted_content"]);
+
+    body
+}
+
+impl DeepSeekClient {
    /// Handle a streaming Responses API request for the OpenAI Codex provider.
    pub(super) async fn handle_responses_stream(
        &self,
        request: MessageRequest,
    ) -> Result<StreamEventBox> {
-        let body = self.build_responses_body(&request);
+        let body = build_responses_body(&request);
        let url = format!("{}{}", self.base_url, CODEX_RESPONSES_PATH);

        // The bearer Authorization header is already installed as a default
@@ -614,7 +613,7 @@ fn tool_to_responses_function(tool: &Tool) -> Value {
 fn codex_responses_reasoning_effort(raw: &str) -> Option<&'static str> {
    match raw.trim().to_ascii_lowercase().as_str() {
        "off" | "disabled" | "none" | "false" => Some("low"),
-        "minimal" => Some("minimal"),
+        "minimal" => Some("low"),
        "low" => Some("low"),
        "high" => Some("high"),
        "xhigh" | "max" | "maximum" => Some("xhigh"),
@@ -661,6 +660,7 @@ fn parse_responses_usage(val: &Value) -> Usage {
 #[cfg(test)]
 mod tests {
    use super::*;
+
    use crate::models::Message;

    #[test]
@@ -670,10 +670,48 @@ mod tests {
        assert_eq!(codex_responses_reasoning_effort("xhigh"), Some("xhigh"));
        assert_eq!(codex_responses_reasoning_effort("high"), Some("high"));
        assert_eq!(codex_responses_reasoning_effort("medium"), Some("medium"));
+        assert_eq!(codex_responses_reasoning_effort("minimal"), Some("low"));
        assert_eq!(codex_responses_reasoning_effort("auto"), Some("medium"));
        assert_eq!(codex_responses_reasoning_effort("off"), Some("low"));
    }

+    #[test]
+    fn codex_responses_body_uses_responses_reasoning_not_deepseek_thinking() {
+        let request = MessageRequest {
+            model: "gpt-5.5".to_string(),
+            messages: vec![Message {
+                role: "user".to_string(),
+                content: vec![ContentBlock::Text {
+                    text: "hello".to_string(),
+                    cache_control: None,
+                }],
+            }],
+            max_tokens: 128,
+            system: None,
+            tools: None,
+            tool_choice: None,
+            metadata: None,
+            thinking: None,
+            reasoning_effort: Some("max".to_string()),
+            stream: None,
+            temperature: None,
+            top_p: None,
+        };
+
+        let body = build_responses_body(&request);
+
+        assert_eq!(
+            body.pointer("/reasoning/effort").and_then(Value::as_str),
+            Some("xhigh")
+        );
+        assert_eq!(
+            body.pointer("/reasoning/summary").and_then(Value::as_str),
+            Some("auto")
+        );
+        assert!(body.get("thinking").is_none());
+        assert!(body.get("reasoning_effort").is_none());
+    }
+
    #[test]
    fn responses_input_includes_user_role_tool_results() {
        let request = MessageRequest {