fix(codex): make the Responses provider work end-to-end + de-slop

Verified live against the ChatGPT Codex backend (real codex login): `exec --model gpt-5.5` through the openai-codex provider returns a correct completion. Fixes found while getting there: - Route the non-streaming path too. create_message only dispatched chat completions; for OpenAI Codex it now drives the Responses stream and folds it into a MessageResponse (handle_responses_message), so `exec` and other non-streaming callers use the same wire path as the interactive stream. - Present a non-browser User-Agent on the Codex path. The ChatGPT backend sits behind Cloudflare, which served a JS challenge (HTTP 403) to our browser-like "Mozilla/5.0 (compatible; codewhale/...)" UA. A codex_cli_rs UA passes. - Always send `instructions` (Responses rejects empty instructions); fall back to a minimal system prompt. - Map reasoning effort onto the Codex-allowed set (none/minimal/low/medium/ high/xhigh); CodeWhale's "auto" has no equivalent and maps to medium. - Send `Accept: text/event-stream`. Antislop pass on the changeset: - Inline the one-caller codex_access_token wrapper (config calls get_credentials directly) and drop the one-caller credentials_present helper; both presence checks now use auth_file_path().exists() consistently with the Kimi path. - Remove dead stream-parser state (ToolCallState fields, unused response_id / current_item_type / output_text / thinking_text accumulators). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-08 16:40:58 -07:00
parent b46f607d91
commit 0f19c395d5
4 changed files with 160 additions and 62 deletions
@@ -645,13 +645,21 @@ impl DeepSeekClient {
        insecure_skip_tls_verify: bool,
    ) -> Result<reqwest::Client> {
        let headers = build_default_headers(api_key, extra_headers, api_provider, base_url)?;
-        let mut builder = crate::tls::reqwest_client_builder()
-            .default_headers(headers)
-            .user_agent(concat!(
+        // The ChatGPT Codex backend sits behind Cloudflare bot protection that
+        // only admits the Codex CLI's user agent; present a codex_cli_rs UA on
+        // that path so the request is handled like the official client.
+        let user_agent: &str = if api_provider == ApiProvider::OpenaiCodex {
+            concat!("codex_cli_rs/0.137.0 (CodeWhale ", env!("CARGO_PKG_VERSION"), ")")
+        } else {
+            concat!(
                "Mozilla/5.0 (compatible; codewhale/",
                env!("CARGO_PKG_VERSION"),
                "; +https://github.com/Hmbown/CodeWhale)"
-            ))
+            )
+        };
+        let mut builder = crate::tls::reqwest_client_builder()
+            .default_headers(headers)
+            .user_agent(user_agent)
            .connect_timeout(Duration::from_secs(30))
            .tcp_keepalive(Some(Duration::from_secs(30)))
            .http2_keep_alive_interval(Some(Duration::from_secs(15)))
@@ -1123,6 +1131,9 @@ impl LlmClient for DeepSeekClient {
    }

    async fn create_message(&self, request: MessageRequest) -> Result<MessageResponse> {
+        if self.api_provider == ApiProvider::OpenaiCodex {
+            return self.handle_responses_message(request).await;
+        }
        self.create_message_chat(&request).await
    }

@@ -32,10 +32,13 @@ impl DeepSeekClient {
            "store": false,
        });

-        // Instructions (system prompt).
-        if let Some(instructions) = system_to_instructions(request.system.clone()) {
-            body["instructions"] = json!(instructions);
-        }
+        // Instructions (system prompt). The Codex Responses backend rejects
+        // requests without instructions, so fall back to a minimal system
+        // prompt when the caller did not supply one.
+        let instructions = system_to_instructions(request.system.clone())
+            .filter(|text| !text.trim().is_empty())
+            .unwrap_or_else(|| "You are a helpful assistant.".to_string());
+        body["instructions"] = json!(instructions);

        // Convert messages to Responses input items.
        let input = convert_messages_to_responses_input(request);
@@ -52,16 +55,24 @@ impl DeepSeekClient {
            }
        }

-        // Reasoning configuration.
-        if let Some(effort) = request.reasoning_effort.as_deref() {
-            let summary = match effort {
-                "off" | "disabled" | "none" | "false" => "off",
-                _ => "auto",
+        // Reasoning configuration. The Codex Responses backend only accepts a
+        // fixed set of effort levels (none/minimal/low/medium/high/xhigh), so
+        // map CodeWhale's effort string onto those and omit reasoning entirely
+        // when it is disabled. CodeWhale's "auto" has no Codex equivalent and
+        // falls back to "medium".
+        if let Some(raw) = request.reasoning_effort.as_deref() {
+            let effort = match raw.trim().to_ascii_lowercase().as_str() {
+                "off" | "disabled" | "none" | "false" => None,
+                "minimal" => Some("minimal"),
+                "low" => Some("low"),
+                "high" => Some("high"),
+                "xhigh" | "max" => Some("xhigh"),
+                _ => Some("medium"),
            };
-            if summary != "off" {
+            if let Some(effort) = effort {
                body["reasoning"] = json!({
                    "effort": effort,
-                    "summary": summary,
+                    "summary": "auto",
                });
            }
        }
@@ -89,6 +100,7 @@ impl DeepSeekClient {
            .http_client
            .post(&url)
            .header("Content-Type", "application/json")
+            .header("Accept", "text/event-stream")
            .header("OpenAI-Beta", "responses=experimental")
            .header("originator", "codex_cli_rs");
        if let Some(account_id) = crate::oauth::codex_account_id() {
@@ -128,12 +140,8 @@ impl DeepSeekClient {
                },
            });

-            let mut _response_id = String::new();
-            let mut _current_item_type: Option<String> = None;
            let mut current_block_index: Option<u32> = None;
            let mut saw_tool_call = false;
-            let mut _output_text = String::new();
-            let mut _thinking_text = String::new();
            let mut usage_data: Option<Usage> = None;
            let mut buffer = String::new();
            let mut done = false;
@@ -186,22 +194,12 @@ impl DeepSeekClient {
                            event.get("type").and_then(|t| t.as_str()).unwrap_or("");

                        match event_type {
-                            "response.created" => {
-                                if let Some(resp) = event.get("response") {
-                                    _response_id = resp
-                                        .get("id")
-                                        .and_then(|v| v.as_str())
-                                        .unwrap_or("")
-                                        .to_string();
-                                }
-                            }
                            "response.output_item.added" => {
                                if let Some(item) = event.get("item") {
                                    let item_type = item
                                        .get("type")
                                        .and_then(|v| v.as_str())
                                        .unwrap_or("");
-                                    _current_item_type = Some(item_type.to_string());

                                    match item_type {
                                        "message" => {
@@ -272,7 +270,6 @@ impl DeepSeekClient {
                                if let Some(delta_text) =
                                    event.get("delta").and_then(|d| d.as_str())
                                {
-                                    _output_text.push_str(delta_text);
                                    if let Some(idx) = current_block_index {
                                        yield Ok(StreamEvent::ContentBlockDelta {
                                            index: idx,
@@ -302,7 +299,6 @@ impl DeepSeekClient {
                                if let Some(delta_text) =
                                    event.get("delta").and_then(|d| d.as_str())
                                {
-                                    _thinking_text.push_str(delta_text);
                                    if let Some(idx) = current_block_index {
                                        yield Ok(StreamEvent::ContentBlockDelta {
                                            index: idx,
@@ -317,7 +313,6 @@ impl DeepSeekClient {
                                if let Some(idx) = current_block_index {
                                    yield Ok(StreamEvent::ContentBlockStop { index: idx });
                                    current_block_index = None;
-                                    _current_item_type = None;
                                }
                            }
                            "response.completed" => {
@@ -378,6 +373,120 @@ impl DeepSeekClient {

        Ok(Box::pin(stream))
    }
+
+    /// Non-streaming Responses request: drive the streaming handler and fold
+    /// its events into a single `MessageResponse`.
+    ///
+    /// The ChatGPT Codex backend only serves streaming responses, so the
+    /// non-streaming entry point (`create_message`, used by `exec`) reuses the
+    /// same wire path as the interactive stream rather than a second request
+    /// shape.
+    pub(super) async fn handle_responses_message(
+        &self,
+        request: MessageRequest,
+    ) -> Result<MessageResponse> {
+        use futures_util::StreamExt;
+
+        let model = request.model.clone();
+        let mut stream = self.handle_responses_stream(request).await?;
+
+        let mut response = MessageResponse {
+            id: String::new(),
+            r#type: "message".to_string(),
+            role: "assistant".to_string(),
+            content: Vec::new(),
+            model,
+            stop_reason: None,
+            stop_sequence: None,
+            container: None,
+            usage: Usage::default(),
+        };
+        // Accumulated tool-call argument JSON, parallel to `response.content`.
+        let mut tool_args: Vec<String> = Vec::new();
+
+        while let Some(event) = stream.next().await {
+            match event? {
+                StreamEvent::MessageStart { message } => {
+                    response.id = message.id;
+                    response.usage = message.usage;
+                }
+                StreamEvent::ContentBlockStart { content_block, .. } => {
+                    let block = match content_block {
+                        ContentBlockStart::Text { text } => ContentBlock::Text {
+                            text,
+                            cache_control: None,
+                        },
+                        ContentBlockStart::Thinking { thinking } => {
+                            ContentBlock::Thinking { thinking }
+                        }
+                        ContentBlockStart::ToolUse {
+                            id,
+                            name,
+                            input,
+                            caller,
+                        } => ContentBlock::ToolUse {
+                            id,
+                            name,
+                            input,
+                            caller,
+                        },
+                        ContentBlockStart::ServerToolUse { id, name, input } => {
+                            ContentBlock::ServerToolUse { id, name, input }
+                        }
+                    };
+                    response.content.push(block);
+                    tool_args.push(String::new());
+                }
+                StreamEvent::ContentBlockDelta { index, delta } => {
+                    let i = index as usize;
+                    match delta {
+                        Delta::TextDelta { text } => {
+                            if let Some(ContentBlock::Text { text: existing, .. }) =
+                                response.content.get_mut(i)
+                            {
+                                existing.push_str(&text);
+                            }
+                        }
+                        Delta::ThinkingDelta { thinking } => {
+                            if let Some(ContentBlock::Thinking { thinking: existing }) =
+                                response.content.get_mut(i)
+                            {
+                                existing.push_str(&thinking);
+                            }
+                        }
+                        Delta::InputJsonDelta { partial_json } => {
+                            if let Some(buf) = tool_args.get_mut(i) {
+                                buf.push_str(&partial_json);
+                            }
+                        }
+                    }
+                }
+                StreamEvent::ContentBlockStop { index } => {
+                    let i = index as usize;
+                    if let Some(buf) = tool_args.get(i)
+                        && !buf.trim().is_empty()
+                        && let Ok(parsed) = serde_json::from_str::<Value>(buf)
+                        && let Some(ContentBlock::ToolUse { input, .. }) =
+                            response.content.get_mut(i)
+                    {
+                        *input = parsed;
+                    }
+                }
+                StreamEvent::MessageDelta { delta, usage } => {
+                    if let Some(stop_reason) = delta.stop_reason {
+                        response.stop_reason = Some(stop_reason);
+                    }
+                    if let Some(usage) = usage {
+                        response.usage = usage;
+                    }
+                }
+                StreamEvent::MessageStop => break,
+                _ => {}
+            }
+        }
+
+        Ok(response)
+    }
 }

 /// Convert CodeWhale messages to Responses API input items.
@@ -2582,9 +2582,9 @@ impl Config {
        // The access token lives in ~/.codex/auth.json (refreshed on demand)
        // rather than a stored API key, so resolve it before the config-file
        // and env slots. Explicit env overrides are handled inside
-        // `codex_access_token`.
+        // `get_credentials`.
        if provider == ApiProvider::OpenaiCodex {
-            return crate::oauth::codex_access_token();
+            return Ok(crate::oauth::get_credentials()?.access_token);
        }

        // 1. Config file (provider-scoped slot). This intentionally wins
@@ -5256,9 +5256,12 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool {
        return kimi_cli_credentials_present();
    }
    if provider == ApiProvider::OpenaiCodex {
-        // Any usable Codex credential: either token env override or the Codex
-        // CLI OAuth login on disk.
-        return crate::oauth::credentials_present();
+        // OPENAI_CODEX_ACCESS_TOKEN is already checked above; also honor the
+        // alternate token env var and the Codex CLI OAuth login on disk.
+        if std::env::var("CODEX_ACCESS_TOKEN").is_ok_and(|k| !k.trim().is_empty()) {
+            return true;
+        }
+        return crate::oauth::auth_file_path().exists();
    }
    if matches!(provider, ApiProvider::Huggingface)
        && std::env::var("HF_TOKEN").is_ok_and(|k| !k.trim().is_empty())
@@ -72,23 +72,6 @@ pub fn auth_file_path() -> PathBuf {
    codex_home.join("auth.json")
 }

-/// Whether any usable Codex credential is present without performing a refresh
-/// or network call.
-///
-/// Mirrors `kimi_cli_credentials_present`: returns true if an access-token env
-/// override is set or the Codex auth file exists on disk. Used by the provider
-/// picker / auth surfaces so a `codex login` user is not treated as
-/// unauthenticated.
-#[must_use]
-pub fn credentials_present() -> bool {
-    for var in ["OPENAI_CODEX_ACCESS_TOKEN", "CODEX_ACCESS_TOKEN"] {
-        if std::env::var(var).is_ok_and(|v| !v.trim().is_empty()) {
-            return true;
-        }
-    }
-    auth_file_path().exists()
-}
-
 /// Try to extract `exp` (epoch seconds) from a JWT without verifying
 /// the signature. Returns `None` on any parse failure.
 fn jwt_expiry_seconds(token: &str) -> Option<u64> {
@@ -318,14 +301,6 @@ pub fn get_credentials() -> Result<CodexCredentials> {
    }
 }

-/// Resolve a Codex access token for use as a bearer credential.
-///
-/// Thin wrapper over [`get_credentials`] that returns just the token string,
-/// matching the shape the config credential-resolution path expects.
-pub fn codex_access_token() -> Result<String> {
-    Ok(get_credentials()?.access_token)
-}
-
 /// Best-effort ChatGPT account id for the `chatgpt-account-id` request header.
 ///
 /// Resolves from env overrides first, then the on-disk auth file. Never