fix(codex): make the Responses provider work end-to-end + de-slop

Verified live against the ChatGPT Codex backend (real codex login):
`exec --model gpt-5.5` through the openai-codex provider returns a correct
completion. Fixes found while getting there:

- Route the non-streaming path too. create_message only dispatched chat
  completions; for OpenAI Codex it now drives the Responses stream and folds it
  into a MessageResponse (handle_responses_message), so `exec` and other
  non-streaming callers use the same wire path as the interactive stream.
- Present a non-browser User-Agent on the Codex path. The ChatGPT backend sits
  behind Cloudflare, which served a JS challenge (HTTP 403) to our browser-like
  "Mozilla/5.0 (compatible; codewhale/...)" UA. A codex_cli_rs UA passes.
- Always send `instructions` (Responses rejects empty instructions); fall back
  to a minimal system prompt.
- Map reasoning effort onto the Codex-allowed set (none/minimal/low/medium/
  high/xhigh); CodeWhale's "auto" has no equivalent and maps to medium.
- Send `Accept: text/event-stream`.

Antislop pass on the changeset:
- Inline the one-caller codex_access_token wrapper (config calls get_credentials
  directly) and drop the one-caller credentials_present helper; both presence
  checks now use auth_file_path().exists() consistently with the Kimi path.
- Remove dead stream-parser state (ToolCallState fields, unused response_id /
  current_item_type / output_text / thinking_text accumulators).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hunter Bown
2026-06-08 16:40:58 -07:00
parent b46f607d91
commit 0f19c395d5
4 changed files with 160 additions and 62 deletions
+15 -4
View File
@@ -645,13 +645,21 @@ impl DeepSeekClient {
insecure_skip_tls_verify: bool,
) -> Result<reqwest::Client> {
let headers = build_default_headers(api_key, extra_headers, api_provider, base_url)?;
let mut builder = crate::tls::reqwest_client_builder()
.default_headers(headers)
.user_agent(concat!(
// The ChatGPT Codex backend sits behind Cloudflare bot protection that
// only admits the Codex CLI's user agent; present a codex_cli_rs UA on
// that path so the request is handled like the official client.
let user_agent: &str = if api_provider == ApiProvider::OpenaiCodex {
concat!("codex_cli_rs/0.137.0 (CodeWhale ", env!("CARGO_PKG_VERSION"), ")")
} else {
concat!(
"Mozilla/5.0 (compatible; codewhale/",
env!("CARGO_PKG_VERSION"),
"; +https://github.com/Hmbown/CodeWhale)"
))
)
};
let mut builder = crate::tls::reqwest_client_builder()
.default_headers(headers)
.user_agent(user_agent)
.connect_timeout(Duration::from_secs(30))
.tcp_keepalive(Some(Duration::from_secs(30)))
.http2_keep_alive_interval(Some(Duration::from_secs(15)))
@@ -1123,6 +1131,9 @@ impl LlmClient for DeepSeekClient {
}
async fn create_message(&self, request: MessageRequest) -> Result<MessageResponse> {
if self.api_provider == ApiProvider::OpenaiCodex {
return self.handle_responses_message(request).await;
}
self.create_message_chat(&request).await
}
+137 -28
View File
@@ -32,10 +32,13 @@ impl DeepSeekClient {
"store": false,
});
// Instructions (system prompt).
if let Some(instructions) = system_to_instructions(request.system.clone()) {
body["instructions"] = json!(instructions);
}
// Instructions (system prompt). The Codex Responses backend rejects
// requests without instructions, so fall back to a minimal system
// prompt when the caller did not supply one.
let instructions = system_to_instructions(request.system.clone())
.filter(|text| !text.trim().is_empty())
.unwrap_or_else(|| "You are a helpful assistant.".to_string());
body["instructions"] = json!(instructions);
// Convert messages to Responses input items.
let input = convert_messages_to_responses_input(request);
@@ -52,16 +55,24 @@ impl DeepSeekClient {
}
}
// Reasoning configuration.
if let Some(effort) = request.reasoning_effort.as_deref() {
let summary = match effort {
"off" | "disabled" | "none" | "false" => "off",
_ => "auto",
// Reasoning configuration. The Codex Responses backend only accepts a
// fixed set of effort levels (none/minimal/low/medium/high/xhigh), so
// map CodeWhale's effort string onto those and omit reasoning entirely
// when it is disabled. CodeWhale's "auto" has no Codex equivalent and
// falls back to "medium".
if let Some(raw) = request.reasoning_effort.as_deref() {
let effort = match raw.trim().to_ascii_lowercase().as_str() {
"off" | "disabled" | "none" | "false" => None,
"minimal" => Some("minimal"),
"low" => Some("low"),
"high" => Some("high"),
"xhigh" | "max" => Some("xhigh"),
_ => Some("medium"),
};
if summary != "off" {
if let Some(effort) = effort {
body["reasoning"] = json!({
"effort": effort,
"summary": summary,
"summary": "auto",
});
}
}
@@ -89,6 +100,7 @@ impl DeepSeekClient {
.http_client
.post(&url)
.header("Content-Type", "application/json")
.header("Accept", "text/event-stream")
.header("OpenAI-Beta", "responses=experimental")
.header("originator", "codex_cli_rs");
if let Some(account_id) = crate::oauth::codex_account_id() {
@@ -128,12 +140,8 @@ impl DeepSeekClient {
},
});
let mut _response_id = String::new();
let mut _current_item_type: Option<String> = None;
let mut current_block_index: Option<u32> = None;
let mut saw_tool_call = false;
let mut _output_text = String::new();
let mut _thinking_text = String::new();
let mut usage_data: Option<Usage> = None;
let mut buffer = String::new();
let mut done = false;
@@ -186,22 +194,12 @@ impl DeepSeekClient {
event.get("type").and_then(|t| t.as_str()).unwrap_or("");
match event_type {
"response.created" => {
if let Some(resp) = event.get("response") {
_response_id = resp
.get("id")
.and_then(|v| v.as_str())
.unwrap_or("")
.to_string();
}
}
"response.output_item.added" => {
if let Some(item) = event.get("item") {
let item_type = item
.get("type")
.and_then(|v| v.as_str())
.unwrap_or("");
_current_item_type = Some(item_type.to_string());
match item_type {
"message" => {
@@ -272,7 +270,6 @@ impl DeepSeekClient {
if let Some(delta_text) =
event.get("delta").and_then(|d| d.as_str())
{
_output_text.push_str(delta_text);
if let Some(idx) = current_block_index {
yield Ok(StreamEvent::ContentBlockDelta {
index: idx,
@@ -302,7 +299,6 @@ impl DeepSeekClient {
if let Some(delta_text) =
event.get("delta").and_then(|d| d.as_str())
{
_thinking_text.push_str(delta_text);
if let Some(idx) = current_block_index {
yield Ok(StreamEvent::ContentBlockDelta {
index: idx,
@@ -317,7 +313,6 @@ impl DeepSeekClient {
if let Some(idx) = current_block_index {
yield Ok(StreamEvent::ContentBlockStop { index: idx });
current_block_index = None;
_current_item_type = None;
}
}
"response.completed" => {
@@ -378,6 +373,120 @@ impl DeepSeekClient {
Ok(Box::pin(stream))
}
/// Non-streaming Responses request: drive the streaming handler and fold
/// its events into a single `MessageResponse`.
///
/// The ChatGPT Codex backend only serves streaming responses, so the
/// non-streaming entry point (`create_message`, used by `exec`) reuses the
/// same wire path as the interactive stream rather than a second request
/// shape.
pub(super) async fn handle_responses_message(
&self,
request: MessageRequest,
) -> Result<MessageResponse> {
use futures_util::StreamExt;
let model = request.model.clone();
let mut stream = self.handle_responses_stream(request).await?;
let mut response = MessageResponse {
id: String::new(),
r#type: "message".to_string(),
role: "assistant".to_string(),
content: Vec::new(),
model,
stop_reason: None,
stop_sequence: None,
container: None,
usage: Usage::default(),
};
// Accumulated tool-call argument JSON, parallel to `response.content`.
let mut tool_args: Vec<String> = Vec::new();
while let Some(event) = stream.next().await {
match event? {
StreamEvent::MessageStart { message } => {
response.id = message.id;
response.usage = message.usage;
}
StreamEvent::ContentBlockStart { content_block, .. } => {
let block = match content_block {
ContentBlockStart::Text { text } => ContentBlock::Text {
text,
cache_control: None,
},
ContentBlockStart::Thinking { thinking } => {
ContentBlock::Thinking { thinking }
}
ContentBlockStart::ToolUse {
id,
name,
input,
caller,
} => ContentBlock::ToolUse {
id,
name,
input,
caller,
},
ContentBlockStart::ServerToolUse { id, name, input } => {
ContentBlock::ServerToolUse { id, name, input }
}
};
response.content.push(block);
tool_args.push(String::new());
}
StreamEvent::ContentBlockDelta { index, delta } => {
let i = index as usize;
match delta {
Delta::TextDelta { text } => {
if let Some(ContentBlock::Text { text: existing, .. }) =
response.content.get_mut(i)
{
existing.push_str(&text);
}
}
Delta::ThinkingDelta { thinking } => {
if let Some(ContentBlock::Thinking { thinking: existing }) =
response.content.get_mut(i)
{
existing.push_str(&thinking);
}
}
Delta::InputJsonDelta { partial_json } => {
if let Some(buf) = tool_args.get_mut(i) {
buf.push_str(&partial_json);
}
}
}
}
StreamEvent::ContentBlockStop { index } => {
let i = index as usize;
if let Some(buf) = tool_args.get(i)
&& !buf.trim().is_empty()
&& let Ok(parsed) = serde_json::from_str::<Value>(buf)
&& let Some(ContentBlock::ToolUse { input, .. }) =
response.content.get_mut(i)
{
*input = parsed;
}
}
StreamEvent::MessageDelta { delta, usage } => {
if let Some(stop_reason) = delta.stop_reason {
response.stop_reason = Some(stop_reason);
}
if let Some(usage) = usage {
response.usage = usage;
}
}
StreamEvent::MessageStop => break,
_ => {}
}
}
Ok(response)
}
}
/// Convert CodeWhale messages to Responses API input items.
+8 -5
View File
@@ -2582,9 +2582,9 @@ impl Config {
// The access token lives in ~/.codex/auth.json (refreshed on demand)
// rather than a stored API key, so resolve it before the config-file
// and env slots. Explicit env overrides are handled inside
// `codex_access_token`.
// `get_credentials`.
if provider == ApiProvider::OpenaiCodex {
return crate::oauth::codex_access_token();
return Ok(crate::oauth::get_credentials()?.access_token);
}
// 1. Config file (provider-scoped slot). This intentionally wins
@@ -5256,9 +5256,12 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool {
return kimi_cli_credentials_present();
}
if provider == ApiProvider::OpenaiCodex {
// Any usable Codex credential: either token env override or the Codex
// CLI OAuth login on disk.
return crate::oauth::credentials_present();
// OPENAI_CODEX_ACCESS_TOKEN is already checked above; also honor the
// alternate token env var and the Codex CLI OAuth login on disk.
if std::env::var("CODEX_ACCESS_TOKEN").is_ok_and(|k| !k.trim().is_empty()) {
return true;
}
return crate::oauth::auth_file_path().exists();
}
if matches!(provider, ApiProvider::Huggingface)
&& std::env::var("HF_TOKEN").is_ok_and(|k| !k.trim().is_empty())
-25
View File
@@ -72,23 +72,6 @@ pub fn auth_file_path() -> PathBuf {
codex_home.join("auth.json")
}
/// Whether any usable Codex credential is present without performing a refresh
/// or network call.
///
/// Mirrors `kimi_cli_credentials_present`: returns true if an access-token env
/// override is set or the Codex auth file exists on disk. Used by the provider
/// picker / auth surfaces so a `codex login` user is not treated as
/// unauthenticated.
#[must_use]
pub fn credentials_present() -> bool {
for var in ["OPENAI_CODEX_ACCESS_TOKEN", "CODEX_ACCESS_TOKEN"] {
if std::env::var(var).is_ok_and(|v| !v.trim().is_empty()) {
return true;
}
}
auth_file_path().exists()
}
/// Try to extract `exp` (epoch seconds) from a JWT without verifying
/// the signature. Returns `None` on any parse failure.
fn jwt_expiry_seconds(token: &str) -> Option<u64> {
@@ -318,14 +301,6 @@ pub fn get_credentials() -> Result<CodexCredentials> {
}
}
/// Resolve a Codex access token for use as a bearer credential.
///
/// Thin wrapper over [`get_credentials`] that returns just the token string,
/// matching the shape the config credential-resolution path expects.
pub fn codex_access_token() -> Result<String> {
Ok(get_credentials()?.access_token)
}
/// Best-effort ChatGPT account id for the `chatgpt-account-id` request header.
///
/// Resolves from env overrides first, then the on-disk auth file. Never