fix(codex): make the Responses provider work end-to-end + de-slop
Verified live against the ChatGPT Codex backend (real codex login): `exec --model gpt-5.5` through the openai-codex provider returns a correct completion. Fixes found while getting there: - Route the non-streaming path too. create_message only dispatched chat completions; for OpenAI Codex it now drives the Responses stream and folds it into a MessageResponse (handle_responses_message), so `exec` and other non-streaming callers use the same wire path as the interactive stream. - Present a non-browser User-Agent on the Codex path. The ChatGPT backend sits behind Cloudflare, which served a JS challenge (HTTP 403) to our browser-like "Mozilla/5.0 (compatible; codewhale/...)" UA. A codex_cli_rs UA passes. - Always send `instructions` (Responses rejects empty instructions); fall back to a minimal system prompt. - Map reasoning effort onto the Codex-allowed set (none/minimal/low/medium/ high/xhigh); CodeWhale's "auto" has no equivalent and maps to medium. - Send `Accept: text/event-stream`. Antislop pass on the changeset: - Inline the one-caller codex_access_token wrapper (config calls get_credentials directly) and drop the one-caller credentials_present helper; both presence checks now use auth_file_path().exists() consistently with the Kimi path. - Remove dead stream-parser state (ToolCallState fields, unused response_id / current_item_type / output_text / thinking_text accumulators). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -645,13 +645,21 @@ impl DeepSeekClient {
|
||||
insecure_skip_tls_verify: bool,
|
||||
) -> Result<reqwest::Client> {
|
||||
let headers = build_default_headers(api_key, extra_headers, api_provider, base_url)?;
|
||||
let mut builder = crate::tls::reqwest_client_builder()
|
||||
.default_headers(headers)
|
||||
.user_agent(concat!(
|
||||
// The ChatGPT Codex backend sits behind Cloudflare bot protection that
|
||||
// only admits the Codex CLI's user agent; present a codex_cli_rs UA on
|
||||
// that path so the request is handled like the official client.
|
||||
let user_agent: &str = if api_provider == ApiProvider::OpenaiCodex {
|
||||
concat!("codex_cli_rs/0.137.0 (CodeWhale ", env!("CARGO_PKG_VERSION"), ")")
|
||||
} else {
|
||||
concat!(
|
||||
"Mozilla/5.0 (compatible; codewhale/",
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
"; +https://github.com/Hmbown/CodeWhale)"
|
||||
))
|
||||
)
|
||||
};
|
||||
let mut builder = crate::tls::reqwest_client_builder()
|
||||
.default_headers(headers)
|
||||
.user_agent(user_agent)
|
||||
.connect_timeout(Duration::from_secs(30))
|
||||
.tcp_keepalive(Some(Duration::from_secs(30)))
|
||||
.http2_keep_alive_interval(Some(Duration::from_secs(15)))
|
||||
@@ -1123,6 +1131,9 @@ impl LlmClient for DeepSeekClient {
|
||||
}
|
||||
|
||||
async fn create_message(&self, request: MessageRequest) -> Result<MessageResponse> {
|
||||
if self.api_provider == ApiProvider::OpenaiCodex {
|
||||
return self.handle_responses_message(request).await;
|
||||
}
|
||||
self.create_message_chat(&request).await
|
||||
}
|
||||
|
||||
|
||||
@@ -32,10 +32,13 @@ impl DeepSeekClient {
|
||||
"store": false,
|
||||
});
|
||||
|
||||
// Instructions (system prompt).
|
||||
if let Some(instructions) = system_to_instructions(request.system.clone()) {
|
||||
body["instructions"] = json!(instructions);
|
||||
}
|
||||
// Instructions (system prompt). The Codex Responses backend rejects
|
||||
// requests without instructions, so fall back to a minimal system
|
||||
// prompt when the caller did not supply one.
|
||||
let instructions = system_to_instructions(request.system.clone())
|
||||
.filter(|text| !text.trim().is_empty())
|
||||
.unwrap_or_else(|| "You are a helpful assistant.".to_string());
|
||||
body["instructions"] = json!(instructions);
|
||||
|
||||
// Convert messages to Responses input items.
|
||||
let input = convert_messages_to_responses_input(request);
|
||||
@@ -52,16 +55,24 @@ impl DeepSeekClient {
|
||||
}
|
||||
}
|
||||
|
||||
// Reasoning configuration.
|
||||
if let Some(effort) = request.reasoning_effort.as_deref() {
|
||||
let summary = match effort {
|
||||
"off" | "disabled" | "none" | "false" => "off",
|
||||
_ => "auto",
|
||||
// Reasoning configuration. The Codex Responses backend only accepts a
|
||||
// fixed set of effort levels (none/minimal/low/medium/high/xhigh), so
|
||||
// map CodeWhale's effort string onto those and omit reasoning entirely
|
||||
// when it is disabled. CodeWhale's "auto" has no Codex equivalent and
|
||||
// falls back to "medium".
|
||||
if let Some(raw) = request.reasoning_effort.as_deref() {
|
||||
let effort = match raw.trim().to_ascii_lowercase().as_str() {
|
||||
"off" | "disabled" | "none" | "false" => None,
|
||||
"minimal" => Some("minimal"),
|
||||
"low" => Some("low"),
|
||||
"high" => Some("high"),
|
||||
"xhigh" | "max" => Some("xhigh"),
|
||||
_ => Some("medium"),
|
||||
};
|
||||
if summary != "off" {
|
||||
if let Some(effort) = effort {
|
||||
body["reasoning"] = json!({
|
||||
"effort": effort,
|
||||
"summary": summary,
|
||||
"summary": "auto",
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -89,6 +100,7 @@ impl DeepSeekClient {
|
||||
.http_client
|
||||
.post(&url)
|
||||
.header("Content-Type", "application/json")
|
||||
.header("Accept", "text/event-stream")
|
||||
.header("OpenAI-Beta", "responses=experimental")
|
||||
.header("originator", "codex_cli_rs");
|
||||
if let Some(account_id) = crate::oauth::codex_account_id() {
|
||||
@@ -128,12 +140,8 @@ impl DeepSeekClient {
|
||||
},
|
||||
});
|
||||
|
||||
let mut _response_id = String::new();
|
||||
let mut _current_item_type: Option<String> = None;
|
||||
let mut current_block_index: Option<u32> = None;
|
||||
let mut saw_tool_call = false;
|
||||
let mut _output_text = String::new();
|
||||
let mut _thinking_text = String::new();
|
||||
let mut usage_data: Option<Usage> = None;
|
||||
let mut buffer = String::new();
|
||||
let mut done = false;
|
||||
@@ -186,22 +194,12 @@ impl DeepSeekClient {
|
||||
event.get("type").and_then(|t| t.as_str()).unwrap_or("");
|
||||
|
||||
match event_type {
|
||||
"response.created" => {
|
||||
if let Some(resp) = event.get("response") {
|
||||
_response_id = resp
|
||||
.get("id")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("")
|
||||
.to_string();
|
||||
}
|
||||
}
|
||||
"response.output_item.added" => {
|
||||
if let Some(item) = event.get("item") {
|
||||
let item_type = item
|
||||
.get("type")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("");
|
||||
_current_item_type = Some(item_type.to_string());
|
||||
|
||||
match item_type {
|
||||
"message" => {
|
||||
@@ -272,7 +270,6 @@ impl DeepSeekClient {
|
||||
if let Some(delta_text) =
|
||||
event.get("delta").and_then(|d| d.as_str())
|
||||
{
|
||||
_output_text.push_str(delta_text);
|
||||
if let Some(idx) = current_block_index {
|
||||
yield Ok(StreamEvent::ContentBlockDelta {
|
||||
index: idx,
|
||||
@@ -302,7 +299,6 @@ impl DeepSeekClient {
|
||||
if let Some(delta_text) =
|
||||
event.get("delta").and_then(|d| d.as_str())
|
||||
{
|
||||
_thinking_text.push_str(delta_text);
|
||||
if let Some(idx) = current_block_index {
|
||||
yield Ok(StreamEvent::ContentBlockDelta {
|
||||
index: idx,
|
||||
@@ -317,7 +313,6 @@ impl DeepSeekClient {
|
||||
if let Some(idx) = current_block_index {
|
||||
yield Ok(StreamEvent::ContentBlockStop { index: idx });
|
||||
current_block_index = None;
|
||||
_current_item_type = None;
|
||||
}
|
||||
}
|
||||
"response.completed" => {
|
||||
@@ -378,6 +373,120 @@ impl DeepSeekClient {
|
||||
|
||||
Ok(Box::pin(stream))
|
||||
}
|
||||
|
||||
/// Non-streaming Responses request: drive the streaming handler and fold
|
||||
/// its events into a single `MessageResponse`.
|
||||
///
|
||||
/// The ChatGPT Codex backend only serves streaming responses, so the
|
||||
/// non-streaming entry point (`create_message`, used by `exec`) reuses the
|
||||
/// same wire path as the interactive stream rather than a second request
|
||||
/// shape.
|
||||
pub(super) async fn handle_responses_message(
|
||||
&self,
|
||||
request: MessageRequest,
|
||||
) -> Result<MessageResponse> {
|
||||
use futures_util::StreamExt;
|
||||
|
||||
let model = request.model.clone();
|
||||
let mut stream = self.handle_responses_stream(request).await?;
|
||||
|
||||
let mut response = MessageResponse {
|
||||
id: String::new(),
|
||||
r#type: "message".to_string(),
|
||||
role: "assistant".to_string(),
|
||||
content: Vec::new(),
|
||||
model,
|
||||
stop_reason: None,
|
||||
stop_sequence: None,
|
||||
container: None,
|
||||
usage: Usage::default(),
|
||||
};
|
||||
// Accumulated tool-call argument JSON, parallel to `response.content`.
|
||||
let mut tool_args: Vec<String> = Vec::new();
|
||||
|
||||
while let Some(event) = stream.next().await {
|
||||
match event? {
|
||||
StreamEvent::MessageStart { message } => {
|
||||
response.id = message.id;
|
||||
response.usage = message.usage;
|
||||
}
|
||||
StreamEvent::ContentBlockStart { content_block, .. } => {
|
||||
let block = match content_block {
|
||||
ContentBlockStart::Text { text } => ContentBlock::Text {
|
||||
text,
|
||||
cache_control: None,
|
||||
},
|
||||
ContentBlockStart::Thinking { thinking } => {
|
||||
ContentBlock::Thinking { thinking }
|
||||
}
|
||||
ContentBlockStart::ToolUse {
|
||||
id,
|
||||
name,
|
||||
input,
|
||||
caller,
|
||||
} => ContentBlock::ToolUse {
|
||||
id,
|
||||
name,
|
||||
input,
|
||||
caller,
|
||||
},
|
||||
ContentBlockStart::ServerToolUse { id, name, input } => {
|
||||
ContentBlock::ServerToolUse { id, name, input }
|
||||
}
|
||||
};
|
||||
response.content.push(block);
|
||||
tool_args.push(String::new());
|
||||
}
|
||||
StreamEvent::ContentBlockDelta { index, delta } => {
|
||||
let i = index as usize;
|
||||
match delta {
|
||||
Delta::TextDelta { text } => {
|
||||
if let Some(ContentBlock::Text { text: existing, .. }) =
|
||||
response.content.get_mut(i)
|
||||
{
|
||||
existing.push_str(&text);
|
||||
}
|
||||
}
|
||||
Delta::ThinkingDelta { thinking } => {
|
||||
if let Some(ContentBlock::Thinking { thinking: existing }) =
|
||||
response.content.get_mut(i)
|
||||
{
|
||||
existing.push_str(&thinking);
|
||||
}
|
||||
}
|
||||
Delta::InputJsonDelta { partial_json } => {
|
||||
if let Some(buf) = tool_args.get_mut(i) {
|
||||
buf.push_str(&partial_json);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
StreamEvent::ContentBlockStop { index } => {
|
||||
let i = index as usize;
|
||||
if let Some(buf) = tool_args.get(i)
|
||||
&& !buf.trim().is_empty()
|
||||
&& let Ok(parsed) = serde_json::from_str::<Value>(buf)
|
||||
&& let Some(ContentBlock::ToolUse { input, .. }) =
|
||||
response.content.get_mut(i)
|
||||
{
|
||||
*input = parsed;
|
||||
}
|
||||
}
|
||||
StreamEvent::MessageDelta { delta, usage } => {
|
||||
if let Some(stop_reason) = delta.stop_reason {
|
||||
response.stop_reason = Some(stop_reason);
|
||||
}
|
||||
if let Some(usage) = usage {
|
||||
response.usage = usage;
|
||||
}
|
||||
}
|
||||
StreamEvent::MessageStop => break,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(response)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert CodeWhale messages to Responses API input items.
|
||||
|
||||
@@ -2582,9 +2582,9 @@ impl Config {
|
||||
// The access token lives in ~/.codex/auth.json (refreshed on demand)
|
||||
// rather than a stored API key, so resolve it before the config-file
|
||||
// and env slots. Explicit env overrides are handled inside
|
||||
// `codex_access_token`.
|
||||
// `get_credentials`.
|
||||
if provider == ApiProvider::OpenaiCodex {
|
||||
return crate::oauth::codex_access_token();
|
||||
return Ok(crate::oauth::get_credentials()?.access_token);
|
||||
}
|
||||
|
||||
// 1. Config file (provider-scoped slot). This intentionally wins
|
||||
@@ -5256,9 +5256,12 @@ pub fn has_api_key_for(config: &Config, provider: ApiProvider) -> bool {
|
||||
return kimi_cli_credentials_present();
|
||||
}
|
||||
if provider == ApiProvider::OpenaiCodex {
|
||||
// Any usable Codex credential: either token env override or the Codex
|
||||
// CLI OAuth login on disk.
|
||||
return crate::oauth::credentials_present();
|
||||
// OPENAI_CODEX_ACCESS_TOKEN is already checked above; also honor the
|
||||
// alternate token env var and the Codex CLI OAuth login on disk.
|
||||
if std::env::var("CODEX_ACCESS_TOKEN").is_ok_and(|k| !k.trim().is_empty()) {
|
||||
return true;
|
||||
}
|
||||
return crate::oauth::auth_file_path().exists();
|
||||
}
|
||||
if matches!(provider, ApiProvider::Huggingface)
|
||||
&& std::env::var("HF_TOKEN").is_ok_and(|k| !k.trim().is_empty())
|
||||
|
||||
@@ -72,23 +72,6 @@ pub fn auth_file_path() -> PathBuf {
|
||||
codex_home.join("auth.json")
|
||||
}
|
||||
|
||||
/// Whether any usable Codex credential is present without performing a refresh
|
||||
/// or network call.
|
||||
///
|
||||
/// Mirrors `kimi_cli_credentials_present`: returns true if an access-token env
|
||||
/// override is set or the Codex auth file exists on disk. Used by the provider
|
||||
/// picker / auth surfaces so a `codex login` user is not treated as
|
||||
/// unauthenticated.
|
||||
#[must_use]
|
||||
pub fn credentials_present() -> bool {
|
||||
for var in ["OPENAI_CODEX_ACCESS_TOKEN", "CODEX_ACCESS_TOKEN"] {
|
||||
if std::env::var(var).is_ok_and(|v| !v.trim().is_empty()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
auth_file_path().exists()
|
||||
}
|
||||
|
||||
/// Try to extract `exp` (epoch seconds) from a JWT without verifying
|
||||
/// the signature. Returns `None` on any parse failure.
|
||||
fn jwt_expiry_seconds(token: &str) -> Option<u64> {
|
||||
@@ -318,14 +301,6 @@ pub fn get_credentials() -> Result<CodexCredentials> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolve a Codex access token for use as a bearer credential.
|
||||
///
|
||||
/// Thin wrapper over [`get_credentials`] that returns just the token string,
|
||||
/// matching the shape the config credential-resolution path expects.
|
||||
pub fn codex_access_token() -> Result<String> {
|
||||
Ok(get_credentials()?.access_token)
|
||||
}
|
||||
|
||||
/// Best-effort ChatGPT account id for the `chatgpt-account-id` request header.
|
||||
///
|
||||
/// Resolves from env overrides first, then the on-disk auth file. Never
|
||||
|
||||
Reference in New Issue
Block a user