Merge pull request #114 from Hmbown/fix/issue-103-stream-retry

fix(engine): transparent retry on stream death with no content (#103 Phase 3)
This commit is contained in:
Hunter Bown
2026-04-26 17:49:38 -05:00
committed by GitHub
+53
View File
@@ -2022,6 +2022,16 @@ impl Engine {
}
let mut active_tool_names = initial_active_tools(&tool_catalog);
// Transparent stream-retry counter: when the chunked-transfer
// connection dies mid-stream and we got nothing useful out of it
// (no tool calls, no completed text), we silently re-issue the
// SAME request up to MAX_STREAM_RETRIES times before surfacing
// the failure to the user. This is the #103 Phase 3 retry that
// keeps long V4 thinking turns from being killed by transient
// proxy disconnects.
const MAX_STREAM_RETRIES: u32 = 3;
let mut stream_retry_attempts: u32 = 0;
loop {
if self.cancel_token.is_cancelled() {
let _ = self.tx_event.send(Event::status("Request cancelled")).await;
@@ -2566,6 +2576,49 @@ impl Engine {
}
}
// #103 Phase 3 — transparent retry. The inner loop above bails
// when reqwest yields chunk decode errors three times in a row;
// most of the time those are recoverable proxy / HTTP/2 issues
// and the request can simply be re-issued. Re-issue silently up
// to MAX_STREAM_RETRIES, but only when the stream produced
// nothing actionable — if any tool call landed or text was
// streamed, ship the partial state to the rest of the turn
// pipeline so we don't double-bill the user by re-running it.
let stream_died_with_nothing = stream_errors > 0
&& tool_uses.is_empty()
&& current_text_visible.trim().is_empty()
&& current_thinking.trim().is_empty()
&& !pending_message_complete;
if stream_died_with_nothing {
if stream_retry_attempts < MAX_STREAM_RETRIES {
stream_retry_attempts = stream_retry_attempts.saturating_add(1);
crate::logging::warn(format!(
"Stream died with no content (attempt {}/{}); retrying request",
stream_retry_attempts, MAX_STREAM_RETRIES
));
let _ = self
.tx_event
.send(Event::status(format!(
"Connection interrupted; retrying ({}/{})",
stream_retry_attempts, MAX_STREAM_RETRIES
)))
.await;
// Don't preserve the per-stream `turn_error` — we're
// about to retry, and a successful retry should not
// surface the transient error as the turn outcome.
turn_error = None;
continue;
}
crate::logging::warn(format!(
"Stream retry budget exhausted ({} attempts); failing turn",
stream_retry_attempts
));
} else if stream_errors == 0 {
// Healthy round → reset retry budget so we don't carry over
// state from a previous bad round.
stream_retry_attempts = 0;
}
// Update turn usage
turn.add_usage(&usage);