feat: survive system sleep mid-turn — detect the suspend gap and retry the request (#2990)

When the host sleeps while a model response is streaming, the connection
dies on wake with 'Stream read error: error decoding response body' and
the turn was lost. The engine now stamps every stream chunk with both
monotonic and wall-clock time; Instant pauses across a suspend while
SystemTime does not, so a >10s divergence on a stream error identifies a
sleep/wake cycle. In that case the partial output is discarded and the
identical request is re-issued (sharing the existing MAX_STREAM_RETRIES=3
budget) instead of failing the turn. Ordinary network flakes keep the
deliberate no-retry-after-content policy from #103.
This commit is contained in:
Hunter B
2026-06-09 23:40:42 -07:00
parent fad04a016f
commit 717d728163
4 changed files with 172 additions and 21 deletions
+3 -3
View File
@@ -2798,10 +2798,10 @@ use self::lsp_hooks::edited_paths_for_tool;
#[cfg(test)]
use self::streaming::TOOL_CALL_START_MARKERS;
use self::streaming::{
ContentBlockKind, FAKE_WRAPPER_NOTICE, MAX_STREAM_ERRORS_BEFORE_FAIL,
ContentBlockKind, FAKE_WRAPPER_NOTICE, MAX_STREAM_ERRORS_BEFORE_FAIL, MAX_STREAM_RETRIES,
MAX_TRANSPARENT_STREAM_RETRIES, STREAM_MAX_CONTENT_BYTES, STREAM_MAX_DURATION_SECS,
ToolUseState, contains_fake_tool_wrapper, filter_tool_call_delta,
should_transparently_retry_stream,
ToolUseState, contains_fake_tool_wrapper, filter_tool_call_delta, should_resume_after_sleep,
should_transparently_retry_stream, sleep_gap_detected,
};
use self::tool_catalog::{
CODE_EXECUTION_TOOL_NAME, JS_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME,
+40
View File
@@ -5,6 +5,7 @@
//! policy, and scrubbers for text that looks like a forged tool-call wrapper.
use crate::models::ToolCaller;
use std::time::Duration;
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(super) enum ContentBlockKind {
@@ -66,6 +67,45 @@ pub(super) fn should_transparently_retry_stream(
!any_content_received && transparent_attempts < MAX_TRANSPARENT_STREAM_RETRIES && !cancelled
}
/// Budget for re-issuing the whole request after a dead stream. Shared by the
/// nothing-streamed outer retry (#103 Phase 3) and the sleep-resume retry
/// (#2990).
pub(super) const MAX_STREAM_RETRIES: u32 = 3;
/// Wall-clock vs monotonic divergence above which we conclude the host slept
/// mid-stream (#2990). `Instant` pauses during system sleep (CLOCK_UPTIME_RAW
/// on macOS, CLOCK_MONOTONIC on Linux) while `SystemTime` keeps advancing, so
/// a large positive gap can only come from a suspend/resume cycle — ordinary
/// network flakes never produce one. Windows `Instant` may keep ticking
/// through sleep, in which case this simply never fires (no behavior change).
pub(super) const SLEEP_GAP_THRESHOLD: Duration = Duration::from_secs(10);
/// True when the gap between wall-clock and monotonic elapsed time since the
/// last stream progress says the host was suspended.
pub(super) fn sleep_gap_detected(
monotonic_elapsed: Duration,
wallclock_elapsed: Duration,
) -> bool {
wallclock_elapsed.saturating_sub(monotonic_elapsed) > SLEEP_GAP_THRESHOLD
}
/// Decide whether a failed stream should be silently re-issued because the
/// host slept mid-turn (#2990).
///
/// Unlike the transparent retry (#103), this fires even after content has
/// streamed: the partial output predates the sleep, the user was not
/// watching, and re-running the identical request is the correct
/// user-visible behavior. The double-billing concern that blocks ordinary
/// post-content retries is accepted here because the alternative is a dead
/// turn the user must re-prompt (and pay for) anyway.
pub(super) fn should_resume_after_sleep(
sleep_detected: bool,
retry_attempts: u32,
cancelled: bool,
) -> bool {
sleep_detected && retry_attempts < MAX_STREAM_RETRIES && !cancelled
}
pub(crate) const TOOL_CALL_START_MARKERS: [&str; 5] = [
"[TOOL_CALL]",
"<codewhale:tool_call",
+65
View File
@@ -3701,6 +3701,71 @@ fn stream_retry_respects_cancellation() {
);
}
// === #2990 sleep-resume policy ================================================
#[test]
fn sleep_gap_requires_wallclock_to_outrun_monotonic_clock() {
use std::time::Duration;
// No divergence: ordinary network failure, clocks agree.
assert!(
!super::sleep_gap_detected(Duration::from_secs(30), Duration::from_secs(30)),
"equal elapsed times must not register as a sleep gap"
);
// Divergence below the threshold: NTP slew / scheduling jitter.
assert!(
!super::sleep_gap_detected(Duration::from_secs(5), Duration::from_secs(14)),
"9s of divergence is below the 10s threshold"
);
// Divergence above the threshold: the host was suspended.
assert!(
super::sleep_gap_detected(Duration::from_secs(5), Duration::from_secs(16)),
"11s of divergence must register as a sleep gap"
);
// Wall clock went backwards (NTP step): saturating_sub → zero gap.
assert!(
!super::sleep_gap_detected(Duration::from_secs(60), Duration::from_secs(5)),
"wall clock behind monotonic must never register as a sleep gap"
);
}
#[test]
fn sleep_resume_retries_even_after_content_streamed() {
// The whole point of #2990: unlike the #103 transparent retry, a
// detected sleep gap retries regardless of streamed content — the
// partial output predates the sleep and the user was not watching.
assert!(
super::should_resume_after_sleep(true, 0, false),
"detected sleep with full budget must resume"
);
assert!(
super::should_resume_after_sleep(true, super::MAX_STREAM_RETRIES - 1, false),
"detected sleep one short of the budget must still resume"
);
}
#[test]
fn sleep_resume_requires_a_detected_gap() {
// Without a sleep gap this layer stays out of the way entirely, so the
// deliberate no-retry-after-content policy for ordinary flakes (#103)
// is preserved.
assert!(
!super::should_resume_after_sleep(false, 0, false),
"no sleep gap → never resume via this layer"
);
}
#[test]
fn sleep_resume_respects_budget_and_cancellation() {
assert!(
!super::should_resume_after_sleep(true, super::MAX_STREAM_RETRIES, false),
"budget exhausted → surface the failure instead of looping"
);
assert!(
!super::should_resume_after_sleep(true, 0, true),
"cancelled turn must not be resumed behind the user's back"
);
}
#[test]
fn stream_retry_threshold_relaxed_to_five() {
// Case 1+4 from issue #103: the consecutive-error threshold for marking
+64 -18
View File
@@ -69,14 +69,11 @@ impl Engine {
let mut loop_guard = LoopGuard::default();
let mut goal_continuations_this_turn = 0u32;
// Transparent stream-retry counter: when the chunked-transfer
// connection dies mid-stream and we got nothing useful out of it
// (no tool calls, no completed text), we silently re-issue the
// SAME request up to MAX_STREAM_RETRIES times before surfacing
// the failure to the user. This is the #103 Phase 3 retry that
// keeps long V4 thinking turns from being killed by transient
// proxy disconnects.
const MAX_STREAM_RETRIES: u32 = 3;
// Outer stream-retry counter: when the chunked-transfer connection
// dies mid-stream and either nothing useful was streamed (#103
// Phase 3) or the host slept mid-turn (#2990), we silently re-issue
// the SAME request up to MAX_STREAM_RETRIES times before surfacing
// the failure to the user.
let mut stream_retry_attempts: u32 = 0;
'turn_loop: loop {
@@ -468,6 +465,13 @@ impl Engine {
// `stream_start` is reset on a transparent retry so the wall-clock
// budget restarts with the fresh stream.
let mut stream_start = Instant::now();
// #2990 sleep-resume bookkeeping: monotonic and wall-clock stamps
// of the last stream progress. `Instant` pauses across a host
// suspend while `SystemTime` does not, so a large divergence on
// the next error tells "machine slept" apart from "network died".
let mut last_progress_mono = Instant::now();
let mut last_progress_wall = std::time::SystemTime::now();
let mut sleep_resume_pending = false;
let mut stream_content_bytes: usize = 0;
let (chunk_timeout_secs, chunk_timeout) = stream_chunk_timeout_budget(&self.config);
let max_duration = Duration::from_secs(STREAM_MAX_DURATION_SECS);
@@ -541,6 +545,8 @@ impl Engine {
let event = match event_result {
Ok(e) => {
last_progress_mono = Instant::now();
last_progress_wall = std::time::SystemTime::now();
// Flip on the first non-MessageStart event — that's
// the moment we cross from "stream not yet productive"
// (eligible for transparent retry) into "DeepSeek has
@@ -553,6 +559,27 @@ impl Engine {
Err(e) => {
stream_errors = stream_errors.saturating_add(1);
let message = self.decorate_auth_error_message(e.to_string());
// #2990: wall-clock far ahead of the monotonic clock
// since the last chunk means the host slept mid-stream.
// The partial output predates the sleep and the user
// was not watching — schedule a full request retry in
// the post-loop block instead of failing the turn.
let wall_elapsed = last_progress_wall
.elapsed()
.unwrap_or_else(|_| last_progress_mono.elapsed());
if should_resume_after_sleep(
sleep_gap_detected(last_progress_mono.elapsed(), wall_elapsed),
stream_retry_attempts,
self.cancel_token.is_cancelled(),
) {
crate::logging::warn(format!(
"Stream error after suspected system sleep ({:?} monotonic vs {:?} wall since last chunk); scheduling request retry: {message}",
last_progress_mono.elapsed(),
wall_elapsed,
));
sleep_resume_pending = true;
break;
}
// #103: when the stream errors before any content was
// streamed AND we still have retry budget, transparently
// resend the request. DeepSeek has not billed for any
@@ -851,18 +878,37 @@ impl Engine {
&& current_text_visible.trim().is_empty()
&& current_thinking.trim().is_empty()
&& !pending_message_complete;
if stream_died_with_nothing {
if stream_died_with_nothing || sleep_resume_pending {
if stream_retry_attempts < MAX_STREAM_RETRIES {
stream_retry_attempts = stream_retry_attempts.saturating_add(1);
crate::logging::warn(format!(
"Stream died with no content (attempt {stream_retry_attempts}/{MAX_STREAM_RETRIES}); retrying request"
));
let _ = self
.tx_event
.send(Event::status(format!(
"Connection interrupted; retrying ({stream_retry_attempts}/{MAX_STREAM_RETRIES})"
)))
.await;
if sleep_resume_pending {
crate::logging::warn(format!(
"Resuming after system sleep (attempt {stream_retry_attempts}/{MAX_STREAM_RETRIES}); discarding partial output and retrying request"
));
let _ = self
.tx_event
.send(Event::status(format!(
"System sleep detected; connection lost — retrying request ({stream_retry_attempts}/{MAX_STREAM_RETRIES})"
)))
.await;
// Finalize any partially-rendered assistant cell so
// the retried stream renders fresh instead of
// appending to the pre-sleep fragment.
if pending_message_complete {
let index = last_text_index.unwrap_or(0);
let _ = self.tx_event.send(Event::MessageComplete { index }).await;
}
} else {
crate::logging::warn(format!(
"Stream died with no content (attempt {stream_retry_attempts}/{MAX_STREAM_RETRIES}); retrying request"
));
let _ = self
.tx_event
.send(Event::status(format!(
"Connection interrupted; retrying ({stream_retry_attempts}/{MAX_STREAM_RETRIES})"
)))
.await;
}
// Don't preserve the per-stream `turn_error` — we're
// about to retry, and a successful retry should not
// surface the transient error as the turn outcome.