feat: survive system sleep mid-turn — detect the suspend gap and retry the request (#2990)
When the host sleeps while a model response is streaming, the connection dies on wake with 'Stream read error: error decoding response body' and the turn was lost. The engine now stamps every stream chunk with both monotonic and wall-clock time; Instant pauses across a suspend while SystemTime does not, so a >10s divergence on a stream error identifies a sleep/wake cycle. In that case the partial output is discarded and the identical request is re-issued (sharing the existing MAX_STREAM_RETRIES=3 budget) instead of failing the turn. Ordinary network flakes keep the deliberate no-retry-after-content policy from #103.
This commit is contained in:
@@ -2798,10 +2798,10 @@ use self::lsp_hooks::edited_paths_for_tool;
|
||||
#[cfg(test)]
|
||||
use self::streaming::TOOL_CALL_START_MARKERS;
|
||||
use self::streaming::{
|
||||
ContentBlockKind, FAKE_WRAPPER_NOTICE, MAX_STREAM_ERRORS_BEFORE_FAIL,
|
||||
ContentBlockKind, FAKE_WRAPPER_NOTICE, MAX_STREAM_ERRORS_BEFORE_FAIL, MAX_STREAM_RETRIES,
|
||||
MAX_TRANSPARENT_STREAM_RETRIES, STREAM_MAX_CONTENT_BYTES, STREAM_MAX_DURATION_SECS,
|
||||
ToolUseState, contains_fake_tool_wrapper, filter_tool_call_delta,
|
||||
should_transparently_retry_stream,
|
||||
ToolUseState, contains_fake_tool_wrapper, filter_tool_call_delta, should_resume_after_sleep,
|
||||
should_transparently_retry_stream, sleep_gap_detected,
|
||||
};
|
||||
use self::tool_catalog::{
|
||||
CODE_EXECUTION_TOOL_NAME, JS_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME,
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
//! policy, and scrubbers for text that looks like a forged tool-call wrapper.
|
||||
|
||||
use crate::models::ToolCaller;
|
||||
use std::time::Duration;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub(super) enum ContentBlockKind {
|
||||
@@ -66,6 +67,45 @@ pub(super) fn should_transparently_retry_stream(
|
||||
!any_content_received && transparent_attempts < MAX_TRANSPARENT_STREAM_RETRIES && !cancelled
|
||||
}
|
||||
|
||||
/// Budget for re-issuing the whole request after a dead stream. Shared by the
|
||||
/// nothing-streamed outer retry (#103 Phase 3) and the sleep-resume retry
|
||||
/// (#2990).
|
||||
pub(super) const MAX_STREAM_RETRIES: u32 = 3;
|
||||
|
||||
/// Wall-clock vs monotonic divergence above which we conclude the host slept
|
||||
/// mid-stream (#2990). `Instant` pauses during system sleep (CLOCK_UPTIME_RAW
|
||||
/// on macOS, CLOCK_MONOTONIC on Linux) while `SystemTime` keeps advancing, so
|
||||
/// a large positive gap can only come from a suspend/resume cycle — ordinary
|
||||
/// network flakes never produce one. Windows `Instant` may keep ticking
|
||||
/// through sleep, in which case this simply never fires (no behavior change).
|
||||
pub(super) const SLEEP_GAP_THRESHOLD: Duration = Duration::from_secs(10);
|
||||
|
||||
/// True when the gap between wall-clock and monotonic elapsed time since the
|
||||
/// last stream progress says the host was suspended.
|
||||
pub(super) fn sleep_gap_detected(
|
||||
monotonic_elapsed: Duration,
|
||||
wallclock_elapsed: Duration,
|
||||
) -> bool {
|
||||
wallclock_elapsed.saturating_sub(monotonic_elapsed) > SLEEP_GAP_THRESHOLD
|
||||
}
|
||||
|
||||
/// Decide whether a failed stream should be silently re-issued because the
|
||||
/// host slept mid-turn (#2990).
|
||||
///
|
||||
/// Unlike the transparent retry (#103), this fires even after content has
|
||||
/// streamed: the partial output predates the sleep, the user was not
|
||||
/// watching, and re-running the identical request is the correct
|
||||
/// user-visible behavior. The double-billing concern that blocks ordinary
|
||||
/// post-content retries is accepted here because the alternative is a dead
|
||||
/// turn the user must re-prompt (and pay for) anyway.
|
||||
pub(super) fn should_resume_after_sleep(
|
||||
sleep_detected: bool,
|
||||
retry_attempts: u32,
|
||||
cancelled: bool,
|
||||
) -> bool {
|
||||
sleep_detected && retry_attempts < MAX_STREAM_RETRIES && !cancelled
|
||||
}
|
||||
|
||||
pub(crate) const TOOL_CALL_START_MARKERS: [&str; 5] = [
|
||||
"[TOOL_CALL]",
|
||||
"<codewhale:tool_call",
|
||||
|
||||
@@ -3701,6 +3701,71 @@ fn stream_retry_respects_cancellation() {
|
||||
);
|
||||
}
|
||||
|
||||
// === #2990 sleep-resume policy ================================================
|
||||
|
||||
#[test]
|
||||
fn sleep_gap_requires_wallclock_to_outrun_monotonic_clock() {
|
||||
use std::time::Duration;
|
||||
// No divergence: ordinary network failure, clocks agree.
|
||||
assert!(
|
||||
!super::sleep_gap_detected(Duration::from_secs(30), Duration::from_secs(30)),
|
||||
"equal elapsed times must not register as a sleep gap"
|
||||
);
|
||||
// Divergence below the threshold: NTP slew / scheduling jitter.
|
||||
assert!(
|
||||
!super::sleep_gap_detected(Duration::from_secs(5), Duration::from_secs(14)),
|
||||
"9s of divergence is below the 10s threshold"
|
||||
);
|
||||
// Divergence above the threshold: the host was suspended.
|
||||
assert!(
|
||||
super::sleep_gap_detected(Duration::from_secs(5), Duration::from_secs(16)),
|
||||
"11s of divergence must register as a sleep gap"
|
||||
);
|
||||
// Wall clock went backwards (NTP step): saturating_sub → zero gap.
|
||||
assert!(
|
||||
!super::sleep_gap_detected(Duration::from_secs(60), Duration::from_secs(5)),
|
||||
"wall clock behind monotonic must never register as a sleep gap"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sleep_resume_retries_even_after_content_streamed() {
|
||||
// The whole point of #2990: unlike the #103 transparent retry, a
|
||||
// detected sleep gap retries regardless of streamed content — the
|
||||
// partial output predates the sleep and the user was not watching.
|
||||
assert!(
|
||||
super::should_resume_after_sleep(true, 0, false),
|
||||
"detected sleep with full budget must resume"
|
||||
);
|
||||
assert!(
|
||||
super::should_resume_after_sleep(true, super::MAX_STREAM_RETRIES - 1, false),
|
||||
"detected sleep one short of the budget must still resume"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sleep_resume_requires_a_detected_gap() {
|
||||
// Without a sleep gap this layer stays out of the way entirely, so the
|
||||
// deliberate no-retry-after-content policy for ordinary flakes (#103)
|
||||
// is preserved.
|
||||
assert!(
|
||||
!super::should_resume_after_sleep(false, 0, false),
|
||||
"no sleep gap → never resume via this layer"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sleep_resume_respects_budget_and_cancellation() {
|
||||
assert!(
|
||||
!super::should_resume_after_sleep(true, super::MAX_STREAM_RETRIES, false),
|
||||
"budget exhausted → surface the failure instead of looping"
|
||||
);
|
||||
assert!(
|
||||
!super::should_resume_after_sleep(true, 0, true),
|
||||
"cancelled turn must not be resumed behind the user's back"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stream_retry_threshold_relaxed_to_five() {
|
||||
// Case 1+4 from issue #103: the consecutive-error threshold for marking
|
||||
|
||||
@@ -69,14 +69,11 @@ impl Engine {
|
||||
let mut loop_guard = LoopGuard::default();
|
||||
let mut goal_continuations_this_turn = 0u32;
|
||||
|
||||
// Transparent stream-retry counter: when the chunked-transfer
|
||||
// connection dies mid-stream and we got nothing useful out of it
|
||||
// (no tool calls, no completed text), we silently re-issue the
|
||||
// SAME request up to MAX_STREAM_RETRIES times before surfacing
|
||||
// the failure to the user. This is the #103 Phase 3 retry that
|
||||
// keeps long V4 thinking turns from being killed by transient
|
||||
// proxy disconnects.
|
||||
const MAX_STREAM_RETRIES: u32 = 3;
|
||||
// Outer stream-retry counter: when the chunked-transfer connection
|
||||
// dies mid-stream and either nothing useful was streamed (#103
|
||||
// Phase 3) or the host slept mid-turn (#2990), we silently re-issue
|
||||
// the SAME request up to MAX_STREAM_RETRIES times before surfacing
|
||||
// the failure to the user.
|
||||
let mut stream_retry_attempts: u32 = 0;
|
||||
|
||||
'turn_loop: loop {
|
||||
@@ -468,6 +465,13 @@ impl Engine {
|
||||
// `stream_start` is reset on a transparent retry so the wall-clock
|
||||
// budget restarts with the fresh stream.
|
||||
let mut stream_start = Instant::now();
|
||||
// #2990 sleep-resume bookkeeping: monotonic and wall-clock stamps
|
||||
// of the last stream progress. `Instant` pauses across a host
|
||||
// suspend while `SystemTime` does not, so a large divergence on
|
||||
// the next error tells "machine slept" apart from "network died".
|
||||
let mut last_progress_mono = Instant::now();
|
||||
let mut last_progress_wall = std::time::SystemTime::now();
|
||||
let mut sleep_resume_pending = false;
|
||||
let mut stream_content_bytes: usize = 0;
|
||||
let (chunk_timeout_secs, chunk_timeout) = stream_chunk_timeout_budget(&self.config);
|
||||
let max_duration = Duration::from_secs(STREAM_MAX_DURATION_SECS);
|
||||
@@ -541,6 +545,8 @@ impl Engine {
|
||||
|
||||
let event = match event_result {
|
||||
Ok(e) => {
|
||||
last_progress_mono = Instant::now();
|
||||
last_progress_wall = std::time::SystemTime::now();
|
||||
// Flip on the first non-MessageStart event — that's
|
||||
// the moment we cross from "stream not yet productive"
|
||||
// (eligible for transparent retry) into "DeepSeek has
|
||||
@@ -553,6 +559,27 @@ impl Engine {
|
||||
Err(e) => {
|
||||
stream_errors = stream_errors.saturating_add(1);
|
||||
let message = self.decorate_auth_error_message(e.to_string());
|
||||
// #2990: wall-clock far ahead of the monotonic clock
|
||||
// since the last chunk means the host slept mid-stream.
|
||||
// The partial output predates the sleep and the user
|
||||
// was not watching — schedule a full request retry in
|
||||
// the post-loop block instead of failing the turn.
|
||||
let wall_elapsed = last_progress_wall
|
||||
.elapsed()
|
||||
.unwrap_or_else(|_| last_progress_mono.elapsed());
|
||||
if should_resume_after_sleep(
|
||||
sleep_gap_detected(last_progress_mono.elapsed(), wall_elapsed),
|
||||
stream_retry_attempts,
|
||||
self.cancel_token.is_cancelled(),
|
||||
) {
|
||||
crate::logging::warn(format!(
|
||||
"Stream error after suspected system sleep ({:?} monotonic vs {:?} wall since last chunk); scheduling request retry: {message}",
|
||||
last_progress_mono.elapsed(),
|
||||
wall_elapsed,
|
||||
));
|
||||
sleep_resume_pending = true;
|
||||
break;
|
||||
}
|
||||
// #103: when the stream errors before any content was
|
||||
// streamed AND we still have retry budget, transparently
|
||||
// resend the request. DeepSeek has not billed for any
|
||||
@@ -851,18 +878,37 @@ impl Engine {
|
||||
&& current_text_visible.trim().is_empty()
|
||||
&& current_thinking.trim().is_empty()
|
||||
&& !pending_message_complete;
|
||||
if stream_died_with_nothing {
|
||||
if stream_died_with_nothing || sleep_resume_pending {
|
||||
if stream_retry_attempts < MAX_STREAM_RETRIES {
|
||||
stream_retry_attempts = stream_retry_attempts.saturating_add(1);
|
||||
crate::logging::warn(format!(
|
||||
"Stream died with no content (attempt {stream_retry_attempts}/{MAX_STREAM_RETRIES}); retrying request"
|
||||
));
|
||||
let _ = self
|
||||
.tx_event
|
||||
.send(Event::status(format!(
|
||||
"Connection interrupted; retrying ({stream_retry_attempts}/{MAX_STREAM_RETRIES})"
|
||||
)))
|
||||
.await;
|
||||
if sleep_resume_pending {
|
||||
crate::logging::warn(format!(
|
||||
"Resuming after system sleep (attempt {stream_retry_attempts}/{MAX_STREAM_RETRIES}); discarding partial output and retrying request"
|
||||
));
|
||||
let _ = self
|
||||
.tx_event
|
||||
.send(Event::status(format!(
|
||||
"System sleep detected; connection lost — retrying request ({stream_retry_attempts}/{MAX_STREAM_RETRIES})"
|
||||
)))
|
||||
.await;
|
||||
// Finalize any partially-rendered assistant cell so
|
||||
// the retried stream renders fresh instead of
|
||||
// appending to the pre-sleep fragment.
|
||||
if pending_message_complete {
|
||||
let index = last_text_index.unwrap_or(0);
|
||||
let _ = self.tx_event.send(Event::MessageComplete { index }).await;
|
||||
}
|
||||
} else {
|
||||
crate::logging::warn(format!(
|
||||
"Stream died with no content (attempt {stream_retry_attempts}/{MAX_STREAM_RETRIES}); retrying request"
|
||||
));
|
||||
let _ = self
|
||||
.tx_event
|
||||
.send(Event::status(format!(
|
||||
"Connection interrupted; retrying ({stream_retry_attempts}/{MAX_STREAM_RETRIES})"
|
||||
)))
|
||||
.await;
|
||||
}
|
||||
// Don't preserve the per-stream `turn_error` — we're
|
||||
// about to retry, and a successful retry should not
|
||||
// surface the transient error as the turn outcome.
|
||||
|
||||
Reference in New Issue
Block a user