From 717d728163d561bacac2149cfc29f1168c439905 Mon Sep 17 00:00:00 2001 From: Hunter B Date: Tue, 9 Jun 2026 23:40:42 -0700 Subject: [PATCH] =?UTF-8?q?feat:=20survive=20system=20sleep=20mid-turn=20?= =?UTF-8?q?=E2=80=94=20detect=20the=20suspend=20gap=20and=20retry=20the=20?= =?UTF-8?q?request=20(#2990)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the host sleeps while a model response is streaming, the connection dies on wake with 'Stream read error: error decoding response body' and the turn was lost. The engine now stamps every stream chunk with both monotonic and wall-clock time; Instant pauses across a suspend while SystemTime does not, so a >10s divergence on a stream error identifies a sleep/wake cycle. In that case the partial output is discarded and the identical request is re-issued (sharing the existing MAX_STREAM_RETRIES=3 budget) instead of failing the turn. Ordinary network flakes keep the deliberate no-retry-after-content policy from #103. --- crates/tui/src/core/engine.rs | 6 +- crates/tui/src/core/engine/streaming.rs | 40 ++++++++++++ crates/tui/src/core/engine/tests.rs | 65 ++++++++++++++++++++ crates/tui/src/core/engine/turn_loop.rs | 82 +++++++++++++++++++------ 4 files changed, 172 insertions(+), 21 deletions(-) diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index d771bac3..6f89fe19 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -2798,10 +2798,10 @@ use self::lsp_hooks::edited_paths_for_tool; #[cfg(test)] use self::streaming::TOOL_CALL_START_MARKERS; use self::streaming::{ - ContentBlockKind, FAKE_WRAPPER_NOTICE, MAX_STREAM_ERRORS_BEFORE_FAIL, + ContentBlockKind, FAKE_WRAPPER_NOTICE, MAX_STREAM_ERRORS_BEFORE_FAIL, MAX_STREAM_RETRIES, MAX_TRANSPARENT_STREAM_RETRIES, STREAM_MAX_CONTENT_BYTES, STREAM_MAX_DURATION_SECS, - ToolUseState, contains_fake_tool_wrapper, filter_tool_call_delta, - should_transparently_retry_stream, + ToolUseState, contains_fake_tool_wrapper, filter_tool_call_delta, should_resume_after_sleep, + should_transparently_retry_stream, sleep_gap_detected, }; use self::tool_catalog::{ CODE_EXECUTION_TOOL_NAME, JS_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME, diff --git a/crates/tui/src/core/engine/streaming.rs b/crates/tui/src/core/engine/streaming.rs index 692b3c2a..4f2fde3f 100644 --- a/crates/tui/src/core/engine/streaming.rs +++ b/crates/tui/src/core/engine/streaming.rs @@ -5,6 +5,7 @@ //! policy, and scrubbers for text that looks like a forged tool-call wrapper. use crate::models::ToolCaller; +use std::time::Duration; #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub(super) enum ContentBlockKind { @@ -66,6 +67,45 @@ pub(super) fn should_transparently_retry_stream( !any_content_received && transparent_attempts < MAX_TRANSPARENT_STREAM_RETRIES && !cancelled } +/// Budget for re-issuing the whole request after a dead stream. Shared by the +/// nothing-streamed outer retry (#103 Phase 3) and the sleep-resume retry +/// (#2990). +pub(super) const MAX_STREAM_RETRIES: u32 = 3; + +/// Wall-clock vs monotonic divergence above which we conclude the host slept +/// mid-stream (#2990). `Instant` pauses during system sleep (CLOCK_UPTIME_RAW +/// on macOS, CLOCK_MONOTONIC on Linux) while `SystemTime` keeps advancing, so +/// a large positive gap can only come from a suspend/resume cycle — ordinary +/// network flakes never produce one. Windows `Instant` may keep ticking +/// through sleep, in which case this simply never fires (no behavior change). +pub(super) const SLEEP_GAP_THRESHOLD: Duration = Duration::from_secs(10); + +/// True when the gap between wall-clock and monotonic elapsed time since the +/// last stream progress says the host was suspended. +pub(super) fn sleep_gap_detected( + monotonic_elapsed: Duration, + wallclock_elapsed: Duration, +) -> bool { + wallclock_elapsed.saturating_sub(monotonic_elapsed) > SLEEP_GAP_THRESHOLD +} + +/// Decide whether a failed stream should be silently re-issued because the +/// host slept mid-turn (#2990). +/// +/// Unlike the transparent retry (#103), this fires even after content has +/// streamed: the partial output predates the sleep, the user was not +/// watching, and re-running the identical request is the correct +/// user-visible behavior. The double-billing concern that blocks ordinary +/// post-content retries is accepted here because the alternative is a dead +/// turn the user must re-prompt (and pay for) anyway. +pub(super) fn should_resume_after_sleep( + sleep_detected: bool, + retry_attempts: u32, + cancelled: bool, +) -> bool { + sleep_detected && retry_attempts < MAX_STREAM_RETRIES && !cancelled +} + pub(crate) const TOOL_CALL_START_MARKERS: [&str; 5] = [ "[TOOL_CALL]", " { + last_progress_mono = Instant::now(); + last_progress_wall = std::time::SystemTime::now(); // Flip on the first non-MessageStart event — that's // the moment we cross from "stream not yet productive" // (eligible for transparent retry) into "DeepSeek has @@ -553,6 +559,27 @@ impl Engine { Err(e) => { stream_errors = stream_errors.saturating_add(1); let message = self.decorate_auth_error_message(e.to_string()); + // #2990: wall-clock far ahead of the monotonic clock + // since the last chunk means the host slept mid-stream. + // The partial output predates the sleep and the user + // was not watching — schedule a full request retry in + // the post-loop block instead of failing the turn. + let wall_elapsed = last_progress_wall + .elapsed() + .unwrap_or_else(|_| last_progress_mono.elapsed()); + if should_resume_after_sleep( + sleep_gap_detected(last_progress_mono.elapsed(), wall_elapsed), + stream_retry_attempts, + self.cancel_token.is_cancelled(), + ) { + crate::logging::warn(format!( + "Stream error after suspected system sleep ({:?} monotonic vs {:?} wall since last chunk); scheduling request retry: {message}", + last_progress_mono.elapsed(), + wall_elapsed, + )); + sleep_resume_pending = true; + break; + } // #103: when the stream errors before any content was // streamed AND we still have retry budget, transparently // resend the request. DeepSeek has not billed for any @@ -851,18 +878,37 @@ impl Engine { && current_text_visible.trim().is_empty() && current_thinking.trim().is_empty() && !pending_message_complete; - if stream_died_with_nothing { + if stream_died_with_nothing || sleep_resume_pending { if stream_retry_attempts < MAX_STREAM_RETRIES { stream_retry_attempts = stream_retry_attempts.saturating_add(1); - crate::logging::warn(format!( - "Stream died with no content (attempt {stream_retry_attempts}/{MAX_STREAM_RETRIES}); retrying request" - )); - let _ = self - .tx_event - .send(Event::status(format!( - "Connection interrupted; retrying ({stream_retry_attempts}/{MAX_STREAM_RETRIES})" - ))) - .await; + if sleep_resume_pending { + crate::logging::warn(format!( + "Resuming after system sleep (attempt {stream_retry_attempts}/{MAX_STREAM_RETRIES}); discarding partial output and retrying request" + )); + let _ = self + .tx_event + .send(Event::status(format!( + "System sleep detected; connection lost — retrying request ({stream_retry_attempts}/{MAX_STREAM_RETRIES})" + ))) + .await; + // Finalize any partially-rendered assistant cell so + // the retried stream renders fresh instead of + // appending to the pre-sleep fragment. + if pending_message_complete { + let index = last_text_index.unwrap_or(0); + let _ = self.tx_event.send(Event::MessageComplete { index }).await; + } + } else { + crate::logging::warn(format!( + "Stream died with no content (attempt {stream_retry_attempts}/{MAX_STREAM_RETRIES}); retrying request" + )); + let _ = self + .tx_event + .send(Event::status(format!( + "Connection interrupted; retrying ({stream_retry_attempts}/{MAX_STREAM_RETRIES})" + ))) + .await; + } // Don't preserve the per-stream `turn_error` — we're // about to retry, and a successful retry should not // surface the transient error as the turn outcome.