Merge pull request #2283 from HUQIANTAO/fix/turn-stall-watchdog

fix(engine): recover from stalled in-progress turns
This commit is contained in:
Hunter Bown
2026-05-30 23:40:01 -07:00
committed by GitHub
2 changed files with 68 additions and 3 deletions
+39
View File
@@ -149,6 +149,11 @@ const UI_IDLE_POLL_MS: u64 = 48;
const UI_ACTIVE_POLL_MS: u64 = 24;
const WEB_CONFIG_POLL_MS: u64 = 16;
const DISPATCH_WATCHDOG_TIMEOUT: Duration = Duration::from_secs(30);
/// Maximum wall-clock time a turn may stay in `"in_progress"` before the UI
/// assumes the engine stalled (e.g. sub-agent hang, lost completion event,
/// engine panic). Matched to [`DEFAULT_STREAM_IDLE_TIMEOUT`] so legitimate
/// long-running tool chains are not interrupted prematurely.
const TURN_STALL_WATCHDOG_TIMEOUT: Duration = Duration::from_secs(300);
// Forced repaint cadence while a turn is live (model loading, compacting,
// sub-agents running). Drives the footer water-spout animation as well as
// the per-tool spinner pulse — keep this fast enough that the spout reads as
@@ -4008,6 +4013,40 @@ fn reconcile_turn_liveness(app: &mut App, now: Instant, has_running_agents: bool
return true;
}
// Branch 3: turn started but never completed — engine may have
// panicked, sub-agent may be stuck, or the completion event was lost.
if app.is_loading
&& matches!(app.runtime_turn_status.as_deref(), Some("in_progress"))
&& !has_running_agents
&& !app.is_compacting
&& app.turn_started_at.is_some_and(|started| {
now.saturating_duration_since(started) > TURN_STALL_WATCHDOG_TIMEOUT
})
{
// Finalize in-flight thinking / assistant / tool cells so the
// transcript doesn't show permanent spinners after recovery.
streaming_thinking::finalize_current(app);
app.finalize_streaming_assistant_as_interrupted();
app.finalize_active_cell_as_interrupted();
app.streaming_state.reset();
app.streaming_message_index = None;
app.streaming_thinking_active_entry = None;
app.is_loading = false;
app.turn_started_at = None;
app.runtime_turn_status = None;
app.runtime_turn_id = None;
app.dispatch_started_at = None;
// Per-turn scroll lock — clear so the next turn auto-scrolls.
app.user_scrolled_during_stream = false;
app.push_status_toast(
"Turn stalled — no completion signal received. Please try again.",
StatusToastLevel::Error,
None,
);
return true;
}
false
}
+29 -3
View File
@@ -2111,17 +2111,43 @@ fn turn_liveness_leaves_active_turn_running() {
let mut app = create_test_app();
app.is_loading = true;
app.runtime_turn_status = Some("in_progress".to_string());
app.dispatch_started_at =
Some(Instant::now() - DISPATCH_WATCHDOG_TIMEOUT - Duration::from_secs(10));
app.turn_started_at = Some(Instant::now() - Duration::from_secs(60));
let recovered = reconcile_turn_liveness(&mut app, Instant::now(), false);
assert!(!recovered);
assert!(app.is_loading);
assert!(app.dispatch_started_at.is_some());
assert!(app.turn_started_at.is_some());
assert!(app.status_toasts.is_empty());
}
#[test]
fn turn_liveness_recovers_stalled_in_progress_turn() {
let mut app = create_test_app();
app.is_loading = true;
app.runtime_turn_status = Some("in_progress".to_string());
app.runtime_turn_id = Some("stale-turn-id".to_string());
app.turn_started_at =
Some(Instant::now() - TURN_STALL_WATCHDOG_TIMEOUT - Duration::from_millis(1));
app.streaming_message_index = Some(0);
app.user_scrolled_during_stream = true;
let recovered = reconcile_turn_liveness(&mut app, Instant::now(), false);
assert!(recovered);
assert!(!app.is_loading);
assert!(app.turn_started_at.is_none());
assert!(app.runtime_turn_status.is_none());
assert!(app.runtime_turn_id.is_none());
assert!(app.dispatch_started_at.is_none());
assert!(app.streaming_message_index.is_none());
assert!(app.streaming_thinking_active_entry.is_none());
assert!(!app.user_scrolled_during_stream);
let toast = app.status_toasts.back().expect("stall toast");
assert_eq!(toast.level, StatusToastLevel::Error);
assert!(toast.text.contains("Turn stalled"));
}
#[test]
fn fixed_model_auto_thinking_skips_auto_model_router() {
let mut app = create_test_app();