From 8ed924f3d52d2cb31065215bb8d63332b42e8a25 Mon Sep 17 00:00:00 2001 From: Hu Qiantao Date: Wed, 27 May 2026 21:26:41 +0800 Subject: [PATCH 1/3] fix(engine): recover from stalled in-progress turns reconcile_turn_liveness() had a blind spot: when TurnStarted arrived (setting runtime_turn_status to "in_progress") but TurnComplete never came (sub-agent hang, engine panic, lost event), neither existing watchdog branch fired. is_loading stayed true permanently, queuing all subsequent messages. Add Branch 3 with a 5-minute timeout (matched to stream idle timeout) that checks turn_started_at for staleness when the turn is stuck in "in_progress" with no running sub-agents. --- crates/tui/src/tui/ui.rs | 27 +++++++++++++++++++++++++++ crates/tui/src/tui/ui/tests.rs | 25 ++++++++++++++++++++++--- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index fb89de61..e7fecc72 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -146,6 +146,11 @@ const UI_IDLE_POLL_MS: u64 = 48; const UI_ACTIVE_POLL_MS: u64 = 24; const WEB_CONFIG_POLL_MS: u64 = 16; const DISPATCH_WATCHDOG_TIMEOUT: Duration = Duration::from_secs(30); +/// Maximum wall-clock time a turn may stay in `"in_progress"` before the UI +/// assumes the engine stalled (e.g. sub-agent hang, lost completion event, +/// engine panic). Matched to [`DEFAULT_STREAM_IDLE_TIMEOUT`] so legitimate +/// long-running tool chains are not interrupted prematurely. +const TURN_STALL_WATCHDOG_TIMEOUT: Duration = Duration::from_secs(300); // Forced repaint cadence while a turn is live (model loading, compacting, // sub-agents running). Drives the footer water-spout animation as well as // the per-tool spinner pulse — keep this fast enough that the spout reads as @@ -3901,6 +3906,28 @@ fn reconcile_turn_liveness(app: &mut App, now: Instant, has_running_agents: bool return true; } + // Branch 3: turn started but never completed — engine may have + // panicked, sub-agent may be stuck, or the completion event was lost. + if app.is_loading + && matches!(app.runtime_turn_status.as_deref(), Some("in_progress")) + && !has_running_agents + && !app.is_compacting + && app.turn_started_at.is_some_and(|started| { + now.saturating_duration_since(started) > TURN_STALL_WATCHDOG_TIMEOUT + }) + { + app.is_loading = false; + app.turn_started_at = None; + app.runtime_turn_status = None; + app.dispatch_started_at = None; + app.push_status_toast( + "Turn stalled — no completion signal received. Please try again.", + StatusToastLevel::Error, + None, + ); + return true; + } + false } diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index 4f0baa5b..97f09750 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -2021,17 +2021,36 @@ fn turn_liveness_leaves_active_turn_running() { let mut app = create_test_app(); app.is_loading = true; app.runtime_turn_status = Some("in_progress".to_string()); - app.dispatch_started_at = - Some(Instant::now() - DISPATCH_WATCHDOG_TIMEOUT - Duration::from_secs(10)); + app.turn_started_at = Some(Instant::now() - Duration::from_secs(60)); let recovered = reconcile_turn_liveness(&mut app, Instant::now(), false); assert!(!recovered); assert!(app.is_loading); - assert!(app.dispatch_started_at.is_some()); + assert!(app.turn_started_at.is_some()); assert!(app.status_toasts.is_empty()); } +#[test] +fn turn_liveness_recovers_stalled_in_progress_turn() { + let mut app = create_test_app(); + app.is_loading = true; + app.runtime_turn_status = Some("in_progress".to_string()); + app.turn_started_at = + Some(Instant::now() - TURN_STALL_WATCHDOG_TIMEOUT - Duration::from_millis(1)); + + let recovered = reconcile_turn_liveness(&mut app, Instant::now(), false); + + assert!(recovered); + assert!(!app.is_loading); + assert!(app.turn_started_at.is_none()); + assert!(app.runtime_turn_status.is_none()); + assert!(app.dispatch_started_at.is_none()); + let toast = app.status_toasts.back().expect("stall toast"); + assert_eq!(toast.level, StatusToastLevel::Error); + assert!(toast.text.contains("Turn stalled")); +} + #[test] fn fixed_model_auto_thinking_skips_auto_model_router() { let mut app = create_test_app(); From 6399d560be3ed81a585f402d61a44bfe541681a1 Mon Sep 17 00:00:00 2001 From: Hu Qiantao Date: Wed, 27 May 2026 21:39:03 +0800 Subject: [PATCH 2/3] fix(tui): finalize transcript cells on stall recovery The watchdog Branch 3 recovery left in-flight tool cells and streaming assistant messages in a running state, causing permanent spinners in the transcript. Also left runtime_turn_id stale, showing "(in progress)" for a turn that had already been recovered. Align the cleanup with apply_engine_error_to_app: finalize thinking, streaming assistant, and active cells as interrupted; reset streaming state; clear runtime_turn_id and streaming indices. --- crates/tui/src/tui/ui.rs | 10 ++++++++++ crates/tui/src/tui/ui/tests.rs | 5 +++++ 2 files changed, 15 insertions(+) diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index e7fecc72..e73ec537 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -3916,9 +3916,19 @@ fn reconcile_turn_liveness(app: &mut App, now: Instant, has_running_agents: bool now.saturating_duration_since(started) > TURN_STALL_WATCHDOG_TIMEOUT }) { + // Finalize in-flight thinking / assistant / tool cells so the + // transcript doesn't show permanent spinners after recovery. + streaming_thinking::finalize_current(app); + app.finalize_streaming_assistant_as_interrupted(); + app.finalize_active_cell_as_interrupted(); + app.streaming_state.reset(); + app.streaming_message_index = None; + app.streaming_thinking_active_entry = None; + app.is_loading = false; app.turn_started_at = None; app.runtime_turn_status = None; + app.runtime_turn_id = None; app.dispatch_started_at = None; app.push_status_toast( "Turn stalled — no completion signal received. Please try again.", diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index 97f09750..d41cf68b 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -2036,8 +2036,10 @@ fn turn_liveness_recovers_stalled_in_progress_turn() { let mut app = create_test_app(); app.is_loading = true; app.runtime_turn_status = Some("in_progress".to_string()); + app.runtime_turn_id = Some("stale-turn-id".to_string()); app.turn_started_at = Some(Instant::now() - TURN_STALL_WATCHDOG_TIMEOUT - Duration::from_millis(1)); + app.streaming_message_index = Some(0); let recovered = reconcile_turn_liveness(&mut app, Instant::now(), false); @@ -2045,7 +2047,10 @@ fn turn_liveness_recovers_stalled_in_progress_turn() { assert!(!app.is_loading); assert!(app.turn_started_at.is_none()); assert!(app.runtime_turn_status.is_none()); + assert!(app.runtime_turn_id.is_none()); assert!(app.dispatch_started_at.is_none()); + assert!(app.streaming_message_index.is_none()); + assert!(app.streaming_thinking_active_entry.is_none()); let toast = app.status_toasts.back().expect("stall toast"); assert_eq!(toast.level, StatusToastLevel::Error); assert!(toast.text.contains("Turn stalled")); From a1e92cd6c2abe60a9752977921b365b9b88938ff Mon Sep 17 00:00:00 2001 From: Hu Qiantao Date: Wed, 27 May 2026 22:37:46 +0800 Subject: [PATCH 3/3] fix(tui): reset user_scrolled_during_stream on stall recovery Without this, the turn immediately after a stall recovery would inherit the scroll-lock from the stalled turn and silently skip auto-scroll, leaving the user staring at stale content. --- crates/tui/src/tui/ui.rs | 2 ++ crates/tui/src/tui/ui/tests.rs | 2 ++ 2 files changed, 4 insertions(+) diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index e73ec537..c53039e7 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -3930,6 +3930,8 @@ fn reconcile_turn_liveness(app: &mut App, now: Instant, has_running_agents: bool app.runtime_turn_status = None; app.runtime_turn_id = None; app.dispatch_started_at = None; + // Per-turn scroll lock — clear so the next turn auto-scrolls. + app.user_scrolled_during_stream = false; app.push_status_toast( "Turn stalled — no completion signal received. Please try again.", StatusToastLevel::Error, diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index d41cf68b..5bc6fc8a 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -2040,6 +2040,7 @@ fn turn_liveness_recovers_stalled_in_progress_turn() { app.turn_started_at = Some(Instant::now() - TURN_STALL_WATCHDOG_TIMEOUT - Duration::from_millis(1)); app.streaming_message_index = Some(0); + app.user_scrolled_during_stream = true; let recovered = reconcile_turn_liveness(&mut app, Instant::now(), false); @@ -2051,6 +2052,7 @@ fn turn_liveness_recovers_stalled_in_progress_turn() { assert!(app.dispatch_started_at.is_none()); assert!(app.streaming_message_index.is_none()); assert!(app.streaming_thinking_active_entry.is_none()); + assert!(!app.user_scrolled_during_stream); let toast = app.status_toasts.back().expect("stall toast"); assert_eq!(toast.level, StatusToastLevel::Error); assert!(toast.text.contains("Turn stalled"));