Merge pull request #2283 from HUQIANTAO/fix/turn-stall-watchdog
fix(engine): recover from stalled in-progress turns
This commit is contained in:
@@ -149,6 +149,11 @@ const UI_IDLE_POLL_MS: u64 = 48;
|
||||
const UI_ACTIVE_POLL_MS: u64 = 24;
|
||||
const WEB_CONFIG_POLL_MS: u64 = 16;
|
||||
const DISPATCH_WATCHDOG_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
/// Maximum wall-clock time a turn may stay in `"in_progress"` before the UI
|
||||
/// assumes the engine stalled (e.g. sub-agent hang, lost completion event,
|
||||
/// engine panic). Matched to [`DEFAULT_STREAM_IDLE_TIMEOUT`] so legitimate
|
||||
/// long-running tool chains are not interrupted prematurely.
|
||||
const TURN_STALL_WATCHDOG_TIMEOUT: Duration = Duration::from_secs(300);
|
||||
// Forced repaint cadence while a turn is live (model loading, compacting,
|
||||
// sub-agents running). Drives the footer water-spout animation as well as
|
||||
// the per-tool spinner pulse — keep this fast enough that the spout reads as
|
||||
@@ -4008,6 +4013,40 @@ fn reconcile_turn_liveness(app: &mut App, now: Instant, has_running_agents: bool
|
||||
return true;
|
||||
}
|
||||
|
||||
// Branch 3: turn started but never completed — engine may have
|
||||
// panicked, sub-agent may be stuck, or the completion event was lost.
|
||||
if app.is_loading
|
||||
&& matches!(app.runtime_turn_status.as_deref(), Some("in_progress"))
|
||||
&& !has_running_agents
|
||||
&& !app.is_compacting
|
||||
&& app.turn_started_at.is_some_and(|started| {
|
||||
now.saturating_duration_since(started) > TURN_STALL_WATCHDOG_TIMEOUT
|
||||
})
|
||||
{
|
||||
// Finalize in-flight thinking / assistant / tool cells so the
|
||||
// transcript doesn't show permanent spinners after recovery.
|
||||
streaming_thinking::finalize_current(app);
|
||||
app.finalize_streaming_assistant_as_interrupted();
|
||||
app.finalize_active_cell_as_interrupted();
|
||||
app.streaming_state.reset();
|
||||
app.streaming_message_index = None;
|
||||
app.streaming_thinking_active_entry = None;
|
||||
|
||||
app.is_loading = false;
|
||||
app.turn_started_at = None;
|
||||
app.runtime_turn_status = None;
|
||||
app.runtime_turn_id = None;
|
||||
app.dispatch_started_at = None;
|
||||
// Per-turn scroll lock — clear so the next turn auto-scrolls.
|
||||
app.user_scrolled_during_stream = false;
|
||||
app.push_status_toast(
|
||||
"Turn stalled — no completion signal received. Please try again.",
|
||||
StatusToastLevel::Error,
|
||||
None,
|
||||
);
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
|
||||
@@ -2111,17 +2111,43 @@ fn turn_liveness_leaves_active_turn_running() {
|
||||
let mut app = create_test_app();
|
||||
app.is_loading = true;
|
||||
app.runtime_turn_status = Some("in_progress".to_string());
|
||||
app.dispatch_started_at =
|
||||
Some(Instant::now() - DISPATCH_WATCHDOG_TIMEOUT - Duration::from_secs(10));
|
||||
app.turn_started_at = Some(Instant::now() - Duration::from_secs(60));
|
||||
|
||||
let recovered = reconcile_turn_liveness(&mut app, Instant::now(), false);
|
||||
|
||||
assert!(!recovered);
|
||||
assert!(app.is_loading);
|
||||
assert!(app.dispatch_started_at.is_some());
|
||||
assert!(app.turn_started_at.is_some());
|
||||
assert!(app.status_toasts.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn turn_liveness_recovers_stalled_in_progress_turn() {
|
||||
let mut app = create_test_app();
|
||||
app.is_loading = true;
|
||||
app.runtime_turn_status = Some("in_progress".to_string());
|
||||
app.runtime_turn_id = Some("stale-turn-id".to_string());
|
||||
app.turn_started_at =
|
||||
Some(Instant::now() - TURN_STALL_WATCHDOG_TIMEOUT - Duration::from_millis(1));
|
||||
app.streaming_message_index = Some(0);
|
||||
app.user_scrolled_during_stream = true;
|
||||
|
||||
let recovered = reconcile_turn_liveness(&mut app, Instant::now(), false);
|
||||
|
||||
assert!(recovered);
|
||||
assert!(!app.is_loading);
|
||||
assert!(app.turn_started_at.is_none());
|
||||
assert!(app.runtime_turn_status.is_none());
|
||||
assert!(app.runtime_turn_id.is_none());
|
||||
assert!(app.dispatch_started_at.is_none());
|
||||
assert!(app.streaming_message_index.is_none());
|
||||
assert!(app.streaming_thinking_active_entry.is_none());
|
||||
assert!(!app.user_scrolled_during_stream);
|
||||
let toast = app.status_toasts.back().expect("stall toast");
|
||||
assert_eq!(toast.level, StatusToastLevel::Error);
|
||||
assert!(toast.text.contains("Turn stalled"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fixed_model_auto_thinking_skips_auto_model_router() {
|
||||
let mut app = create_test_app();
|
||||
|
||||
Reference in New Issue
Block a user