From e617e3e618d2a8a5c00cd3968600db75712f7951 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 12 Jun 2026 00:18:55 +0000 Subject: [PATCH] feat(tui): make provider wait observable with route, idle budget, and fanout preflight state The >30s stall reason now reports provider/model and idle-vs-budget seconds instead of a bare 'waiting for model', explicitly marks a planned-but-unlaunched fanout as 0 running, and logs a once-per-turn structured provider-wait incident when a fanout plan is pending. Part of #3095 Co-Authored-By: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> --- crates/tui/src/tui/app.rs | 4 ++ crates/tui/src/tui/footer_ui.rs | 83 ++++++++++++++++++++++++++--- crates/tui/src/tui/ui.rs | 1 + crates/tui/src/tui/ui/tests.rs | 94 +++++++++++++++++++++++++++++++++ 4 files changed, 175 insertions(+), 7 deletions(-) diff --git a/crates/tui/src/tui/app.rs b/crates/tui/src/tui/app.rs index 920584cc..a09b8157 100644 --- a/crates/tui/src/tui/app.rs +++ b/crates/tui/src/tui/app.rs @@ -1275,6 +1275,9 @@ pub struct App { pub next_history_revision: u64, pub api_messages: Vec, pub is_loading: bool, + /// Whether the once-per-turn provider-wait incident (#3095) has already + /// been logged for the current turn. + pub provider_wait_incident_logged: bool, /// Ghost-text follow-up suggestion shown in the composer when empty. /// Generated asynchronously after each completed turn; cleared on new input. pub prompt_suggestion: Option, @@ -2135,6 +2138,7 @@ impl App { next_history_revision: 1, api_messages: Vec::new(), is_loading: false, + provider_wait_incident_logged: false, prompt_suggestion: None, prompt_suggestion_gen: std::sync::atomic::AtomicU64::new(0), offline_mode: false, diff --git a/crates/tui/src/tui/footer_ui.rs b/crates/tui/src/tui/footer_ui.rs index 02dc8ce5..9f55c6f7 100644 --- a/crates/tui/src/tui/footer_ui.rs +++ b/crates/tui/src/tui/footer_ui.rs @@ -93,6 +93,7 @@ pub(crate) fn render_footer(f: &mut Frame, area: Rect, app: &mut App) { if let Some(reason) = stall_reason(app) { label = format!("{label} ({reason})"); } + maybe_log_provider_wait_incident(app); props.state_label = label; props.state_color = palette::DEEPSEEK_SKY; @@ -120,22 +121,22 @@ pub(crate) fn render_footer(f: &mut Frame, area: Rect, app: &mut App) { /// Classify why a turn that has been running for > 30 s might appear stalled. /// Returns a short human-readable reason string, or `None` when the turn has /// not been running long enough to classify as stalled. -pub(crate) fn stall_reason(app: &App) -> Option<&'static str> { +pub(crate) fn stall_reason(app: &App) -> Option { let elapsed = app.turn_started_at?.elapsed(); if elapsed.as_secs() < 30 { return None; } if app.is_compacting { - return Some("compacting context"); + return Some("compacting context".to_string()); } if app.is_loading { - return Some("waiting for model"); + return Some(provider_wait_reason(app)); } if running_agent_count(app) > 0 { - return Some("sub-agents working"); + return Some("sub-agents working".to_string()); } if app.task_panel.iter().any(|task| task.status == "running") { - return Some("background jobs running"); + return Some("background jobs running".to_string()); } let active = app.active_cell.as_ref()?; if active.entries().iter().any(|cell| match cell { @@ -151,14 +152,82 @@ pub(crate) fn stall_reason(app: &App) -> Option<&'static str> { }, _ => false, }) { - return Some("tools executing"); + return Some("tools executing".to_string()); } if app.runtime_turn_status.as_deref() == Some("in_progress") { - return Some("waiting - no recent activity"); + return Some("waiting - no recent activity".to_string()); } None } +/// Seconds the current turn has gone without observable stream activity. +pub(crate) fn provider_wait_idle_secs(app: &App) -> u64 { + app.turn_last_activity_at + .or(app.turn_started_at) + .map(|at| at.elapsed().as_secs()) + .unwrap_or(0) +} + +/// Detailed `waiting for model` reason: provider/model route, elapsed idle +/// time against the stream-idle budget, and — when a sub-agent fanout is +/// planned but nothing has launched — an explicit `0 running` marker so a +/// pre-launch provider wait is never mistaken for active sub-agent work. +fn provider_wait_reason(app: &App) -> String { + let idle = provider_wait_idle_secs(app); + let budget = app.stream_chunk_timeout_secs; + let mut reason = format!( + "waiting for {} {}, {idle}s/{budget}s idle timeout", + app.api_provider.as_str(), + app.model + ); + if running_agent_count(app) == 0 { + if let Some((0, total)) = active_fanout_counts(app) { + reason.push_str(&format!("; fanout 0/{total} running")); + } else if app.pending_subagent_dispatch.is_some() { + reason.push_str("; sub-agent dispatch pending, 0 running"); + } + } + reason +} + +/// Threshold after which a provider wait with a planned fanout is logged as +/// a structured incident (once per turn). +const PROVIDER_WAIT_INCIDENT_SECS: u64 = 120; + +/// Log a compact structured incident when the parent turn has spent a long +/// time in provider wait while a sub-agent fanout plan is present (#3095). +pub(crate) fn maybe_log_provider_wait_incident(app: &mut App) { + if app.provider_wait_incident_logged || !app.is_loading { + return; + } + let elapsed = match app.turn_started_at { + Some(at) => at.elapsed().as_secs(), + None => return, + }; + if elapsed < PROVIDER_WAIT_INCIDENT_SECS { + return; + } + let fanout = active_fanout_counts(app); + let pending_dispatch = app.pending_subagent_dispatch.is_some(); + if fanout.is_none() && !pending_dispatch { + return; + } + let (fanout_running, fanout_total) = fanout.unwrap_or((0, 0)); + app.provider_wait_incident_logged = true; + crate::logging::warn(format!( + "provider-wait incident: provider={} model={} elapsed_secs={elapsed} \ + idle_secs={} stream_idle_budget_secs={} max_subagents={} \ + fanout_running={fanout_running} fanout_total={fanout_total} \ + running_agents={} pending_dispatch={pending_dispatch}", + app.api_provider.as_str(), + app.model, + provider_wait_idle_secs(app), + app.stream_chunk_timeout_secs, + app.max_subagents, + running_agent_count(app), + )); +} + /// Whether the footer should animate the water-spout strip. Driven by the /// underlying live-work flags so the strip stays visible for the *entire* /// turn — not just the moments where bytes are streaming. `is_loading` can diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 1c0061fd..166e5714 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -1788,6 +1788,7 @@ async fn run_event_loop( let now = Instant::now(); app.turn_started_at = Some(now); app.turn_last_activity_at = Some(now); + app.provider_wait_incident_logged = false; // Discoverability hint for users who don't know how // to interrupt a long-running turn (#1367). Only // surface when the status_message slot is empty so diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index 10887f32..82973333 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -3563,6 +3563,28 @@ fn fanout_interrupted_mailbox_drops_running_count() { ); } +#[test] +fn stall_reason_provider_wait_includes_route_and_idle_budget() { + let mut app = create_test_app(); + app.is_loading = true; + app.turn_started_at = Some(Instant::now() - Duration::from_secs(45)); + app.turn_last_activity_at = Some(Instant::now() - Duration::from_secs(40)); + + let reason = crate::tui::footer_ui::stall_reason(&app).expect("stalled turn has a reason"); + assert!( + reason.contains(&format!( + "waiting for {} {}", + app.api_provider.as_str(), + app.model + )), + "{reason}" + ); + assert!( + reason.contains(&format!("/{}s idle timeout", app.stream_chunk_timeout_secs)), + "{reason}" + ); +} + #[test] fn reconcile_syncs_stale_running_cards_with_interrupted_snapshots() { let mut app = create_test_app(); @@ -3604,6 +3626,78 @@ fn reconcile_syncs_stale_running_cards_with_interrupted_snapshots() { assert_eq!(running_agent_count(&app), 1); } +#[test] +fn stall_reason_provider_wait_reports_zero_running_for_planned_fanout() { + let mut app = create_test_app(); + app.is_loading = true; + app.turn_started_at = Some(Instant::now() - Duration::from_secs(45)); + + // A fanout plan exists (card seeded with pending workers) but no child + // agent has launched yet: the reason must say 0 running explicitly. + let card = crate::tui::widgets::agent_card::FanoutCard::new("rlm", app.ui_locale) + .with_workers(["task:a", "task:b", "task:c", "task:d"]); + app.history + .push(HistoryCell::SubAgent(SubAgentCell::Fanout(card))); + app.history_revisions.push(0); + app.last_fanout_card_index = Some(app.history.len() - 1); + + let reason = crate::tui::footer_ui::stall_reason(&app).expect("stalled turn has a reason"); + assert!(reason.contains("fanout 0/4 running"), "{reason}"); + + // Once a worker is actually running the marker disappears. + if let Some(HistoryCell::SubAgent(SubAgentCell::Fanout(card))) = app + .last_fanout_card_index + .and_then(|idx| app.history.get_mut(idx)) + { + card.upsert_worker( + "task:a", + crate::tui::widgets::agent_card::AgentLifecycle::Running, + ); + } + let reason = crate::tui::footer_ui::stall_reason(&app).expect("stalled turn has a reason"); + assert!(!reason.contains("0/4 running"), "{reason}"); +} + +#[test] +fn stall_reason_provider_wait_flags_pending_dispatch() { + let mut app = create_test_app(); + app.is_loading = true; + app.turn_started_at = Some(Instant::now() - Duration::from_secs(31)); + app.pending_subagent_dispatch = Some("agent_spawn".to_string()); + + let reason = crate::tui::footer_ui::stall_reason(&app).expect("stalled turn has a reason"); + assert!( + reason.contains("sub-agent dispatch pending, 0 running"), + "{reason}" + ); +} + +#[test] +fn provider_wait_incident_logs_once_per_turn() { + let mut app = create_test_app(); + app.is_loading = true; + app.turn_started_at = Some(Instant::now() - Duration::from_secs(150)); + app.pending_subagent_dispatch = Some("rlm".to_string()); + + assert!(!app.provider_wait_incident_logged); + crate::tui::footer_ui::maybe_log_provider_wait_incident(&mut app); + assert!(app.provider_wait_incident_logged, "incident logged once"); + + // Below threshold or without a fanout plan, nothing is logged. + let mut quiet = create_test_app(); + quiet.is_loading = true; + quiet.turn_started_at = Some(Instant::now() - Duration::from_secs(150)); + crate::tui::footer_ui::maybe_log_provider_wait_incident(&mut quiet); + assert!(!quiet.provider_wait_incident_logged); + + let mut early = create_test_app(); + early.is_loading = true; + early.turn_started_at = Some(Instant::now() - Duration::from_secs(60)); + early.pending_subagent_dispatch = Some("rlm".to_string()); + crate::tui::footer_ui::maybe_log_provider_wait_incident(&mut early); + assert!(!early.provider_wait_incident_logged); +} + #[test] fn format_token_count_compact_formats_units() { assert_eq!(format_token_count_compact(999), "999");