feat(tui): make provider wait observable with route, idle budget, and fanout preflight state
The >30s stall reason now reports provider/model and idle-vs-budget seconds instead of a bare 'waiting for model', explicitly marks a planned-but-unlaunched fanout as 0 running, and logs a once-per-turn structured provider-wait incident when a fanout plan is pending. Part of #3095 Co-Authored-By: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
This commit is contained in:
@@ -1275,6 +1275,9 @@ pub struct App {
|
||||
pub next_history_revision: u64,
|
||||
pub api_messages: Vec<Message>,
|
||||
pub is_loading: bool,
|
||||
/// Whether the once-per-turn provider-wait incident (#3095) has already
|
||||
/// been logged for the current turn.
|
||||
pub provider_wait_incident_logged: bool,
|
||||
/// Ghost-text follow-up suggestion shown in the composer when empty.
|
||||
/// Generated asynchronously after each completed turn; cleared on new input.
|
||||
pub prompt_suggestion: Option<String>,
|
||||
@@ -2135,6 +2138,7 @@ impl App {
|
||||
next_history_revision: 1,
|
||||
api_messages: Vec::new(),
|
||||
is_loading: false,
|
||||
provider_wait_incident_logged: false,
|
||||
prompt_suggestion: None,
|
||||
prompt_suggestion_gen: std::sync::atomic::AtomicU64::new(0),
|
||||
offline_mode: false,
|
||||
|
||||
@@ -93,6 +93,7 @@ pub(crate) fn render_footer(f: &mut Frame, area: Rect, app: &mut App) {
|
||||
if let Some(reason) = stall_reason(app) {
|
||||
label = format!("{label} ({reason})");
|
||||
}
|
||||
maybe_log_provider_wait_incident(app);
|
||||
props.state_label = label;
|
||||
props.state_color = palette::DEEPSEEK_SKY;
|
||||
|
||||
@@ -120,22 +121,22 @@ pub(crate) fn render_footer(f: &mut Frame, area: Rect, app: &mut App) {
|
||||
/// Classify why a turn that has been running for > 30 s might appear stalled.
|
||||
/// Returns a short human-readable reason string, or `None` when the turn has
|
||||
/// not been running long enough to classify as stalled.
|
||||
pub(crate) fn stall_reason(app: &App) -> Option<&'static str> {
|
||||
pub(crate) fn stall_reason(app: &App) -> Option<String> {
|
||||
let elapsed = app.turn_started_at?.elapsed();
|
||||
if elapsed.as_secs() < 30 {
|
||||
return None;
|
||||
}
|
||||
if app.is_compacting {
|
||||
return Some("compacting context");
|
||||
return Some("compacting context".to_string());
|
||||
}
|
||||
if app.is_loading {
|
||||
return Some("waiting for model");
|
||||
return Some(provider_wait_reason(app));
|
||||
}
|
||||
if running_agent_count(app) > 0 {
|
||||
return Some("sub-agents working");
|
||||
return Some("sub-agents working".to_string());
|
||||
}
|
||||
if app.task_panel.iter().any(|task| task.status == "running") {
|
||||
return Some("background jobs running");
|
||||
return Some("background jobs running".to_string());
|
||||
}
|
||||
let active = app.active_cell.as_ref()?;
|
||||
if active.entries().iter().any(|cell| match cell {
|
||||
@@ -151,14 +152,82 @@ pub(crate) fn stall_reason(app: &App) -> Option<&'static str> {
|
||||
},
|
||||
_ => false,
|
||||
}) {
|
||||
return Some("tools executing");
|
||||
return Some("tools executing".to_string());
|
||||
}
|
||||
if app.runtime_turn_status.as_deref() == Some("in_progress") {
|
||||
return Some("waiting - no recent activity");
|
||||
return Some("waiting - no recent activity".to_string());
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Seconds the current turn has gone without observable stream activity.
|
||||
pub(crate) fn provider_wait_idle_secs(app: &App) -> u64 {
|
||||
app.turn_last_activity_at
|
||||
.or(app.turn_started_at)
|
||||
.map(|at| at.elapsed().as_secs())
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
/// Detailed `waiting for model` reason: provider/model route, elapsed idle
|
||||
/// time against the stream-idle budget, and — when a sub-agent fanout is
|
||||
/// planned but nothing has launched — an explicit `0 running` marker so a
|
||||
/// pre-launch provider wait is never mistaken for active sub-agent work.
|
||||
fn provider_wait_reason(app: &App) -> String {
|
||||
let idle = provider_wait_idle_secs(app);
|
||||
let budget = app.stream_chunk_timeout_secs;
|
||||
let mut reason = format!(
|
||||
"waiting for {} {}, {idle}s/{budget}s idle timeout",
|
||||
app.api_provider.as_str(),
|
||||
app.model
|
||||
);
|
||||
if running_agent_count(app) == 0 {
|
||||
if let Some((0, total)) = active_fanout_counts(app) {
|
||||
reason.push_str(&format!("; fanout 0/{total} running"));
|
||||
} else if app.pending_subagent_dispatch.is_some() {
|
||||
reason.push_str("; sub-agent dispatch pending, 0 running");
|
||||
}
|
||||
}
|
||||
reason
|
||||
}
|
||||
|
||||
/// Threshold after which a provider wait with a planned fanout is logged as
|
||||
/// a structured incident (once per turn).
|
||||
const PROVIDER_WAIT_INCIDENT_SECS: u64 = 120;
|
||||
|
||||
/// Log a compact structured incident when the parent turn has spent a long
|
||||
/// time in provider wait while a sub-agent fanout plan is present (#3095).
|
||||
pub(crate) fn maybe_log_provider_wait_incident(app: &mut App) {
|
||||
if app.provider_wait_incident_logged || !app.is_loading {
|
||||
return;
|
||||
}
|
||||
let elapsed = match app.turn_started_at {
|
||||
Some(at) => at.elapsed().as_secs(),
|
||||
None => return,
|
||||
};
|
||||
if elapsed < PROVIDER_WAIT_INCIDENT_SECS {
|
||||
return;
|
||||
}
|
||||
let fanout = active_fanout_counts(app);
|
||||
let pending_dispatch = app.pending_subagent_dispatch.is_some();
|
||||
if fanout.is_none() && !pending_dispatch {
|
||||
return;
|
||||
}
|
||||
let (fanout_running, fanout_total) = fanout.unwrap_or((0, 0));
|
||||
app.provider_wait_incident_logged = true;
|
||||
crate::logging::warn(format!(
|
||||
"provider-wait incident: provider={} model={} elapsed_secs={elapsed} \
|
||||
idle_secs={} stream_idle_budget_secs={} max_subagents={} \
|
||||
fanout_running={fanout_running} fanout_total={fanout_total} \
|
||||
running_agents={} pending_dispatch={pending_dispatch}",
|
||||
app.api_provider.as_str(),
|
||||
app.model,
|
||||
provider_wait_idle_secs(app),
|
||||
app.stream_chunk_timeout_secs,
|
||||
app.max_subagents,
|
||||
running_agent_count(app),
|
||||
));
|
||||
}
|
||||
|
||||
/// Whether the footer should animate the water-spout strip. Driven by the
|
||||
/// underlying live-work flags so the strip stays visible for the *entire*
|
||||
/// turn — not just the moments where bytes are streaming. `is_loading` can
|
||||
|
||||
@@ -1788,6 +1788,7 @@ async fn run_event_loop(
|
||||
let now = Instant::now();
|
||||
app.turn_started_at = Some(now);
|
||||
app.turn_last_activity_at = Some(now);
|
||||
app.provider_wait_incident_logged = false;
|
||||
// Discoverability hint for users who don't know how
|
||||
// to interrupt a long-running turn (#1367). Only
|
||||
// surface when the status_message slot is empty so
|
||||
|
||||
@@ -3563,6 +3563,28 @@ fn fanout_interrupted_mailbox_drops_running_count() {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stall_reason_provider_wait_includes_route_and_idle_budget() {
|
||||
let mut app = create_test_app();
|
||||
app.is_loading = true;
|
||||
app.turn_started_at = Some(Instant::now() - Duration::from_secs(45));
|
||||
app.turn_last_activity_at = Some(Instant::now() - Duration::from_secs(40));
|
||||
|
||||
let reason = crate::tui::footer_ui::stall_reason(&app).expect("stalled turn has a reason");
|
||||
assert!(
|
||||
reason.contains(&format!(
|
||||
"waiting for {} {}",
|
||||
app.api_provider.as_str(),
|
||||
app.model
|
||||
)),
|
||||
"{reason}"
|
||||
);
|
||||
assert!(
|
||||
reason.contains(&format!("/{}s idle timeout", app.stream_chunk_timeout_secs)),
|
||||
"{reason}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reconcile_syncs_stale_running_cards_with_interrupted_snapshots() {
|
||||
let mut app = create_test_app();
|
||||
@@ -3604,6 +3626,78 @@ fn reconcile_syncs_stale_running_cards_with_interrupted_snapshots() {
|
||||
assert_eq!(running_agent_count(&app), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stall_reason_provider_wait_reports_zero_running_for_planned_fanout() {
|
||||
let mut app = create_test_app();
|
||||
app.is_loading = true;
|
||||
app.turn_started_at = Some(Instant::now() - Duration::from_secs(45));
|
||||
|
||||
// A fanout plan exists (card seeded with pending workers) but no child
|
||||
// agent has launched yet: the reason must say 0 running explicitly.
|
||||
let card = crate::tui::widgets::agent_card::FanoutCard::new("rlm", app.ui_locale)
|
||||
.with_workers(["task:a", "task:b", "task:c", "task:d"]);
|
||||
app.history
|
||||
.push(HistoryCell::SubAgent(SubAgentCell::Fanout(card)));
|
||||
app.history_revisions.push(0);
|
||||
app.last_fanout_card_index = Some(app.history.len() - 1);
|
||||
|
||||
let reason = crate::tui::footer_ui::stall_reason(&app).expect("stalled turn has a reason");
|
||||
assert!(reason.contains("fanout 0/4 running"), "{reason}");
|
||||
|
||||
// Once a worker is actually running the marker disappears.
|
||||
if let Some(HistoryCell::SubAgent(SubAgentCell::Fanout(card))) = app
|
||||
.last_fanout_card_index
|
||||
.and_then(|idx| app.history.get_mut(idx))
|
||||
{
|
||||
card.upsert_worker(
|
||||
"task:a",
|
||||
crate::tui::widgets::agent_card::AgentLifecycle::Running,
|
||||
);
|
||||
}
|
||||
let reason = crate::tui::footer_ui::stall_reason(&app).expect("stalled turn has a reason");
|
||||
assert!(!reason.contains("0/4 running"), "{reason}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stall_reason_provider_wait_flags_pending_dispatch() {
|
||||
let mut app = create_test_app();
|
||||
app.is_loading = true;
|
||||
app.turn_started_at = Some(Instant::now() - Duration::from_secs(31));
|
||||
app.pending_subagent_dispatch = Some("agent_spawn".to_string());
|
||||
|
||||
let reason = crate::tui::footer_ui::stall_reason(&app).expect("stalled turn has a reason");
|
||||
assert!(
|
||||
reason.contains("sub-agent dispatch pending, 0 running"),
|
||||
"{reason}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn provider_wait_incident_logs_once_per_turn() {
|
||||
let mut app = create_test_app();
|
||||
app.is_loading = true;
|
||||
app.turn_started_at = Some(Instant::now() - Duration::from_secs(150));
|
||||
app.pending_subagent_dispatch = Some("rlm".to_string());
|
||||
|
||||
assert!(!app.provider_wait_incident_logged);
|
||||
crate::tui::footer_ui::maybe_log_provider_wait_incident(&mut app);
|
||||
assert!(app.provider_wait_incident_logged, "incident logged once");
|
||||
|
||||
// Below threshold or without a fanout plan, nothing is logged.
|
||||
let mut quiet = create_test_app();
|
||||
quiet.is_loading = true;
|
||||
quiet.turn_started_at = Some(Instant::now() - Duration::from_secs(150));
|
||||
crate::tui::footer_ui::maybe_log_provider_wait_incident(&mut quiet);
|
||||
assert!(!quiet.provider_wait_incident_logged);
|
||||
|
||||
let mut early = create_test_app();
|
||||
early.is_loading = true;
|
||||
early.turn_started_at = Some(Instant::now() - Duration::from_secs(60));
|
||||
early.pending_subagent_dispatch = Some("rlm".to_string());
|
||||
crate::tui::footer_ui::maybe_log_provider_wait_incident(&mut early);
|
||||
assert!(!early.provider_wait_incident_logged);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_token_count_compact_formats_units() {
|
||||
assert_eq!(format_token_count_compact(999), "999");
|
||||
|
||||
Reference in New Issue
Block a user