feat(tui): show live background shell output (#2826)

2026-06-05 21:51:40 -07:00
parent 1bacaf763e
commit 787383591a
9 changed files with 293 additions and 1 deletions
@@ -66,6 +66,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
  visibility trail, and @BigBenLabs, @lzx1545642258, @yangdaowan,
  @mangdehuang, @VerrPower, @hejia-v, @nasus9527, and @ygzhang-cn for the
  GUI/VS Code demand and validation trail.
+- Added inline live-output refresh for background shell Exec cards keyed by the
+  exact shell task id, so long-running commands can show bounded stdout/stderr
+  tails without consuming deltas or matching by command text. Thanks
+  @donglovejava for the live shell-output direction in #2048.
 - Added a static prompt composer override for embedders that need to replace
  the byte-stable base/personality prompt segment while leaving mode metadata,
  approval policy, tool taxonomy, Context Management, and the Compaction Relay
@@ -66,6 +66,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
  visibility trail, and @BigBenLabs, @lzx1545642258, @yangdaowan,
  @mangdehuang, @VerrPower, @hejia-v, @nasus9527, and @ygzhang-cn for the
  GUI/VS Code demand and validation trail.
+- Added inline live-output refresh for background shell Exec cards keyed by the
+  exact shell task id, so long-running commands can show bounded stdout/stderr
+  tails without consuming deltas or matching by command text. Thanks
+  @donglovejava for the live shell-output direction in #2048.
 - Added a static prompt composer override for embedders that need to replace
  the byte-stable base/personality prompt segment while leaving mode metadata,
  approval policy, tool taxonomy, Context Management, and the Compaction Relay
@@ -331,6 +331,8 @@ mod tests {
            command: command.to_string(),
            status: ToolStatus::Running,
            output: None,
+            live_output: None,
+            shell_task_id: None,
            started_at: Some(Instant::now()),
            duration_ms: None,
            source: ExecSource::Assistant,
@@ -890,6 +890,8 @@ pub struct ExecCell {
    pub command: String,
    pub status: ToolStatus,
    pub output: Option<String>,
+    pub live_output: Option<String>,
+    pub shell_task_id: Option<String>,
    pub started_at: Option<Instant>,
    pub duration_ms: Option<u64>,
    pub source: ExecSource,
@@ -946,7 +948,7 @@ impl ExecCell {
        }

        if self.interaction.is_none() {
-            if let Some(output) = self.output.as_ref() {
+            if let Some(output) = self.output.as_ref().or(self.live_output.as_ref()) {
                lines.extend(render_exec_output_mode(
                    output,
                    width,
@@ -4306,6 +4308,8 @@ mod tests {
            command: "echo hi".to_string(),
            status: ToolStatus::Running,
            output: None,
+            live_output: None,
+            shell_task_id: None,
            started_at,
            duration_ms: None,
            source: ExecSource::Assistant,
@@ -4645,6 +4649,8 @@ mod tests {
            command: "ls".to_string(),
            status: ToolStatus::Success,
            output: Some("a\nb\n".to_string()),
+            live_output: None,
+            shell_task_id: None,
            started_at: None,
            duration_ms: Some(10),
            source: ExecSource::Assistant,
@@ -4675,6 +4681,8 @@ mod tests {
            command: "cargo test --workspace --all-features".to_string(),
            status: ToolStatus::Running,
            output: None,
+            live_output: None,
+            shell_task_id: None,
            started_at: None,
            duration_ms: None,
            source: ExecSource::Assistant,
@@ -4991,6 +4999,8 @@ mod tests {
            command: "false".to_string(),
            status: ToolStatus::Failed,
            output: Some("boom".to_string()),
+            live_output: None,
+            shell_task_id: None,
            started_at: None,
            duration_ms: Some(42),
            source: ExecSource::Assistant,
@@ -5046,6 +5056,49 @@ mod tests {
        lines.iter().map(line_text).collect::<Vec<_>>().join("\n")
    }

+    #[test]
+    fn exec_cell_renders_live_shell_output_before_final_output() {
+        let cell = ExecCell {
+            command: "cargo test".to_string(),
+            status: ToolStatus::Running,
+            output: None,
+            live_output: Some("running line 1\nrunning line 2".to_string()),
+            shell_task_id: Some("shell_live".to_string()),
+            started_at: None,
+            duration_ms: None,
+            source: ExecSource::Assistant,
+            interaction: None,
+            output_summary: None,
+        };
+
+        let text = lines_text(&cell.lines_with_motion(80, true));
+
+        assert!(text.contains("running line 1"));
+        assert!(text.contains("running line 2"));
+        assert!(!text.contains("Ctrl+B opens shell controls"));
+    }
+
+    #[test]
+    fn exec_cell_prefers_final_output_over_live_shell_tail() {
+        let cell = ExecCell {
+            command: "cargo test".to_string(),
+            status: ToolStatus::Success,
+            output: Some("final output".to_string()),
+            live_output: Some("stale live tail".to_string()),
+            shell_task_id: Some("shell_live".to_string()),
+            started_at: None,
+            duration_ms: None,
+            source: ExecSource::Assistant,
+            interaction: None,
+            output_summary: None,
+        };
+
+        let text = lines_text(&cell.lines_with_motion(80, true));
+
+        assert!(text.contains("final output"));
+        assert!(!text.contains("stale live tail"));
+    }
+
    #[test]
    fn long_thinking_display_is_shorter_than_transcript() {
        // Build a multi-paragraph thinking body so the live view has
@@ -5162,6 +5215,8 @@ mod tests {
            command: "noisy_script.sh".to_string(),
            status: ToolStatus::Success,
            output: Some(output),
+            live_output: None,
+            shell_task_id: None,
            started_at: None,
            duration_ms: Some(120),
            source: ExecSource::Assistant,
@@ -5613,6 +5668,8 @@ mod tests {
            command: command.to_string(),
            status: ToolStatus::Success,
            output: Some("ok".to_string()),
+            live_output: None,
+            shell_task_id: None,
            started_at: None,
            duration_ms: None,
            source: ExecSource::Assistant,
@@ -2874,6 +2874,8 @@ mod tests {
                    command: command.to_string(),
                    status: ToolStatus::Running,
                    output: None,
+                    live_output: None,
+                    shell_task_id: None,
                    started_at: None,
                    duration_ms: Some(ACTIVE_TOOL_STALE_RUNNING_ROW_TTL.as_millis() as u64 + 1),
                    source: ExecSource::Assistant,
@@ -2906,6 +2908,8 @@ mod tests {
                command: "cargo test --workspace".to_string(),
                status: ToolStatus::Running,
                output: None,
+                live_output: None,
+                shell_task_id: None,
                started_at: Some(std::time::Instant::now()),
                duration_ms: None,
                source: ExecSource::Assistant,
@@ -3040,6 +3044,8 @@ mod tests {
                    .to_string(),
                status: ToolStatus::Failed,
                output: Some("Lint pending\nTest pending".to_string()),
+                live_output: None,
+                shell_task_id: None,
                started_at: None,
                duration_ms: Some(15_000),
                source: ExecSource::Assistant,
@@ -3080,6 +3086,8 @@ mod tests {
            command: "cargo test -p codewhale-tui".to_string(),
            status: ToolStatus::Failed,
            output: Some("test failed".to_string()),
+            live_output: None,
+            shell_task_id: None,
            started_at: None,
            duration_ms: Some(1_250),
            source: ExecSource::Assistant,
@@ -3109,6 +3117,8 @@ mod tests {
            command: "cargo check".to_string(),
            status: ToolStatus::Success,
            output: Some("Finished".to_string()),
+            live_output: None,
+            shell_task_id: None,
            started_at: None,
            duration_ms: Some(1_250),
            source: ExecSource::Assistant,
@@ -101,6 +101,8 @@ pub(super) fn handle_tool_call_started(
                    command,
                    status: ToolStatus::Running,
                    output: None,
+                    live_output: None,
+                    shell_task_id: None,
                    started_at: Some(Instant::now()),
                    duration_ms: None,
                    source,
@@ -133,6 +135,8 @@ pub(super) fn handle_tool_call_started(
                command,
                status: ToolStatus::Running,
                output: None,
+                live_output: None,
+                shell_task_id: None,
                started_at: Some(Instant::now()),
                duration_ms: None,
                source,
@@ -506,6 +510,16 @@ pub(super) fn handle_tool_call_complete(
            HistoryCell::Tool(ToolCell::Exec(exec)) => {
                exec.status = status;
                if let Ok(tool_result) = result.as_ref() {
+                    let shell_task_id = tool_result
+                        .metadata
+                        .as_ref()
+                        .and_then(|m| m.get("task_id"))
+                        .and_then(serde_json::Value::as_str)
+                        .filter(|task_id| !task_id.trim().is_empty())
+                        .map(str::to_string);
+                    if shell_task_id.is_some() {
+                        exec.shell_task_id = shell_task_id;
+                    }
                    if let Some(meta_command) = tool_result
                        .metadata
                        .as_ref()
@@ -538,6 +552,12 @@ pub(super) fn handle_tool_call_complete(
                        exec.output = Some(tool_result.content.clone());
                        exec.output_summary =
                            Some(super::history::summarize_tool_output(&tool_result.content));
+                        exec.live_output = None;
+                    } else if status == ToolStatus::Running
+                        && exec.interaction.is_none()
+                        && !tool_result.content.is_empty()
+                    {
+                        exec.live_output = Some(tool_result.content.clone());
                    }
                } else if let Err(err) = result.as_ref()
                    && exec.interaction.is_none()
@@ -616,6 +616,8 @@ mod tests {
            command: command.to_string(),
            status: ToolStatus::Running,
            output: None,
+            live_output: None,
+            shell_task_id: None,
            started_at: None,
            duration_ms: None,
            source: ExecSource::Assistant,
@@ -65,6 +65,7 @@ use crate::settings::Settings;
 use crate::task_manager::{
    NewTaskRequest, SharedTaskManager, TaskManager, TaskManagerConfig, TaskStatus, TaskSummary,
 };
+use crate::tools::shell::{ShellJobSnapshot, ShellStatus};
 use crate::tools::spec::{RuntimeToolServices, ToolResult};
 use crate::tools::subagent::SubAgentStatus;
 use crate::tui::app::HuntVerdict;
@@ -1014,6 +1015,89 @@ async fn refresh_active_task_panel(app: &mut App, task_manager: &SharedTaskManag
    app.task_panel = entries;
 }

+fn refresh_shell_exec_live_output(app: &mut App) -> bool {
+    let Some(shell_mgr) = app.runtime_services.shell_manager.as_ref().cloned() else {
+        return false;
+    };
+    let jobs = {
+        let Ok(mut mgr) = shell_mgr.lock() else {
+            return false;
+        };
+        mgr.list_jobs()
+            .into_iter()
+            .map(|job| (job.id.clone(), job))
+            .collect::<std::collections::HashMap<_, _>>()
+    };
+    if jobs.is_empty() {
+        return false;
+    }
+
+    let mut changed = false;
+    for index in 0..app.virtual_cell_count() {
+        let Some((task_id, next_status, next_live, next_duration)) =
+            shell_exec_live_update(app, index, &jobs)
+        else {
+            continue;
+        };
+        let Some(HistoryCell::Tool(ToolCell::Exec(exec))) = app.cell_at_virtual_index_mut(index)
+        else {
+            continue;
+        };
+        if exec.output.is_some() || exec.shell_task_id.as_deref() != Some(task_id.as_str()) {
+            continue;
+        }
+        exec.status = next_status;
+        exec.live_output = next_live;
+        exec.duration_ms = Some(next_duration);
+        changed = true;
+    }
+    changed
+}
+
+fn shell_exec_live_update(
+    app: &App,
+    index: usize,
+    jobs: &std::collections::HashMap<String, ShellJobSnapshot>,
+) -> Option<(String, ToolStatus, Option<String>, u64)> {
+    let HistoryCell::Tool(ToolCell::Exec(exec)) = app.cell_at_virtual_index(index)? else {
+        return None;
+    };
+    if exec.output.is_some() {
+        return None;
+    }
+    let task_id = exec.shell_task_id.as_deref()?;
+    let job = jobs.get(task_id)?;
+    let next_status = shell_job_tool_status(&job.status);
+    let next_live = shell_job_live_output(job).or_else(|| exec.live_output.clone());
+    if exec.status == next_status
+        && exec.live_output == next_live
+        && exec.duration_ms == Some(job.elapsed_ms)
+    {
+        return None;
+    }
+    Some((task_id.to_string(), next_status, next_live, job.elapsed_ms))
+}
+
+fn shell_job_tool_status(status: &ShellStatus) -> ToolStatus {
+    match status {
+        ShellStatus::Running => ToolStatus::Running,
+        ShellStatus::Completed => ToolStatus::Success,
+        ShellStatus::Failed | ShellStatus::Killed | ShellStatus::TimedOut => ToolStatus::Failed,
+    }
+}
+
+fn shell_job_live_output(job: &ShellJobSnapshot) -> Option<String> {
+    match (job.stdout_tail.is_empty(), job.stderr_tail.is_empty()) {
+        (true, true) => None,
+        (false, true) => Some(job.stdout_tail.clone()),
+        (true, false) => Some(format!("STDERR:\n{}", job.stderr_tail)),
+        (false, false) => Some(format!(
+            "{}\n\nSTDERR:\n{}",
+            job.stdout_tail, job.stderr_tail
+        )),
+    }
+}
+
 fn active_reasoning_task_entries(app: &App) -> Vec<TaskPanelEntry> {
    let Some(active) = app.active_cell.as_ref() else {
        return Vec::new();
@@ -1301,6 +1385,9 @@ async fn run_event_loop(

        if last_task_refresh.elapsed() >= Duration::from_millis(2500) {
            refresh_active_task_panel(app, &task_manager).await;
+            if refresh_shell_exec_live_output(app) {
+                app.needs_redraw = true;
+            }
            last_task_refresh = Instant::now();
            app.needs_redraw = true;
        }
@@ -1838,6 +1838,8 @@ fn active_tool_status_label_summarizes_live_tool_group() {
            command: "cargo test --workspace --all-features".to_string(),
            status: ToolStatus::Running,
            output: None,
+            live_output: None,
+            shell_task_id: None,
            started_at: app.turn_started_at,
            duration_ms: None,
            source: ExecSource::Assistant,
@@ -1868,6 +1870,106 @@ fn active_tool_status_label_summarizes_live_tool_group() {
    assert!(label.contains(crate::tui::key_shortcuts::tool_details_shortcut_label()));
 }

+#[test]
+fn shell_live_output_update_matches_exact_task_id_only() {
+    let mut app = create_test_app();
+    app.push_history_cell(HistoryCell::Tool(ToolCell::Exec(ExecCell {
+        command: "cargo test --workspace".to_string(),
+        status: ToolStatus::Running,
+        output: None,
+        live_output: None,
+        shell_task_id: Some("shell_a".to_string()),
+        started_at: None,
+        duration_ms: None,
+        source: ExecSource::Assistant,
+        interaction: None,
+        output_summary: None,
+    })));
+    app.push_history_cell(HistoryCell::Tool(ToolCell::Exec(ExecCell {
+        command: "cargo test --workspace".to_string(),
+        status: ToolStatus::Running,
+        output: None,
+        live_output: Some("previous".to_string()),
+        shell_task_id: Some("shell_b".to_string()),
+        started_at: None,
+        duration_ms: None,
+        source: ExecSource::Assistant,
+        interaction: None,
+        output_summary: None,
+    })));
+
+    let mut jobs = std::collections::HashMap::new();
+    jobs.insert(
+        "shell_b".to_string(),
+        ShellJobSnapshot {
+            id: "shell_b".to_string(),
+            job_id: "shell_b".to_string(),
+            command: "cargo test --workspace".to_string(),
+            cwd: PathBuf::from("/tmp/repo"),
+            status: ShellStatus::Running,
+            exit_code: None,
+            elapsed_ms: 777,
+            stdout_tail: "stdout tail\n".to_string(),
+            stderr_tail: "stderr tail\n".to_string(),
+            stdout_len: 12,
+            stderr_len: 12,
+            stdin_available: false,
+            stale: false,
+            linked_task_id: None,
+        },
+    );
+
+    assert!(shell_exec_live_update(&app, 0, &jobs).is_none());
+    let (_task_id, status, output, duration) =
+        shell_exec_live_update(&app, 1, &jobs).expect("matching task id updates");
+
+    assert_eq!(status, ToolStatus::Running);
+    assert_eq!(duration, 777);
+    assert_eq!(
+        output.as_deref(),
+        Some("stdout tail\n\n\nSTDERR:\nstderr tail\n")
+    );
+}
+
+#[test]
+fn shell_live_output_update_skips_finalized_exec_cell() {
+    let mut app = create_test_app();
+    app.push_history_cell(HistoryCell::Tool(ToolCell::Exec(ExecCell {
+        command: "cargo test --workspace".to_string(),
+        status: ToolStatus::Success,
+        output: Some("final output".to_string()),
+        live_output: Some("old live output".to_string()),
+        shell_task_id: Some("shell_a".to_string()),
+        started_at: None,
+        duration_ms: Some(10),
+        source: ExecSource::Assistant,
+        interaction: None,
+        output_summary: None,
+    })));
+    let mut jobs = std::collections::HashMap::new();
+    jobs.insert(
+        "shell_a".to_string(),
+        ShellJobSnapshot {
+            id: "shell_a".to_string(),
+            job_id: "shell_a".to_string(),
+            command: "cargo test --workspace".to_string(),
+            cwd: PathBuf::from("/tmp/repo"),
+            status: ShellStatus::Completed,
+            exit_code: Some(0),
+            elapsed_ms: 999,
+            stdout_tail: "new live output".to_string(),
+            stderr_tail: String::new(),
+            stdout_len: 15,
+            stderr_len: 0,
+            stdin_available: false,
+            stale: false,
+            linked_task_id: None,
+        },
+    );
+
+    assert!(shell_exec_live_update(&app, 0, &jobs).is_none());
+}
+
 #[test]
 fn active_tool_status_label_strips_shell_wrappers_from_ci_polling() {
    let mut app = create_test_app();
@@ -1880,6 +1982,8 @@ fn active_tool_status_label_strips_shell_wrappers_from_ci_polling() {
                .to_string(),
            status: ToolStatus::Running,
            output: None,
+            live_output: None,
+            shell_task_id: None,
            started_at: app.turn_started_at,
            duration_ms: None,
            source: ExecSource::Assistant,
@@ -5435,6 +5539,8 @@ fn terminal_pause_has_live_owner_only_for_running_exec_cells() {
            command: "python3 -i".to_string(),
            status: ToolStatus::Running,
            output: None,
+            live_output: None,
+            shell_task_id: None,
            started_at: Some(Instant::now()),
            duration_ms: None,
            source: ExecSource::Assistant,