From 41590edfd834f46b69823ef38671b4a87b23c26f Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Sat, 23 May 2026 16:48:56 -0500
Subject: [PATCH] feat(tools): github_close_pr, handle_read redirection,
 shell/sidebar polish

- New github_close_pr tool distinct from github_close_issue; proper PR wording
  in tool output, audit records, and gh pr close (not issue close)
- handle_read detects art_/call_/SHA refs and points to retrieve_tool_result
  with copy-pasteable hints; error messages show correct tool for each ref type
- Shell delta tool results include the command field so the UI can resolve
  task_id-only exec cells when the completion metadata arrives
- Sidebar background shell tasks show the actual command on the primary row
  instead of just the task ID; task ID stays available as dim detail
- Tool routing falls back to task_id when exec_shell_wait has no command,
  then updates when the completion carries command metadata
- Plan mode prompt explains update_plan as the handoff signal; model waits
  for user action instead of continuing to tool around
- Base prompt clarifies handle_read scope (var_handles only) vs
  retrieve_tool_result (artifacts/tool-result refs)
- New tests: close_pr_schema, close distinction wording, handle_read artifact
  detection, shell_wait task_id fallback, sidebar background task labels
---
 .github/ISSUE_TEMPLATE/bug_report.md |   2 +
 .github/PULL_REQUEST_TEMPLATE.md     |   4 +-
 CONTRIBUTING.md                      |  12 +-
 crates/tui/src/prompts.rs            |  12 ++
 crates/tui/src/prompts/base.txt      |   2 +-
 crates/tui/src/prompts/modes/plan.md |   6 +-
 crates/tui/src/tools/github.rs       | 242 +++++++++++++++++++--------
 crates/tui/src/tools/handle.rs       |  41 ++++-
 crates/tui/src/tools/registry.rs     |   4 +-
 crates/tui/src/tools/shell.rs        |   1 +
 crates/tui/src/tui/sidebar.rs        |  57 +++++--
 crates/tui/src/tui/tool_routing.rs   |  46 ++++-
 crates/tui/src/tui/ui/tests.rs       |  66 ++++++++
 docs/TOOL_SURFACE.md                 |   3 +-
 14 files changed, 403 insertions(+), 95 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index 4b4da719..26072b92 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -29,6 +29,7 @@ labels: bug
 - OS:
 - codewhale version:
 - Install method:
+- `codewhale doctor` summary:
 - Model/provider:
 - Terminal app:
 - Shell:
@@ -37,6 +38,7 @@ labels: bug
 OS: Windows 11 / Ubuntu 22.04 / macOS 14
 codewhale version: run `codewhale --version`
 Install method: cargo install / release binary / source build
+codewhale doctor summary: paste only the relevant lines, and redact secrets
 Model/provider: deepseek-v4-pro / DeepSeek, or qwen2.5-coder / Ollama
 Terminal app: iTerm2 / Windows Terminal / GNOME Terminal / VS Code terminal
 Shell: bash / zsh / fish / PowerShell
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 36d57b12..db22692b 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -2,9 +2,9 @@
 
 ## Testing
 
-- [ ] `cargo test --all-features`
 - [ ] `cargo fmt --all -- --check`
-- [ ] `cargo clippy --all-targets --all-features`
+- [ ] `cargo clippy --workspace --all-targets --all-features`
+- [ ] `cargo test --workspace --all-features`
 
 ## Checklist
 
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f1afa9ed..255bc94e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -14,7 +14,7 @@ Thank you for your interest in contributing to codewhale! This document provides
 
 1. Fork and clone the repository:
    ```bash
-   git clone https://github.com/YOUR_USERNAME/DeepSeek-TUI.git
+   git clone https://github.com/YOUR_USERNAME/CodeWhale.git
    cd CodeWhale
    ```
 
@@ -25,12 +25,12 @@ Thank you for your interest in contributing to codewhale! This document provides
 
 3. Run tests:
    ```bash
-   cargo test
+   cargo test --workspace --all-features
    ```
 
 4. Run with development settings:
    ```bash
-   cargo run
+   cargo run --bin codewhale
    ```
 
 ## Development Workflow
@@ -153,9 +153,9 @@ these crates, including the bottom-up build order.
 
 3. Ensure CI passes:
    ```bash
-   cargo fmt --check
-   cargo clippy
-   cargo test
+   cargo fmt --all -- --check
+   cargo clippy --workspace --all-targets --all-features
+   cargo test --workspace --all-features
    ```
 
 4. Push your branch and create a Pull Request
diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs
index 912ab283..f6eb8be6 100644
--- a/crates/tui/src/prompts.rs
+++ b/crates/tui/src/prompts.rs
@@ -803,6 +803,18 @@ mod tests {
         );
     }
 
+    #[test]
+    fn plan_mode_prompt_uses_update_plan_as_confirmation_handoff() {
+        assert!(
+            PLAN_MODE.contains("call `update_plan`"),
+            "Plan mode must tell the model to finish plans through update_plan"
+        );
+        assert!(
+            PLAN_MODE.contains("accept / revise / exit prompt"),
+            "Plan mode must explain why update_plan is the UI handoff signal"
+        );
+    }
+
     #[test]
     fn render_environment_block_lists_supplied_locale_and_workspace() {
         let tmp = tempdir().expect("tempdir");
diff --git a/crates/tui/src/prompts/base.txt b/crates/tui/src/prompts/base.txt
index 58f03268..8862148f 100644
--- a/crates/tui/src/prompts/base.txt
+++ b/crates/tui/src/prompts/base.txt
@@ -45,7 +45,7 @@ Model notes: DeepSeek V4 models emit *thinking tokens* (`ContentBlock::Thinking`
 - **Git / diag / tests**: `git_status`, `git_diff`, `git_show`, `git_log`, `git_blame`, `diagnostics`, `run_tests`, `review`.
 - **Sub-agents**: `agent_open`, `agent_eval`, `agent_close`. Fresh sessions are the default; use `fork_context: true` when multiple perspectives need the current parent context and byte-identical prefill/prompt prefix for DeepSeek prefix-cache reuse. Use `tool_agent` for experimental Fin fast-lane execution: simple tool-bound OCR/search/fetch/probe work on Flash V4 with thinking off.
 - **Recursive LM (long inputs / parallel reasoning)**: `rlm_open`, `rlm_eval`, `rlm_configure`, `rlm_close` — open a named Python REPL over a file/string/URL, run deterministic and semantic analysis, return compact results or `var_handle`s, then close when done.
-- **Large symbolic outputs**: `handle_read` — read bounded slices, counts, ranges, or JSONPath projections from returned `var_handle`s.
+- **Large symbolic outputs**: `handle_read` — read bounded slices, counts, ranges, or JSONPath projections from returned `var_handle`s only. For `art_...`, `call_...`, SHA, or spilled tool-output refs, use `retrieve_tool_result`.
 - **Other**: `code_execution` (Python sandbox), `validate_data` (JSON/TOML), `request_user_input`, `finance` (market quotes), `tool_search_tool_regex`, `tool_search_tool_bm25` (deferred tool discovery).
 
 Multiple `tool_calls` in one turn run in parallel. `web_search` returns `ref_id`s — cite as `(ref_id)`.
diff --git a/crates/tui/src/prompts/modes/plan.md b/crates/tui/src/prompts/modes/plan.md
index cc59277a..bcbfb229 100644
--- a/crates/tui/src/prompts/modes/plan.md
+++ b/crates/tui/src/prompts/modes/plan.md
@@ -3,9 +3,11 @@
 You are running in Plan mode — design before implementing.
 
 Investigate first, act later. Use `checklist_write` for visible, granular progress on multi-step
-investigations. Add `update_plan` only when high-level strategy adds value beyond the checklist.
+investigations. When you are ready to present the implementation plan, call `update_plan` with
+the final plan; that is the handoff signal that lets the UI show the accept / revise / exit prompt.
 All writes and patches are blocked — you can read the world but you
 can't change it. Shell and code execution are unavailable.
 
 Use this mode to build a thorough plan. Spawn read-only sub-agents for parallel investigation.
-When the plan is solid, the user will switch modes so you can execute.
+After `update_plan` presents the plan, wait for the user's next action instead of continuing to
+tool around in Plan mode.
diff --git a/crates/tui/src/tools/github.rs b/crates/tui/src/tools/github.rs
index 86444642..521724af 100644
--- a/crates/tui/src/tools/github.rs
+++ b/crates/tui/src/tools/github.rs
@@ -28,6 +28,7 @@ pub struct GithubIssueContextTool;
 pub struct GithubPrContextTool;
 pub struct GithubCommentTool;
 pub struct GithubCloseIssueTool;
+pub struct GithubClosePrTool;
 
 #[async_trait]
 impl ToolSpec for GithubIssueContextTool {
@@ -221,32 +222,11 @@ impl ToolSpec for GithubCloseIssueTool {
     }
 
     fn description(&self) -> &'static str {
-        "Close a GitHub issue only when structured acceptance evidence is present and approved. Never close merely because the agent is stopping."
+        "Close a GitHub issue only when structured acceptance evidence is present and approved. For pull requests use github_close_pr; do not call PRs issues in user-facing output. Never close merely because the agent is stopping."
     }
 
     fn input_schema(&self) -> Value {
-        json!({
-            "type": "object",
-            "properties": {
-                "number": { "type": "integer", "minimum": 1 },
-                "acceptance_criteria": { "type": "array", "items": { "type": "string" }, "minItems": 1 },
-                "evidence": {
-                    "type": "object",
-                    "properties": {
-                        "files_changed": { "type": "array", "items": { "type": "string" } },
-                        "tests_run": { "type": "array", "items": { "type": "string" } },
-                        "commits": { "type": "array", "items": { "type": "string" } },
-                        "final_status": { "type": "string" }
-                    },
-                    "required": ["files_changed", "tests_run", "final_status"]
-                },
-                "comment": { "type": "string" },
-                "allow_dirty": { "type": "boolean", "default": false },
-                "dry_run": { "type": "boolean", "default": false }
-            },
-            "required": ["number", "acceptance_criteria", "evidence"],
-            "additionalProperties": false
-        })
+        close_input_schema()
     }
 
     fn capabilities(&self) -> Vec<ToolCapability> {
@@ -258,53 +238,158 @@ impl ToolSpec for GithubCloseIssueTool {
     }
 
     async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError> {
-        validate_evidence(&input, true)?;
-        if !optional_bool(&input, "allow_dirty", false) {
-            let status = git_status_porcelain(context)?;
-            if !status.trim().is_empty() {
-                return Ok(ToolResult::error(
-                    "Refusing to close issue: worktree is dirty and allow_dirty was false.",
-                )
-                .with_metadata(json!({ "dirty_status": status })));
-            }
-        }
-        let number = required_u64(&input, "number")?;
-        if optional_bool(&input, "dry_run", false) {
-            return Ok(ToolResult::success(format!(
-                "Dry run: would close issue #{number}."
-            )));
-        }
-        if let Some(comment) = optional_str(&input, "comment") {
-            let number_s = number.to_string();
-            run_gh_text(context, &["issue", "comment", &number_s, "--body", comment])?;
-        }
-        let number_s = number.to_string();
-        run_gh_text(
-            context,
-            &["issue", "close", &number_s, "--reason", "completed"],
-        )?;
-        let metadata = github_event_metadata(
-            "close",
-            "issue",
-            number,
-            "Issue closed as completed with structured evidence".to_string(),
-            None,
-            optional_str(&input, "comment")
-                .and_then(|comment| {
-                    write_artifact_if_needed(
-                        context,
-                        "github_close_comment",
-                        comment,
-                        BODY_ARTIFACT_THRESHOLD,
-                    )
-                    .ok()
-                })
-                .flatten(),
-        );
-        Ok(ToolResult::success(format!("Closed issue #{number}.")).with_metadata(metadata))
+        close_github_thread(input, context, GithubCloseTarget::Issue)
     }
 }
 
+#[async_trait]
+impl ToolSpec for GithubClosePrTool {
+    fn name(&self) -> &'static str {
+        "github_close_pr"
+    }
+
+    fn description(&self) -> &'static str {
+        "Close a GitHub pull request only when structured acceptance evidence is present and approved. Use this for PRs instead of github_close_issue so the UI, audit trail, and comments keep PR wording clear."
+    }
+
+    fn input_schema(&self) -> Value {
+        close_input_schema()
+    }
+
+    fn capabilities(&self) -> Vec<ToolCapability> {
+        vec![ToolCapability::Network, ToolCapability::RequiresApproval]
+    }
+
+    fn approval_requirement(&self) -> ApprovalRequirement {
+        ApprovalRequirement::Required
+    }
+
+    async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError> {
+        close_github_thread(input, context, GithubCloseTarget::Pr)
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+enum GithubCloseTarget {
+    Issue,
+    Pr,
+}
+
+impl GithubCloseTarget {
+    fn cli_subcommand(self) -> &'static str {
+        match self {
+            Self::Issue => "issue",
+            Self::Pr => "pr",
+        }
+    }
+
+    fn metadata_target(self) -> &'static str {
+        match self {
+            Self::Issue => "issue",
+            Self::Pr => "pr",
+        }
+    }
+
+    fn display(self) -> &'static str {
+        match self {
+            Self::Issue => "issue",
+            Self::Pr => "PR",
+        }
+    }
+
+    fn summary_subject(self) -> &'static str {
+        match self {
+            Self::Issue => "Issue",
+            Self::Pr => "PR",
+        }
+    }
+}
+
+fn close_input_schema() -> Value {
+    json!({
+        "type": "object",
+        "properties": {
+            "number": { "type": "integer", "minimum": 1 },
+            "acceptance_criteria": { "type": "array", "items": { "type": "string" }, "minItems": 1 },
+            "evidence": {
+                "type": "object",
+                "properties": {
+                    "files_changed": { "type": "array", "items": { "type": "string" } },
+                    "tests_run": { "type": "array", "items": { "type": "string" } },
+                    "commits": { "type": "array", "items": { "type": "string" } },
+                    "final_status": { "type": "string" }
+                },
+                "required": ["files_changed", "tests_run", "final_status"]
+            },
+            "comment": { "type": "string" },
+            "allow_dirty": { "type": "boolean", "default": false },
+            "dry_run": { "type": "boolean", "default": false }
+        },
+        "required": ["number", "acceptance_criteria", "evidence"],
+        "additionalProperties": false
+    })
+}
+
+fn close_github_thread(
+    input: Value,
+    context: &ToolContext,
+    target: GithubCloseTarget,
+) -> Result<ToolResult, ToolError> {
+    validate_evidence(&input, true)?;
+    if !optional_bool(&input, "allow_dirty", false) {
+        let status = git_status_porcelain(context)?;
+        if !status.trim().is_empty() {
+            return Ok(ToolResult::error(format!(
+                "Refusing to close {}: worktree is dirty and allow_dirty was false.",
+                target.display()
+            ))
+            .with_metadata(json!({ "dirty_status": status })));
+        }
+    }
+    let number = required_u64(&input, "number")?;
+    if optional_bool(&input, "dry_run", false) {
+        return Ok(ToolResult::success(format!(
+            "Dry run: would close {} #{number}.",
+            target.display()
+        )));
+    }
+    let subcmd = target.cli_subcommand();
+    let number_s = number.to_string();
+    if let Some(comment) = optional_str(&input, "comment") {
+        run_gh_text(context, &[subcmd, "comment", &number_s, "--body", comment])?;
+    }
+    let close_args: Vec<&str> = match target {
+        GithubCloseTarget::Issue => vec!["issue", "close", &number_s, "--reason", "completed"],
+        GithubCloseTarget::Pr => vec!["pr", "close", &number_s],
+    };
+    run_gh_text(context, &close_args)?;
+    let metadata = github_event_metadata(
+        "close",
+        target.metadata_target(),
+        number,
+        format!(
+            "{} closed as completed with structured evidence",
+            target.summary_subject()
+        ),
+        None,
+        optional_str(&input, "comment")
+            .and_then(|comment| {
+                write_artifact_if_needed(
+                    context,
+                    "github_close_comment",
+                    comment,
+                    BODY_ARTIFACT_THRESHOLD,
+                )
+                .ok()
+            })
+            .flatten(),
+    );
+    Ok(
+        ToolResult::success(format!("Closed {} #{number}.", target.display()))
+            .with_metadata(metadata),
+    )
+}
+
 fn gh_bin() -> String {
     if let Ok(bin) = std::env::var("DEEPSEEK_GH_BIN") {
         return bin;
@@ -588,6 +673,29 @@ mod tests {
         );
     }
 
+    #[test]
+    fn close_pr_schema_requires_structured_evidence() {
+        let schema = GithubClosePrTool.input_schema();
+        assert!(
+            schema["properties"]["evidence"]["required"]
+                .as_array()
+                .expect("required")
+                .contains(&json!("tests_run"))
+        );
+    }
+
+    #[test]
+    fn close_tools_distinguish_issue_and_pr_wording() {
+        assert_eq!(GithubCloseTarget::Issue.display(), "issue");
+        assert_eq!(GithubCloseTarget::Pr.display(), "PR");
+        assert!(
+            GithubCloseIssueTool
+                .description()
+                .contains("github_close_pr")
+        );
+        assert!(GithubClosePrTool.description().contains("pull request"));
+    }
+
     #[test]
     fn missing_close_evidence_refuses() {
         let input = json!({
diff --git a/crates/tui/src/tools/handle.rs b/crates/tui/src/tools/handle.rs
index a6406748..a811640e 100644
--- a/crates/tui/src/tools/handle.rs
+++ b/crates/tui/src/tools/handle.rs
@@ -180,7 +180,10 @@ impl ToolSpec for HandleReadTool {
 
     fn description(&self) -> &'static str {
         "Read a bounded projection from a var_handle returned by tools such \
-         as RLM sessions, sub-agents, or large artifact producers. Provide \
+         as RLM sessions or sub-agents. This does not read artifact ids \
+         (`art_...`), tool-call ids (`call_...`), SHA refs, or files; use \
+         retrieve_tool_result for spilled tool results/artifacts and \
+         read_file for workspace files. Provide \
          exactly one projection: `slice` for char/line slices, `range` for \
          one-based line ranges, `count` for metadata counts, or `jsonpath` \
          for a small JSON-path projection. This retrieves from the handle's \
@@ -194,7 +197,7 @@ impl ToolSpec for HandleReadTool {
             "required": ["handle"],
             "properties": {
                 "handle": {
-                    "description": "A var_handle object, or a compact `session_id/name` string.",
+                    "description": "A var_handle object, or a compact `session_id/name` string. Not an `art_...`, `call_...`, SHA, or file path ref.",
                     "oneOf": [
                         {
                             "type": "object",
@@ -321,9 +324,16 @@ enum Projection {
 
 fn parse_handle(value: &Value) -> Result<VarHandle, ToolError> {
     if let Some(raw) = value.as_str() {
+        if looks_like_tool_result_ref(raw) {
+            return Err(ToolError::invalid_input(
+                "handle_read only accepts var_handle objects or `session_id/name` strings. \
+                 This looks like an artifact/tool-result ref; use `retrieve_tool_result` instead.",
+            ));
+        }
         let Some((session_id, name)) = raw.rsplit_once('/') else {
             return Err(ToolError::invalid_input(
-                "handle_read: string handle must use `session_id/name`",
+                "handle_read: string handles must use `session_id/name`. \
+                 For `art_...`, `call_...`, SHA, or file refs, use `retrieve_tool_result`.",
             ));
         };
         return Ok(VarHandle {
@@ -353,6 +363,19 @@ fn parse_handle(value: &Value) -> Result<VarHandle, ToolError> {
     Ok(handle)
 }
 
+fn looks_like_tool_result_ref(raw: &str) -> bool {
+    let trimmed = raw.trim();
+    let sha_candidate = trimmed
+        .strip_prefix("sha:")
+        .or_else(|| trimmed.strip_prefix("sha_"))
+        .unwrap_or(trimmed);
+    trimmed.starts_with("art_")
+        || trimmed.starts_with("call_")
+        || trimmed.starts_with("tool_result:")
+        || trimmed.ends_with(".txt")
+        || crate::tools::truncate::is_valid_sha256(&sha_candidate.to_ascii_lowercase())
+}
+
 fn parse_projection(input: &Value) -> Result<Projection, ToolError> {
     let mut count = 0usize;
     count += usize::from(input.get("slice").is_some());
@@ -809,4 +832,16 @@ mod tests {
             .expect_err("projection required");
         assert!(err.to_string().contains("exactly one"));
     }
+
+    #[tokio::test]
+    async fn handle_read_points_artifact_refs_to_tool_result_retrieval() {
+        let ctx = ctx();
+        let err = HandleReadTool
+            .execute(json!({"handle": "art_call_abc123", "count": true}), &ctx)
+            .await
+            .expect_err("artifact refs are not var handles");
+        let message = err.to_string();
+        assert!(message.contains("retrieve_tool_result"));
+        assert!(message.contains("artifact/tool-result ref"));
+    }
 }
diff --git a/crates/tui/src/tools/registry.rs b/crates/tui/src/tools/registry.rs
index 6ad9f355..f84a4927 100644
--- a/crates/tui/src/tools/registry.rs
+++ b/crates/tui/src/tools/registry.rs
@@ -549,7 +549,8 @@ impl ToolRegistryBuilder {
             AutomationReadTool, AutomationResumeTool, AutomationRunTool, AutomationUpdateTool,
         };
         use super::github::{
-            GithubCloseIssueTool, GithubCommentTool, GithubIssueContextTool, GithubPrContextTool,
+            GithubCloseIssueTool, GithubClosePrTool, GithubCommentTool, GithubIssueContextTool,
+            GithubPrContextTool,
         };
         use super::tasks::{
             PrAttemptListTool, PrAttemptPreflightTool, PrAttemptReadTool, PrAttemptRecordTool,
@@ -580,6 +581,7 @@ impl ToolRegistryBuilder {
             .with_tool(Arc::new(AutomationRunTool))
             .with_tool(Arc::new(GithubCommentTool))
             .with_tool(Arc::new(GithubCloseIssueTool))
+            .with_tool(Arc::new(GithubClosePrTool))
     }
 
     /// Include only read-only durable task, PR-attempt, GitHub, and automation
diff --git a/crates/tui/src/tools/shell.rs b/crates/tui/src/tools/shell.rs
index a86dbbfc..70a45973 100644
--- a/crates/tui/src/tools/shell.rs
+++ b/crates/tui/src/tools/shell.rs
@@ -2252,6 +2252,7 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult, context: &ToolContext)
             "summary": summary,
             "stdout_summary": stdout_summary,
             "stderr_summary": stderr_summary,
+            "command": delta.command,
             "stream_delta": true,
         })),
     };
diff --git a/crates/tui/src/tui/sidebar.rs b/crates/tui/src/tui/sidebar.rs
index 3ae6b525..e228b320 100644
--- a/crates/tui/src/tui/sidebar.rs
+++ b/crates/tui/src/tui/sidebar.rs
@@ -628,12 +628,7 @@ fn task_panel_lines(app: &App, content_width: usize, max_rows: usize) -> Vec<Lin
                 .duration_ms
                 .map(format_duration_ms)
                 .unwrap_or_else(|| "-".to_string());
-            let label = format!(
-                "{} {} {}",
-                truncate_line_to_width(&task.id, 10),
-                task.status,
-                duration
-            );
+            let (label, detail) = background_task_labels(task, &duration);
             lines.push(Line::from(Span::styled(
                 truncate_line_to_width(&label, content_width.max(1)),
                 Style::default().fg(color),
@@ -641,10 +636,7 @@ fn task_panel_lines(app: &App, content_width: usize, max_rows: usize) -> Vec<Lin
             lines.push(Line::from(Span::styled(
                 format!(
                     "  {}",
-                    truncate_line_to_width(
-                        &task.prompt_summary,
-                        content_width.saturating_sub(2).max(1)
-                    )
+                    truncate_line_to_width(&detail, content_width.saturating_sub(2).max(1))
                 ),
                 Style::default().fg(palette::TEXT_DIM),
             )));
@@ -694,6 +686,26 @@ fn push_sidebar_label(lines: &mut Vec<Line<'static>>, label: &str, color: ratatu
     )));
 }
 
+fn background_task_labels(task: &TaskPanelEntry, duration: &str) -> (String, String) {
+    if let Some(command) = task.prompt_summary.strip_prefix("shell: ") {
+        let command = concise_shell_command_label(command, 96);
+        return (
+            format!("{} {} {}", task.status, command, duration),
+            format!("{} \u{00B7} shell job", task.id),
+        );
+    }
+
+    (
+        format!(
+            "{} {} {}",
+            truncate_line_to_width(&task.id, 10),
+            task.status,
+            duration
+        ),
+        task.prompt_summary.clone(),
+    )
+}
+
 fn active_tool_rows(app: &App) -> Vec<SidebarToolRow> {
     let Some(active) = app.active_cell.as_ref() else {
         return Vec::new();
@@ -2216,6 +2228,31 @@ mod tests {
         );
     }
 
+    #[test]
+    fn tasks_panel_puts_background_shell_command_on_primary_row() {
+        let mut app = create_test_app();
+        app.task_panel.push(TaskPanelEntry {
+            id: "shell_33a08c3c".to_string(),
+            status: "running".to_string(),
+            prompt_summary: "shell: cd /tmp/repo && cargo test --workspace --all-features"
+                .to_string(),
+            duration_ms: Some(178_000),
+        });
+
+        let text = lines_to_text(&task_panel_lines(&app, 96, 8));
+
+        assert!(
+            text.iter()
+                .any(|line| line.contains("running cargo test --workspace --all-features")),
+            "background shell headline should show the command, not only the shell id: {text:?}"
+        );
+        assert!(
+            text.iter()
+                .any(|line| line.contains("shell_33a08c3c") && line.contains("shell job")),
+            "shell id should remain available as detail: {text:?}"
+        );
+    }
+
     #[test]
     fn tasks_panel_collapses_repeated_low_value_recent_tools_after_failures() {
         let mut app = create_test_app();
diff --git a/crates/tui/src/tui/tool_routing.rs b/crates/tui/src/tui/tool_routing.rs
index d11f0a97..e688d624 100644
--- a/crates/tui/src/tui/tool_routing.rs
+++ b/crates/tui/src/tui/tool_routing.rs
@@ -78,7 +78,7 @@ pub(super) fn handle_tool_call_started(
     // simultaneously, which is exactly the case CX#7 fixes.
 
     if is_exec_tool(name) {
-        let command = exec_command_from_input(input).unwrap_or_else(|| "<command>".to_string());
+        let command = exec_target_from_input(input);
         let source = exec_source_from_input(input);
         let interaction = exec_interaction_summary(name, input);
         let mut is_wait = false;
@@ -535,6 +535,29 @@ pub(super) fn handle_tool_call_complete(
             HistoryCell::Tool(ToolCell::Exec(exec)) => {
                 exec.status = status;
                 if let Ok(tool_result) = result.as_ref() {
+                    if let Some(meta_command) = tool_result
+                        .metadata
+                        .as_ref()
+                        .and_then(|m| m.get("command"))
+                        .and_then(serde_json::Value::as_str)
+                        && !meta_command.trim().is_empty()
+                        && (exec.command == "shell job" || exec.command.starts_with("shell job "))
+                    {
+                        exec.command = meta_command.to_string();
+                        if exec.interaction.as_deref().is_some_and(|interaction| {
+                            interaction.starts_with("Waiting for shell job")
+                        }) {
+                            let task_suffix = tool_result
+                                .metadata
+                                .as_ref()
+                                .and_then(|m| m.get("task_id"))
+                                .and_then(serde_json::Value::as_str)
+                                .map(|task_id| format!(" ({task_id})"))
+                                .unwrap_or_default();
+                            exec.interaction =
+                                Some(format!("Waiting for \"{meta_command}\"{task_suffix}"));
+                        }
+                    }
                     exec.duration_ms = tool_result
                         .metadata
                         .as_ref()
@@ -1078,6 +1101,17 @@ fn exec_command_from_input(input: &serde_json::Value) -> Option<String> {
         .map(std::string::ToString::to_string)
 }
 
+fn exec_target_from_input(input: &serde_json::Value) -> String {
+    exec_command_from_input(input).unwrap_or_else(|| {
+        input
+            .get("task_id")
+            .or_else(|| input.get("id"))
+            .and_then(|v| v.as_str())
+            .map(|task_id| format!("shell job {task_id}"))
+            .unwrap_or_else(|| "shell job".to_string())
+    })
+}
+
 fn exec_source_from_input(input: &serde_json::Value) -> ExecSource {
     match input.get("source").and_then(|v| v.as_str()) {
         Some(source) if source.eq_ignore_ascii_case("user") => ExecSource::User,
@@ -1086,7 +1120,7 @@ fn exec_source_from_input(input: &serde_json::Value) -> ExecSource {
 }
 
 fn exec_interaction_summary(name: &str, input: &serde_json::Value) -> Option<(String, bool)> {
-    let command = exec_command_from_input(input).unwrap_or_else(|| "<command>".to_string());
+    let command = exec_target_from_input(input);
     let command_display = format!("\"{command}\"");
     let interaction_input = input
         .get("input")
@@ -1108,6 +1142,14 @@ fn exec_interaction_summary(name: &str, input: &serde_json::Value) -> Option<(St
     }
 
     if is_wait_tool || input.get("wait").and_then(serde_json::Value::as_bool) == Some(true) {
+        if exec_command_from_input(input).is_none()
+            && let Some(task_id) = input
+                .get("task_id")
+                .or_else(|| input.get("id"))
+                .and_then(|v| v.as_str())
+        {
+            return Some((format!("Waiting for shell job {task_id}"), true));
+        }
         return Some((format!("Waited for {command_display}"), true));
     }
 
diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs
index 4bd00a10..ae00d54c 100644
--- a/crates/tui/src/tui/ui/tests.rs
+++ b/crates/tui/src/tui/ui/tests.rs
@@ -3867,6 +3867,72 @@ fn ok_result(
     Ok(crate::tools::spec::ToolResult::success(content))
 }
 
+#[test]
+fn shell_wait_without_command_uses_task_id_until_command_metadata_arrives() {
+    let mut app = create_test_app();
+    handle_tool_call_started(
+        &mut app,
+        "shell-wait",
+        "exec_shell_wait",
+        &serde_json::json!({"task_id": "shell_33a08c3c"}),
+    );
+
+    let exec = app
+        .active_cell
+        .as_ref()
+        .expect("active cell")
+        .entries()
+        .iter()
+        .find_map(|cell| match cell {
+            HistoryCell::Tool(ToolCell::Exec(exec)) => Some(exec),
+            _ => None,
+        })
+        .expect("exec cell");
+    assert_eq!(exec.command, "shell job shell_33a08c3c");
+    assert!(
+        exec.interaction
+            .as_deref()
+            .is_some_and(|text| text.contains("shell_33a08c3c"))
+    );
+    assert!(
+        !exec.command.contains("<command>")
+            && !exec
+                .interaction
+                .as_deref()
+                .unwrap_or_default()
+                .contains("<command>")
+    );
+
+    let result = Ok(crate::tools::spec::ToolResult::success(
+        "Background task running (no new output).",
+    )
+    .with_metadata(serde_json::json!({
+        "status": "Running",
+        "duration_ms": 178_000_u64,
+        "task_id": "shell_33a08c3c",
+        "command": "cargo test --workspace --all-features",
+    })));
+    handle_tool_call_complete(&mut app, "shell-wait", "exec_shell_wait", &result);
+
+    let exec = app
+        .active_cell
+        .as_ref()
+        .expect("active cell")
+        .entries()
+        .iter()
+        .find_map(|cell| match cell {
+            HistoryCell::Tool(ToolCell::Exec(exec)) => Some(exec),
+            _ => None,
+        })
+        .expect("exec cell");
+    assert_eq!(exec.command, "cargo test --workspace --all-features");
+    assert!(
+        exec.interaction
+            .as_deref()
+            .is_some_and(|text| text.contains("cargo test --workspace"))
+    );
+}
+
 #[test]
 fn tool_child_usage_metadata_updates_live_cost_counter() {
     let mut app = create_test_app();
diff --git a/docs/TOOL_SURFACE.md b/docs/TOOL_SURFACE.md
index d1243ca1..09a17534 100644
--- a/docs/TOOL_SURFACE.md
+++ b/docs/TOOL_SURFACE.md
@@ -115,7 +115,8 @@ Large logs and command outputs should be artifacts with compact summaries in the
 | `github_issue_context` | Read-only issue context via `gh issue view`; large bodies become task artifacts when possible. |
 | `github_pr_context` | Read-only PR context via `gh pr view`; optional diff capture via `gh pr diff --patch`; large bodies/diffs become task artifacts when possible. |
 | `github_comment` | Approval-required issue/PR comment with structured evidence. |
-| `github_close_issue` | Approval-required issue closure. Requires non-empty acceptance criteria and evidence; refuses dirty worktrees unless explicitly allowed. Never close an issue merely because an agent is stopping. |
+| `github_close_issue` | Approval-required issue closure. Requires non-empty acceptance criteria and evidence; refuses dirty worktrees unless explicitly allowed. Never use for PRs. |
+| `github_close_pr` | Approval-required PR closure. Requires the same structured evidence as issue closure and keeps PR wording in tool output/audit records. |
 
 ### PR attempts