From 41590edfd834f46b69823ef38671b4a87b23c26f Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Sat, 23 May 2026 16:48:56 -0500 Subject: [PATCH] feat(tools): github_close_pr, handle_read redirection, shell/sidebar polish - New github_close_pr tool distinct from github_close_issue; proper PR wording in tool output, audit records, and gh pr close (not issue close) - handle_read detects art_/call_/SHA refs and points to retrieve_tool_result with copy-pasteable hints; error messages show correct tool for each ref type - Shell delta tool results include the command field so the UI can resolve task_id-only exec cells when the completion metadata arrives - Sidebar background shell tasks show the actual command on the primary row instead of just the task ID; task ID stays available as dim detail - Tool routing falls back to task_id when exec_shell_wait has no command, then updates when the completion carries command metadata - Plan mode prompt explains update_plan as the handoff signal; model waits for user action instead of continuing to tool around - Base prompt clarifies handle_read scope (var_handles only) vs retrieve_tool_result (artifacts/tool-result refs) - New tests: close_pr_schema, close distinction wording, handle_read artifact detection, shell_wait task_id fallback, sidebar background task labels --- .github/ISSUE_TEMPLATE/bug_report.md | 2 + .github/PULL_REQUEST_TEMPLATE.md | 4 +- CONTRIBUTING.md | 12 +- crates/tui/src/prompts.rs | 12 ++ crates/tui/src/prompts/base.txt | 2 +- crates/tui/src/prompts/modes/plan.md | 6 +- crates/tui/src/tools/github.rs | 242 +++++++++++++++++++-------- crates/tui/src/tools/handle.rs | 41 ++++- crates/tui/src/tools/registry.rs | 4 +- crates/tui/src/tools/shell.rs | 1 + crates/tui/src/tui/sidebar.rs | 57 +++++-- crates/tui/src/tui/tool_routing.rs | 46 ++++- crates/tui/src/tui/ui/tests.rs | 66 ++++++++ docs/TOOL_SURFACE.md | 3 +- 14 files changed, 403 insertions(+), 95 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 4b4da719..26072b92 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -29,6 +29,7 @@ labels: bug - OS: - codewhale version: - Install method: +- `codewhale doctor` summary: - Model/provider: - Terminal app: - Shell: @@ -37,6 +38,7 @@ labels: bug OS: Windows 11 / Ubuntu 22.04 / macOS 14 codewhale version: run `codewhale --version` Install method: cargo install / release binary / source build +codewhale doctor summary: paste only the relevant lines, and redact secrets Model/provider: deepseek-v4-pro / DeepSeek, or qwen2.5-coder / Ollama Terminal app: iTerm2 / Windows Terminal / GNOME Terminal / VS Code terminal Shell: bash / zsh / fish / PowerShell diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 36d57b12..db22692b 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -2,9 +2,9 @@ ## Testing -- [ ] `cargo test --all-features` - [ ] `cargo fmt --all -- --check` -- [ ] `cargo clippy --all-targets --all-features` +- [ ] `cargo clippy --workspace --all-targets --all-features` +- [ ] `cargo test --workspace --all-features` ## Checklist diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f1afa9ed..255bc94e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -14,7 +14,7 @@ Thank you for your interest in contributing to codewhale! This document provides 1. Fork and clone the repository: ```bash - git clone https://github.com/YOUR_USERNAME/DeepSeek-TUI.git + git clone https://github.com/YOUR_USERNAME/CodeWhale.git cd CodeWhale ``` @@ -25,12 +25,12 @@ Thank you for your interest in contributing to codewhale! This document provides 3. Run tests: ```bash - cargo test + cargo test --workspace --all-features ``` 4. Run with development settings: ```bash - cargo run + cargo run --bin codewhale ``` ## Development Workflow @@ -153,9 +153,9 @@ these crates, including the bottom-up build order. 3. Ensure CI passes: ```bash - cargo fmt --check - cargo clippy - cargo test + cargo fmt --all -- --check + cargo clippy --workspace --all-targets --all-features + cargo test --workspace --all-features ``` 4. Push your branch and create a Pull Request diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs index 912ab283..f6eb8be6 100644 --- a/crates/tui/src/prompts.rs +++ b/crates/tui/src/prompts.rs @@ -803,6 +803,18 @@ mod tests { ); } + #[test] + fn plan_mode_prompt_uses_update_plan_as_confirmation_handoff() { + assert!( + PLAN_MODE.contains("call `update_plan`"), + "Plan mode must tell the model to finish plans through update_plan" + ); + assert!( + PLAN_MODE.contains("accept / revise / exit prompt"), + "Plan mode must explain why update_plan is the UI handoff signal" + ); + } + #[test] fn render_environment_block_lists_supplied_locale_and_workspace() { let tmp = tempdir().expect("tempdir"); diff --git a/crates/tui/src/prompts/base.txt b/crates/tui/src/prompts/base.txt index 58f03268..8862148f 100644 --- a/crates/tui/src/prompts/base.txt +++ b/crates/tui/src/prompts/base.txt @@ -45,7 +45,7 @@ Model notes: DeepSeek V4 models emit *thinking tokens* (`ContentBlock::Thinking` - **Git / diag / tests**: `git_status`, `git_diff`, `git_show`, `git_log`, `git_blame`, `diagnostics`, `run_tests`, `review`. - **Sub-agents**: `agent_open`, `agent_eval`, `agent_close`. Fresh sessions are the default; use `fork_context: true` when multiple perspectives need the current parent context and byte-identical prefill/prompt prefix for DeepSeek prefix-cache reuse. Use `tool_agent` for experimental Fin fast-lane execution: simple tool-bound OCR/search/fetch/probe work on Flash V4 with thinking off. - **Recursive LM (long inputs / parallel reasoning)**: `rlm_open`, `rlm_eval`, `rlm_configure`, `rlm_close` — open a named Python REPL over a file/string/URL, run deterministic and semantic analysis, return compact results or `var_handle`s, then close when done. -- **Large symbolic outputs**: `handle_read` — read bounded slices, counts, ranges, or JSONPath projections from returned `var_handle`s. +- **Large symbolic outputs**: `handle_read` — read bounded slices, counts, ranges, or JSONPath projections from returned `var_handle`s only. For `art_...`, `call_...`, SHA, or spilled tool-output refs, use `retrieve_tool_result`. - **Other**: `code_execution` (Python sandbox), `validate_data` (JSON/TOML), `request_user_input`, `finance` (market quotes), `tool_search_tool_regex`, `tool_search_tool_bm25` (deferred tool discovery). Multiple `tool_calls` in one turn run in parallel. `web_search` returns `ref_id`s — cite as `(ref_id)`. diff --git a/crates/tui/src/prompts/modes/plan.md b/crates/tui/src/prompts/modes/plan.md index cc59277a..bcbfb229 100644 --- a/crates/tui/src/prompts/modes/plan.md +++ b/crates/tui/src/prompts/modes/plan.md @@ -3,9 +3,11 @@ You are running in Plan mode — design before implementing. Investigate first, act later. Use `checklist_write` for visible, granular progress on multi-step -investigations. Add `update_plan` only when high-level strategy adds value beyond the checklist. +investigations. When you are ready to present the implementation plan, call `update_plan` with +the final plan; that is the handoff signal that lets the UI show the accept / revise / exit prompt. All writes and patches are blocked — you can read the world but you can't change it. Shell and code execution are unavailable. Use this mode to build a thorough plan. Spawn read-only sub-agents for parallel investigation. -When the plan is solid, the user will switch modes so you can execute. +After `update_plan` presents the plan, wait for the user's next action instead of continuing to +tool around in Plan mode. diff --git a/crates/tui/src/tools/github.rs b/crates/tui/src/tools/github.rs index 86444642..521724af 100644 --- a/crates/tui/src/tools/github.rs +++ b/crates/tui/src/tools/github.rs @@ -28,6 +28,7 @@ pub struct GithubIssueContextTool; pub struct GithubPrContextTool; pub struct GithubCommentTool; pub struct GithubCloseIssueTool; +pub struct GithubClosePrTool; #[async_trait] impl ToolSpec for GithubIssueContextTool { @@ -221,32 +222,11 @@ impl ToolSpec for GithubCloseIssueTool { } fn description(&self) -> &'static str { - "Close a GitHub issue only when structured acceptance evidence is present and approved. Never close merely because the agent is stopping." + "Close a GitHub issue only when structured acceptance evidence is present and approved. For pull requests use github_close_pr; do not call PRs issues in user-facing output. Never close merely because the agent is stopping." } fn input_schema(&self) -> Value { - json!({ - "type": "object", - "properties": { - "number": { "type": "integer", "minimum": 1 }, - "acceptance_criteria": { "type": "array", "items": { "type": "string" }, "minItems": 1 }, - "evidence": { - "type": "object", - "properties": { - "files_changed": { "type": "array", "items": { "type": "string" } }, - "tests_run": { "type": "array", "items": { "type": "string" } }, - "commits": { "type": "array", "items": { "type": "string" } }, - "final_status": { "type": "string" } - }, - "required": ["files_changed", "tests_run", "final_status"] - }, - "comment": { "type": "string" }, - "allow_dirty": { "type": "boolean", "default": false }, - "dry_run": { "type": "boolean", "default": false } - }, - "required": ["number", "acceptance_criteria", "evidence"], - "additionalProperties": false - }) + close_input_schema() } fn capabilities(&self) -> Vec { @@ -258,53 +238,158 @@ impl ToolSpec for GithubCloseIssueTool { } async fn execute(&self, input: Value, context: &ToolContext) -> Result { - validate_evidence(&input, true)?; - if !optional_bool(&input, "allow_dirty", false) { - let status = git_status_porcelain(context)?; - if !status.trim().is_empty() { - return Ok(ToolResult::error( - "Refusing to close issue: worktree is dirty and allow_dirty was false.", - ) - .with_metadata(json!({ "dirty_status": status }))); - } - } - let number = required_u64(&input, "number")?; - if optional_bool(&input, "dry_run", false) { - return Ok(ToolResult::success(format!( - "Dry run: would close issue #{number}." - ))); - } - if let Some(comment) = optional_str(&input, "comment") { - let number_s = number.to_string(); - run_gh_text(context, &["issue", "comment", &number_s, "--body", comment])?; - } - let number_s = number.to_string(); - run_gh_text( - context, - &["issue", "close", &number_s, "--reason", "completed"], - )?; - let metadata = github_event_metadata( - "close", - "issue", - number, - "Issue closed as completed with structured evidence".to_string(), - None, - optional_str(&input, "comment") - .and_then(|comment| { - write_artifact_if_needed( - context, - "github_close_comment", - comment, - BODY_ARTIFACT_THRESHOLD, - ) - .ok() - }) - .flatten(), - ); - Ok(ToolResult::success(format!("Closed issue #{number}.")).with_metadata(metadata)) + close_github_thread(input, context, GithubCloseTarget::Issue) } } +#[async_trait] +impl ToolSpec for GithubClosePrTool { + fn name(&self) -> &'static str { + "github_close_pr" + } + + fn description(&self) -> &'static str { + "Close a GitHub pull request only when structured acceptance evidence is present and approved. Use this for PRs instead of github_close_issue so the UI, audit trail, and comments keep PR wording clear." + } + + fn input_schema(&self) -> Value { + close_input_schema() + } + + fn capabilities(&self) -> Vec { + vec![ToolCapability::Network, ToolCapability::RequiresApproval] + } + + fn approval_requirement(&self) -> ApprovalRequirement { + ApprovalRequirement::Required + } + + async fn execute(&self, input: Value, context: &ToolContext) -> Result { + close_github_thread(input, context, GithubCloseTarget::Pr) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum GithubCloseTarget { + Issue, + Pr, +} + +impl GithubCloseTarget { + fn cli_subcommand(self) -> &'static str { + match self { + Self::Issue => "issue", + Self::Pr => "pr", + } + } + + fn metadata_target(self) -> &'static str { + match self { + Self::Issue => "issue", + Self::Pr => "pr", + } + } + + fn display(self) -> &'static str { + match self { + Self::Issue => "issue", + Self::Pr => "PR", + } + } + + fn summary_subject(self) -> &'static str { + match self { + Self::Issue => "Issue", + Self::Pr => "PR", + } + } +} + +fn close_input_schema() -> Value { + json!({ + "type": "object", + "properties": { + "number": { "type": "integer", "minimum": 1 }, + "acceptance_criteria": { "type": "array", "items": { "type": "string" }, "minItems": 1 }, + "evidence": { + "type": "object", + "properties": { + "files_changed": { "type": "array", "items": { "type": "string" } }, + "tests_run": { "type": "array", "items": { "type": "string" } }, + "commits": { "type": "array", "items": { "type": "string" } }, + "final_status": { "type": "string" } + }, + "required": ["files_changed", "tests_run", "final_status"] + }, + "comment": { "type": "string" }, + "allow_dirty": { "type": "boolean", "default": false }, + "dry_run": { "type": "boolean", "default": false } + }, + "required": ["number", "acceptance_criteria", "evidence"], + "additionalProperties": false + }) +} + +fn close_github_thread( + input: Value, + context: &ToolContext, + target: GithubCloseTarget, +) -> Result { + validate_evidence(&input, true)?; + if !optional_bool(&input, "allow_dirty", false) { + let status = git_status_porcelain(context)?; + if !status.trim().is_empty() { + return Ok(ToolResult::error(format!( + "Refusing to close {}: worktree is dirty and allow_dirty was false.", + target.display() + )) + .with_metadata(json!({ "dirty_status": status }))); + } + } + let number = required_u64(&input, "number")?; + if optional_bool(&input, "dry_run", false) { + return Ok(ToolResult::success(format!( + "Dry run: would close {} #{number}.", + target.display() + ))); + } + let subcmd = target.cli_subcommand(); + let number_s = number.to_string(); + if let Some(comment) = optional_str(&input, "comment") { + run_gh_text(context, &[subcmd, "comment", &number_s, "--body", comment])?; + } + let close_args: Vec<&str> = match target { + GithubCloseTarget::Issue => vec!["issue", "close", &number_s, "--reason", "completed"], + GithubCloseTarget::Pr => vec!["pr", "close", &number_s], + }; + run_gh_text(context, &close_args)?; + let metadata = github_event_metadata( + "close", + target.metadata_target(), + number, + format!( + "{} closed as completed with structured evidence", + target.summary_subject() + ), + None, + optional_str(&input, "comment") + .and_then(|comment| { + write_artifact_if_needed( + context, + "github_close_comment", + comment, + BODY_ARTIFACT_THRESHOLD, + ) + .ok() + }) + .flatten(), + ); + Ok( + ToolResult::success(format!("Closed {} #{number}.", target.display())) + .with_metadata(metadata), + ) +} + fn gh_bin() -> String { if let Ok(bin) = std::env::var("DEEPSEEK_GH_BIN") { return bin; @@ -588,6 +673,29 @@ mod tests { ); } + #[test] + fn close_pr_schema_requires_structured_evidence() { + let schema = GithubClosePrTool.input_schema(); + assert!( + schema["properties"]["evidence"]["required"] + .as_array() + .expect("required") + .contains(&json!("tests_run")) + ); + } + + #[test] + fn close_tools_distinguish_issue_and_pr_wording() { + assert_eq!(GithubCloseTarget::Issue.display(), "issue"); + assert_eq!(GithubCloseTarget::Pr.display(), "PR"); + assert!( + GithubCloseIssueTool + .description() + .contains("github_close_pr") + ); + assert!(GithubClosePrTool.description().contains("pull request")); + } + #[test] fn missing_close_evidence_refuses() { let input = json!({ diff --git a/crates/tui/src/tools/handle.rs b/crates/tui/src/tools/handle.rs index a6406748..a811640e 100644 --- a/crates/tui/src/tools/handle.rs +++ b/crates/tui/src/tools/handle.rs @@ -180,7 +180,10 @@ impl ToolSpec for HandleReadTool { fn description(&self) -> &'static str { "Read a bounded projection from a var_handle returned by tools such \ - as RLM sessions, sub-agents, or large artifact producers. Provide \ + as RLM sessions or sub-agents. This does not read artifact ids \ + (`art_...`), tool-call ids (`call_...`), SHA refs, or files; use \ + retrieve_tool_result for spilled tool results/artifacts and \ + read_file for workspace files. Provide \ exactly one projection: `slice` for char/line slices, `range` for \ one-based line ranges, `count` for metadata counts, or `jsonpath` \ for a small JSON-path projection. This retrieves from the handle's \ @@ -194,7 +197,7 @@ impl ToolSpec for HandleReadTool { "required": ["handle"], "properties": { "handle": { - "description": "A var_handle object, or a compact `session_id/name` string.", + "description": "A var_handle object, or a compact `session_id/name` string. Not an `art_...`, `call_...`, SHA, or file path ref.", "oneOf": [ { "type": "object", @@ -321,9 +324,16 @@ enum Projection { fn parse_handle(value: &Value) -> Result { if let Some(raw) = value.as_str() { + if looks_like_tool_result_ref(raw) { + return Err(ToolError::invalid_input( + "handle_read only accepts var_handle objects or `session_id/name` strings. \ + This looks like an artifact/tool-result ref; use `retrieve_tool_result` instead.", + )); + } let Some((session_id, name)) = raw.rsplit_once('/') else { return Err(ToolError::invalid_input( - "handle_read: string handle must use `session_id/name`", + "handle_read: string handles must use `session_id/name`. \ + For `art_...`, `call_...`, SHA, or file refs, use `retrieve_tool_result`.", )); }; return Ok(VarHandle { @@ -353,6 +363,19 @@ fn parse_handle(value: &Value) -> Result { Ok(handle) } +fn looks_like_tool_result_ref(raw: &str) -> bool { + let trimmed = raw.trim(); + let sha_candidate = trimmed + .strip_prefix("sha:") + .or_else(|| trimmed.strip_prefix("sha_")) + .unwrap_or(trimmed); + trimmed.starts_with("art_") + || trimmed.starts_with("call_") + || trimmed.starts_with("tool_result:") + || trimmed.ends_with(".txt") + || crate::tools::truncate::is_valid_sha256(&sha_candidate.to_ascii_lowercase()) +} + fn parse_projection(input: &Value) -> Result { let mut count = 0usize; count += usize::from(input.get("slice").is_some()); @@ -809,4 +832,16 @@ mod tests { .expect_err("projection required"); assert!(err.to_string().contains("exactly one")); } + + #[tokio::test] + async fn handle_read_points_artifact_refs_to_tool_result_retrieval() { + let ctx = ctx(); + let err = HandleReadTool + .execute(json!({"handle": "art_call_abc123", "count": true}), &ctx) + .await + .expect_err("artifact refs are not var handles"); + let message = err.to_string(); + assert!(message.contains("retrieve_tool_result")); + assert!(message.contains("artifact/tool-result ref")); + } } diff --git a/crates/tui/src/tools/registry.rs b/crates/tui/src/tools/registry.rs index 6ad9f355..f84a4927 100644 --- a/crates/tui/src/tools/registry.rs +++ b/crates/tui/src/tools/registry.rs @@ -549,7 +549,8 @@ impl ToolRegistryBuilder { AutomationReadTool, AutomationResumeTool, AutomationRunTool, AutomationUpdateTool, }; use super::github::{ - GithubCloseIssueTool, GithubCommentTool, GithubIssueContextTool, GithubPrContextTool, + GithubCloseIssueTool, GithubClosePrTool, GithubCommentTool, GithubIssueContextTool, + GithubPrContextTool, }; use super::tasks::{ PrAttemptListTool, PrAttemptPreflightTool, PrAttemptReadTool, PrAttemptRecordTool, @@ -580,6 +581,7 @@ impl ToolRegistryBuilder { .with_tool(Arc::new(AutomationRunTool)) .with_tool(Arc::new(GithubCommentTool)) .with_tool(Arc::new(GithubCloseIssueTool)) + .with_tool(Arc::new(GithubClosePrTool)) } /// Include only read-only durable task, PR-attempt, GitHub, and automation diff --git a/crates/tui/src/tools/shell.rs b/crates/tui/src/tools/shell.rs index a86dbbfc..70a45973 100644 --- a/crates/tui/src/tools/shell.rs +++ b/crates/tui/src/tools/shell.rs @@ -2252,6 +2252,7 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult, context: &ToolContext) "summary": summary, "stdout_summary": stdout_summary, "stderr_summary": stderr_summary, + "command": delta.command, "stream_delta": true, })), }; diff --git a/crates/tui/src/tui/sidebar.rs b/crates/tui/src/tui/sidebar.rs index 3ae6b525..e228b320 100644 --- a/crates/tui/src/tui/sidebar.rs +++ b/crates/tui/src/tui/sidebar.rs @@ -628,12 +628,7 @@ fn task_panel_lines(app: &App, content_width: usize, max_rows: usize) -> Vec Vec>, label: &str, color: ratatu ))); } +fn background_task_labels(task: &TaskPanelEntry, duration: &str) -> (String, String) { + if let Some(command) = task.prompt_summary.strip_prefix("shell: ") { + let command = concise_shell_command_label(command, 96); + return ( + format!("{} {} {}", task.status, command, duration), + format!("{} \u{00B7} shell job", task.id), + ); + } + + ( + format!( + "{} {} {}", + truncate_line_to_width(&task.id, 10), + task.status, + duration + ), + task.prompt_summary.clone(), + ) +} + fn active_tool_rows(app: &App) -> Vec { let Some(active) = app.active_cell.as_ref() else { return Vec::new(); @@ -2216,6 +2228,31 @@ mod tests { ); } + #[test] + fn tasks_panel_puts_background_shell_command_on_primary_row() { + let mut app = create_test_app(); + app.task_panel.push(TaskPanelEntry { + id: "shell_33a08c3c".to_string(), + status: "running".to_string(), + prompt_summary: "shell: cd /tmp/repo && cargo test --workspace --all-features" + .to_string(), + duration_ms: Some(178_000), + }); + + let text = lines_to_text(&task_panel_lines(&app, 96, 8)); + + assert!( + text.iter() + .any(|line| line.contains("running cargo test --workspace --all-features")), + "background shell headline should show the command, not only the shell id: {text:?}" + ); + assert!( + text.iter() + .any(|line| line.contains("shell_33a08c3c") && line.contains("shell job")), + "shell id should remain available as detail: {text:?}" + ); + } + #[test] fn tasks_panel_collapses_repeated_low_value_recent_tools_after_failures() { let mut app = create_test_app(); diff --git a/crates/tui/src/tui/tool_routing.rs b/crates/tui/src/tui/tool_routing.rs index d11f0a97..e688d624 100644 --- a/crates/tui/src/tui/tool_routing.rs +++ b/crates/tui/src/tui/tool_routing.rs @@ -78,7 +78,7 @@ pub(super) fn handle_tool_call_started( // simultaneously, which is exactly the case CX#7 fixes. if is_exec_tool(name) { - let command = exec_command_from_input(input).unwrap_or_else(|| "".to_string()); + let command = exec_target_from_input(input); let source = exec_source_from_input(input); let interaction = exec_interaction_summary(name, input); let mut is_wait = false; @@ -535,6 +535,29 @@ pub(super) fn handle_tool_call_complete( HistoryCell::Tool(ToolCell::Exec(exec)) => { exec.status = status; if let Ok(tool_result) = result.as_ref() { + if let Some(meta_command) = tool_result + .metadata + .as_ref() + .and_then(|m| m.get("command")) + .and_then(serde_json::Value::as_str) + && !meta_command.trim().is_empty() + && (exec.command == "shell job" || exec.command.starts_with("shell job ")) + { + exec.command = meta_command.to_string(); + if exec.interaction.as_deref().is_some_and(|interaction| { + interaction.starts_with("Waiting for shell job") + }) { + let task_suffix = tool_result + .metadata + .as_ref() + .and_then(|m| m.get("task_id")) + .and_then(serde_json::Value::as_str) + .map(|task_id| format!(" ({task_id})")) + .unwrap_or_default(); + exec.interaction = + Some(format!("Waiting for \"{meta_command}\"{task_suffix}")); + } + } exec.duration_ms = tool_result .metadata .as_ref() @@ -1078,6 +1101,17 @@ fn exec_command_from_input(input: &serde_json::Value) -> Option { .map(std::string::ToString::to_string) } +fn exec_target_from_input(input: &serde_json::Value) -> String { + exec_command_from_input(input).unwrap_or_else(|| { + input + .get("task_id") + .or_else(|| input.get("id")) + .and_then(|v| v.as_str()) + .map(|task_id| format!("shell job {task_id}")) + .unwrap_or_else(|| "shell job".to_string()) + }) +} + fn exec_source_from_input(input: &serde_json::Value) -> ExecSource { match input.get("source").and_then(|v| v.as_str()) { Some(source) if source.eq_ignore_ascii_case("user") => ExecSource::User, @@ -1086,7 +1120,7 @@ fn exec_source_from_input(input: &serde_json::Value) -> ExecSource { } fn exec_interaction_summary(name: &str, input: &serde_json::Value) -> Option<(String, bool)> { - let command = exec_command_from_input(input).unwrap_or_else(|| "".to_string()); + let command = exec_target_from_input(input); let command_display = format!("\"{command}\""); let interaction_input = input .get("input") @@ -1108,6 +1142,14 @@ fn exec_interaction_summary(name: &str, input: &serde_json::Value) -> Option<(St } if is_wait_tool || input.get("wait").and_then(serde_json::Value::as_bool) == Some(true) { + if exec_command_from_input(input).is_none() + && let Some(task_id) = input + .get("task_id") + .or_else(|| input.get("id")) + .and_then(|v| v.as_str()) + { + return Some((format!("Waiting for shell job {task_id}"), true)); + } return Some((format!("Waited for {command_display}"), true)); } diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index 4bd00a10..ae00d54c 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -3867,6 +3867,72 @@ fn ok_result( Ok(crate::tools::spec::ToolResult::success(content)) } +#[test] +fn shell_wait_without_command_uses_task_id_until_command_metadata_arrives() { + let mut app = create_test_app(); + handle_tool_call_started( + &mut app, + "shell-wait", + "exec_shell_wait", + &serde_json::json!({"task_id": "shell_33a08c3c"}), + ); + + let exec = app + .active_cell + .as_ref() + .expect("active cell") + .entries() + .iter() + .find_map(|cell| match cell { + HistoryCell::Tool(ToolCell::Exec(exec)) => Some(exec), + _ => None, + }) + .expect("exec cell"); + assert_eq!(exec.command, "shell job shell_33a08c3c"); + assert!( + exec.interaction + .as_deref() + .is_some_and(|text| text.contains("shell_33a08c3c")) + ); + assert!( + !exec.command.contains("") + && !exec + .interaction + .as_deref() + .unwrap_or_default() + .contains("") + ); + + let result = Ok(crate::tools::spec::ToolResult::success( + "Background task running (no new output).", + ) + .with_metadata(serde_json::json!({ + "status": "Running", + "duration_ms": 178_000_u64, + "task_id": "shell_33a08c3c", + "command": "cargo test --workspace --all-features", + }))); + handle_tool_call_complete(&mut app, "shell-wait", "exec_shell_wait", &result); + + let exec = app + .active_cell + .as_ref() + .expect("active cell") + .entries() + .iter() + .find_map(|cell| match cell { + HistoryCell::Tool(ToolCell::Exec(exec)) => Some(exec), + _ => None, + }) + .expect("exec cell"); + assert_eq!(exec.command, "cargo test --workspace --all-features"); + assert!( + exec.interaction + .as_deref() + .is_some_and(|text| text.contains("cargo test --workspace")) + ); +} + #[test] fn tool_child_usage_metadata_updates_live_cost_counter() { let mut app = create_test_app(); diff --git a/docs/TOOL_SURFACE.md b/docs/TOOL_SURFACE.md index d1243ca1..09a17534 100644 --- a/docs/TOOL_SURFACE.md +++ b/docs/TOOL_SURFACE.md @@ -115,7 +115,8 @@ Large logs and command outputs should be artifacts with compact summaries in the | `github_issue_context` | Read-only issue context via `gh issue view`; large bodies become task artifacts when possible. | | `github_pr_context` | Read-only PR context via `gh pr view`; optional diff capture via `gh pr diff --patch`; large bodies/diffs become task artifacts when possible. | | `github_comment` | Approval-required issue/PR comment with structured evidence. | -| `github_close_issue` | Approval-required issue closure. Requires non-empty acceptance criteria and evidence; refuses dirty worktrees unless explicitly allowed. Never close an issue merely because an agent is stopping. | +| `github_close_issue` | Approval-required issue closure. Requires non-empty acceptance criteria and evidence; refuses dirty worktrees unless explicitly allowed. Never use for PRs. | +| `github_close_pr` | Approval-required PR closure. Requires the same structured evidence as issue closure and keeps PR wording in tool output/audit records. | ### PR attempts