diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs index dab8bb30..a143121f 100644 --- a/crates/tui/src/prompts.rs +++ b/crates/tui/src/prompts.rs @@ -1533,6 +1533,7 @@ mod tests { assert!(prompt.contains("- **Shell**:")); assert!(prompt.contains("### `exec_shell`")); assert!(prompt.contains("`task_shell_start`")); + assert!(prompt.contains(">5 seconds")); assert!(prompt.contains("Arithmetic, math, calculations → `exec_shell`")); } diff --git a/crates/tui/src/prompts/base.md b/crates/tui/src/prompts/base.md index fd194628..e3ab8575 100644 --- a/crates/tui/src/prompts/base.md +++ b/crates/tui/src/prompts/base.md @@ -244,7 +244,7 @@ When context is deep (past a soft seam): cache reasoning conclusions in concise - **Planning / tracking**: `checklist_write` (primary Work progress under the active task/thread), `checklist_add` / `checklist_update` / `checklist_list`, `update_plan` (optional high-level strategy metadata for complex initiatives), `task_create` / `task_list` / `task_read` / `task_cancel` (durable work objects), `note` (persistent memory). - **File I/O**: `read_file` (PDFs auto-extracted), `list_dir`, `write_file`, `edit_file`, `apply_patch`, `retrieve_tool_result` for prior spilled large tool outputs. -- **Shell**: `task_shell_start` + `task_shell_wait` for long-running commands, diagnostics, tests, searches, and servers; `exec_shell` for bounded cancellable foreground commands; `exec_shell_wait`, `exec_shell_interact`. If foreground `exec_shell` times out, the process was killed; rerun long work with `task_shell_start` or `exec_shell` using `background: true`, then poll/wait. +- **Shell**: `task_shell_start` + `task_shell_wait` for commands expected to take >5 seconds, diagnostics, tests, searches, polling, sleeps, and servers; `exec_shell` for bounded cancellable foreground commands; `exec_shell_wait`, `exec_shell_interact`. If foreground `exec_shell` times out, the process was killed; rerun long work with `task_shell_start` or `exec_shell` using `background: true`, then poll/wait. - **Task evidence**: `task_gate_run` for verification gates; `pr_attempt_record` / `pr_attempt_list` / `pr_attempt_read` / `pr_attempt_preflight`; for GitHub issue/PR/release triage, prefer the native `gh ... --json` CLI through shell because it is authenticated, structured, and reproducible; `github_issue_context` / `github_pr_context` are read-only fallbacks when the CLI route is unavailable; `github_comment` / `github_close_issue` require approval + evidence; `automation_*` scheduling tools. - **Structured search**: `grep_files`, `file_search`, `web_search`, `fetch_url`, `web.run` (browse). - **Git / diag / tests**: `git_status`, `git_diff`, `git_show`, `git_log`, `git_blame`, `diagnostics`, `run_tests`, `run_verifiers`, `review`. @@ -265,7 +265,7 @@ Use `apply_patch` for structural edits, coordinated changes, or cases where line Use `edit_file` for one clear replacement in one file. Do not use it for multi-block deletions, cross-cutting refactors, or changes that touch more than one logical unit; use `apply_patch` or `write_file` for those. ### `exec_shell` -Use `exec_shell` for shell-native diagnostics, pipelines, and bounded commands. Use structured tools for structured operations when they map directly (`grep_files`, `git_diff`, `read_file`). For long commands, servers, full test suites, or release computations, start background work with `task_shell_start` or `exec_shell` using `background: true`, then poll with `task_shell_wait` or `exec_shell_wait`. +Use `exec_shell` for shell-native diagnostics, pipelines, and bounded commands. Use structured tools for structured operations when they map directly (`grep_files`, `git_diff`, `read_file`). For commands expected to take >5 seconds, including long commands, servers, full test suites, polling, sleeps, or release computations, start background work with `task_shell_start` or `exec_shell` using `background: true`, then poll with `task_shell_wait` or `exec_shell_wait`. ### `agent_open` / `agent_eval` / `agent_close` / `tool_agent` Use `agent_open` for independent investigations or implementation slices that can run while you continue coordinating. Fresh sessions are the default and are best when the child only needs the assignment you pass. Use `fork_context: true` when multiple perspectives should share the same parent context: the runtime preserves the parent prefill/prompt prefix byte-identically where available so DeepSeek prefix-cache reuse stays high, then appends the child instructions and task at the tail. diff --git a/crates/tui/src/tools/shell.rs b/crates/tui/src/tools/shell.rs index 278134ec..c949dd40 100644 --- a/crates/tui/src/tools/shell.rs +++ b/crates/tui/src/tools/shell.rs @@ -2005,7 +2005,7 @@ impl ToolSpec for ExecShellTool { } fn description(&self) -> &'static str { - "Execute a shell command in the workspace directory. Foreground mode is for bounded commands; use background=true or task_shell_start for long-running work, then poll/wait." + "Execute a shell command in the workspace directory. Foreground mode is for bounded commands; use background=true or task_shell_start for work expected to take >5 seconds, then poll/wait." } fn input_schema(&self) -> serde_json::Value { @@ -2022,7 +2022,7 @@ impl ToolSpec for ExecShellTool { }, "background": { "type": "boolean", - "description": "Run in background and return task_id (default: false). Prefer true for commands that may run for minutes; poll with exec_shell_wait or task_shell_wait." + "description": "Run in background and return task_id (default: false). Prefer task_shell_start or background=true for commands expected to take >5 seconds, including builds, test suites, servers, CI polling, sleep, or other long-running work; poll with exec_shell_wait or task_shell_wait." }, "interactive": { "type": "boolean", diff --git a/crates/tui/src/tools/shell/tests.rs b/crates/tui/src/tools/shell/tests.rs index 9708ea56..8da55eb8 100644 --- a/crates/tui/src/tools/shell/tests.rs +++ b/crates/tui/src/tools/shell/tests.rs @@ -643,6 +643,17 @@ async fn test_exec_shell_foreground_timeout_guides_background_rerun() { ); } +#[test] +fn test_exec_shell_schema_guides_gt_five_second_work_to_background() { + let schema = ExecShellTool.input_schema(); + let description = schema["properties"]["background"]["description"] + .as_str() + .expect("background description"); + assert!(description.contains(">5 seconds"), "{description}"); + assert!(description.contains("task_shell_start"), "{description}"); + assert!(description.contains("exec_shell_wait"), "{description}"); +} + #[tokio::test] async fn test_exec_shell_foreground_cancel_kills_process() { let tmp = tempdir().expect("tempdir");