fix: gate task_shell_start behind allow_shell like exec_shell

task_shell_start delegates to ExecShellTool, providing the same shell
execution capability as exec_shell. Previously, task_shell_start was
registered unconditionally in with_runtime_task_tools while exec_shell
was gated behind allow_shell, creating an inconsistent security gate.

This caused the model to try exec_shell first, fail, then fall back to
task_shell_start — wasting tokens and bypassing the intended security
boundary.

Split TaskShellStartTool and TaskShellWaitTool out of
with_runtime_task_tools into a new with_runtime_task_shell_tools method,
and gate both behind the allow_shell check in with_agent_tools.

Closes #2303
This commit is contained in:
Hu Qiantao
2026-05-31 11:51:14 +08:00
committed by Hunter Bown
parent fc9a32be1a
commit ed81d13782
3 changed files with 47 additions and 6 deletions
+1 -1
View File
@@ -85,7 +85,7 @@ impl Engine {
&& self.config.features.enabled(Feature::ShellTool)
&& self.session.allow_shell
{
builder = builder.with_shell_tools();
builder = builder.with_shell_tools().with_runtime_task_shell_tools();
}
// Register the `remember` tool only when the user has opted in to
+45 -4
View File
@@ -542,6 +542,10 @@ impl ToolRegistryBuilder {
}
/// Include durable task, gate, PR-attempt, GitHub, and automation tools.
///
/// Shell-related task tools (`task_shell_start`, `task_shell_wait`) are
/// *not* included here — use [`with_runtime_task_shell_tools`] to register
/// them when `allow_shell` is true.
#[must_use]
pub fn with_runtime_task_tools(self) -> Self {
use super::automation::{
@@ -555,7 +559,6 @@ impl ToolRegistryBuilder {
use super::tasks::{
PrAttemptListTool, PrAttemptPreflightTool, PrAttemptReadTool, PrAttemptRecordTool,
TaskCancelTool, TaskCreateTool, TaskGateRunTool, TaskListTool, TaskReadTool,
TaskShellStartTool, TaskShellWaitTool,
};
self.with_tool(Arc::new(TaskCreateTool))
@@ -563,8 +566,6 @@ impl ToolRegistryBuilder {
.with_tool(Arc::new(TaskReadTool))
.with_tool(Arc::new(TaskCancelTool))
.with_tool(Arc::new(TaskGateRunTool))
.with_tool(Arc::new(TaskShellStartTool))
.with_tool(Arc::new(TaskShellWaitTool))
.with_tool(Arc::new(GithubIssueContextTool))
.with_tool(Arc::new(GithubPrContextTool))
.with_tool(Arc::new(PrAttemptRecordTool))
@@ -584,6 +585,18 @@ impl ToolRegistryBuilder {
.with_tool(Arc::new(GithubClosePrTool))
}
/// Include shell-related task tools (`task_shell_start`, `task_shell_wait`).
///
/// These are gated behind `allow_shell` because `task_shell_start`
/// delegates directly to `ExecShellTool`, providing the same shell
/// execution capability as `exec_shell`.
#[must_use]
pub fn with_runtime_task_shell_tools(self) -> Self {
use super::tasks::{TaskShellStartTool, TaskShellWaitTool};
self.with_tool(Arc::new(TaskShellStartTool))
.with_tool(Arc::new(TaskShellWaitTool))
}
/// Include only read-only durable task, PR-attempt, GitHub, and automation
/// inspection tools. Plan mode uses this surface so it can observe state
/// without starting work, changing remotes, or mutating automation config.
@@ -786,7 +799,7 @@ impl ToolRegistryBuilder {
.with_image_ocr_tools();
if allow_shell {
builder.with_shell_tools()
builder.with_shell_tools().with_runtime_task_shell_tools()
} else {
builder
}
@@ -1379,4 +1392,32 @@ mod tests {
assert!(registry.contains("finance"));
}
#[test]
fn agent_tools_with_allow_shell_false_excludes_shell_tools() {
let tmp = tempdir().expect("tempdir");
let ctx = ToolContext::new(tmp.path().to_path_buf());
let registry = ToolRegistryBuilder::new()
.with_agent_tools(false)
.build(ctx);
assert!(!registry.contains("exec_shell"), "exec_shell should be excluded when allow_shell is false");
assert!(!registry.contains("task_shell_start"), "task_shell_start should be excluded when allow_shell is false");
assert!(!registry.contains("task_shell_wait"), "task_shell_wait should be excluded when allow_shell is false");
}
#[test]
fn agent_tools_with_allow_shell_true_includes_shell_tools() {
let tmp = tempdir().expect("tempdir");
let ctx = ToolContext::new(tmp.path().to_path_buf());
let registry = ToolRegistryBuilder::new()
.with_agent_tools(true)
.build(ctx);
assert!(registry.contains("exec_shell"), "exec_shell should be included when allow_shell is true");
assert!(registry.contains("task_shell_start"), "task_shell_start should be included when allow_shell is true");
assert!(registry.contains("task_shell_wait"), "task_shell_wait should be included when allow_shell is true");
}
}
+1 -1
View File
@@ -19,7 +19,7 @@ Run `/mode` to open the mode picker, or switch directly with `/mode agent`,
`/mode plan`, `/mode yolo`, `/mode 1`, `/mode 2`, or `/mode 3`.
- **Plan**: design-first prompting. Read-only investigation tools stay available; shell and patch execution stay off. Use this when you want to think out loud and produce a plan to hand to a human (yourself later, or a reviewer).
- **Agent**: multi-step tool use. Approvals for shell and paid tools (file writes are allowed without a prompt).
- **Agent**: multi-step tool use. Shell execution (`exec_shell`, `task_shell_start`) requires `allow_shell = true` in config; approval prompts gate each call. File writes are allowed without a prompt.
- **YOLO**: enables shell + trust mode and auto-approves all tools. Use only in trusted repos.
All action-capable modes have access to persistent RLM sessions through `rlm_open`, `rlm_eval`, `rlm_configure`, and `rlm_close`. Inside an RLM Python REPL, `sub_query_batch` fans out 1-16 cheap parallel child calls pinned to `deepseek-v4-flash`. The model reaches for it when work is too large or repetitive for the parent transcript.