fix: gate task_shell_start behind allow_shell like exec_shell
task_shell_start delegates to ExecShellTool, providing the same shell execution capability as exec_shell. Previously, task_shell_start was registered unconditionally in with_runtime_task_tools while exec_shell was gated behind allow_shell, creating an inconsistent security gate. This caused the model to try exec_shell first, fail, then fall back to task_shell_start — wasting tokens and bypassing the intended security boundary. Split TaskShellStartTool and TaskShellWaitTool out of with_runtime_task_tools into a new with_runtime_task_shell_tools method, and gate both behind the allow_shell check in with_agent_tools. Closes #2303
This commit is contained in:
@@ -85,7 +85,7 @@ impl Engine {
|
||||
&& self.config.features.enabled(Feature::ShellTool)
|
||||
&& self.session.allow_shell
|
||||
{
|
||||
builder = builder.with_shell_tools();
|
||||
builder = builder.with_shell_tools().with_runtime_task_shell_tools();
|
||||
}
|
||||
|
||||
// Register the `remember` tool only when the user has opted in to
|
||||
|
||||
@@ -542,6 +542,10 @@ impl ToolRegistryBuilder {
|
||||
}
|
||||
|
||||
/// Include durable task, gate, PR-attempt, GitHub, and automation tools.
|
||||
///
|
||||
/// Shell-related task tools (`task_shell_start`, `task_shell_wait`) are
|
||||
/// *not* included here — use [`with_runtime_task_shell_tools`] to register
|
||||
/// them when `allow_shell` is true.
|
||||
#[must_use]
|
||||
pub fn with_runtime_task_tools(self) -> Self {
|
||||
use super::automation::{
|
||||
@@ -555,7 +559,6 @@ impl ToolRegistryBuilder {
|
||||
use super::tasks::{
|
||||
PrAttemptListTool, PrAttemptPreflightTool, PrAttemptReadTool, PrAttemptRecordTool,
|
||||
TaskCancelTool, TaskCreateTool, TaskGateRunTool, TaskListTool, TaskReadTool,
|
||||
TaskShellStartTool, TaskShellWaitTool,
|
||||
};
|
||||
|
||||
self.with_tool(Arc::new(TaskCreateTool))
|
||||
@@ -563,8 +566,6 @@ impl ToolRegistryBuilder {
|
||||
.with_tool(Arc::new(TaskReadTool))
|
||||
.with_tool(Arc::new(TaskCancelTool))
|
||||
.with_tool(Arc::new(TaskGateRunTool))
|
||||
.with_tool(Arc::new(TaskShellStartTool))
|
||||
.with_tool(Arc::new(TaskShellWaitTool))
|
||||
.with_tool(Arc::new(GithubIssueContextTool))
|
||||
.with_tool(Arc::new(GithubPrContextTool))
|
||||
.with_tool(Arc::new(PrAttemptRecordTool))
|
||||
@@ -584,6 +585,18 @@ impl ToolRegistryBuilder {
|
||||
.with_tool(Arc::new(GithubClosePrTool))
|
||||
}
|
||||
|
||||
/// Include shell-related task tools (`task_shell_start`, `task_shell_wait`).
|
||||
///
|
||||
/// These are gated behind `allow_shell` because `task_shell_start`
|
||||
/// delegates directly to `ExecShellTool`, providing the same shell
|
||||
/// execution capability as `exec_shell`.
|
||||
#[must_use]
|
||||
pub fn with_runtime_task_shell_tools(self) -> Self {
|
||||
use super::tasks::{TaskShellStartTool, TaskShellWaitTool};
|
||||
self.with_tool(Arc::new(TaskShellStartTool))
|
||||
.with_tool(Arc::new(TaskShellWaitTool))
|
||||
}
|
||||
|
||||
/// Include only read-only durable task, PR-attempt, GitHub, and automation
|
||||
/// inspection tools. Plan mode uses this surface so it can observe state
|
||||
/// without starting work, changing remotes, or mutating automation config.
|
||||
@@ -786,7 +799,7 @@ impl ToolRegistryBuilder {
|
||||
.with_image_ocr_tools();
|
||||
|
||||
if allow_shell {
|
||||
builder.with_shell_tools()
|
||||
builder.with_shell_tools().with_runtime_task_shell_tools()
|
||||
} else {
|
||||
builder
|
||||
}
|
||||
@@ -1379,4 +1392,32 @@ mod tests {
|
||||
|
||||
assert!(registry.contains("finance"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_tools_with_allow_shell_false_excludes_shell_tools() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
let ctx = ToolContext::new(tmp.path().to_path_buf());
|
||||
|
||||
let registry = ToolRegistryBuilder::new()
|
||||
.with_agent_tools(false)
|
||||
.build(ctx);
|
||||
|
||||
assert!(!registry.contains("exec_shell"), "exec_shell should be excluded when allow_shell is false");
|
||||
assert!(!registry.contains("task_shell_start"), "task_shell_start should be excluded when allow_shell is false");
|
||||
assert!(!registry.contains("task_shell_wait"), "task_shell_wait should be excluded when allow_shell is false");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_tools_with_allow_shell_true_includes_shell_tools() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
let ctx = ToolContext::new(tmp.path().to_path_buf());
|
||||
|
||||
let registry = ToolRegistryBuilder::new()
|
||||
.with_agent_tools(true)
|
||||
.build(ctx);
|
||||
|
||||
assert!(registry.contains("exec_shell"), "exec_shell should be included when allow_shell is true");
|
||||
assert!(registry.contains("task_shell_start"), "task_shell_start should be included when allow_shell is true");
|
||||
assert!(registry.contains("task_shell_wait"), "task_shell_wait should be included when allow_shell is true");
|
||||
}
|
||||
}
|
||||
|
||||
+1
-1
@@ -19,7 +19,7 @@ Run `/mode` to open the mode picker, or switch directly with `/mode agent`,
|
||||
`/mode plan`, `/mode yolo`, `/mode 1`, `/mode 2`, or `/mode 3`.
|
||||
|
||||
- **Plan**: design-first prompting. Read-only investigation tools stay available; shell and patch execution stay off. Use this when you want to think out loud and produce a plan to hand to a human (yourself later, or a reviewer).
|
||||
- **Agent**: multi-step tool use. Approvals for shell and paid tools (file writes are allowed without a prompt).
|
||||
- **Agent**: multi-step tool use. Shell execution (`exec_shell`, `task_shell_start`) requires `allow_shell = true` in config; approval prompts gate each call. File writes are allowed without a prompt.
|
||||
- **YOLO**: enables shell + trust mode and auto-approves all tools. Use only in trusted repos.
|
||||
|
||||
All action-capable modes have access to persistent RLM sessions through `rlm_open`, `rlm_eval`, `rlm_configure`, and `rlm_close`. Inside an RLM Python REPL, `sub_query_batch` fans out 1-16 cheap parallel child calls pinned to `deepseek-v4-flash`. The model reaches for it when work is too large or repetitive for the parent transcript.
|
||||
|
||||
Reference in New Issue
Block a user