fix(shell): explain Plan mode network sandbox failures

Closes #929

Squashed from #974 after local verification and green CI.
This commit is contained in:
Hunter Bown
2026-05-07 01:51:59 -05:00
committed by GitHub
parent 4f77c625fd
commit b6509b10de
6 changed files with 261 additions and 23 deletions
+14 -4
View File
@@ -1448,10 +1448,20 @@ impl Engine {
}
match mode {
// Plan mode is read-only investigation; the shell tool is not
// registered, so leaving the sandbox policy at the seatbelt-strict
// default is fine.
AppMode::Plan => ctx,
// Plan mode is read-only investigation. Shell tools can still be
// available for read-only local inspection, but outbound network
// remains sandbox-blocked; attach an explicit hint so network
// command failures do not look like DNS/proxy problems.
AppMode::Plan => ctx
.with_elevated_sandbox_policy(crate::sandbox::SandboxPolicy::WorkspaceWrite {
writable_roots: vec![self.session.workspace.clone()],
network_access: false,
exclude_tmpdir: false,
exclude_slash_tmp: false,
})
.with_shell_network_denied_hint(
"Shell command blocked: Plan mode runs shell commands in a network-restricted sandbox. Use fetch_url or code_execution for network access, or switch to Agent mode (`/agent`) before retrying shell network commands.",
),
// Agent registers the shell tool and runs each command through
// the per-mode sandbox + per-tool approval flow. The sandbox
// default would deny all outbound network — including DNS —
+13 -6
View File
@@ -500,13 +500,20 @@ fn agent_and_yolo_modes_elevate_shell_sandbox_to_allow_network() {
"Yolo mode must use DangerFullAccess (no sandbox); got {yolo_policy:?}",
);
// Plan mode is read-only investigation and does not register the shell
// tool, so it intentionally leaves the policy at the strict default.
// Plan mode can still expose shell tools for local read-only inspection,
// but it keeps outbound network blocked and attaches an explicit hint so
// network command failures are not mistaken for DNS/proxy issues.
let plan_ctx = engine.build_tool_context(AppMode::Plan, false);
let plan_policy = plan_ctx
.elevated_sandbox_policy
.as_ref()
.expect("Plan mode should make the shell sandbox policy explicit");
assert!(!plan_policy.has_network_access());
assert!(
engine
.build_tool_context(AppMode::Plan, false)
.elevated_sandbox_policy
.is_none(),
plan_ctx
.shell_network_denied_hint
.as_deref()
.is_some_and(|hint| hint.contains("Plan mode")),
);
}
+4 -3
View File
@@ -33,9 +33,10 @@ pub(super) fn should_default_defer_tool(name: &str, mode: AppMode) -> bool {
return false;
}
// Shell tools are kept active in Agent so the model can run verification
// commands (build/test/git/cargo) without first having to discover the
// tool through ToolSearch. Plan mode never registers shell tools.
// Shell exec tools are kept active in Agent so the model can run
// verification commands (build/test/git/cargo) without first having to
// discover them through ToolSearch. Plan mode may register shell tools,
// but keeps most shell execution deferred and network-restricted.
let always_loaded_in_action_modes = matches!(mode, AppMode::Agent)
&& matches!(
name,
+133 -10
View File
@@ -109,6 +109,7 @@ pub struct ShellJobDetail {
}
pub struct ShellDeltaResult {
pub command: String,
pub result: ShellResult,
pub stdout_total_len: usize,
pub stderr_total_len: usize,
@@ -1206,6 +1207,7 @@ impl ShellManager {
let (stderr, stderr_meta) = truncate_with_meta(&stderr_delta);
let sandboxed = !matches!(shell.sandbox_type, SandboxType::None);
let command = shell.command.clone();
let result = ShellResult {
task_id: Some(shell.id.clone()),
status: shell.status.clone(),
@@ -1229,6 +1231,7 @@ impl ShellManager {
};
Ok(ShellDeltaResult {
command,
result,
stdout_total_len: stdout_total,
stderr_total_len: stderr_total,
@@ -1409,7 +1412,7 @@ pub fn new_shared_shell_manager(workspace: PathBuf) -> SharedShellManager {
// === ToolSpec Implementations ===
use crate::command_safety::{SafetyLevel, analyze_command};
use crate::command_safety::{SafetyLevel, analyze_command, extract_primary_command};
use crate::execpolicy::{ExecPolicyDecision, load_default_policy};
use crate::features::Feature;
use crate::tools::spec::{
@@ -1423,6 +1426,99 @@ const FOREGROUND_TIMEOUT_RECOVERY_HINT: &str = "Foreground exec_shell is for bou
The timed-out process was killed; rerun long work with task_shell_start or exec_shell with \
background: true, then poll with task_shell_wait or exec_shell_wait.";
fn command_likely_needs_network(command: &str) -> bool {
let normalized = command.to_ascii_lowercase();
let Some(primary) = extract_primary_command(&normalized) else {
return false;
};
let primary = primary.rsplit(['/', '\\']).next().unwrap_or(primary);
match primary {
"curl" | "wget" | "fetch" | "nc" | "netcat" | "ncat" | "ssh" | "scp" | "sftp" | "rsync"
| "ftp" | "ping" | "traceroute" | "nslookup" | "dig" | "host" | "nmap" | "gh" | "hub" => {
true
}
"git" => [
" fetch",
" pull",
" clone",
" ls-remote",
" submodule",
" push",
]
.iter()
.any(|needle| normalized.contains(needle)),
"cargo" => [" install", " fetch", " update", " publish", " search"]
.iter()
.any(|needle| normalized.contains(needle)),
"npm" | "pnpm" | "yarn" => [" install", " i", " add", " update", " publish"]
.iter()
.any(|needle| normalized.contains(needle)),
"pip" | "pip3" | "uv" | "poetry" => [" install", " add", " sync", " update"]
.iter()
.any(|needle| normalized.contains(needle)),
"brew" | "apt" | "apt-get" | "yum" | "dnf" | "pacman" => true,
"go" => [" get", " install", " mod download"]
.iter()
.any(|needle| normalized.contains(needle)),
_ => false,
}
}
fn looks_like_network_blocked_failure(result: &ShellResult) -> bool {
if matches!(result.status, ShellStatus::Completed | ShellStatus::Running)
|| result.exit_code == Some(0)
{
return false;
}
if result.stdout.trim() == "000" {
return true;
}
if result.sandboxed && result.stdout.is_empty() && result.stderr.is_empty() {
return true;
}
let output = format!("{}\n{}", result.stdout, result.stderr).to_ascii_lowercase();
[
"operation not permitted",
"network is unreachable",
"could not resolve host",
"couldn't resolve host",
"failed to resolve",
"temporary failure in name resolution",
"name or service not known",
"nodename nor servname provided",
"no address associated",
"failed to connect",
"couldn't connect",
"connection timed out",
"connection reset",
]
.iter()
.any(|pattern| output.contains(pattern))
}
fn shell_network_restricted_hint<'a>(
context: &'a ToolContext,
command: &str,
result: &ShellResult,
) -> Option<&'a str> {
let hint = context.shell_network_denied_hint.as_deref()?;
let policy_blocks_network = context
.elevated_sandbox_policy
.as_ref()
.is_some_and(|policy| !policy.has_network_access());
if !policy_blocks_network || !command_likely_needs_network(command) {
return None;
}
if result.sandbox_denied || looks_like_network_blocked_failure(result) {
Some(hint)
} else {
None
}
}
async fn execute_foreground_via_background(
context: &ToolContext,
command: &str,
@@ -1843,7 +1939,9 @@ impl ToolSpec for ExecShellTool {
} else {
stdout_summary.clone()
};
let output = if interactive {
let network_restricted_hint =
shell_network_restricted_hint(context, command, &result).map(str::to_string);
let mut output = if interactive {
format!(
"Interactive command completed (exit code: {:?})",
result.exit_code
@@ -1880,6 +1978,9 @@ impl ToolSpec for ExecShellTool {
result.exit_code, result.stdout, result.stderr
)
};
if let Some(hint) = network_restricted_hint.as_deref() {
output = format!("{hint}\n\n{output}");
}
let mut metadata = json!({
"exit_code": result.exit_code,
@@ -1930,6 +2031,10 @@ impl ToolSpec for ExecShellTool {
"poll_with": ["task_shell_wait", "exec_shell_wait"]
});
}
if let Some(hint) = network_restricted_hint {
metadata["sandbox_network_restricted"] = json!(true);
metadata["sandbox_network_denied_hint"] = json!(hint);
}
Ok(ToolResult {
content: output,
@@ -1971,8 +2076,10 @@ fn required_task_id(input: &serde_json::Value) -> Result<&str, ToolError> {
.ok_or_else(|| ToolError::missing_field("task_id"))
}
fn build_shell_delta_tool_result(delta: ShellDeltaResult) -> ToolResult {
fn build_shell_delta_tool_result(delta: ShellDeltaResult, context: &ToolContext) -> ToolResult {
let result = delta.result;
let network_restricted_hint =
shell_network_restricted_hint(context, &delta.command, &result).map(str::to_string);
let stdout_summary = summarize_output(&result.stdout);
let stderr_summary = summarize_output(&result.stderr);
let summary = if !stderr_summary.is_empty() {
@@ -1981,7 +2088,7 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult) -> ToolResult {
stdout_summary.clone()
};
let output = if result.stdout.is_empty() && result.stderr.is_empty() {
let mut output = if result.stdout.is_empty() && result.stderr.is_empty() {
match result.status {
ShellStatus::Running => "Background task running (no new output).".to_string(),
ShellStatus::Completed => "(no new output)".to_string(),
@@ -1994,8 +2101,11 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult) -> ToolResult {
} else {
format!("{}\n\nSTDERR:\n{}", result.stdout, result.stderr)
};
if let Some(hint) = network_restricted_hint.as_deref() {
output = format!("{hint}\n\n{output}");
}
ToolResult {
let mut tool_result = ToolResult {
content: output,
success: matches!(result.status, ShellStatus::Completed | ShellStatus::Running),
metadata: Some(json!({
@@ -2019,7 +2129,15 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult) -> ToolResult {
"stderr_summary": stderr_summary,
"stream_delta": true,
})),
};
if let Some(hint) = network_restricted_hint
&& let Some(metadata) = tool_result.metadata.as_mut()
&& let Some(object) = metadata.as_object_mut()
{
object.insert("sandbox_network_restricted".to_string(), json!(true));
object.insert("sandbox_network_denied_hint".to_string(), json!(hint));
}
tool_result
}
async fn wait_for_shell_delta_cancellable(
@@ -2032,7 +2150,7 @@ async fn wait_for_shell_delta_cancellable(
let mut stdout_accum = String::new();
let mut stderr_accum = String::new();
let (result, stdout_total_len, stderr_total_len) = loop {
let (command, result, stdout_total_len, stderr_total_len) = loop {
if context
.cancel_token
.as_ref()
@@ -2048,6 +2166,7 @@ async fn wait_for_shell_delta_cancellable(
append_shell_delta_output(&mut stdout_accum, &mut stderr_accum, &delta.result);
return Ok((
shell_delta_with_accumulated_output(
delta.command,
delta.result,
&stdout_accum,
&stderr_accum,
@@ -2070,11 +2189,12 @@ async fn wait_for_shell_delta_cancellable(
let stdout_total_len = delta.stdout_total_len;
let stderr_total_len = delta.stderr_total_len;
let command = delta.command.clone();
append_shell_delta_output(&mut stdout_accum, &mut stderr_accum, &delta.result);
let status = delta.result.status.clone();
if status != ShellStatus::Running || Instant::now() >= deadline {
break (delta.result, stdout_total_len, stderr_total_len);
break (command, delta.result, stdout_total_len, stderr_total_len);
}
tokio::time::sleep(Duration::from_millis(100)).await;
@@ -2082,6 +2202,7 @@ async fn wait_for_shell_delta_cancellable(
Ok((
shell_delta_with_accumulated_output(
command,
result,
&stdout_accum,
&stderr_accum,
@@ -2106,6 +2227,7 @@ fn append_shell_delta_output(
}
fn shell_delta_with_accumulated_output(
command: String,
mut result: ShellResult,
stdout_accum: &str,
stderr_accum: &str,
@@ -2124,6 +2246,7 @@ fn shell_delta_with_accumulated_output(
result.stderr_truncated = stderr_meta.truncated;
ShellDeltaResult {
command,
result,
stdout_total_len,
stderr_total_len,
@@ -2300,7 +2423,7 @@ impl ToolSpec for ShellWaitTool {
};
let status = delta.result.status.clone();
let mut result = build_shell_delta_tool_result(delta);
let mut result = build_shell_delta_tool_result(delta, context);
if wait_canceled {
if matches!(status, ShellStatus::Running) {
result.content = format!(
@@ -2411,7 +2534,7 @@ impl ToolSpec for ShellInteractTool {
let delta = manager
.get_output_delta(task_id, false, 0)
.map_err(|err| ToolError::execution_failed(err.to_string()))?;
let mut result = build_shell_delta_tool_result(delta);
let mut result = build_shell_delta_tool_result(delta, context);
if let Some(metadata) = result.metadata.as_mut()
&& let Some(object) = metadata.as_object_mut()
{
@@ -2435,7 +2558,7 @@ impl ToolSpec for ShellInteractTool {
|| delta.result.status != ShellStatus::Running
|| elapsed >= timeout_ms
{
return Ok(build_shell_delta_tool_result(delta));
return Ok(build_shell_delta_tool_result(delta, context));
}
tokio::time::sleep(Duration::from_millis(50)).await;
+85
View File
@@ -49,6 +49,39 @@ fn echo_stdin_command() -> String {
}
}
fn network_restricted_context(tmp: &std::path::Path) -> ToolContext {
ToolContext::new(tmp)
.with_elevated_sandbox_policy(ExecutionSandboxPolicy::WorkspaceWrite {
writable_roots: vec![tmp.to_path_buf()],
network_access: false,
exclude_tmpdir: false,
exclude_slash_tmp: false,
})
.with_shell_network_denied_hint(
"Shell command blocked: Plan mode runs shell commands in a network-restricted sandbox.",
)
}
fn failed_network_shell_result(stdout: &str, stderr: &str) -> ShellResult {
ShellResult {
task_id: None,
status: ShellStatus::Failed,
exit_code: Some(6),
stdout: stdout.to_string(),
stderr: stderr.to_string(),
duration_ms: 25,
stdout_len: stdout.len(),
stderr_len: stderr.len(),
stdout_omitted: 0,
stderr_omitted: 0,
stdout_truncated: false,
stderr_truncated: false,
sandboxed: true,
sandbox_type: Some("seatbelt".to_string()),
sandbox_denied: false,
}
}
#[test]
fn test_sync_execution() {
let tmp = tempdir().expect("tempdir");
@@ -232,6 +265,58 @@ fn test_truncate_with_meta_reports_omission_counts() {
assert!(truncated.contains("bytes omitted"));
}
#[test]
fn network_restricted_hint_detects_silent_curl_failure() {
let tmp = tempdir().expect("tempdir");
let ctx = network_restricted_context(tmp.path());
let result = failed_network_shell_result("000", "");
let hint = shell_network_restricted_hint(
&ctx,
"curl -s -o /dev/null -w '%{http_code}' https://api.github.com",
&result,
)
.expect("network-restricted hint");
assert!(hint.contains("Plan mode"));
}
#[test]
fn network_restricted_hint_ignores_local_failures() {
let tmp = tempdir().expect("tempdir");
let ctx = network_restricted_context(tmp.path());
let result = failed_network_shell_result("", "No such file or directory");
assert!(shell_network_restricted_hint(&ctx, "cat missing.txt", &result).is_none());
}
#[test]
fn shell_delta_result_surfaces_network_restricted_hint() {
let tmp = tempdir().expect("tempdir");
let ctx = network_restricted_context(tmp.path());
let result = failed_network_shell_result("000", "");
let tool_result = build_shell_delta_tool_result(
ShellDeltaResult {
command: "gh issue list".to_string(),
result,
stdout_total_len: 3,
stderr_total_len: 0,
},
&ctx,
);
assert!(!tool_result.success);
assert!(tool_result.content.starts_with("Shell command blocked"));
let metadata = tool_result.metadata.expect("metadata");
assert_eq!(
metadata
.get("sandbox_network_restricted")
.and_then(Value::as_bool),
Some(true)
);
}
#[test]
fn test_summarize_output_strips_truncation_note() {
let long_output = "x".repeat(60_000);
+12
View File
@@ -86,6 +86,9 @@ pub struct ToolContext {
/// Elevated sandbox policy override (used when retrying after sandbox denial).
/// This overrides the default sandbox behavior for shell commands.
pub elevated_sandbox_policy: Option<crate::sandbox::SandboxPolicy>,
/// Optional user-facing hint for shell commands that fail because the
/// active sandbox policy intentionally denies outbound network access.
pub shell_network_denied_hint: Option<String>,
/// Whether tools should auto-approve without safety checks (YOLO mode).
/// When true, command safety analysis is skipped for shell execution.
pub auto_approve: bool,
@@ -153,6 +156,7 @@ impl ToolContext {
notes_path,
mcp_config_path,
elevated_sandbox_policy: None,
shell_network_denied_hint: None,
auto_approve: false,
features: Features::with_defaults(),
state_namespace: "workspace".to_string(),
@@ -186,6 +190,7 @@ impl ToolContext {
notes_path: notes_path.into(),
mcp_config_path: mcp_config_path.into(),
elevated_sandbox_policy: None,
shell_network_denied_hint: None,
auto_approve: false,
features: Features::with_defaults(),
state_namespace: "workspace".to_string(),
@@ -219,6 +224,7 @@ impl ToolContext {
notes_path: notes_path.into(),
mcp_config_path: mcp_config_path.into(),
elevated_sandbox_policy: None,
shell_network_denied_hint: None,
auto_approve,
features: Features::with_defaults(),
state_namespace: "workspace".to_string(),
@@ -448,6 +454,12 @@ impl ToolContext {
self
}
/// Set the shell network-denial hint used by network-restricted modes.
pub fn with_shell_network_denied_hint(mut self, hint: impl Into<String>) -> Self {
self.shell_network_denied_hint = Some(hint.into());
self
}
/// Set the namespace used for session-scoped tool state.
pub fn with_state_namespace(mut self, namespace: impl Into<String>) -> Self {
self.state_namespace = namespace.into();