From b6509b10de07983ba80b885e84c1bb1ae9467fd2 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Thu, 7 May 2026 01:51:59 -0500 Subject: [PATCH] fix(shell): explain Plan mode network sandbox failures Closes #929 Squashed from #974 after local verification and green CI. --- crates/tui/src/core/engine.rs | 18 ++- crates/tui/src/core/engine/tests.rs | 19 ++- crates/tui/src/core/engine/tool_catalog.rs | 7 +- crates/tui/src/tools/shell.rs | 143 +++++++++++++++++++-- crates/tui/src/tools/shell/tests.rs | 85 ++++++++++++ crates/tui/src/tools/spec.rs | 12 ++ 6 files changed, 261 insertions(+), 23 deletions(-) diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 5d5b4981..dc1cb2fe 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -1448,10 +1448,20 @@ impl Engine { } match mode { - // Plan mode is read-only investigation; the shell tool is not - // registered, so leaving the sandbox policy at the seatbelt-strict - // default is fine. - AppMode::Plan => ctx, + // Plan mode is read-only investigation. Shell tools can still be + // available for read-only local inspection, but outbound network + // remains sandbox-blocked; attach an explicit hint so network + // command failures do not look like DNS/proxy problems. + AppMode::Plan => ctx + .with_elevated_sandbox_policy(crate::sandbox::SandboxPolicy::WorkspaceWrite { + writable_roots: vec![self.session.workspace.clone()], + network_access: false, + exclude_tmpdir: false, + exclude_slash_tmp: false, + }) + .with_shell_network_denied_hint( + "Shell command blocked: Plan mode runs shell commands in a network-restricted sandbox. Use fetch_url or code_execution for network access, or switch to Agent mode (`/agent`) before retrying shell network commands.", + ), // Agent registers the shell tool and runs each command through // the per-mode sandbox + per-tool approval flow. The sandbox // default would deny all outbound network — including DNS — diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index 03f5edd0..1971875c 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -500,13 +500,20 @@ fn agent_and_yolo_modes_elevate_shell_sandbox_to_allow_network() { "Yolo mode must use DangerFullAccess (no sandbox); got {yolo_policy:?}", ); - // Plan mode is read-only investigation and does not register the shell - // tool, so it intentionally leaves the policy at the strict default. + // Plan mode can still expose shell tools for local read-only inspection, + // but it keeps outbound network blocked and attaches an explicit hint so + // network command failures are not mistaken for DNS/proxy issues. + let plan_ctx = engine.build_tool_context(AppMode::Plan, false); + let plan_policy = plan_ctx + .elevated_sandbox_policy + .as_ref() + .expect("Plan mode should make the shell sandbox policy explicit"); + assert!(!plan_policy.has_network_access()); assert!( - engine - .build_tool_context(AppMode::Plan, false) - .elevated_sandbox_policy - .is_none(), + plan_ctx + .shell_network_denied_hint + .as_deref() + .is_some_and(|hint| hint.contains("Plan mode")), ); } diff --git a/crates/tui/src/core/engine/tool_catalog.rs b/crates/tui/src/core/engine/tool_catalog.rs index c731a794..2212e5ef 100644 --- a/crates/tui/src/core/engine/tool_catalog.rs +++ b/crates/tui/src/core/engine/tool_catalog.rs @@ -33,9 +33,10 @@ pub(super) fn should_default_defer_tool(name: &str, mode: AppMode) -> bool { return false; } - // Shell tools are kept active in Agent so the model can run verification - // commands (build/test/git/cargo) without first having to discover the - // tool through ToolSearch. Plan mode never registers shell tools. + // Shell exec tools are kept active in Agent so the model can run + // verification commands (build/test/git/cargo) without first having to + // discover them through ToolSearch. Plan mode may register shell tools, + // but keeps most shell execution deferred and network-restricted. let always_loaded_in_action_modes = matches!(mode, AppMode::Agent) && matches!( name, diff --git a/crates/tui/src/tools/shell.rs b/crates/tui/src/tools/shell.rs index f9ac5276..e9f0cc3f 100644 --- a/crates/tui/src/tools/shell.rs +++ b/crates/tui/src/tools/shell.rs @@ -109,6 +109,7 @@ pub struct ShellJobDetail { } pub struct ShellDeltaResult { + pub command: String, pub result: ShellResult, pub stdout_total_len: usize, pub stderr_total_len: usize, @@ -1206,6 +1207,7 @@ impl ShellManager { let (stderr, stderr_meta) = truncate_with_meta(&stderr_delta); let sandboxed = !matches!(shell.sandbox_type, SandboxType::None); + let command = shell.command.clone(); let result = ShellResult { task_id: Some(shell.id.clone()), status: shell.status.clone(), @@ -1229,6 +1231,7 @@ impl ShellManager { }; Ok(ShellDeltaResult { + command, result, stdout_total_len: stdout_total, stderr_total_len: stderr_total, @@ -1409,7 +1412,7 @@ pub fn new_shared_shell_manager(workspace: PathBuf) -> SharedShellManager { // === ToolSpec Implementations === -use crate::command_safety::{SafetyLevel, analyze_command}; +use crate::command_safety::{SafetyLevel, analyze_command, extract_primary_command}; use crate::execpolicy::{ExecPolicyDecision, load_default_policy}; use crate::features::Feature; use crate::tools::spec::{ @@ -1423,6 +1426,99 @@ const FOREGROUND_TIMEOUT_RECOVERY_HINT: &str = "Foreground exec_shell is for bou The timed-out process was killed; rerun long work with task_shell_start or exec_shell with \ background: true, then poll with task_shell_wait or exec_shell_wait."; +fn command_likely_needs_network(command: &str) -> bool { + let normalized = command.to_ascii_lowercase(); + let Some(primary) = extract_primary_command(&normalized) else { + return false; + }; + let primary = primary.rsplit(['/', '\\']).next().unwrap_or(primary); + + match primary { + "curl" | "wget" | "fetch" | "nc" | "netcat" | "ncat" | "ssh" | "scp" | "sftp" | "rsync" + | "ftp" | "ping" | "traceroute" | "nslookup" | "dig" | "host" | "nmap" | "gh" | "hub" => { + true + } + "git" => [ + " fetch", + " pull", + " clone", + " ls-remote", + " submodule", + " push", + ] + .iter() + .any(|needle| normalized.contains(needle)), + "cargo" => [" install", " fetch", " update", " publish", " search"] + .iter() + .any(|needle| normalized.contains(needle)), + "npm" | "pnpm" | "yarn" => [" install", " i", " add", " update", " publish"] + .iter() + .any(|needle| normalized.contains(needle)), + "pip" | "pip3" | "uv" | "poetry" => [" install", " add", " sync", " update"] + .iter() + .any(|needle| normalized.contains(needle)), + "brew" | "apt" | "apt-get" | "yum" | "dnf" | "pacman" => true, + "go" => [" get", " install", " mod download"] + .iter() + .any(|needle| normalized.contains(needle)), + _ => false, + } +} + +fn looks_like_network_blocked_failure(result: &ShellResult) -> bool { + if matches!(result.status, ShellStatus::Completed | ShellStatus::Running) + || result.exit_code == Some(0) + { + return false; + } + + if result.stdout.trim() == "000" { + return true; + } + if result.sandboxed && result.stdout.is_empty() && result.stderr.is_empty() { + return true; + } + + let output = format!("{}\n{}", result.stdout, result.stderr).to_ascii_lowercase(); + [ + "operation not permitted", + "network is unreachable", + "could not resolve host", + "couldn't resolve host", + "failed to resolve", + "temporary failure in name resolution", + "name or service not known", + "nodename nor servname provided", + "no address associated", + "failed to connect", + "couldn't connect", + "connection timed out", + "connection reset", + ] + .iter() + .any(|pattern| output.contains(pattern)) +} + +fn shell_network_restricted_hint<'a>( + context: &'a ToolContext, + command: &str, + result: &ShellResult, +) -> Option<&'a str> { + let hint = context.shell_network_denied_hint.as_deref()?; + let policy_blocks_network = context + .elevated_sandbox_policy + .as_ref() + .is_some_and(|policy| !policy.has_network_access()); + if !policy_blocks_network || !command_likely_needs_network(command) { + return None; + } + if result.sandbox_denied || looks_like_network_blocked_failure(result) { + Some(hint) + } else { + None + } +} + async fn execute_foreground_via_background( context: &ToolContext, command: &str, @@ -1843,7 +1939,9 @@ impl ToolSpec for ExecShellTool { } else { stdout_summary.clone() }; - let output = if interactive { + let network_restricted_hint = + shell_network_restricted_hint(context, command, &result).map(str::to_string); + let mut output = if interactive { format!( "Interactive command completed (exit code: {:?})", result.exit_code @@ -1880,6 +1978,9 @@ impl ToolSpec for ExecShellTool { result.exit_code, result.stdout, result.stderr ) }; + if let Some(hint) = network_restricted_hint.as_deref() { + output = format!("{hint}\n\n{output}"); + } let mut metadata = json!({ "exit_code": result.exit_code, @@ -1930,6 +2031,10 @@ impl ToolSpec for ExecShellTool { "poll_with": ["task_shell_wait", "exec_shell_wait"] }); } + if let Some(hint) = network_restricted_hint { + metadata["sandbox_network_restricted"] = json!(true); + metadata["sandbox_network_denied_hint"] = json!(hint); + } Ok(ToolResult { content: output, @@ -1971,8 +2076,10 @@ fn required_task_id(input: &serde_json::Value) -> Result<&str, ToolError> { .ok_or_else(|| ToolError::missing_field("task_id")) } -fn build_shell_delta_tool_result(delta: ShellDeltaResult) -> ToolResult { +fn build_shell_delta_tool_result(delta: ShellDeltaResult, context: &ToolContext) -> ToolResult { let result = delta.result; + let network_restricted_hint = + shell_network_restricted_hint(context, &delta.command, &result).map(str::to_string); let stdout_summary = summarize_output(&result.stdout); let stderr_summary = summarize_output(&result.stderr); let summary = if !stderr_summary.is_empty() { @@ -1981,7 +2088,7 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult) -> ToolResult { stdout_summary.clone() }; - let output = if result.stdout.is_empty() && result.stderr.is_empty() { + let mut output = if result.stdout.is_empty() && result.stderr.is_empty() { match result.status { ShellStatus::Running => "Background task running (no new output).".to_string(), ShellStatus::Completed => "(no new output)".to_string(), @@ -1994,8 +2101,11 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult) -> ToolResult { } else { format!("{}\n\nSTDERR:\n{}", result.stdout, result.stderr) }; + if let Some(hint) = network_restricted_hint.as_deref() { + output = format!("{hint}\n\n{output}"); + } - ToolResult { + let mut tool_result = ToolResult { content: output, success: matches!(result.status, ShellStatus::Completed | ShellStatus::Running), metadata: Some(json!({ @@ -2019,7 +2129,15 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult) -> ToolResult { "stderr_summary": stderr_summary, "stream_delta": true, })), + }; + if let Some(hint) = network_restricted_hint + && let Some(metadata) = tool_result.metadata.as_mut() + && let Some(object) = metadata.as_object_mut() + { + object.insert("sandbox_network_restricted".to_string(), json!(true)); + object.insert("sandbox_network_denied_hint".to_string(), json!(hint)); } + tool_result } async fn wait_for_shell_delta_cancellable( @@ -2032,7 +2150,7 @@ async fn wait_for_shell_delta_cancellable( let mut stdout_accum = String::new(); let mut stderr_accum = String::new(); - let (result, stdout_total_len, stderr_total_len) = loop { + let (command, result, stdout_total_len, stderr_total_len) = loop { if context .cancel_token .as_ref() @@ -2048,6 +2166,7 @@ async fn wait_for_shell_delta_cancellable( append_shell_delta_output(&mut stdout_accum, &mut stderr_accum, &delta.result); return Ok(( shell_delta_with_accumulated_output( + delta.command, delta.result, &stdout_accum, &stderr_accum, @@ -2070,11 +2189,12 @@ async fn wait_for_shell_delta_cancellable( let stdout_total_len = delta.stdout_total_len; let stderr_total_len = delta.stderr_total_len; + let command = delta.command.clone(); append_shell_delta_output(&mut stdout_accum, &mut stderr_accum, &delta.result); let status = delta.result.status.clone(); if status != ShellStatus::Running || Instant::now() >= deadline { - break (delta.result, stdout_total_len, stderr_total_len); + break (command, delta.result, stdout_total_len, stderr_total_len); } tokio::time::sleep(Duration::from_millis(100)).await; @@ -2082,6 +2202,7 @@ async fn wait_for_shell_delta_cancellable( Ok(( shell_delta_with_accumulated_output( + command, result, &stdout_accum, &stderr_accum, @@ -2106,6 +2227,7 @@ fn append_shell_delta_output( } fn shell_delta_with_accumulated_output( + command: String, mut result: ShellResult, stdout_accum: &str, stderr_accum: &str, @@ -2124,6 +2246,7 @@ fn shell_delta_with_accumulated_output( result.stderr_truncated = stderr_meta.truncated; ShellDeltaResult { + command, result, stdout_total_len, stderr_total_len, @@ -2300,7 +2423,7 @@ impl ToolSpec for ShellWaitTool { }; let status = delta.result.status.clone(); - let mut result = build_shell_delta_tool_result(delta); + let mut result = build_shell_delta_tool_result(delta, context); if wait_canceled { if matches!(status, ShellStatus::Running) { result.content = format!( @@ -2411,7 +2534,7 @@ impl ToolSpec for ShellInteractTool { let delta = manager .get_output_delta(task_id, false, 0) .map_err(|err| ToolError::execution_failed(err.to_string()))?; - let mut result = build_shell_delta_tool_result(delta); + let mut result = build_shell_delta_tool_result(delta, context); if let Some(metadata) = result.metadata.as_mut() && let Some(object) = metadata.as_object_mut() { @@ -2435,7 +2558,7 @@ impl ToolSpec for ShellInteractTool { || delta.result.status != ShellStatus::Running || elapsed >= timeout_ms { - return Ok(build_shell_delta_tool_result(delta)); + return Ok(build_shell_delta_tool_result(delta, context)); } tokio::time::sleep(Duration::from_millis(50)).await; diff --git a/crates/tui/src/tools/shell/tests.rs b/crates/tui/src/tools/shell/tests.rs index 740e5cd7..028e0055 100644 --- a/crates/tui/src/tools/shell/tests.rs +++ b/crates/tui/src/tools/shell/tests.rs @@ -49,6 +49,39 @@ fn echo_stdin_command() -> String { } } +fn network_restricted_context(tmp: &std::path::Path) -> ToolContext { + ToolContext::new(tmp) + .with_elevated_sandbox_policy(ExecutionSandboxPolicy::WorkspaceWrite { + writable_roots: vec![tmp.to_path_buf()], + network_access: false, + exclude_tmpdir: false, + exclude_slash_tmp: false, + }) + .with_shell_network_denied_hint( + "Shell command blocked: Plan mode runs shell commands in a network-restricted sandbox.", + ) +} + +fn failed_network_shell_result(stdout: &str, stderr: &str) -> ShellResult { + ShellResult { + task_id: None, + status: ShellStatus::Failed, + exit_code: Some(6), + stdout: stdout.to_string(), + stderr: stderr.to_string(), + duration_ms: 25, + stdout_len: stdout.len(), + stderr_len: stderr.len(), + stdout_omitted: 0, + stderr_omitted: 0, + stdout_truncated: false, + stderr_truncated: false, + sandboxed: true, + sandbox_type: Some("seatbelt".to_string()), + sandbox_denied: false, + } +} + #[test] fn test_sync_execution() { let tmp = tempdir().expect("tempdir"); @@ -232,6 +265,58 @@ fn test_truncate_with_meta_reports_omission_counts() { assert!(truncated.contains("bytes omitted")); } +#[test] +fn network_restricted_hint_detects_silent_curl_failure() { + let tmp = tempdir().expect("tempdir"); + let ctx = network_restricted_context(tmp.path()); + let result = failed_network_shell_result("000", ""); + + let hint = shell_network_restricted_hint( + &ctx, + "curl -s -o /dev/null -w '%{http_code}' https://api.github.com", + &result, + ) + .expect("network-restricted hint"); + + assert!(hint.contains("Plan mode")); +} + +#[test] +fn network_restricted_hint_ignores_local_failures() { + let tmp = tempdir().expect("tempdir"); + let ctx = network_restricted_context(tmp.path()); + let result = failed_network_shell_result("", "No such file or directory"); + + assert!(shell_network_restricted_hint(&ctx, "cat missing.txt", &result).is_none()); +} + +#[test] +fn shell_delta_result_surfaces_network_restricted_hint() { + let tmp = tempdir().expect("tempdir"); + let ctx = network_restricted_context(tmp.path()); + let result = failed_network_shell_result("000", ""); + + let tool_result = build_shell_delta_tool_result( + ShellDeltaResult { + command: "gh issue list".to_string(), + result, + stdout_total_len: 3, + stderr_total_len: 0, + }, + &ctx, + ); + + assert!(!tool_result.success); + assert!(tool_result.content.starts_with("Shell command blocked")); + let metadata = tool_result.metadata.expect("metadata"); + assert_eq!( + metadata + .get("sandbox_network_restricted") + .and_then(Value::as_bool), + Some(true) + ); +} + #[test] fn test_summarize_output_strips_truncation_note() { let long_output = "x".repeat(60_000); diff --git a/crates/tui/src/tools/spec.rs b/crates/tui/src/tools/spec.rs index 91f9ff8e..a9dcf360 100644 --- a/crates/tui/src/tools/spec.rs +++ b/crates/tui/src/tools/spec.rs @@ -86,6 +86,9 @@ pub struct ToolContext { /// Elevated sandbox policy override (used when retrying after sandbox denial). /// This overrides the default sandbox behavior for shell commands. pub elevated_sandbox_policy: Option, + /// Optional user-facing hint for shell commands that fail because the + /// active sandbox policy intentionally denies outbound network access. + pub shell_network_denied_hint: Option, /// Whether tools should auto-approve without safety checks (YOLO mode). /// When true, command safety analysis is skipped for shell execution. pub auto_approve: bool, @@ -153,6 +156,7 @@ impl ToolContext { notes_path, mcp_config_path, elevated_sandbox_policy: None, + shell_network_denied_hint: None, auto_approve: false, features: Features::with_defaults(), state_namespace: "workspace".to_string(), @@ -186,6 +190,7 @@ impl ToolContext { notes_path: notes_path.into(), mcp_config_path: mcp_config_path.into(), elevated_sandbox_policy: None, + shell_network_denied_hint: None, auto_approve: false, features: Features::with_defaults(), state_namespace: "workspace".to_string(), @@ -219,6 +224,7 @@ impl ToolContext { notes_path: notes_path.into(), mcp_config_path: mcp_config_path.into(), elevated_sandbox_policy: None, + shell_network_denied_hint: None, auto_approve, features: Features::with_defaults(), state_namespace: "workspace".to_string(), @@ -448,6 +454,12 @@ impl ToolContext { self } + /// Set the shell network-denial hint used by network-restricted modes. + pub fn with_shell_network_denied_hint(mut self, hint: impl Into) -> Self { + self.shell_network_denied_hint = Some(hint.into()); + self + } + /// Set the namespace used for session-scoped tool state. pub fn with_state_namespace(mut self, namespace: impl Into) -> Self { self.state_namespace = namespace.into();