fix(shell): explain Plan mode network sandbox failures

Closes #929 Squashed from #974 after local verification and green CI.
2026-05-07 01:51:59 -05:00
parent 4f77c625fd
commit b6509b10de
6 changed files with 261 additions and 23 deletions
@@ -1448,10 +1448,20 @@ impl Engine {
        }

        match mode {
-            // Plan mode is read-only investigation; the shell tool is not
-            // registered, so leaving the sandbox policy at the seatbelt-strict
-            // default is fine.
-            AppMode::Plan => ctx,
+            // Plan mode is read-only investigation. Shell tools can still be
+            // available for read-only local inspection, but outbound network
+            // remains sandbox-blocked; attach an explicit hint so network
+            // command failures do not look like DNS/proxy problems.
+            AppMode::Plan => ctx
+                .with_elevated_sandbox_policy(crate::sandbox::SandboxPolicy::WorkspaceWrite {
+                    writable_roots: vec![self.session.workspace.clone()],
+                    network_access: false,
+                    exclude_tmpdir: false,
+                    exclude_slash_tmp: false,
+                })
+                .with_shell_network_denied_hint(
+                    "Shell command blocked: Plan mode runs shell commands in a network-restricted sandbox. Use fetch_url or code_execution for network access, or switch to Agent mode (`/agent`) before retrying shell network commands.",
+                ),
            // Agent registers the shell tool and runs each command through
            // the per-mode sandbox + per-tool approval flow. The sandbox
            // default would deny all outbound network — including DNS —
@@ -500,13 +500,20 @@ fn agent_and_yolo_modes_elevate_shell_sandbox_to_allow_network() {
        "Yolo mode must use DangerFullAccess (no sandbox); got {yolo_policy:?}",
    );

-    // Plan mode is read-only investigation and does not register the shell
-    // tool, so it intentionally leaves the policy at the strict default.
+    // Plan mode can still expose shell tools for local read-only inspection,
+    // but it keeps outbound network blocked and attaches an explicit hint so
+    // network command failures are not mistaken for DNS/proxy issues.
+    let plan_ctx = engine.build_tool_context(AppMode::Plan, false);
+    let plan_policy = plan_ctx
+        .elevated_sandbox_policy
+        .as_ref()
+        .expect("Plan mode should make the shell sandbox policy explicit");
+    assert!(!plan_policy.has_network_access());
    assert!(
-        engine
-            .build_tool_context(AppMode::Plan, false)
-            .elevated_sandbox_policy
-            .is_none(),
+        plan_ctx
+            .shell_network_denied_hint
+            .as_deref()
+            .is_some_and(|hint| hint.contains("Plan mode")),
    );
 }

@@ -33,9 +33,10 @@ pub(super) fn should_default_defer_tool(name: &str, mode: AppMode) -> bool {
        return false;
    }

-    // Shell tools are kept active in Agent so the model can run verification
-    // commands (build/test/git/cargo) without first having to discover the
-    // tool through ToolSearch. Plan mode never registers shell tools.
+    // Shell exec tools are kept active in Agent so the model can run
+    // verification commands (build/test/git/cargo) without first having to
+    // discover them through ToolSearch. Plan mode may register shell tools,
+    // but keeps most shell execution deferred and network-restricted.
    let always_loaded_in_action_modes = matches!(mode, AppMode::Agent)
        && matches!(
            name,
@@ -109,6 +109,7 @@ pub struct ShellJobDetail {
 }

 pub struct ShellDeltaResult {
+    pub command: String,
    pub result: ShellResult,
    pub stdout_total_len: usize,
    pub stderr_total_len: usize,
@@ -1206,6 +1207,7 @@ impl ShellManager {
        let (stderr, stderr_meta) = truncate_with_meta(&stderr_delta);
        let sandboxed = !matches!(shell.sandbox_type, SandboxType::None);

+        let command = shell.command.clone();
        let result = ShellResult {
            task_id: Some(shell.id.clone()),
            status: shell.status.clone(),
@@ -1229,6 +1231,7 @@ impl ShellManager {
        };

        Ok(ShellDeltaResult {
+            command,
            result,
            stdout_total_len: stdout_total,
            stderr_total_len: stderr_total,
@@ -1409,7 +1412,7 @@ pub fn new_shared_shell_manager(workspace: PathBuf) -> SharedShellManager {

 // === ToolSpec Implementations ===

-use crate::command_safety::{SafetyLevel, analyze_command};
+use crate::command_safety::{SafetyLevel, analyze_command, extract_primary_command};
 use crate::execpolicy::{ExecPolicyDecision, load_default_policy};
 use crate::features::Feature;
 use crate::tools::spec::{
@@ -1423,6 +1426,99 @@ const FOREGROUND_TIMEOUT_RECOVERY_HINT: &str = "Foreground exec_shell is for bou
 The timed-out process was killed; rerun long work with task_shell_start or exec_shell with \
 background: true, then poll with task_shell_wait or exec_shell_wait.";

+fn command_likely_needs_network(command: &str) -> bool {
+    let normalized = command.to_ascii_lowercase();
+    let Some(primary) = extract_primary_command(&normalized) else {
+        return false;
+    };
+    let primary = primary.rsplit(['/', '\\']).next().unwrap_or(primary);
+
+    match primary {
+        "curl" | "wget" | "fetch" | "nc" | "netcat" | "ncat" | "ssh" | "scp" | "sftp" | "rsync"
+        | "ftp" | "ping" | "traceroute" | "nslookup" | "dig" | "host" | "nmap" | "gh" | "hub" => {
+            true
+        }
+        "git" => [
+            " fetch",
+            " pull",
+            " clone",
+            " ls-remote",
+            " submodule",
+            " push",
+        ]
+        .iter()
+        .any(|needle| normalized.contains(needle)),
+        "cargo" => [" install", " fetch", " update", " publish", " search"]
+            .iter()
+            .any(|needle| normalized.contains(needle)),
+        "npm" | "pnpm" | "yarn" => [" install", " i", " add", " update", " publish"]
+            .iter()
+            .any(|needle| normalized.contains(needle)),
+        "pip" | "pip3" | "uv" | "poetry" => [" install", " add", " sync", " update"]
+            .iter()
+            .any(|needle| normalized.contains(needle)),
+        "brew" | "apt" | "apt-get" | "yum" | "dnf" | "pacman" => true,
+        "go" => [" get", " install", " mod download"]
+            .iter()
+            .any(|needle| normalized.contains(needle)),
+        _ => false,
+    }
+}
+
+fn looks_like_network_blocked_failure(result: &ShellResult) -> bool {
+    if matches!(result.status, ShellStatus::Completed | ShellStatus::Running)
+        || result.exit_code == Some(0)
+    {
+        return false;
+    }
+
+    if result.stdout.trim() == "000" {
+        return true;
+    }
+    if result.sandboxed && result.stdout.is_empty() && result.stderr.is_empty() {
+        return true;
+    }
+
+    let output = format!("{}\n{}", result.stdout, result.stderr).to_ascii_lowercase();
+    [
+        "operation not permitted",
+        "network is unreachable",
+        "could not resolve host",
+        "couldn't resolve host",
+        "failed to resolve",
+        "temporary failure in name resolution",
+        "name or service not known",
+        "nodename nor servname provided",
+        "no address associated",
+        "failed to connect",
+        "couldn't connect",
+        "connection timed out",
+        "connection reset",
+    ]
+    .iter()
+    .any(|pattern| output.contains(pattern))
+}
+
+fn shell_network_restricted_hint<'a>(
+    context: &'a ToolContext,
+    command: &str,
+    result: &ShellResult,
+) -> Option<&'a str> {
+    let hint = context.shell_network_denied_hint.as_deref()?;
+    let policy_blocks_network = context
+        .elevated_sandbox_policy
+        .as_ref()
+        .is_some_and(|policy| !policy.has_network_access());
+    if !policy_blocks_network || !command_likely_needs_network(command) {
+        return None;
+    }
+    if result.sandbox_denied || looks_like_network_blocked_failure(result) {
+        Some(hint)
+    } else {
+        None
+    }
+}
+
 async fn execute_foreground_via_background(
    context: &ToolContext,
    command: &str,
@@ -1843,7 +1939,9 @@ impl ToolSpec for ExecShellTool {
                } else {
                    stdout_summary.clone()
                };
-                let output = if interactive {
+                let network_restricted_hint =
+                    shell_network_restricted_hint(context, command, &result).map(str::to_string);
+                let mut output = if interactive {
                    format!(
                        "Interactive command completed (exit code: {:?})",
                        result.exit_code
@@ -1880,6 +1978,9 @@ impl ToolSpec for ExecShellTool {
                        result.exit_code, result.stdout, result.stderr
                    )
                };
+                if let Some(hint) = network_restricted_hint.as_deref() {
+                    output = format!("{hint}\n\n{output}");
+                }

                let mut metadata = json!({
                    "exit_code": result.exit_code,
@@ -1930,6 +2031,10 @@ impl ToolSpec for ExecShellTool {
                        "poll_with": ["task_shell_wait", "exec_shell_wait"]
                    });
                }
+                if let Some(hint) = network_restricted_hint {
+                    metadata["sandbox_network_restricted"] = json!(true);
+                    metadata["sandbox_network_denied_hint"] = json!(hint);
+                }

                Ok(ToolResult {
                    content: output,
@@ -1971,8 +2076,10 @@ fn required_task_id(input: &serde_json::Value) -> Result<&str, ToolError> {
        .ok_or_else(|| ToolError::missing_field("task_id"))
 }

-fn build_shell_delta_tool_result(delta: ShellDeltaResult) -> ToolResult {
+fn build_shell_delta_tool_result(delta: ShellDeltaResult, context: &ToolContext) -> ToolResult {
    let result = delta.result;
+    let network_restricted_hint =
+        shell_network_restricted_hint(context, &delta.command, &result).map(str::to_string);
    let stdout_summary = summarize_output(&result.stdout);
    let stderr_summary = summarize_output(&result.stderr);
    let summary = if !stderr_summary.is_empty() {
@@ -1981,7 +2088,7 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult) -> ToolResult {
        stdout_summary.clone()
    };

-    let output = if result.stdout.is_empty() && result.stderr.is_empty() {
+    let mut output = if result.stdout.is_empty() && result.stderr.is_empty() {
        match result.status {
            ShellStatus::Running => "Background task running (no new output).".to_string(),
            ShellStatus::Completed => "(no new output)".to_string(),
@@ -1994,8 +2101,11 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult) -> ToolResult {
    } else {
        format!("{}\n\nSTDERR:\n{}", result.stdout, result.stderr)
    };
+    if let Some(hint) = network_restricted_hint.as_deref() {
+        output = format!("{hint}\n\n{output}");
+    }

-    ToolResult {
+    let mut tool_result = ToolResult {
        content: output,
        success: matches!(result.status, ShellStatus::Completed | ShellStatus::Running),
        metadata: Some(json!({
@@ -2019,7 +2129,15 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult) -> ToolResult {
            "stderr_summary": stderr_summary,
            "stream_delta": true,
        })),
+    };
+    if let Some(hint) = network_restricted_hint
+        && let Some(metadata) = tool_result.metadata.as_mut()
+        && let Some(object) = metadata.as_object_mut()
+    {
+        object.insert("sandbox_network_restricted".to_string(), json!(true));
+        object.insert("sandbox_network_denied_hint".to_string(), json!(hint));
    }
+    tool_result
 }

 async fn wait_for_shell_delta_cancellable(
@@ -2032,7 +2150,7 @@ async fn wait_for_shell_delta_cancellable(
    let mut stdout_accum = String::new();
    let mut stderr_accum = String::new();

-    let (result, stdout_total_len, stderr_total_len) = loop {
+    let (command, result, stdout_total_len, stderr_total_len) = loop {
        if context
            .cancel_token
            .as_ref()
@@ -2048,6 +2166,7 @@ async fn wait_for_shell_delta_cancellable(
            append_shell_delta_output(&mut stdout_accum, &mut stderr_accum, &delta.result);
            return Ok((
                shell_delta_with_accumulated_output(
+                    delta.command,
                    delta.result,
                    &stdout_accum,
                    &stderr_accum,
@@ -2070,11 +2189,12 @@ async fn wait_for_shell_delta_cancellable(

        let stdout_total_len = delta.stdout_total_len;
        let stderr_total_len = delta.stderr_total_len;
+        let command = delta.command.clone();
        append_shell_delta_output(&mut stdout_accum, &mut stderr_accum, &delta.result);

        let status = delta.result.status.clone();
        if status != ShellStatus::Running || Instant::now() >= deadline {
-            break (delta.result, stdout_total_len, stderr_total_len);
+            break (command, delta.result, stdout_total_len, stderr_total_len);
        }

        tokio::time::sleep(Duration::from_millis(100)).await;
@@ -2082,6 +2202,7 @@ async fn wait_for_shell_delta_cancellable(

    Ok((
        shell_delta_with_accumulated_output(
+            command,
            result,
            &stdout_accum,
            &stderr_accum,
@@ -2106,6 +2227,7 @@ fn append_shell_delta_output(
 }

 fn shell_delta_with_accumulated_output(
+    command: String,
    mut result: ShellResult,
    stdout_accum: &str,
    stderr_accum: &str,
@@ -2124,6 +2246,7 @@ fn shell_delta_with_accumulated_output(
    result.stderr_truncated = stderr_meta.truncated;

    ShellDeltaResult {
+        command,
        result,
        stdout_total_len,
        stderr_total_len,
@@ -2300,7 +2423,7 @@ impl ToolSpec for ShellWaitTool {
        };

        let status = delta.result.status.clone();
-        let mut result = build_shell_delta_tool_result(delta);
+        let mut result = build_shell_delta_tool_result(delta, context);
        if wait_canceled {
            if matches!(status, ShellStatus::Running) {
                result.content = format!(
@@ -2411,7 +2534,7 @@ impl ToolSpec for ShellInteractTool {
                let delta = manager
                    .get_output_delta(task_id, false, 0)
                    .map_err(|err| ToolError::execution_failed(err.to_string()))?;
-                let mut result = build_shell_delta_tool_result(delta);
+                let mut result = build_shell_delta_tool_result(delta, context);
                if let Some(metadata) = result.metadata.as_mut()
                    && let Some(object) = metadata.as_object_mut()
                {
@@ -2435,7 +2558,7 @@ impl ToolSpec for ShellInteractTool {
                || delta.result.status != ShellStatus::Running
                || elapsed >= timeout_ms
            {
-                return Ok(build_shell_delta_tool_result(delta));
+                return Ok(build_shell_delta_tool_result(delta, context));
            }

            tokio::time::sleep(Duration::from_millis(50)).await;
@@ -49,6 +49,39 @@ fn echo_stdin_command() -> String {
    }
 }

+fn network_restricted_context(tmp: &std::path::Path) -> ToolContext {
+    ToolContext::new(tmp)
+        .with_elevated_sandbox_policy(ExecutionSandboxPolicy::WorkspaceWrite {
+            writable_roots: vec![tmp.to_path_buf()],
+            network_access: false,
+            exclude_tmpdir: false,
+            exclude_slash_tmp: false,
+        })
+        .with_shell_network_denied_hint(
+            "Shell command blocked: Plan mode runs shell commands in a network-restricted sandbox.",
+        )
+}
+
+fn failed_network_shell_result(stdout: &str, stderr: &str) -> ShellResult {
+    ShellResult {
+        task_id: None,
+        status: ShellStatus::Failed,
+        exit_code: Some(6),
+        stdout: stdout.to_string(),
+        stderr: stderr.to_string(),
+        duration_ms: 25,
+        stdout_len: stdout.len(),
+        stderr_len: stderr.len(),
+        stdout_omitted: 0,
+        stderr_omitted: 0,
+        stdout_truncated: false,
+        stderr_truncated: false,
+        sandboxed: true,
+        sandbox_type: Some("seatbelt".to_string()),
+        sandbox_denied: false,
+    }
+}
+
 #[test]
 fn test_sync_execution() {
    let tmp = tempdir().expect("tempdir");
@@ -232,6 +265,58 @@ fn test_truncate_with_meta_reports_omission_counts() {
    assert!(truncated.contains("bytes omitted"));
 }

+#[test]
+fn network_restricted_hint_detects_silent_curl_failure() {
+    let tmp = tempdir().expect("tempdir");
+    let ctx = network_restricted_context(tmp.path());
+    let result = failed_network_shell_result("000", "");
+
+    let hint = shell_network_restricted_hint(
+        &ctx,
+        "curl -s -o /dev/null -w '%{http_code}' https://api.github.com",
+        &result,
+    )
+    .expect("network-restricted hint");
+
+    assert!(hint.contains("Plan mode"));
+}
+
+#[test]
+fn network_restricted_hint_ignores_local_failures() {
+    let tmp = tempdir().expect("tempdir");
+    let ctx = network_restricted_context(tmp.path());
+    let result = failed_network_shell_result("", "No such file or directory");
+
+    assert!(shell_network_restricted_hint(&ctx, "cat missing.txt", &result).is_none());
+}
+
+#[test]
+fn shell_delta_result_surfaces_network_restricted_hint() {
+    let tmp = tempdir().expect("tempdir");
+    let ctx = network_restricted_context(tmp.path());
+    let result = failed_network_shell_result("000", "");
+
+    let tool_result = build_shell_delta_tool_result(
+        ShellDeltaResult {
+            command: "gh issue list".to_string(),
+            result,
+            stdout_total_len: 3,
+            stderr_total_len: 0,
+        },
+        &ctx,
+    );
+
+    assert!(!tool_result.success);
+    assert!(tool_result.content.starts_with("Shell command blocked"));
+    let metadata = tool_result.metadata.expect("metadata");
+    assert_eq!(
+        metadata
+            .get("sandbox_network_restricted")
+            .and_then(Value::as_bool),
+        Some(true)
+    );
+}
+
 #[test]
 fn test_summarize_output_strips_truncation_note() {
    let long_output = "x".repeat(60_000);
@@ -86,6 +86,9 @@ pub struct ToolContext {
    /// Elevated sandbox policy override (used when retrying after sandbox denial).
    /// This overrides the default sandbox behavior for shell commands.
    pub elevated_sandbox_policy: Option<crate::sandbox::SandboxPolicy>,
+    /// Optional user-facing hint for shell commands that fail because the
+    /// active sandbox policy intentionally denies outbound network access.
+    pub shell_network_denied_hint: Option<String>,
    /// Whether tools should auto-approve without safety checks (YOLO mode).
    /// When true, command safety analysis is skipped for shell execution.
    pub auto_approve: bool,
@@ -153,6 +156,7 @@ impl ToolContext {
            notes_path,
            mcp_config_path,
            elevated_sandbox_policy: None,
+            shell_network_denied_hint: None,
            auto_approve: false,
            features: Features::with_defaults(),
            state_namespace: "workspace".to_string(),
@@ -186,6 +190,7 @@ impl ToolContext {
            notes_path: notes_path.into(),
            mcp_config_path: mcp_config_path.into(),
            elevated_sandbox_policy: None,
+            shell_network_denied_hint: None,
            auto_approve: false,
            features: Features::with_defaults(),
            state_namespace: "workspace".to_string(),
@@ -219,6 +224,7 @@ impl ToolContext {
            notes_path: notes_path.into(),
            mcp_config_path: mcp_config_path.into(),
            elevated_sandbox_policy: None,
+            shell_network_denied_hint: None,
            auto_approve,
            features: Features::with_defaults(),
            state_namespace: "workspace".to_string(),
@@ -448,6 +454,12 @@ impl ToolContext {
        self
    }

+    /// Set the shell network-denial hint used by network-restricted modes.
+    pub fn with_shell_network_denied_hint(mut self, hint: impl Into<String>) -> Self {
+        self.shell_network_denied_hint = Some(hint.into());
+        self
+    }
+
    /// Set the namespace used for session-scoped tool state.
    pub fn with_state_namespace(mut self, namespace: impl Into<String>) -> Self {
        self.state_namespace = namespace.into();