fix(shell): explain Plan mode network sandbox failures
Closes #929 Squashed from #974 after local verification and green CI.
This commit is contained in:
@@ -1448,10 +1448,20 @@ impl Engine {
|
||||
}
|
||||
|
||||
match mode {
|
||||
// Plan mode is read-only investigation; the shell tool is not
|
||||
// registered, so leaving the sandbox policy at the seatbelt-strict
|
||||
// default is fine.
|
||||
AppMode::Plan => ctx,
|
||||
// Plan mode is read-only investigation. Shell tools can still be
|
||||
// available for read-only local inspection, but outbound network
|
||||
// remains sandbox-blocked; attach an explicit hint so network
|
||||
// command failures do not look like DNS/proxy problems.
|
||||
AppMode::Plan => ctx
|
||||
.with_elevated_sandbox_policy(crate::sandbox::SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![self.session.workspace.clone()],
|
||||
network_access: false,
|
||||
exclude_tmpdir: false,
|
||||
exclude_slash_tmp: false,
|
||||
})
|
||||
.with_shell_network_denied_hint(
|
||||
"Shell command blocked: Plan mode runs shell commands in a network-restricted sandbox. Use fetch_url or code_execution for network access, or switch to Agent mode (`/agent`) before retrying shell network commands.",
|
||||
),
|
||||
// Agent registers the shell tool and runs each command through
|
||||
// the per-mode sandbox + per-tool approval flow. The sandbox
|
||||
// default would deny all outbound network — including DNS —
|
||||
|
||||
@@ -500,13 +500,20 @@ fn agent_and_yolo_modes_elevate_shell_sandbox_to_allow_network() {
|
||||
"Yolo mode must use DangerFullAccess (no sandbox); got {yolo_policy:?}",
|
||||
);
|
||||
|
||||
// Plan mode is read-only investigation and does not register the shell
|
||||
// tool, so it intentionally leaves the policy at the strict default.
|
||||
// Plan mode can still expose shell tools for local read-only inspection,
|
||||
// but it keeps outbound network blocked and attaches an explicit hint so
|
||||
// network command failures are not mistaken for DNS/proxy issues.
|
||||
let plan_ctx = engine.build_tool_context(AppMode::Plan, false);
|
||||
let plan_policy = plan_ctx
|
||||
.elevated_sandbox_policy
|
||||
.as_ref()
|
||||
.expect("Plan mode should make the shell sandbox policy explicit");
|
||||
assert!(!plan_policy.has_network_access());
|
||||
assert!(
|
||||
engine
|
||||
.build_tool_context(AppMode::Plan, false)
|
||||
.elevated_sandbox_policy
|
||||
.is_none(),
|
||||
plan_ctx
|
||||
.shell_network_denied_hint
|
||||
.as_deref()
|
||||
.is_some_and(|hint| hint.contains("Plan mode")),
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -33,9 +33,10 @@ pub(super) fn should_default_defer_tool(name: &str, mode: AppMode) -> bool {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Shell tools are kept active in Agent so the model can run verification
|
||||
// commands (build/test/git/cargo) without first having to discover the
|
||||
// tool through ToolSearch. Plan mode never registers shell tools.
|
||||
// Shell exec tools are kept active in Agent so the model can run
|
||||
// verification commands (build/test/git/cargo) without first having to
|
||||
// discover them through ToolSearch. Plan mode may register shell tools,
|
||||
// but keeps most shell execution deferred and network-restricted.
|
||||
let always_loaded_in_action_modes = matches!(mode, AppMode::Agent)
|
||||
&& matches!(
|
||||
name,
|
||||
|
||||
+133
-10
@@ -109,6 +109,7 @@ pub struct ShellJobDetail {
|
||||
}
|
||||
|
||||
pub struct ShellDeltaResult {
|
||||
pub command: String,
|
||||
pub result: ShellResult,
|
||||
pub stdout_total_len: usize,
|
||||
pub stderr_total_len: usize,
|
||||
@@ -1206,6 +1207,7 @@ impl ShellManager {
|
||||
let (stderr, stderr_meta) = truncate_with_meta(&stderr_delta);
|
||||
let sandboxed = !matches!(shell.sandbox_type, SandboxType::None);
|
||||
|
||||
let command = shell.command.clone();
|
||||
let result = ShellResult {
|
||||
task_id: Some(shell.id.clone()),
|
||||
status: shell.status.clone(),
|
||||
@@ -1229,6 +1231,7 @@ impl ShellManager {
|
||||
};
|
||||
|
||||
Ok(ShellDeltaResult {
|
||||
command,
|
||||
result,
|
||||
stdout_total_len: stdout_total,
|
||||
stderr_total_len: stderr_total,
|
||||
@@ -1409,7 +1412,7 @@ pub fn new_shared_shell_manager(workspace: PathBuf) -> SharedShellManager {
|
||||
|
||||
// === ToolSpec Implementations ===
|
||||
|
||||
use crate::command_safety::{SafetyLevel, analyze_command};
|
||||
use crate::command_safety::{SafetyLevel, analyze_command, extract_primary_command};
|
||||
use crate::execpolicy::{ExecPolicyDecision, load_default_policy};
|
||||
use crate::features::Feature;
|
||||
use crate::tools::spec::{
|
||||
@@ -1423,6 +1426,99 @@ const FOREGROUND_TIMEOUT_RECOVERY_HINT: &str = "Foreground exec_shell is for bou
|
||||
The timed-out process was killed; rerun long work with task_shell_start or exec_shell with \
|
||||
background: true, then poll with task_shell_wait or exec_shell_wait.";
|
||||
|
||||
fn command_likely_needs_network(command: &str) -> bool {
|
||||
let normalized = command.to_ascii_lowercase();
|
||||
let Some(primary) = extract_primary_command(&normalized) else {
|
||||
return false;
|
||||
};
|
||||
let primary = primary.rsplit(['/', '\\']).next().unwrap_or(primary);
|
||||
|
||||
match primary {
|
||||
"curl" | "wget" | "fetch" | "nc" | "netcat" | "ncat" | "ssh" | "scp" | "sftp" | "rsync"
|
||||
| "ftp" | "ping" | "traceroute" | "nslookup" | "dig" | "host" | "nmap" | "gh" | "hub" => {
|
||||
true
|
||||
}
|
||||
"git" => [
|
||||
" fetch",
|
||||
" pull",
|
||||
" clone",
|
||||
" ls-remote",
|
||||
" submodule",
|
||||
" push",
|
||||
]
|
||||
.iter()
|
||||
.any(|needle| normalized.contains(needle)),
|
||||
"cargo" => [" install", " fetch", " update", " publish", " search"]
|
||||
.iter()
|
||||
.any(|needle| normalized.contains(needle)),
|
||||
"npm" | "pnpm" | "yarn" => [" install", " i", " add", " update", " publish"]
|
||||
.iter()
|
||||
.any(|needle| normalized.contains(needle)),
|
||||
"pip" | "pip3" | "uv" | "poetry" => [" install", " add", " sync", " update"]
|
||||
.iter()
|
||||
.any(|needle| normalized.contains(needle)),
|
||||
"brew" | "apt" | "apt-get" | "yum" | "dnf" | "pacman" => true,
|
||||
"go" => [" get", " install", " mod download"]
|
||||
.iter()
|
||||
.any(|needle| normalized.contains(needle)),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn looks_like_network_blocked_failure(result: &ShellResult) -> bool {
|
||||
if matches!(result.status, ShellStatus::Completed | ShellStatus::Running)
|
||||
|| result.exit_code == Some(0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if result.stdout.trim() == "000" {
|
||||
return true;
|
||||
}
|
||||
if result.sandboxed && result.stdout.is_empty() && result.stderr.is_empty() {
|
||||
return true;
|
||||
}
|
||||
|
||||
let output = format!("{}\n{}", result.stdout, result.stderr).to_ascii_lowercase();
|
||||
[
|
||||
"operation not permitted",
|
||||
"network is unreachable",
|
||||
"could not resolve host",
|
||||
"couldn't resolve host",
|
||||
"failed to resolve",
|
||||
"temporary failure in name resolution",
|
||||
"name or service not known",
|
||||
"nodename nor servname provided",
|
||||
"no address associated",
|
||||
"failed to connect",
|
||||
"couldn't connect",
|
||||
"connection timed out",
|
||||
"connection reset",
|
||||
]
|
||||
.iter()
|
||||
.any(|pattern| output.contains(pattern))
|
||||
}
|
||||
|
||||
fn shell_network_restricted_hint<'a>(
|
||||
context: &'a ToolContext,
|
||||
command: &str,
|
||||
result: &ShellResult,
|
||||
) -> Option<&'a str> {
|
||||
let hint = context.shell_network_denied_hint.as_deref()?;
|
||||
let policy_blocks_network = context
|
||||
.elevated_sandbox_policy
|
||||
.as_ref()
|
||||
.is_some_and(|policy| !policy.has_network_access());
|
||||
if !policy_blocks_network || !command_likely_needs_network(command) {
|
||||
return None;
|
||||
}
|
||||
if result.sandbox_denied || looks_like_network_blocked_failure(result) {
|
||||
Some(hint)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
async fn execute_foreground_via_background(
|
||||
context: &ToolContext,
|
||||
command: &str,
|
||||
@@ -1843,7 +1939,9 @@ impl ToolSpec for ExecShellTool {
|
||||
} else {
|
||||
stdout_summary.clone()
|
||||
};
|
||||
let output = if interactive {
|
||||
let network_restricted_hint =
|
||||
shell_network_restricted_hint(context, command, &result).map(str::to_string);
|
||||
let mut output = if interactive {
|
||||
format!(
|
||||
"Interactive command completed (exit code: {:?})",
|
||||
result.exit_code
|
||||
@@ -1880,6 +1978,9 @@ impl ToolSpec for ExecShellTool {
|
||||
result.exit_code, result.stdout, result.stderr
|
||||
)
|
||||
};
|
||||
if let Some(hint) = network_restricted_hint.as_deref() {
|
||||
output = format!("{hint}\n\n{output}");
|
||||
}
|
||||
|
||||
let mut metadata = json!({
|
||||
"exit_code": result.exit_code,
|
||||
@@ -1930,6 +2031,10 @@ impl ToolSpec for ExecShellTool {
|
||||
"poll_with": ["task_shell_wait", "exec_shell_wait"]
|
||||
});
|
||||
}
|
||||
if let Some(hint) = network_restricted_hint {
|
||||
metadata["sandbox_network_restricted"] = json!(true);
|
||||
metadata["sandbox_network_denied_hint"] = json!(hint);
|
||||
}
|
||||
|
||||
Ok(ToolResult {
|
||||
content: output,
|
||||
@@ -1971,8 +2076,10 @@ fn required_task_id(input: &serde_json::Value) -> Result<&str, ToolError> {
|
||||
.ok_or_else(|| ToolError::missing_field("task_id"))
|
||||
}
|
||||
|
||||
fn build_shell_delta_tool_result(delta: ShellDeltaResult) -> ToolResult {
|
||||
fn build_shell_delta_tool_result(delta: ShellDeltaResult, context: &ToolContext) -> ToolResult {
|
||||
let result = delta.result;
|
||||
let network_restricted_hint =
|
||||
shell_network_restricted_hint(context, &delta.command, &result).map(str::to_string);
|
||||
let stdout_summary = summarize_output(&result.stdout);
|
||||
let stderr_summary = summarize_output(&result.stderr);
|
||||
let summary = if !stderr_summary.is_empty() {
|
||||
@@ -1981,7 +2088,7 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult) -> ToolResult {
|
||||
stdout_summary.clone()
|
||||
};
|
||||
|
||||
let output = if result.stdout.is_empty() && result.stderr.is_empty() {
|
||||
let mut output = if result.stdout.is_empty() && result.stderr.is_empty() {
|
||||
match result.status {
|
||||
ShellStatus::Running => "Background task running (no new output).".to_string(),
|
||||
ShellStatus::Completed => "(no new output)".to_string(),
|
||||
@@ -1994,8 +2101,11 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult) -> ToolResult {
|
||||
} else {
|
||||
format!("{}\n\nSTDERR:\n{}", result.stdout, result.stderr)
|
||||
};
|
||||
if let Some(hint) = network_restricted_hint.as_deref() {
|
||||
output = format!("{hint}\n\n{output}");
|
||||
}
|
||||
|
||||
ToolResult {
|
||||
let mut tool_result = ToolResult {
|
||||
content: output,
|
||||
success: matches!(result.status, ShellStatus::Completed | ShellStatus::Running),
|
||||
metadata: Some(json!({
|
||||
@@ -2019,7 +2129,15 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult) -> ToolResult {
|
||||
"stderr_summary": stderr_summary,
|
||||
"stream_delta": true,
|
||||
})),
|
||||
};
|
||||
if let Some(hint) = network_restricted_hint
|
||||
&& let Some(metadata) = tool_result.metadata.as_mut()
|
||||
&& let Some(object) = metadata.as_object_mut()
|
||||
{
|
||||
object.insert("sandbox_network_restricted".to_string(), json!(true));
|
||||
object.insert("sandbox_network_denied_hint".to_string(), json!(hint));
|
||||
}
|
||||
tool_result
|
||||
}
|
||||
|
||||
async fn wait_for_shell_delta_cancellable(
|
||||
@@ -2032,7 +2150,7 @@ async fn wait_for_shell_delta_cancellable(
|
||||
let mut stdout_accum = String::new();
|
||||
let mut stderr_accum = String::new();
|
||||
|
||||
let (result, stdout_total_len, stderr_total_len) = loop {
|
||||
let (command, result, stdout_total_len, stderr_total_len) = loop {
|
||||
if context
|
||||
.cancel_token
|
||||
.as_ref()
|
||||
@@ -2048,6 +2166,7 @@ async fn wait_for_shell_delta_cancellable(
|
||||
append_shell_delta_output(&mut stdout_accum, &mut stderr_accum, &delta.result);
|
||||
return Ok((
|
||||
shell_delta_with_accumulated_output(
|
||||
delta.command,
|
||||
delta.result,
|
||||
&stdout_accum,
|
||||
&stderr_accum,
|
||||
@@ -2070,11 +2189,12 @@ async fn wait_for_shell_delta_cancellable(
|
||||
|
||||
let stdout_total_len = delta.stdout_total_len;
|
||||
let stderr_total_len = delta.stderr_total_len;
|
||||
let command = delta.command.clone();
|
||||
append_shell_delta_output(&mut stdout_accum, &mut stderr_accum, &delta.result);
|
||||
|
||||
let status = delta.result.status.clone();
|
||||
if status != ShellStatus::Running || Instant::now() >= deadline {
|
||||
break (delta.result, stdout_total_len, stderr_total_len);
|
||||
break (command, delta.result, stdout_total_len, stderr_total_len);
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
@@ -2082,6 +2202,7 @@ async fn wait_for_shell_delta_cancellable(
|
||||
|
||||
Ok((
|
||||
shell_delta_with_accumulated_output(
|
||||
command,
|
||||
result,
|
||||
&stdout_accum,
|
||||
&stderr_accum,
|
||||
@@ -2106,6 +2227,7 @@ fn append_shell_delta_output(
|
||||
}
|
||||
|
||||
fn shell_delta_with_accumulated_output(
|
||||
command: String,
|
||||
mut result: ShellResult,
|
||||
stdout_accum: &str,
|
||||
stderr_accum: &str,
|
||||
@@ -2124,6 +2246,7 @@ fn shell_delta_with_accumulated_output(
|
||||
result.stderr_truncated = stderr_meta.truncated;
|
||||
|
||||
ShellDeltaResult {
|
||||
command,
|
||||
result,
|
||||
stdout_total_len,
|
||||
stderr_total_len,
|
||||
@@ -2300,7 +2423,7 @@ impl ToolSpec for ShellWaitTool {
|
||||
};
|
||||
|
||||
let status = delta.result.status.clone();
|
||||
let mut result = build_shell_delta_tool_result(delta);
|
||||
let mut result = build_shell_delta_tool_result(delta, context);
|
||||
if wait_canceled {
|
||||
if matches!(status, ShellStatus::Running) {
|
||||
result.content = format!(
|
||||
@@ -2411,7 +2534,7 @@ impl ToolSpec for ShellInteractTool {
|
||||
let delta = manager
|
||||
.get_output_delta(task_id, false, 0)
|
||||
.map_err(|err| ToolError::execution_failed(err.to_string()))?;
|
||||
let mut result = build_shell_delta_tool_result(delta);
|
||||
let mut result = build_shell_delta_tool_result(delta, context);
|
||||
if let Some(metadata) = result.metadata.as_mut()
|
||||
&& let Some(object) = metadata.as_object_mut()
|
||||
{
|
||||
@@ -2435,7 +2558,7 @@ impl ToolSpec for ShellInteractTool {
|
||||
|| delta.result.status != ShellStatus::Running
|
||||
|| elapsed >= timeout_ms
|
||||
{
|
||||
return Ok(build_shell_delta_tool_result(delta));
|
||||
return Ok(build_shell_delta_tool_result(delta, context));
|
||||
}
|
||||
|
||||
tokio::time::sleep(Duration::from_millis(50)).await;
|
||||
|
||||
@@ -49,6 +49,39 @@ fn echo_stdin_command() -> String {
|
||||
}
|
||||
}
|
||||
|
||||
fn network_restricted_context(tmp: &std::path::Path) -> ToolContext {
|
||||
ToolContext::new(tmp)
|
||||
.with_elevated_sandbox_policy(ExecutionSandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![tmp.to_path_buf()],
|
||||
network_access: false,
|
||||
exclude_tmpdir: false,
|
||||
exclude_slash_tmp: false,
|
||||
})
|
||||
.with_shell_network_denied_hint(
|
||||
"Shell command blocked: Plan mode runs shell commands in a network-restricted sandbox.",
|
||||
)
|
||||
}
|
||||
|
||||
fn failed_network_shell_result(stdout: &str, stderr: &str) -> ShellResult {
|
||||
ShellResult {
|
||||
task_id: None,
|
||||
status: ShellStatus::Failed,
|
||||
exit_code: Some(6),
|
||||
stdout: stdout.to_string(),
|
||||
stderr: stderr.to_string(),
|
||||
duration_ms: 25,
|
||||
stdout_len: stdout.len(),
|
||||
stderr_len: stderr.len(),
|
||||
stdout_omitted: 0,
|
||||
stderr_omitted: 0,
|
||||
stdout_truncated: false,
|
||||
stderr_truncated: false,
|
||||
sandboxed: true,
|
||||
sandbox_type: Some("seatbelt".to_string()),
|
||||
sandbox_denied: false,
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sync_execution() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
@@ -232,6 +265,58 @@ fn test_truncate_with_meta_reports_omission_counts() {
|
||||
assert!(truncated.contains("bytes omitted"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn network_restricted_hint_detects_silent_curl_failure() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
let ctx = network_restricted_context(tmp.path());
|
||||
let result = failed_network_shell_result("000", "");
|
||||
|
||||
let hint = shell_network_restricted_hint(
|
||||
&ctx,
|
||||
"curl -s -o /dev/null -w '%{http_code}' https://api.github.com",
|
||||
&result,
|
||||
)
|
||||
.expect("network-restricted hint");
|
||||
|
||||
assert!(hint.contains("Plan mode"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn network_restricted_hint_ignores_local_failures() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
let ctx = network_restricted_context(tmp.path());
|
||||
let result = failed_network_shell_result("", "No such file or directory");
|
||||
|
||||
assert!(shell_network_restricted_hint(&ctx, "cat missing.txt", &result).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shell_delta_result_surfaces_network_restricted_hint() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
let ctx = network_restricted_context(tmp.path());
|
||||
let result = failed_network_shell_result("000", "");
|
||||
|
||||
let tool_result = build_shell_delta_tool_result(
|
||||
ShellDeltaResult {
|
||||
command: "gh issue list".to_string(),
|
||||
result,
|
||||
stdout_total_len: 3,
|
||||
stderr_total_len: 0,
|
||||
},
|
||||
&ctx,
|
||||
);
|
||||
|
||||
assert!(!tool_result.success);
|
||||
assert!(tool_result.content.starts_with("Shell command blocked"));
|
||||
let metadata = tool_result.metadata.expect("metadata");
|
||||
assert_eq!(
|
||||
metadata
|
||||
.get("sandbox_network_restricted")
|
||||
.and_then(Value::as_bool),
|
||||
Some(true)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_summarize_output_strips_truncation_note() {
|
||||
let long_output = "x".repeat(60_000);
|
||||
|
||||
@@ -86,6 +86,9 @@ pub struct ToolContext {
|
||||
/// Elevated sandbox policy override (used when retrying after sandbox denial).
|
||||
/// This overrides the default sandbox behavior for shell commands.
|
||||
pub elevated_sandbox_policy: Option<crate::sandbox::SandboxPolicy>,
|
||||
/// Optional user-facing hint for shell commands that fail because the
|
||||
/// active sandbox policy intentionally denies outbound network access.
|
||||
pub shell_network_denied_hint: Option<String>,
|
||||
/// Whether tools should auto-approve without safety checks (YOLO mode).
|
||||
/// When true, command safety analysis is skipped for shell execution.
|
||||
pub auto_approve: bool,
|
||||
@@ -153,6 +156,7 @@ impl ToolContext {
|
||||
notes_path,
|
||||
mcp_config_path,
|
||||
elevated_sandbox_policy: None,
|
||||
shell_network_denied_hint: None,
|
||||
auto_approve: false,
|
||||
features: Features::with_defaults(),
|
||||
state_namespace: "workspace".to_string(),
|
||||
@@ -186,6 +190,7 @@ impl ToolContext {
|
||||
notes_path: notes_path.into(),
|
||||
mcp_config_path: mcp_config_path.into(),
|
||||
elevated_sandbox_policy: None,
|
||||
shell_network_denied_hint: None,
|
||||
auto_approve: false,
|
||||
features: Features::with_defaults(),
|
||||
state_namespace: "workspace".to_string(),
|
||||
@@ -219,6 +224,7 @@ impl ToolContext {
|
||||
notes_path: notes_path.into(),
|
||||
mcp_config_path: mcp_config_path.into(),
|
||||
elevated_sandbox_policy: None,
|
||||
shell_network_denied_hint: None,
|
||||
auto_approve,
|
||||
features: Features::with_defaults(),
|
||||
state_namespace: "workspace".to_string(),
|
||||
@@ -448,6 +454,12 @@ impl ToolContext {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the shell network-denial hint used by network-restricted modes.
|
||||
pub fn with_shell_network_denied_hint(mut self, hint: impl Into<String>) -> Self {
|
||||
self.shell_network_denied_hint = Some(hint.into());
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the namespace used for session-scoped tool state.
|
||||
pub fn with_state_namespace(mut self, namespace: impl Into<String>) -> Self {
|
||||
self.state_namespace = namespace.into();
|
||||
|
||||
Reference in New Issue
Block a user