fix(engine): Plan mode uses ReadOnly sandbox so shell can't mutate workspace (#1077)
Plan mode's tool context elevated the shell sandbox to `WorkspaceWrite`
with the workspace itself as a writable root. Reads were correct, but a
shell call like `python -c "open('foo','w').write('x')"` succeeded
because the sandbox literally permitted writes inside the workspace —
which is exactly what the user reported in #1077: "Plan mode … uses
shell to run python to change files."
Plan mode is investigation, not modification. Switch to
`SandboxPolicy::ReadOnly`: no writes anywhere on the filesystem, no
network. The model-facing tool surface still includes shell so
read-only commands (`ls`, `git log`, `grep`, `cat`, `cargo metadata`,
…) keep working; the per-platform sandbox (seatbelt on macOS, landlock
on Linux) blocks the rest.
Refactored the per-mode policy decision into
`tool_setup::sandbox_policy_for_mode(mode, workspace)` so it has one
unit-testable home. Updated the network-denied hint to surface the
actual contract ("read-only sandbox — no writes, no network") instead
of the previous wording that only mentioned network.
Tests:
- New `sandbox_policy_for_mode_returns_correct_policy_per_mode` asserts
the helper returns `ReadOnly` / `WorkspaceWrite{network: true}` /
`DangerFullAccess` for Plan / Agent / YOLO respectively.
- Existing `engine_elevates_sandbox_policy_for_mode` test extended to
confirm Plan mode is `ReadOnly` (not just network-denied),
`has_full_disk_write_access()` is false, and `get_writable_roots`
enumerates zero entries.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1500,48 +1500,14 @@ impl Engine {
|
||||
ctx = ctx.with_sandbox_backend(std::sync::Arc::clone(backend));
|
||||
}
|
||||
|
||||
match mode {
|
||||
// Plan mode is read-only investigation. Shell tools can still be
|
||||
// available for read-only local inspection, but outbound network
|
||||
// remains sandbox-blocked; attach an explicit hint so network
|
||||
// command failures do not look like DNS/proxy problems.
|
||||
AppMode::Plan => ctx
|
||||
.with_elevated_sandbox_policy(crate::sandbox::SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![self.session.workspace.clone()],
|
||||
network_access: false,
|
||||
exclude_tmpdir: false,
|
||||
exclude_slash_tmp: false,
|
||||
})
|
||||
.with_shell_network_denied_hint(
|
||||
"Shell command blocked: Plan mode runs shell commands in a network-restricted sandbox. Use fetch_url or code_execution for network access, or switch to Agent mode (`/agent`) before retrying shell network commands.",
|
||||
),
|
||||
// Agent registers the shell tool and runs each command through
|
||||
// the per-mode sandbox + per-tool approval flow. The sandbox
|
||||
// default would deny all outbound network — including DNS —
|
||||
// which breaks ordinary developer commands (cargo fetch, npm
|
||||
// install, curl, yt-dlp, …) without buying the user any safety
|
||||
// the approval flow doesn't already provide. Elevate to
|
||||
// workspace-write + network. (#273)
|
||||
AppMode::Agent => {
|
||||
ctx.with_elevated_sandbox_policy(crate::sandbox::SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![self.session.workspace.clone()],
|
||||
network_access: true,
|
||||
exclude_tmpdir: false,
|
||||
exclude_slash_tmp: false,
|
||||
})
|
||||
}
|
||||
// YOLO is the explicit "no guardrails" mode — auto-approve all
|
||||
// tools, trust mode on, no sandbox. Workspace-write was still
|
||||
// intercepting commands that wanted to write outside the
|
||||
// workspace (rare but legitimate: pipx install, npm install
|
||||
// -g, brew, package-manager state under ~/.cache, sub-agent
|
||||
// workspaces, …) which forced approval round-trips and
|
||||
// contradicts the YOLO contract. The user opted into YOLO
|
||||
// deliberately; trust them.
|
||||
AppMode::Yolo => {
|
||||
ctx.with_elevated_sandbox_policy(crate::sandbox::SandboxPolicy::DangerFullAccess)
|
||||
}
|
||||
let policy = sandbox_policy_for_mode(mode, &self.session.workspace);
|
||||
let mut ctx = ctx.with_elevated_sandbox_policy(policy);
|
||||
if matches!(mode, AppMode::Plan) {
|
||||
ctx = ctx.with_shell_network_denied_hint(
|
||||
"Shell command blocked: Plan mode runs shell commands in a read-only sandbox — no writes, no network. Use Agent mode (`/agent`) for any command that creates or modifies files, or that needs network access.",
|
||||
);
|
||||
}
|
||||
ctx
|
||||
}
|
||||
|
||||
async fn ensure_mcp_pool(&mut self) -> Result<Arc<AsyncMutex<McpPool>>, ToolError> {
|
||||
@@ -2075,6 +2041,7 @@ use self::tool_catalog::{
|
||||
#[cfg(test)]
|
||||
use self::tool_catalog::{TOOL_SEARCH_BM25_NAME, should_default_defer_tool};
|
||||
use self::tool_execution::emit_tool_audit;
|
||||
use self::tool_setup::sandbox_policy_for_mode;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
@@ -501,23 +501,71 @@ fn agent_and_yolo_modes_elevate_shell_sandbox_to_allow_network() {
|
||||
"Yolo mode must use DangerFullAccess (no sandbox); got {yolo_policy:?}",
|
||||
);
|
||||
|
||||
// Plan mode can still expose shell tools for local read-only inspection,
|
||||
// but it keeps outbound network blocked and attaches an explicit hint so
|
||||
// network command failures are not mistaken for DNS/proxy issues.
|
||||
// Plan mode (#1077): the sandbox must actually deny workspace writes.
|
||||
// The previous WorkspaceWrite-with-empty-network policy whitelisted the
|
||||
// workspace as writable, so `python -c "open('f','w').write('x')"`
|
||||
// mutated files inside the workspace despite Plan-mode's intent. Lock
|
||||
// it to ReadOnly: no writes anywhere, no network. The shell tool stays
|
||||
// exposed for read-only inspection (`ls`, `git log`, `grep`, …) and
|
||||
// the per-platform sandbox enforces the rest.
|
||||
let plan_ctx = engine.build_tool_context(AppMode::Plan, false);
|
||||
let plan_policy = plan_ctx
|
||||
.elevated_sandbox_policy
|
||||
.as_ref()
|
||||
.expect("Plan mode should make the shell sandbox policy explicit");
|
||||
assert!(
|
||||
matches!(plan_policy, crate::sandbox::SandboxPolicy::ReadOnly),
|
||||
"Plan mode must use ReadOnly sandbox to deny workspace writes (#1077); got {plan_policy:?}",
|
||||
);
|
||||
assert!(!plan_policy.has_network_access());
|
||||
assert!(!plan_policy.has_full_disk_write_access());
|
||||
assert!(
|
||||
plan_policy
|
||||
.get_writable_roots(&std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")))
|
||||
.is_empty(),
|
||||
"ReadOnly policy must enumerate zero writable roots; got {plan_policy:?}",
|
||||
);
|
||||
assert!(
|
||||
plan_ctx
|
||||
.shell_network_denied_hint
|
||||
.as_deref()
|
||||
.is_some_and(|hint| hint.contains("Plan mode")),
|
||||
.is_some_and(|hint| hint.contains("Plan mode") && hint.contains("read-only")),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sandbox_policy_for_mode_returns_correct_policy_per_mode() {
|
||||
use super::tool_setup::sandbox_policy_for_mode;
|
||||
use crate::sandbox::SandboxPolicy;
|
||||
|
||||
let workspace = PathBuf::from("/tmp/example-workspace");
|
||||
|
||||
// Plan: ReadOnly. The whole point of #1077.
|
||||
assert!(matches!(
|
||||
sandbox_policy_for_mode(AppMode::Plan, &workspace),
|
||||
SandboxPolicy::ReadOnly
|
||||
));
|
||||
|
||||
// Agent: WorkspaceWrite with workspace as writable root, network on.
|
||||
match sandbox_policy_for_mode(AppMode::Agent, &workspace) {
|
||||
SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots,
|
||||
network_access,
|
||||
..
|
||||
} => {
|
||||
assert_eq!(writable_roots, vec![workspace.clone()]);
|
||||
assert!(network_access, "Agent mode must allow shell network access");
|
||||
}
|
||||
other => panic!("Agent mode should be WorkspaceWrite; got {other:?}"),
|
||||
}
|
||||
|
||||
// YOLO: DangerFullAccess.
|
||||
assert!(matches!(
|
||||
sandbox_policy_for_mode(AppMode::Yolo, &workspace),
|
||||
SandboxPolicy::DangerFullAccess
|
||||
));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn session_update_preserves_reasoning_tool_only_turn() {
|
||||
let (mut engine, handle) = Engine::new(EngineConfig::default(), &Config::default());
|
||||
|
||||
@@ -2,7 +2,35 @@
|
||||
//!
|
||||
//! This keeps mode/feature-specific registry construction out of the send path.
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use super::*;
|
||||
use crate::sandbox::SandboxPolicy;
|
||||
|
||||
/// Pick the sandbox policy that gates shell commands for a given UI mode.
|
||||
///
|
||||
/// - **Plan** (#1077): `ReadOnly` — no writes, no network. The previous
|
||||
/// `WorkspaceWrite` policy let `python -c "open('f','w').write('x')"` mutate
|
||||
/// files inside the workspace because it whitelisted the workspace as
|
||||
/// writable. Plan mode is investigation only; if the user wants to change
|
||||
/// files they should switch to Agent.
|
||||
/// - **Agent**: `WorkspaceWrite` with workspace as writable root and network
|
||||
/// on. Approval flow gates risky individual commands; the sandbox handles
|
||||
/// the rest. Network is allowed because cargo / npm / curl-style commands
|
||||
/// are normal during agent work and DNS-deny breaks them silently.
|
||||
/// - **YOLO**: `DangerFullAccess` — explicit no-guardrails contract.
|
||||
pub(crate) fn sandbox_policy_for_mode(mode: AppMode, workspace: &Path) -> SandboxPolicy {
|
||||
match mode {
|
||||
AppMode::Plan => SandboxPolicy::ReadOnly,
|
||||
AppMode::Agent => SandboxPolicy::WorkspaceWrite {
|
||||
writable_roots: vec![workspace.to_path_buf()],
|
||||
network_access: true,
|
||||
exclude_tmpdir: false,
|
||||
exclude_slash_tmp: false,
|
||||
},
|
||||
AppMode::Yolo => SandboxPolicy::DangerFullAccess,
|
||||
}
|
||||
}
|
||||
|
||||
impl Engine {
|
||||
pub(super) fn build_turn_tool_registry_builder(
|
||||
|
||||
Reference in New Issue
Block a user