From fa32e7ac5334be9bcef1c35232bff2653e683110 Mon Sep 17 00:00:00 2001 From: jiaren wang <33421508+JiarenWang@users.noreply.github.com> Date: Fri, 8 May 2026 15:49:31 +0900 Subject: [PATCH] fix(plan): enforce read-only tool boundaries in Plan mode (#1114) --- crates/tui/src/core/engine/tests.rs | 26 +++++++++++++++++++++- crates/tui/src/core/engine/tool_catalog.rs | 4 ++-- crates/tui/src/core/engine/tool_setup.rs | 9 +++++--- crates/tui/src/core/engine/turn_loop.rs | 19 +++++++++++++++- crates/tui/src/prompts/modes/plan.md | 2 +- crates/tui/src/prompts/plan.txt | 2 +- 6 files changed, 53 insertions(+), 9 deletions(-) diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index bb578137..e1aa09b4 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -448,6 +448,9 @@ fn turn_tool_registry_builder_keeps_plan_mode_read_only_for_files() { assert!(registry.contains("list_dir")); assert!(!registry.contains("write_file")); assert!(!registry.contains("edit_file")); + assert!(!registry.contains("exec_shell")); + assert!(!registry.contains("exec_shell_wait")); + assert!(!registry.contains("exec_shell_interact")); assert!(registry.contains("update_plan")); assert!(registry.contains("task_create")); } @@ -1350,7 +1353,7 @@ fn tool_search_activates_discovered_deferred_tools() { cache_control: None, }, ]; - ensure_advanced_tooling(&mut catalog); + ensure_advanced_tooling(&mut catalog, AppMode::Agent); let mut active = initial_active_tools(&catalog); let result = execute_tool_search( TOOL_SEARCH_BM25_NAME, @@ -1374,6 +1377,27 @@ async fn code_execution_runs_python_and_returns_result_payload() { assert!(result.content.contains("return_code")); } +#[test] +fn plan_mode_catalog_skips_code_execution_tool() { + let mut plan_catalog = vec![api_tool("read_file")]; + ensure_advanced_tooling(&mut plan_catalog, AppMode::Plan); + assert!( + !plan_catalog + .iter() + .any(|tool| tool.name == CODE_EXECUTION_TOOL_NAME), + "Plan mode must not expose code_execution" + ); + + let mut agent_catalog = vec![api_tool("read_file")]; + ensure_advanced_tooling(&mut agent_catalog, AppMode::Agent); + assert!( + agent_catalog + .iter() + .any(|tool| tool.name == CODE_EXECUTION_TOOL_NAME), + "Agent mode should still expose code_execution" + ); +} + #[test] fn deferred_tool_requests_are_auto_activated() { use std::collections::HashSet; diff --git a/crates/tui/src/core/engine/tool_catalog.rs b/crates/tui/src/core/engine/tool_catalog.rs index 2212e5ef..9fdc3c6d 100644 --- a/crates/tui/src/core/engine/tool_catalog.rs +++ b/crates/tui/src/core/engine/tool_catalog.rs @@ -118,8 +118,8 @@ pub(super) fn build_model_tool_catalog( native_tools } -pub(super) fn ensure_advanced_tooling(catalog: &mut Vec) { - if !catalog.iter().any(|t| t.name == CODE_EXECUTION_TOOL_NAME) { +pub(super) fn ensure_advanced_tooling(catalog: &mut Vec, mode: AppMode) { + if mode != AppMode::Plan && !catalog.iter().any(|t| t.name == CODE_EXECUTION_TOOL_NAME) { catalog.push(Tool { tool_type: Some(CODE_EXECUTION_TOOL_TYPE.to_string()), name: CODE_EXECUTION_TOOL_NAME.to_string(), diff --git a/crates/tui/src/core/engine/tool_setup.rs b/crates/tui/src/core/engine/tool_setup.rs index 5abe7614..1508a469 100644 --- a/crates/tui/src/core/engine/tool_setup.rs +++ b/crates/tui/src/core/engine/tool_setup.rs @@ -71,9 +71,12 @@ impl Engine { if self.config.features.enabled(Feature::WebSearch) { builder = builder.with_web_tools(); } - // Plan mode keeps shell available when the session allows it; command - // safety and approval checks still gate risky commands. - if self.config.features.enabled(Feature::ShellTool) && self.session.allow_shell { + // Plan mode is strictly read-only: do not expose shell execution at + // all, even if the session would otherwise allow it. + if mode != AppMode::Plan + && self.config.features.enabled(Feature::ShellTool) + && self.session.allow_shell + { builder = builder.with_shell_tools(); } diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs index 7aa9bff7..a8ec0047 100644 --- a/crates/tui/src/core/engine/turn_loop.rs +++ b/crates/tui/src/core/engine/turn_loop.rs @@ -26,7 +26,7 @@ impl Engine { let mut context_recovery_attempts = 0u8; let mut tool_catalog = tools.unwrap_or_default(); if !tool_catalog.is_empty() { - ensure_advanced_tooling(&mut tool_catalog); + ensure_advanced_tooling(&mut tool_catalog, mode); } let mut active_tool_names = initial_active_tools(&tool_catalog); let mut loop_guard = LoopGuard::default(); @@ -1045,6 +1045,23 @@ impl Engine { let mut read_only = false; let mut blocked_error: Option = None; let mut guard_result: Option = None; + + if mode == AppMode::Plan + && matches!( + tool_name.as_str(), + "exec_shell" + | "exec_shell_wait" + | "exec_shell_interact" + | "exec_wait" + | "exec_interact" + | CODE_EXECUTION_TOOL_NAME + ) + { + blocked_error = Some(ToolError::permission_denied(format!( + "Tool '{tool_name}' is unavailable in Plan mode" + ))); + } + if maybe_activate_requested_deferred_tool( &tool_name, &tool_catalog, diff --git a/crates/tui/src/prompts/modes/plan.md b/crates/tui/src/prompts/modes/plan.md index 92798b23..8b854a4f 100644 --- a/crates/tui/src/prompts/modes/plan.md +++ b/crates/tui/src/prompts/modes/plan.md @@ -4,7 +4,7 @@ You are running in Plan mode — design before implementing. Investigate first, act later. Use `update_plan` to lay out high-level strategy and `checklist_write` for granular, verifiable steps. All writes and patches are blocked — you can read the world but you -can't change it. Shell commands go through approval. +can't change it. Shell and code execution are unavailable. Use this mode to build a thorough plan. Spawn read-only sub-agents for parallel investigation. When the plan is solid, the user will switch modes so you can execute. diff --git a/crates/tui/src/prompts/plan.txt b/crates/tui/src/prompts/plan.txt index eec55cc8..af9c8670 100644 --- a/crates/tui/src/prompts/plan.txt +++ b/crates/tui/src/prompts/plan.txt @@ -2,7 +2,7 @@ Investigate first, act later. Use `update_plan` to lay out high-level strategy and `checklist_write` for granular, verifiable steps. All writes and patches are blocked — you can read the world but you -can't change it. Shell commands go through approval. +can't change it. Shell and code execution are unavailable. Use this mode to build a thorough plan. Spawn read-only sub-agents for parallel investigation. When the plan is solid, the user will switch modes so you can execute.