fix(plan): enforce read-only tool boundaries in Plan mode (#1114)

2026-05-08 15:49:31 +09:00
parent f283e56bd1
commit fa32e7ac53
6 changed files with 53 additions and 9 deletions
@@ -448,6 +448,9 @@ fn turn_tool_registry_builder_keeps_plan_mode_read_only_for_files() {
    assert!(registry.contains("list_dir"));
    assert!(!registry.contains("write_file"));
    assert!(!registry.contains("edit_file"));
+    assert!(!registry.contains("exec_shell"));
+    assert!(!registry.contains("exec_shell_wait"));
+    assert!(!registry.contains("exec_shell_interact"));
    assert!(registry.contains("update_plan"));
    assert!(registry.contains("task_create"));
 }
@@ -1350,7 +1353,7 @@ fn tool_search_activates_discovered_deferred_tools() {
            cache_control: None,
        },
    ];
-    ensure_advanced_tooling(&mut catalog);
+    ensure_advanced_tooling(&mut catalog, AppMode::Agent);
    let mut active = initial_active_tools(&catalog);
    let result = execute_tool_search(
        TOOL_SEARCH_BM25_NAME,
@@ -1374,6 +1377,27 @@ async fn code_execution_runs_python_and_returns_result_payload() {
    assert!(result.content.contains("return_code"));
 }

+#[test]
+fn plan_mode_catalog_skips_code_execution_tool() {
+    let mut plan_catalog = vec![api_tool("read_file")];
+    ensure_advanced_tooling(&mut plan_catalog, AppMode::Plan);
+    assert!(
+        !plan_catalog
+            .iter()
+            .any(|tool| tool.name == CODE_EXECUTION_TOOL_NAME),
+        "Plan mode must not expose code_execution"
+    );
+
+    let mut agent_catalog = vec![api_tool("read_file")];
+    ensure_advanced_tooling(&mut agent_catalog, AppMode::Agent);
+    assert!(
+        agent_catalog
+            .iter()
+            .any(|tool| tool.name == CODE_EXECUTION_TOOL_NAME),
+        "Agent mode should still expose code_execution"
+    );
+}
+
 #[test]
 fn deferred_tool_requests_are_auto_activated() {
    use std::collections::HashSet;
@@ -118,8 +118,8 @@ pub(super) fn build_model_tool_catalog(
    native_tools
 }

-pub(super) fn ensure_advanced_tooling(catalog: &mut Vec<Tool>) {
-    if !catalog.iter().any(|t| t.name == CODE_EXECUTION_TOOL_NAME) {
+pub(super) fn ensure_advanced_tooling(catalog: &mut Vec<Tool>, mode: AppMode) {
+    if mode != AppMode::Plan && !catalog.iter().any(|t| t.name == CODE_EXECUTION_TOOL_NAME) {
        catalog.push(Tool {
            tool_type: Some(CODE_EXECUTION_TOOL_TYPE.to_string()),
            name: CODE_EXECUTION_TOOL_NAME.to_string(),
@@ -71,9 +71,12 @@ impl Engine {
        if self.config.features.enabled(Feature::WebSearch) {
            builder = builder.with_web_tools();
        }
-        // Plan mode keeps shell available when the session allows it; command
-        // safety and approval checks still gate risky commands.
-        if self.config.features.enabled(Feature::ShellTool) && self.session.allow_shell {
+        // Plan mode is strictly read-only: do not expose shell execution at
+        // all, even if the session would otherwise allow it.
+        if mode != AppMode::Plan
+            && self.config.features.enabled(Feature::ShellTool)
+            && self.session.allow_shell
+        {
            builder = builder.with_shell_tools();
        }

@@ -26,7 +26,7 @@ impl Engine {
        let mut context_recovery_attempts = 0u8;
        let mut tool_catalog = tools.unwrap_or_default();
        if !tool_catalog.is_empty() {
-            ensure_advanced_tooling(&mut tool_catalog);
+            ensure_advanced_tooling(&mut tool_catalog, mode);
        }
        let mut active_tool_names = initial_active_tools(&tool_catalog);
        let mut loop_guard = LoopGuard::default();
@@ -1045,6 +1045,23 @@ impl Engine {
                let mut read_only = false;
                let mut blocked_error: Option<ToolError> = None;
                let mut guard_result: Option<ToolResult> = None;
+
+                if mode == AppMode::Plan
+                    && matches!(
+                        tool_name.as_str(),
+                        "exec_shell"
+                            | "exec_shell_wait"
+                            | "exec_shell_interact"
+                            | "exec_wait"
+                            | "exec_interact"
+                            | CODE_EXECUTION_TOOL_NAME
+                    )
+                {
+                    blocked_error = Some(ToolError::permission_denied(format!(
+                        "Tool '{tool_name}' is unavailable in Plan mode"
+                    )));
+                }
+
                if maybe_activate_requested_deferred_tool(
                    &tool_name,
                    &tool_catalog,
@@ -4,7 +4,7 @@ You are running in Plan mode — design before implementing.

 Investigate first, act later. Use `update_plan` to lay out high-level strategy and `checklist_write` for
 granular, verifiable steps. All writes and patches are blocked — you can read the world but you
-can't change it. Shell commands go through approval.
+can't change it. Shell and code execution are unavailable.

 Use this mode to build a thorough plan. Spawn read-only sub-agents for parallel investigation.
 When the plan is solid, the user will switch modes so you can execute.
@@ -2,7 +2,7 @@

 Investigate first, act later. Use `update_plan` to lay out high-level strategy and `checklist_write` for
 granular, verifiable steps. All writes and patches are blocked — you can read the world but you
-can't change it. Shell commands go through approval.
+can't change it. Shell and code execution are unavailable.

 Use this mode to build a thorough plan. Spawn read-only sub-agents for parallel investigation.
 When the plan is solid, the user will switch modes so you can execute.