fix(plan): enforce read-only tool boundaries in Plan mode (#1114)

This commit is contained in:
jiaren wang
2026-05-08 15:49:31 +09:00
committed by GitHub
parent f283e56bd1
commit fa32e7ac53
6 changed files with 53 additions and 9 deletions
+25 -1
View File
@@ -448,6 +448,9 @@ fn turn_tool_registry_builder_keeps_plan_mode_read_only_for_files() {
assert!(registry.contains("list_dir"));
assert!(!registry.contains("write_file"));
assert!(!registry.contains("edit_file"));
assert!(!registry.contains("exec_shell"));
assert!(!registry.contains("exec_shell_wait"));
assert!(!registry.contains("exec_shell_interact"));
assert!(registry.contains("update_plan"));
assert!(registry.contains("task_create"));
}
@@ -1350,7 +1353,7 @@ fn tool_search_activates_discovered_deferred_tools() {
cache_control: None,
},
];
ensure_advanced_tooling(&mut catalog);
ensure_advanced_tooling(&mut catalog, AppMode::Agent);
let mut active = initial_active_tools(&catalog);
let result = execute_tool_search(
TOOL_SEARCH_BM25_NAME,
@@ -1374,6 +1377,27 @@ async fn code_execution_runs_python_and_returns_result_payload() {
assert!(result.content.contains("return_code"));
}
#[test]
fn plan_mode_catalog_skips_code_execution_tool() {
let mut plan_catalog = vec![api_tool("read_file")];
ensure_advanced_tooling(&mut plan_catalog, AppMode::Plan);
assert!(
!plan_catalog
.iter()
.any(|tool| tool.name == CODE_EXECUTION_TOOL_NAME),
"Plan mode must not expose code_execution"
);
let mut agent_catalog = vec![api_tool("read_file")];
ensure_advanced_tooling(&mut agent_catalog, AppMode::Agent);
assert!(
agent_catalog
.iter()
.any(|tool| tool.name == CODE_EXECUTION_TOOL_NAME),
"Agent mode should still expose code_execution"
);
}
#[test]
fn deferred_tool_requests_are_auto_activated() {
use std::collections::HashSet;
+2 -2
View File
@@ -118,8 +118,8 @@ pub(super) fn build_model_tool_catalog(
native_tools
}
pub(super) fn ensure_advanced_tooling(catalog: &mut Vec<Tool>) {
if !catalog.iter().any(|t| t.name == CODE_EXECUTION_TOOL_NAME) {
pub(super) fn ensure_advanced_tooling(catalog: &mut Vec<Tool>, mode: AppMode) {
if mode != AppMode::Plan && !catalog.iter().any(|t| t.name == CODE_EXECUTION_TOOL_NAME) {
catalog.push(Tool {
tool_type: Some(CODE_EXECUTION_TOOL_TYPE.to_string()),
name: CODE_EXECUTION_TOOL_NAME.to_string(),
+6 -3
View File
@@ -71,9 +71,12 @@ impl Engine {
if self.config.features.enabled(Feature::WebSearch) {
builder = builder.with_web_tools();
}
// Plan mode keeps shell available when the session allows it; command
// safety and approval checks still gate risky commands.
if self.config.features.enabled(Feature::ShellTool) && self.session.allow_shell {
// Plan mode is strictly read-only: do not expose shell execution at
// all, even if the session would otherwise allow it.
if mode != AppMode::Plan
&& self.config.features.enabled(Feature::ShellTool)
&& self.session.allow_shell
{
builder = builder.with_shell_tools();
}
+18 -1
View File
@@ -26,7 +26,7 @@ impl Engine {
let mut context_recovery_attempts = 0u8;
let mut tool_catalog = tools.unwrap_or_default();
if !tool_catalog.is_empty() {
ensure_advanced_tooling(&mut tool_catalog);
ensure_advanced_tooling(&mut tool_catalog, mode);
}
let mut active_tool_names = initial_active_tools(&tool_catalog);
let mut loop_guard = LoopGuard::default();
@@ -1045,6 +1045,23 @@ impl Engine {
let mut read_only = false;
let mut blocked_error: Option<ToolError> = None;
let mut guard_result: Option<ToolResult> = None;
if mode == AppMode::Plan
&& matches!(
tool_name.as_str(),
"exec_shell"
| "exec_shell_wait"
| "exec_shell_interact"
| "exec_wait"
| "exec_interact"
| CODE_EXECUTION_TOOL_NAME
)
{
blocked_error = Some(ToolError::permission_denied(format!(
"Tool '{tool_name}' is unavailable in Plan mode"
)));
}
if maybe_activate_requested_deferred_tool(
&tool_name,
&tool_catalog,
+1 -1
View File
@@ -4,7 +4,7 @@ You are running in Plan mode — design before implementing.
Investigate first, act later. Use `update_plan` to lay out high-level strategy and `checklist_write` for
granular, verifiable steps. All writes and patches are blocked — you can read the world but you
can't change it. Shell commands go through approval.
can't change it. Shell and code execution are unavailable.
Use this mode to build a thorough plan. Spawn read-only sub-agents for parallel investigation.
When the plan is solid, the user will switch modes so you can execute.
+1 -1
View File
@@ -2,7 +2,7 @@
Investigate first, act later. Use `update_plan` to lay out high-level strategy and `checklist_write` for
granular, verifiable steps. All writes and patches are blocked — you can read the world but you
can't change it. Shell commands go through approval.
can't change it. Shell and code execution are unavailable.
Use this mode to build a thorough plan. Spawn read-only sub-agents for parallel investigation.
When the plan is solid, the user will switch modes so you can execute.