fix(plan): enforce read-only tool boundaries in Plan mode (#1114)
This commit is contained in:
@@ -448,6 +448,9 @@ fn turn_tool_registry_builder_keeps_plan_mode_read_only_for_files() {
|
||||
assert!(registry.contains("list_dir"));
|
||||
assert!(!registry.contains("write_file"));
|
||||
assert!(!registry.contains("edit_file"));
|
||||
assert!(!registry.contains("exec_shell"));
|
||||
assert!(!registry.contains("exec_shell_wait"));
|
||||
assert!(!registry.contains("exec_shell_interact"));
|
||||
assert!(registry.contains("update_plan"));
|
||||
assert!(registry.contains("task_create"));
|
||||
}
|
||||
@@ -1350,7 +1353,7 @@ fn tool_search_activates_discovered_deferred_tools() {
|
||||
cache_control: None,
|
||||
},
|
||||
];
|
||||
ensure_advanced_tooling(&mut catalog);
|
||||
ensure_advanced_tooling(&mut catalog, AppMode::Agent);
|
||||
let mut active = initial_active_tools(&catalog);
|
||||
let result = execute_tool_search(
|
||||
TOOL_SEARCH_BM25_NAME,
|
||||
@@ -1374,6 +1377,27 @@ async fn code_execution_runs_python_and_returns_result_payload() {
|
||||
assert!(result.content.contains("return_code"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plan_mode_catalog_skips_code_execution_tool() {
|
||||
let mut plan_catalog = vec![api_tool("read_file")];
|
||||
ensure_advanced_tooling(&mut plan_catalog, AppMode::Plan);
|
||||
assert!(
|
||||
!plan_catalog
|
||||
.iter()
|
||||
.any(|tool| tool.name == CODE_EXECUTION_TOOL_NAME),
|
||||
"Plan mode must not expose code_execution"
|
||||
);
|
||||
|
||||
let mut agent_catalog = vec![api_tool("read_file")];
|
||||
ensure_advanced_tooling(&mut agent_catalog, AppMode::Agent);
|
||||
assert!(
|
||||
agent_catalog
|
||||
.iter()
|
||||
.any(|tool| tool.name == CODE_EXECUTION_TOOL_NAME),
|
||||
"Agent mode should still expose code_execution"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn deferred_tool_requests_are_auto_activated() {
|
||||
use std::collections::HashSet;
|
||||
|
||||
@@ -118,8 +118,8 @@ pub(super) fn build_model_tool_catalog(
|
||||
native_tools
|
||||
}
|
||||
|
||||
pub(super) fn ensure_advanced_tooling(catalog: &mut Vec<Tool>) {
|
||||
if !catalog.iter().any(|t| t.name == CODE_EXECUTION_TOOL_NAME) {
|
||||
pub(super) fn ensure_advanced_tooling(catalog: &mut Vec<Tool>, mode: AppMode) {
|
||||
if mode != AppMode::Plan && !catalog.iter().any(|t| t.name == CODE_EXECUTION_TOOL_NAME) {
|
||||
catalog.push(Tool {
|
||||
tool_type: Some(CODE_EXECUTION_TOOL_TYPE.to_string()),
|
||||
name: CODE_EXECUTION_TOOL_NAME.to_string(),
|
||||
|
||||
@@ -71,9 +71,12 @@ impl Engine {
|
||||
if self.config.features.enabled(Feature::WebSearch) {
|
||||
builder = builder.with_web_tools();
|
||||
}
|
||||
// Plan mode keeps shell available when the session allows it; command
|
||||
// safety and approval checks still gate risky commands.
|
||||
if self.config.features.enabled(Feature::ShellTool) && self.session.allow_shell {
|
||||
// Plan mode is strictly read-only: do not expose shell execution at
|
||||
// all, even if the session would otherwise allow it.
|
||||
if mode != AppMode::Plan
|
||||
&& self.config.features.enabled(Feature::ShellTool)
|
||||
&& self.session.allow_shell
|
||||
{
|
||||
builder = builder.with_shell_tools();
|
||||
}
|
||||
|
||||
|
||||
@@ -26,7 +26,7 @@ impl Engine {
|
||||
let mut context_recovery_attempts = 0u8;
|
||||
let mut tool_catalog = tools.unwrap_or_default();
|
||||
if !tool_catalog.is_empty() {
|
||||
ensure_advanced_tooling(&mut tool_catalog);
|
||||
ensure_advanced_tooling(&mut tool_catalog, mode);
|
||||
}
|
||||
let mut active_tool_names = initial_active_tools(&tool_catalog);
|
||||
let mut loop_guard = LoopGuard::default();
|
||||
@@ -1045,6 +1045,23 @@ impl Engine {
|
||||
let mut read_only = false;
|
||||
let mut blocked_error: Option<ToolError> = None;
|
||||
let mut guard_result: Option<ToolResult> = None;
|
||||
|
||||
if mode == AppMode::Plan
|
||||
&& matches!(
|
||||
tool_name.as_str(),
|
||||
"exec_shell"
|
||||
| "exec_shell_wait"
|
||||
| "exec_shell_interact"
|
||||
| "exec_wait"
|
||||
| "exec_interact"
|
||||
| CODE_EXECUTION_TOOL_NAME
|
||||
)
|
||||
{
|
||||
blocked_error = Some(ToolError::permission_denied(format!(
|
||||
"Tool '{tool_name}' is unavailable in Plan mode"
|
||||
)));
|
||||
}
|
||||
|
||||
if maybe_activate_requested_deferred_tool(
|
||||
&tool_name,
|
||||
&tool_catalog,
|
||||
|
||||
@@ -4,7 +4,7 @@ You are running in Plan mode — design before implementing.
|
||||
|
||||
Investigate first, act later. Use `update_plan` to lay out high-level strategy and `checklist_write` for
|
||||
granular, verifiable steps. All writes and patches are blocked — you can read the world but you
|
||||
can't change it. Shell commands go through approval.
|
||||
can't change it. Shell and code execution are unavailable.
|
||||
|
||||
Use this mode to build a thorough plan. Spawn read-only sub-agents for parallel investigation.
|
||||
When the plan is solid, the user will switch modes so you can execute.
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
Investigate first, act later. Use `update_plan` to lay out high-level strategy and `checklist_write` for
|
||||
granular, verifiable steps. All writes and patches are blocked — you can read the world but you
|
||||
can't change it. Shell commands go through approval.
|
||||
can't change it. Shell and code execution are unavailable.
|
||||
|
||||
Use this mode to build a thorough plan. Spawn read-only sub-agents for parallel investigation.
|
||||
When the plan is solid, the user will switch modes so you can execute.
|
||||
|
||||
Reference in New Issue
Block a user