From dbd9b9670da23000cdfe3439bcbac412b94f8f56 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Wed, 10 Jun 2026 16:17:33 -0700 Subject: [PATCH 1/3] feat(exec): add --allowed-tools, --disallowed-tools, --max-turns, --append-system-prompt (#3027) Headless exec hardening for benchmark/CI/droplet use: - New CLI flags: --allowed-tools, --disallowed-tools, --max-turns, --append-system-prompt - Add disallowed_tools to EngineConfig + command_denies_tool() helper - run_exec_agent threads all four flags into EngineConfig and Op::SendMessage - needs_engine now includes flag presence for standalone exec use --- crates/tui/src/core/engine.rs | 4 ++ crates/tui/src/core/engine/turn_loop.rs | 9 ++++ crates/tui/src/main.rs | 63 +++++++++++++++++++++---- crates/tui/src/runtime_threads.rs | 1 + crates/tui/src/tui/ui.rs | 1 + 5 files changed, 69 insertions(+), 9 deletions(-) diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 6f89fe19..23dfe719 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -325,6 +325,9 @@ pub struct EngineConfig { /// Tool restriction from custom slash command frontmatter. /// `None` means the current turn may use the normal tool set. pub allowed_tools: Option>, + /// Tool deny-list. Deny always wins over allow (#3027). + /// `None` means no tools are explicitly denied. + pub disallowed_tools: Option>, /// Hook executor for control-plane hooks. /// `ToolCallBefore` hooks may deny a tool call with exit code 2. pub hook_executor: Option>, @@ -409,6 +412,7 @@ impl Default for EngineConfig { strict_tool_mode: false, goal_objective: None, allowed_tools: None, + disallowed_tools: None, hook_executor: None, locale_tag: "en".to_string(), workshop: None, diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs index 53cdf927..631c3a7b 100644 --- a/crates/tui/src/core/engine/turn_loop.rs +++ b/crates/tui/src/core/engine/turn_loop.rs @@ -2378,6 +2378,15 @@ fn command_allows_tool(allowed_tools: Option<&[String]>, tool_name: &str) -> boo allowed_tools.contains(&tool_name.to_ascii_lowercase()) } +/// Check whether `tool_name` is explicitly denied (#3027). +/// Deny always wins over allow. +fn command_denies_tool(disallowed_tools: Option<&[String]>, tool_name: &str) -> bool { + let Some(disallowed_tools) = disallowed_tools else { + return false; + }; + disallowed_tools.contains(&tool_name.to_ascii_lowercase()) +} + fn resolve_tool_definition<'a>( tool_name: &mut String, tool_catalog: &'a [Tool], diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 8ac5f8f6..6b0b526e 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -336,6 +336,19 @@ struct ExecArgs { /// Output format for exec mode #[arg(long, value_enum, default_value_t = ExecOutputFormat::Text)] output_format: ExecOutputFormat, + /// Comma-separated list of tools to allow (all others denied). + /// Lowercase catalog names: read_file, write_file, exec_shell, grep_files, etc. + #[arg(long, value_delimiter = ',')] + allowed_tools: Option>, + /// Comma-separated list of tools to deny (deny wins over allow). + #[arg(long, value_delimiter = ',')] + disallowed_tools: Option>, + /// Maximum number of model steps (tool calls) before the run ends. + #[arg(long, value_parser = clap::value_parser!(u32).range(1..))] + max_turns: Option, + /// Extra text appended to the system prompt for this run. + #[arg(long)] + append_system_prompt: Option, } #[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)] @@ -975,13 +988,23 @@ async fn main() -> Result<()> { let needs_engine = args.auto || yolo || resume_session_id.is_some() - || args.output_format == ExecOutputFormat::StreamJson; + || args.output_format == ExecOutputFormat::StreamJson + || args.max_turns.is_some() + || args.allowed_tools.is_some() + || args.disallowed_tools.is_some(); if needs_engine { let max_subagents = cli.max_subagents.map_or_else( || config.max_subagents(), |value| value.clamp(1, MAX_SUBAGENTS), ); let auto_mode = args.auto || yolo; + let max_turns = args.max_turns.unwrap_or(100); + let allowed_tools = args.allowed_tools.as_ref().map(|v| { + v.iter().map(|s| s.to_ascii_lowercase().trim().to_string()).collect::>() + }); + let disallowed_tools = args.disallowed_tools.as_ref().map(|v| { + v.iter().map(|s| s.to_ascii_lowercase().trim().to_string()).collect::>() + }); run_exec_agent( &config, &model, @@ -993,6 +1016,10 @@ async fn main() -> Result<()> { args.json, resume_session_id, args.output_format, + max_turns, + allowed_tools, + disallowed_tools, + args.append_system_prompt.clone(), ) .await } else if args.json { @@ -1249,6 +1276,10 @@ async fn run_swebench_command( false, None, args.output_format, + 100, + None, + None, + None, ) .await?; @@ -5748,6 +5779,10 @@ async fn run_exec_agent( json_output: bool, resume_session_id: Option, output_format: ExecOutputFormat, + max_turns: u32, + allowed_tools: Option>, + disallowed_tools: Option>, + append_system_prompt: Option, ) -> Result<()> { use crate::compaction::CompactionConfig; use crate::core::engine::{EngineConfig, spawn_engine}; @@ -5799,15 +5834,24 @@ async fn run_exec_agent( notes_path: config.notes_path(), mcp_config_path: config.mcp_config_path(), skills_dir: config.skills_dir(), - instructions: config - .instructions_paths() - .into_iter() - .map(Into::into) - .collect(), + instructions: { + let mut instrs: Vec = config + .instructions_paths() + .into_iter() + .map(Into::into) + .collect(); + if let Some(ref extra) = append_system_prompt { + instrs.push(crate::prompts::InstructionSource::Inline { + name: "cli:append-system-prompt".into(), + content: extra.clone(), + }); + } + instrs + }, project_context_pack_enabled: config.project_context_pack_enabled(), translation_enabled: false, show_thinking: settings.show_thinking, - max_steps: 100, + max_steps: max_turns, max_subagents, features: config.features(), compaction, @@ -5837,7 +5881,8 @@ async fn run_exec_agent( vision_config: config.vision_model_config(), strict_tool_mode: config.strict_tool_mode.unwrap_or(false), goal_objective: None, - allowed_tools: None, + allowed_tools: allowed_tools.clone(), + disallowed_tools: disallowed_tools.clone(), hook_executor: None, locale_tag: crate::localization::resolve_locale(&settings.locale) .tag() @@ -5895,7 +5940,7 @@ async fn run_exec_agent( mode, model: effective_model.clone(), goal_objective: None, - allowed_tools: None, + allowed_tools: allowed_tools.clone(), hook_executor: None, reasoning_effort: effective_reasoning_effort, reasoning_effort_auto: auto_model, diff --git a/crates/tui/src/runtime_threads.rs b/crates/tui/src/runtime_threads.rs index 862d82e5..724c9d35 100644 --- a/crates/tui/src/runtime_threads.rs +++ b/crates/tui/src/runtime_threads.rs @@ -2082,6 +2082,7 @@ impl RuntimeThreadManager { strict_tool_mode: self.config.strict_tool_mode.unwrap_or(false), goal_objective: None, allowed_tools: None, + disallowed_tools: None, hook_executor: None, locale_tag: crate::localization::resolve_locale(&settings.locale) .tag() diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 3481d43e..1887dbe3 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -894,6 +894,7 @@ fn build_engine_config(app: &App, config: &Config) -> EngineConfig { ), max_spawn_depth: crate::tools::subagent::DEFAULT_MAX_SPAWN_DEPTH, allowed_tools: app.active_allowed_tools.clone(), + disallowed_tools: None, hook_executor: app.runtime_services.hook_executor.clone(), network_policy: config.network.clone().map(|toml_cfg| { crate::network_policy::NetworkPolicyDecider::with_default_audit(toml_cfg.into_runtime()) From b433989cc31cd34d52535b3dab0ec6e7ab78e123 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Wed, 10 Jun 2026 16:40:27 -0700 Subject: [PATCH 2/3] style: cargo fmt Co-Authored-By: Claude Fable 5 --- crates/tui/src/main.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 6b0b526e..74a20280 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -1000,10 +1000,14 @@ async fn main() -> Result<()> { let auto_mode = args.auto || yolo; let max_turns = args.max_turns.unwrap_or(100); let allowed_tools = args.allowed_tools.as_ref().map(|v| { - v.iter().map(|s| s.to_ascii_lowercase().trim().to_string()).collect::>() + v.iter() + .map(|s| s.to_ascii_lowercase().trim().to_string()) + .collect::>() }); let disallowed_tools = args.disallowed_tools.as_ref().map(|v| { - v.iter().map(|s| s.to_ascii_lowercase().trim().to_string()).collect::>() + v.iter() + .map(|s| s.to_ascii_lowercase().trim().to_string()) + .collect::>() }); run_exec_agent( &config, From c15e93709618dd2d1fa18b81e1b44db8665a0904 Mon Sep 17 00:00:00 2001 From: Claude Date: Thu, 11 Jun 2026 00:31:05 +0000 Subject: [PATCH 3/3] fix(exec): wire --disallowed-tools into the gate chain (deny wins over allow), filter the advertised tool catalog, honor --append-system-prompt in needs_engine, surface max-steps notice in text mode; add clap/gate/catalog tests (#3027) Co-Authored-By: Claude https://claude.ai/code/session_018zaP8vUfTAsrE38L6h6fw5 --- crates/tui/src/core/engine.rs | 18 +++++++++ crates/tui/src/core/engine/tests.rs | 39 +++++++++++++++++++ crates/tui/src/core/engine/turn_loop.rs | 44 ++++++++++++++++++++- crates/tui/src/main.rs | 51 ++++++++++++++++++++++++- 4 files changed, 149 insertions(+), 3 deletions(-) diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 23dfe719..599478fe 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -1813,6 +1813,11 @@ impl Engine { tool.defer_loading = Some(false); } } + filter_tool_catalog_for_gates( + &mut catalog, + self.config.allowed_tools.as_deref(), + self.config.disallowed_tools.as_deref(), + ); catalog }); let tool_catalog_for_event = tools.clone(); @@ -2786,6 +2791,19 @@ pub(crate) fn default_active_native_tool_names() -> &'static [&'static str] { tool_catalog::DEFAULT_ACTIVE_NATIVE_TOOLS } +/// Drop catalog entries the execution gates would reject (#3027): the model +/// should never be advertised a tool it cannot call. Deny wins over allow. +fn filter_tool_catalog_for_gates( + catalog: &mut Vec, + allowed_tools: Option<&[String]>, + disallowed_tools: Option<&[String]>, +) { + catalog.retain(|tool| { + !turn_loop::command_denies_tool(disallowed_tools, &tool.name) + && turn_loop::command_allows_tool(allowed_tools, &tool.name) + }); +} + use self::approval::{ApprovalDecision, ApprovalResult, UserInputDecision}; #[cfg(test)] use self::dispatch::should_parallelize_tool_batch; diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index 212042c4..4f90e91d 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -85,6 +85,45 @@ fn build_engine_with_capacity(capacity: CapacityControllerConfig) -> Engine { engine } +fn catalog_tool(name: &str) -> Tool { + Tool { + tool_type: None, + name: name.to_string(), + description: String::new(), + input_schema: json!({"type": "object"}), + allowed_callers: None, + defer_loading: None, + input_examples: None, + strict: None, + cache_control: None, + } +} + +#[test] +fn tool_catalog_filter_applies_allow_and_deny_gates() { + // #3027 AC1: the advertised catalog must not contain tools the execution + // gates would deny; deny wins over allow. + let mut catalog = vec![ + catalog_tool("read_file"), + catalog_tool("exec_shell"), + catalog_tool("grep_files"), + ]; + filter_tool_catalog_for_gates( + &mut catalog, + Some(&["read_file".to_string(), "exec_shell".to_string()][..]), + Some(&["exec_shell".to_string()][..]), + ); + let names: Vec<&str> = catalog.iter().map(|t| t.name.as_str()).collect(); + assert_eq!(names, ["read_file"]); +} + +#[test] +fn tool_catalog_filter_is_inert_without_gates() { + let mut catalog = vec![catalog_tool("read_file"), catalog_tool("exec_shell")]; + filter_tool_catalog_for_gates(&mut catalog, None, None); + assert_eq!(catalog.len(), 2); +} + #[test] fn structured_state_block_includes_rich_plan_artifact() { let state = StructuredState { diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs index 631c3a7b..a209d967 100644 --- a/crates/tui/src/core/engine/turn_loop.rs +++ b/crates/tui/src/core/engine/turn_loop.rs @@ -1380,6 +1380,16 @@ impl Engine { ))); } + // #3027: deny wins over allow — check the deny-list first so a + // tool present in both lists is still blocked. + if blocked_error.is_none() + && command_denies_tool(self.config.disallowed_tools.as_deref(), &tool_name) + { + blocked_error = Some(ToolError::permission_denied(format!( + "Tool '{tool_name}' is in the disallowed-tools list" + ))); + } + if blocked_error.is_none() && !command_allows_tool(self.config.allowed_tools.as_deref(), &tool_name) { @@ -2371,7 +2381,7 @@ mod stream_timeout_tests { } } -fn command_allows_tool(allowed_tools: Option<&[String]>, tool_name: &str) -> bool { +pub(super) fn command_allows_tool(allowed_tools: Option<&[String]>, tool_name: &str) -> bool { let Some(allowed_tools) = allowed_tools else { return true; }; @@ -2380,7 +2390,7 @@ fn command_allows_tool(allowed_tools: Option<&[String]>, tool_name: &str) -> boo /// Check whether `tool_name` is explicitly denied (#3027). /// Deny always wins over allow. -fn command_denies_tool(disallowed_tools: Option<&[String]>, tool_name: &str) -> bool { +pub(super) fn command_denies_tool(disallowed_tools: Option<&[String]>, tool_name: &str) -> bool { let Some(disallowed_tools) = disallowed_tools else { return false; }; @@ -2722,6 +2732,36 @@ mod tests { assert!(!command_allows_tool(Some(&allowed), "bash")); } + #[test] + fn disallowed_tools_gate_blocks_listed_tool() { + let disallowed = vec!["exec_shell".to_string()]; + assert!(command_denies_tool(Some(&disallowed), "exec_shell")); + assert!(!command_denies_tool(Some(&disallowed), "read_file")); + } + + #[test] + fn disallowed_tools_gate_blocks_case_insensitively() { + let disallowed = vec!["exec_shell".to_string()]; + assert!(command_denies_tool(Some(&disallowed), "Exec_Shell")); + } + + #[test] + fn disallowed_tools_gate_is_inert_when_not_set() { + assert!(!command_denies_tool(None, "exec_shell")); + let empty: Vec = Vec::new(); + assert!(!command_denies_tool(Some(&empty), "exec_shell")); + } + + #[test] + fn deny_wins_over_allow_for_same_tool() { + // The turn-loop gate chain checks the deny-list before the allow-list, + // so a tool present in both must still be blocked. + let allowed = vec!["exec_shell".to_string()]; + let disallowed = vec!["exec_shell".to_string()]; + assert!(command_allows_tool(Some(&allowed), "exec_shell")); + assert!(command_denies_tool(Some(&disallowed), "exec_shell")); + } + #[test] fn review_regression_allowed_tools_gate_checks_canonical_tool_name() { let tmp = tempfile::tempdir().expect("tempdir"); diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 74a20280..a56ffab1 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -991,7 +991,8 @@ async fn main() -> Result<()> { || args.output_format == ExecOutputFormat::StreamJson || args.max_turns.is_some() || args.allowed_tools.is_some() - || args.disallowed_tools.is_some(); + || args.disallowed_tools.is_some() + || args.append_system_prompt.is_some(); if needs_engine { let max_subagents = cli.max_subagents.map_or_else( || config.max_subagents(), @@ -6233,6 +6234,15 @@ async fn run_exec_agent( latest_model = model; latest_workspace = workspace; } + // #3027: surface the engine's max-steps notice in text mode so a + // --max-turns run that stops early says why instead of going quiet. + Event::Status { message } + if output_format == ExecOutputFormat::Text + && !json_output + && message.contains("Reached maximum steps") => + { + eprintln!("{message}"); + } _ => {} } } @@ -6693,6 +6703,45 @@ mod terminal_mode_tests { assert_eq!(args.output_format, ExecOutputFormat::Text); } + #[test] + fn exec_parses_tool_gate_and_hardening_flags() { + let cli = parse_cli(&[ + "codewhale", + "exec", + "--allowed-tools", + "read_file,grep_files", + "--disallowed-tools", + "exec_shell", + "--max-turns", + "7", + "--append-system-prompt", + "extra rules", + "do the thing", + ]); + let Some(Commands::Exec(args)) = cli.command else { + panic!("expected exec command"); + }; + + assert_eq!( + args.allowed_tools.as_deref(), + Some(&["read_file".to_string(), "grep_files".to_string()][..]) + ); + assert_eq!( + args.disallowed_tools.as_deref(), + Some(&["exec_shell".to_string()][..]) + ); + assert_eq!(args.max_turns, Some(7)); + assert_eq!(args.append_system_prompt.as_deref(), Some("extra rules")); + assert_eq!(args.prompt, vec!["do the thing"]); + } + + #[test] + fn exec_rejects_zero_max_turns() { + let err = Cli::try_parse_from(["codewhale", "exec", "--max-turns", "0", "hello"]) + .expect_err("max-turns must be >= 1"); + assert_eq!(err.kind(), clap::error::ErrorKind::ValueValidation); + } + #[test] fn exec_accepts_continue_for_latest_workspace_session() { let cli = parse_cli(&["codewhale", "exec", "--continue", "follow up"]);