fix(verifier): detach background verifier runs
Teach run_verifiers to accept an explicit background=true mode that starts each verifier gate as a managed shell job and returns task_ids immediately. The normal verifier path remains blocking by default, while detached verifier starts can batch with read-only inspection work under auto-approve. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -571,6 +571,37 @@ fn background_shell_starts_batch_with_readonly_tools_when_auto_approved() {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn background_verifier_starts_batch_with_readonly_tools_when_auto_approved() {
|
||||
let mut shell_a = make_plan_at(0, true, true, false, false);
|
||||
shell_a.name = "exec_shell".to_string();
|
||||
shell_a.input = json!({"command": "git status -s"});
|
||||
|
||||
let mut verifier = make_plan_at(1, false, false, false, false);
|
||||
verifier.name = "run_verifiers".to_string();
|
||||
verifier.input = json!({"profile": "rust", "level": "full", "background": true});
|
||||
verifier.detached_start = true;
|
||||
|
||||
let mut shell_b = make_plan_at(2, true, true, false, false);
|
||||
shell_b.name = "exec_shell".to_string();
|
||||
shell_b.input = json!({"command": "rg TODO crates/tui/src/core"});
|
||||
|
||||
let batches = plan_tool_execution_batches(vec![shell_a, verifier, shell_b]);
|
||||
assert_eq!(batches.len(), 1);
|
||||
|
||||
match &batches[0] {
|
||||
ToolExecutionBatch::Parallel(plans) => {
|
||||
assert_eq!(
|
||||
plans.iter().map(|plan| plan.index).collect::<Vec<_>>(),
|
||||
vec![0, 1, 2]
|
||||
);
|
||||
}
|
||||
ToolExecutionBatch::Serial(_) => {
|
||||
panic!("background verifier start should join parallel batch")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn successful_update_plan_ends_plan_mode_turn_immediately() {
|
||||
assert!(should_stop_after_plan_tool(
|
||||
|
||||
@@ -944,6 +944,12 @@ pub(crate) fn render_core_tool_taxonomy_body(mode: AppMode) -> String {
|
||||
if let Some(verification) = render_core_tool_group(TOOL_TAXONOMY_VERIFICATION, &core_tools) {
|
||||
sentences.push(format!("Use {verification} for verification."));
|
||||
}
|
||||
if core_tools.contains(&"run_verifiers") {
|
||||
sentences.push(
|
||||
"For long build/test/lint verifier suites, call `run_verifiers` with `background: true` or use `task_shell_start`, then poll while continuing independent inspection."
|
||||
.to_string(),
|
||||
);
|
||||
}
|
||||
|
||||
debug_assert!(
|
||||
!sentences.is_empty(),
|
||||
|
||||
@@ -13,6 +13,7 @@ use std::time::Instant;
|
||||
use async_trait::async_trait;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{Value, json};
|
||||
use shlex::try_join;
|
||||
|
||||
use crate::dependencies::ExternalTool;
|
||||
|
||||
@@ -23,6 +24,7 @@ use super::spec::{
|
||||
const MAX_GATE_OUTPUT_CHARS: usize = 16_000;
|
||||
const DEFAULT_MAX_PYTHON_FILES: usize = 200;
|
||||
const MAX_CUSTOM_GATES: usize = 12;
|
||||
const BACKGROUND_GATE_TIMEOUT_MS: u64 = 600_000;
|
||||
|
||||
/// Tool for running independent verifier gates concurrently.
|
||||
pub struct RunVerifiersTool;
|
||||
@@ -95,6 +97,7 @@ struct RunVerifiersInput {
|
||||
level: String,
|
||||
max_python_files: usize,
|
||||
commands: Vec<CustomVerifierInput>,
|
||||
background: bool,
|
||||
}
|
||||
|
||||
impl Default for RunVerifiersInput {
|
||||
@@ -104,6 +107,7 @@ impl Default for RunVerifiersInput {
|
||||
level: "quick".to_string(),
|
||||
max_python_files: DEFAULT_MAX_PYTHON_FILES,
|
||||
commands: Vec::new(),
|
||||
background: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -166,6 +170,33 @@ struct RunVerifiersOutput {
|
||||
gates: Vec<GateResult>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct BackgroundGateJob {
|
||||
name: String,
|
||||
ecosystem: String,
|
||||
status: String,
|
||||
command: String,
|
||||
cwd: String,
|
||||
task_id: Option<String>,
|
||||
skipped_reason: Option<String>,
|
||||
error: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
struct RunVerifiersBackgroundOutput {
|
||||
success: bool,
|
||||
profile: String,
|
||||
level: String,
|
||||
workspace: String,
|
||||
background: bool,
|
||||
gate_count: usize,
|
||||
started: usize,
|
||||
skipped: usize,
|
||||
failed_to_start: usize,
|
||||
summary: String,
|
||||
jobs: Vec<BackgroundGateJob>,
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ToolSpec for RunVerifiersTool {
|
||||
fn name(&self) -> &'static str {
|
||||
@@ -228,6 +259,11 @@ impl ToolSpec for RunVerifiersTool {
|
||||
"additionalProperties": false
|
||||
},
|
||||
"default": []
|
||||
},
|
||||
"background": {
|
||||
"type": "boolean",
|
||||
"default": false,
|
||||
"description": "Start verifier gates as background shell jobs and return task_ids immediately. Use for long build/test/lint gates, then poll with exec_shell_wait or task_shell_wait while continuing independent inspection."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false
|
||||
@@ -242,6 +278,10 @@ impl ToolSpec for RunVerifiersTool {
|
||||
ApprovalRequirement::Required
|
||||
}
|
||||
|
||||
fn starts_detached_for(&self, input: &Value) -> bool {
|
||||
input.get("background").and_then(Value::as_bool) == Some(true)
|
||||
}
|
||||
|
||||
async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError> {
|
||||
let input: RunVerifiersInput = serde_json::from_value(input)
|
||||
.map_err(|err| ToolError::invalid_input(err.to_string()))?;
|
||||
@@ -282,6 +322,10 @@ impl ToolSpec for RunVerifiersTool {
|
||||
.map_err(|err| ToolError::execution_failed(err.to_string()));
|
||||
}
|
||||
|
||||
if input.background {
|
||||
return start_background_gates(context, profile, level, gates);
|
||||
}
|
||||
|
||||
let mut handles = Vec::with_capacity(gates.len());
|
||||
for gate in gates {
|
||||
handles.push(tokio::task::spawn_blocking(move || run_gate(gate)));
|
||||
@@ -345,6 +389,129 @@ impl ToolSpec for RunVerifiersTool {
|
||||
}
|
||||
}
|
||||
|
||||
fn start_background_gates(
|
||||
context: &ToolContext,
|
||||
profile: VerifierProfile,
|
||||
level: VerifierLevel,
|
||||
gates: Vec<VerifierGate>,
|
||||
) -> Result<ToolResult, ToolError> {
|
||||
let mut jobs = Vec::with_capacity(gates.len());
|
||||
let mut started = 0usize;
|
||||
let mut skipped = 0usize;
|
||||
let mut failed_to_start = 0usize;
|
||||
|
||||
for gate in gates {
|
||||
let cwd = gate.cwd.display().to_string();
|
||||
let Some(program) = gate.program.as_deref() else {
|
||||
skipped += 1;
|
||||
jobs.push(BackgroundGateJob {
|
||||
name: gate.name,
|
||||
ecosystem: gate.ecosystem,
|
||||
status: "skipped".to_string(),
|
||||
command: String::new(),
|
||||
cwd,
|
||||
task_id: None,
|
||||
skipped_reason: gate.skipped_reason,
|
||||
error: None,
|
||||
});
|
||||
continue;
|
||||
};
|
||||
|
||||
let command = render_gate_command(program, &gate.args)?;
|
||||
let env: HashMap<String, String> = gate.env.into_iter().collect();
|
||||
let spawn_result = {
|
||||
let mut manager = context
|
||||
.shell_manager
|
||||
.lock()
|
||||
.map_err(|_| ToolError::execution_failed("shell manager lock poisoned"))?;
|
||||
manager.execute_with_options_env(
|
||||
&command,
|
||||
Some(&cwd),
|
||||
BACKGROUND_GATE_TIMEOUT_MS,
|
||||
true,
|
||||
None,
|
||||
false,
|
||||
context.elevated_sandbox_policy.clone(),
|
||||
env,
|
||||
)
|
||||
};
|
||||
|
||||
match spawn_result {
|
||||
Ok(result) => {
|
||||
started += 1;
|
||||
jobs.push(BackgroundGateJob {
|
||||
name: gate.name,
|
||||
ecosystem: gate.ecosystem,
|
||||
status: "running".to_string(),
|
||||
command,
|
||||
cwd,
|
||||
task_id: result.task_id,
|
||||
skipped_reason: None,
|
||||
error: None,
|
||||
});
|
||||
}
|
||||
Err(err) => {
|
||||
failed_to_start += 1;
|
||||
jobs.push(BackgroundGateJob {
|
||||
name: gate.name,
|
||||
ecosystem: gate.ecosystem,
|
||||
status: "failed_to_start".to_string(),
|
||||
command,
|
||||
cwd,
|
||||
task_id: None,
|
||||
skipped_reason: None,
|
||||
error: Some(err.to_string()),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
jobs.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
let success = failed_to_start == 0 && started > 0;
|
||||
let summary = if failed_to_start == 0 {
|
||||
format!(
|
||||
"Started {started} verifier gate(s) in the background; {skipped} skipped. Poll task_ids with exec_shell_wait or task_shell_wait."
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"Started {started} verifier gate(s), failed to start {failed_to_start}, and skipped {skipped}. Poll task_ids with exec_shell_wait or task_shell_wait."
|
||||
)
|
||||
};
|
||||
let task_ids = jobs
|
||||
.iter()
|
||||
.filter_map(|job| job.task_id.clone())
|
||||
.collect::<Vec<_>>();
|
||||
let output = RunVerifiersBackgroundOutput {
|
||||
success,
|
||||
profile: profile.as_str().to_string(),
|
||||
level: level.as_str().to_string(),
|
||||
workspace: context.workspace.display().to_string(),
|
||||
background: true,
|
||||
gate_count: jobs.len(),
|
||||
started,
|
||||
skipped,
|
||||
failed_to_start,
|
||||
summary,
|
||||
jobs,
|
||||
};
|
||||
|
||||
let mut result =
|
||||
ToolResult::json(&output).map_err(|err| ToolError::execution_failed(err.to_string()))?;
|
||||
result.success = success;
|
||||
Ok(result.with_metadata(json!({
|
||||
"backgrounded": true,
|
||||
"detached_start": true,
|
||||
"verifier_background": true,
|
||||
"task_ids": task_ids,
|
||||
"poll_with": ["exec_shell_wait", "task_shell_wait"]
|
||||
})))
|
||||
}
|
||||
|
||||
fn render_gate_command(program: &str, args: &[String]) -> Result<String, ToolError> {
|
||||
try_join(std::iter::once(program).chain(args.iter().map(String::as_str)))
|
||||
.map_err(|err| ToolError::execution_failed(format!("failed to render gate command: {err}")))
|
||||
}
|
||||
|
||||
fn build_gate_plan(
|
||||
context: &ToolContext,
|
||||
profile: VerifierProfile,
|
||||
@@ -951,6 +1118,7 @@ fn char_boundary_index(text: &str, max_chars: usize) -> usize {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::tools::shell::ShellStatus;
|
||||
use tempfile::tempdir;
|
||||
|
||||
#[test]
|
||||
@@ -963,6 +1131,20 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn run_verifiers_background_advertises_detached_start() {
|
||||
let tool = RunVerifiersTool;
|
||||
let schema = tool.input_schema();
|
||||
let background_description = schema["properties"]["background"]["description"]
|
||||
.as_str()
|
||||
.expect("background description");
|
||||
|
||||
assert!(background_description.contains("exec_shell_wait"));
|
||||
assert!(background_description.contains("task_shell_wait"));
|
||||
assert!(tool.starts_detached_for(&json!({"background": true})));
|
||||
assert!(!tool.starts_detached_for(&json!({"profile": "auto"})));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn auto_profile_detects_multiple_ecosystems_without_bash() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
@@ -1064,4 +1246,64 @@ mod tests {
|
||||
parsed.gates[0].stdout
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn run_verifiers_background_starts_shell_jobs_and_returns_task_ids() {
|
||||
if !crate::dependencies::RustC::available() {
|
||||
return;
|
||||
}
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
let ctx = ToolContext::new(tmp.path());
|
||||
let tool = RunVerifiersTool;
|
||||
let result = tool
|
||||
.execute(
|
||||
json!({
|
||||
"profile": "auto",
|
||||
"background": true,
|
||||
"commands": [
|
||||
{
|
||||
"name": "rustc-version",
|
||||
"program": crate::dependencies::RustC::resolve().expect("rustc"),
|
||||
"args": ["--version"]
|
||||
}
|
||||
]
|
||||
}),
|
||||
&ctx,
|
||||
)
|
||||
.await
|
||||
.expect("execute");
|
||||
|
||||
let parsed: RunVerifiersBackgroundOutput =
|
||||
serde_json::from_str(&result.content).expect("background verifier output json");
|
||||
assert!(parsed.success, "result: {}", result.content);
|
||||
assert!(parsed.background);
|
||||
assert_eq!(parsed.started, 1);
|
||||
assert_eq!(parsed.failed_to_start, 0);
|
||||
let task_id = parsed.jobs[0]
|
||||
.task_id
|
||||
.as_deref()
|
||||
.expect("background task id");
|
||||
assert!(
|
||||
result
|
||||
.metadata
|
||||
.as_ref()
|
||||
.and_then(|metadata| metadata.get("verifier_background"))
|
||||
.and_then(Value::as_bool)
|
||||
.unwrap_or(false),
|
||||
"metadata should mark verifier background start"
|
||||
);
|
||||
|
||||
let output = ctx
|
||||
.shell_manager
|
||||
.lock()
|
||||
.expect("shell manager")
|
||||
.get_output(task_id, true, 10_000)
|
||||
.expect("background output");
|
||||
assert_eq!(output.status, ShellStatus::Completed);
|
||||
assert!(
|
||||
output.stdout.contains("rustc"),
|
||||
"stdout should include rustc version: {:?}",
|
||||
output.stdout
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user