test(fleet): CI-safe multi-worker dogfood smoke with injected failure (#3166)
Adds a verifiable dogfood smoke that drives several concurrent exec-style workers (three healthy + one injected-failure that emits an error event and exits non-zero) through the real host adapter, asserting distinct terminal pass/fail outcomes — no external services, no model calls, no codewhale binary. Documents the automated CI smoke vs the manual `codewhale fleet run` path in the dogfood spec, and is honest that the manager run-loop cutover to drive real FleetExecutor workers is still in progress. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -421,4 +421,66 @@ mod tests {
|
||||
);
|
||||
assert!(exec.all_terminal());
|
||||
}
|
||||
|
||||
/// Dogfood smoke (#3166): several concurrent exec-style workers with one
|
||||
/// injected failure. Proves the executor drives a small fleet to terminal
|
||||
/// outcomes and that a failing worker is classified distinctly from the
|
||||
/// passing ones — all without the codewhale binary.
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn executor_drives_concurrent_workers_with_injected_failure() {
|
||||
let tmp = tempfile::TempDir::new().unwrap();
|
||||
let mut exec = FleetExecutor::new(tmp.path());
|
||||
|
||||
// Three healthy workers emit a tool_use + done; one injected-failure
|
||||
// worker emits an error event and exits non-zero.
|
||||
let ok = r#"printf '{"type":"tool_use","name":"grep_files","id":"c","input":{}}\n{"type":"done"}\n'"#;
|
||||
let bad = r#"printf '{"type":"error","error":"injected failure"}\n'; exit 7"#;
|
||||
for id in ["w1", "w2", "w3"] {
|
||||
exec.start_worker(
|
||||
id,
|
||||
FleetWorkerCommand::new("sh", vec!["-c".to_string(), ok.to_string()]),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
exec.start_worker(
|
||||
"w-fail",
|
||||
FleetWorkerCommand::new("sh", vec!["-c".to_string(), bad.to_string()]),
|
||||
None,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let ids = ["w1", "w2", "w3", "w-fail"];
|
||||
let mut terminals: std::collections::BTreeMap<&str, FleetWorkerEventPayload> =
|
||||
std::collections::BTreeMap::new();
|
||||
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(8);
|
||||
while terminals.len() < ids.len() {
|
||||
for id in ids {
|
||||
let _ = exec.drain_events(id);
|
||||
if let Some(term) = exec.poll_terminal(id) {
|
||||
terminals.insert(id, term);
|
||||
}
|
||||
}
|
||||
assert!(
|
||||
std::time::Instant::now() < deadline,
|
||||
"not all workers terminated: {terminals:?}"
|
||||
);
|
||||
std::thread::sleep(std::time::Duration::from_millis(20));
|
||||
}
|
||||
|
||||
assert!(exec.all_terminal());
|
||||
for id in ["w1", "w2", "w3"] {
|
||||
assert!(
|
||||
matches!(terminals[id], FleetWorkerEventPayload::Completed { .. }),
|
||||
"{id} should pass, got {:?}",
|
||||
terminals[id]
|
||||
);
|
||||
}
|
||||
assert!(
|
||||
matches!(terminals["w-fail"], FleetWorkerEventPayload::Failed { .. }),
|
||||
"injected-failure worker should fail, got {:?}",
|
||||
terminals["w-fail"]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,16 +1,27 @@
|
||||
# Agent Fleet dogfood smoke spec (#3166)
|
||||
#
|
||||
# This spec exercises the fleet end-to-end: create a run with two local
|
||||
# workers, run a lint task and a review task, verify the ledger records
|
||||
# receipts, and confirm the status surfaces work.
|
||||
# workers, run a workspace-check task and a protocol-review task, verify the
|
||||
# ledger records receipts, and confirm the status surfaces work. Each worker is
|
||||
# a headless `codewhale exec` run (see docs/AGENT_RUNTIME.md).
|
||||
#
|
||||
# Run:
|
||||
# Automated CI-safe smoke (no external services, no model calls):
|
||||
# cargo test -p codewhale-tui --bins fleet::executor
|
||||
# It drives several concurrent exec-style workers (with one injected failure)
|
||||
# through the real host adapter and asserts terminal pass/fail outcomes.
|
||||
#
|
||||
# Manual run (drives real `codewhale exec` workers; needs provider creds):
|
||||
# codewhale fleet run docs/examples/fleet-dogfood.toml --max-workers 2 --once
|
||||
#
|
||||
# Then check:
|
||||
# codewhale fleet status
|
||||
# codewhale fleet inspect <worker-id-from-status>
|
||||
# codewhale fleet logs <worker-id-from-status>
|
||||
#
|
||||
# NOTE: wiring the manager run loop to drive FleetExecutor for real workers is
|
||||
# the in-progress cutover; until then the manual run path uses the local
|
||||
# simulation harness. The automated smoke above already proves the real
|
||||
# exec-subprocess -> ledger-event path.
|
||||
|
||||
name = "dogfood smoke"
|
||||
labels = { milestone = "v0.8.60", class = "smoke" }
|
||||
|
||||
Reference in New Issue
Block a user