test(fleet): CI-safe multi-worker dogfood smoke with injected failure (#3166)

Adds a verifiable dogfood smoke that drives several concurrent exec-style
workers (three healthy + one injected-failure that emits an error event and
exits non-zero) through the real host adapter, asserting distinct terminal
pass/fail outcomes — no external services, no model calls, no codewhale binary.
Documents the automated CI smoke vs the manual `codewhale fleet run` path in
the dogfood spec, and is honest that the manager run-loop cutover to drive real
FleetExecutor workers is still in progress.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hunter B
2026-06-13 01:30:06 -07:00
parent 22980b8788
commit 5106ecfb2d
2 changed files with 76 additions and 3 deletions
+62
View File
@@ -421,4 +421,66 @@ mod tests {
);
assert!(exec.all_terminal());
}
/// Dogfood smoke (#3166): several concurrent exec-style workers with one
/// injected failure. Proves the executor drives a small fleet to terminal
/// outcomes and that a failing worker is classified distinctly from the
/// passing ones — all without the codewhale binary.
#[cfg(unix)]
#[test]
fn executor_drives_concurrent_workers_with_injected_failure() {
let tmp = tempfile::TempDir::new().unwrap();
let mut exec = FleetExecutor::new(tmp.path());
// Three healthy workers emit a tool_use + done; one injected-failure
// worker emits an error event and exits non-zero.
let ok = r#"printf '{"type":"tool_use","name":"grep_files","id":"c","input":{}}\n{"type":"done"}\n'"#;
let bad = r#"printf '{"type":"error","error":"injected failure"}\n'; exit 7"#;
for id in ["w1", "w2", "w3"] {
exec.start_worker(
id,
FleetWorkerCommand::new("sh", vec!["-c".to_string(), ok.to_string()]),
None,
)
.unwrap();
}
exec.start_worker(
"w-fail",
FleetWorkerCommand::new("sh", vec!["-c".to_string(), bad.to_string()]),
None,
)
.unwrap();
let ids = ["w1", "w2", "w3", "w-fail"];
let mut terminals: std::collections::BTreeMap<&str, FleetWorkerEventPayload> =
std::collections::BTreeMap::new();
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(8);
while terminals.len() < ids.len() {
for id in ids {
let _ = exec.drain_events(id);
if let Some(term) = exec.poll_terminal(id) {
terminals.insert(id, term);
}
}
assert!(
std::time::Instant::now() < deadline,
"not all workers terminated: {terminals:?}"
);
std::thread::sleep(std::time::Duration::from_millis(20));
}
assert!(exec.all_terminal());
for id in ["w1", "w2", "w3"] {
assert!(
matches!(terminals[id], FleetWorkerEventPayload::Completed { .. }),
"{id} should pass, got {:?}",
terminals[id]
);
}
assert!(
matches!(terminals["w-fail"], FleetWorkerEventPayload::Failed { .. }),
"injected-failure worker should fail, got {:?}",
terminals["w-fail"]
);
}
}
+14 -3
View File
@@ -1,16 +1,27 @@
# Agent Fleet dogfood smoke spec (#3166)
#
# This spec exercises the fleet end-to-end: create a run with two local
# workers, run a lint task and a review task, verify the ledger records
# receipts, and confirm the status surfaces work.
# workers, run a workspace-check task and a protocol-review task, verify the
# ledger records receipts, and confirm the status surfaces work. Each worker is
# a headless `codewhale exec` run (see docs/AGENT_RUNTIME.md).
#
# Run:
# Automated CI-safe smoke (no external services, no model calls):
# cargo test -p codewhale-tui --bins fleet::executor
# It drives several concurrent exec-style workers (with one injected failure)
# through the real host adapter and asserts terminal pass/fail outcomes.
#
# Manual run (drives real `codewhale exec` workers; needs provider creds):
# codewhale fleet run docs/examples/fleet-dogfood.toml --max-workers 2 --once
#
# Then check:
# codewhale fleet status
# codewhale fleet inspect <worker-id-from-status>
# codewhale fleet logs <worker-id-from-status>
#
# NOTE: wiring the manager run loop to drive FleetExecutor for real workers is
# the in-progress cutover; until then the manual run path uses the local
# simulation harness. The automated smoke above already proves the real
# exec-subprocess -> ledger-event path.
name = "dogfood smoke"
labels = { milestone = "v0.8.60", class = "smoke" }