e8b52ac57a
Lands the Agent Fleet security/trust boundary and the headless-worker bridge on the v0.8.60 line, and collapses the sub-agent and fleet recursion model into a single shared axis (Hunter steer: "not two moving targets"). Security & trust (#3165): - FleetTrustLevel, FleetSecurityPolicy, FleetSecretRef (redacted), FleetWorkerAuth, FleetCapabilityGrant, FleetAlertEndpoint (redacted) in protocol. - secrets: resolve_direct(key, source_hint) — fleet secret resolution, never logged. - Host adapters refuse secret-bearing env keys; SSH uses SendEnv (no argv secrets). Roles & delegation (#3167): - fleet role -> SubAgentType mapping; reviewer/verifier default read-only. Headless worker bridge (#3096/#3154, partial — still simulation, real spawn next): - worker_runtime: FleetTaskSpec -> AgentWorkerSpec, status -> ledger events, exec hardening (mirrors #3027), parallel-safe read-only tool set (#2983). - FleetManager carries an optional SharedSubAgentManager + exec config. Recursion depth — ONE axis: - codewhale_config now owns DEFAULT_SPAWN_DEPTH (3) + MAX_SPAWN_DEPTH_CEILING (3). - sub-agent DEFAULT_MAX_SPAWN_DEPTH and the fleet clamp both source these consts. - fleet default raised 1 -> 3 to match standalone sub-agents; root runs at depth 0, budget gates child delegation. End-to-end test proves a depth-0 fleet worker reaches 3 nested levels (afford >= 3). Dogfood scaffolding (#3166, partial): docs/examples/fleet-dogfood.toml. Tests green: codewhale-config fleet, codewhale-tui fleet (58), subagent max_depth; cargo fmt + git diff --check clean; cargo check --workspace ok. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
53 lines
3.3 KiB
TOML
53 lines
3.3 KiB
TOML
# Agent Fleet dogfood smoke spec (#3166)
|
|
#
|
|
# This spec exercises the fleet end-to-end: create a run with two local
|
|
# workers, run a lint task and a review task, verify the ledger records
|
|
# receipts, and confirm the status surfaces work.
|
|
#
|
|
# Run:
|
|
# codewhale fleet run docs/examples/fleet-dogfood.toml --max-workers 2 --once
|
|
#
|
|
# Then check:
|
|
# codewhale fleet status
|
|
# codewhale fleet inspect <worker-id-from-status>
|
|
# codewhale fleet logs <worker-id-from-status>
|
|
|
|
name = "dogfood smoke"
|
|
labels = { milestone = "v0.8.60", class = "smoke" }
|
|
|
|
security_policy = { default_trust_level = "local", allowed_secrets = [], require_identity_verification = false }
|
|
|
|
[[tasks]]
|
|
id = "cargo-check"
|
|
name = "Workspace check"
|
|
description = "Run `cargo check --workspace` and report any compilation errors."
|
|
objective = "Verify the workspace compiles cleanly with zero errors."
|
|
instructions = "Run `cargo check --workspace` in the repo root. If it compiles cleanly, report success. If there are errors, list each file:line and the error message. Do NOT attempt to fix anything — just report what you found."
|
|
worker = { role = "release-checker", tool_profile = "read-only", tools = ["cargo"], capabilities = ["rust"] }
|
|
workspace = { required_files = ["Cargo.toml"], writable_paths = [".codewhale/fleet"], environment = { required = ["PATH"] } }
|
|
input_files = ["Cargo.toml"]
|
|
context = ["You are running in a fleet smoke test. Be concise. Only report the pass/fail and any specific errors."]
|
|
budget = { max_tokens = 8000, max_tool_calls = 12, max_seconds = 300 }
|
|
expected_artifacts = ["log", "report", "receipt"]
|
|
scorer = { kind = "exit_code" }
|
|
retry_policy = { max_attempts = 2, initial_backoff_seconds = 5, max_backoff_seconds = 30 }
|
|
timeout_seconds = 300
|
|
tags = ["smoke", "check"]
|
|
|
|
[[tasks]]
|
|
id = "protocol-review"
|
|
name = "Protocol review"
|
|
description = "Review fleet protocol types for security and correctness."
|
|
objective = "Inspect crates/protocol/src/fleet.rs and crates/secrets/src/lib.rs. Report any missing serde defaults, unsafe wire changes, or security-sensitive fields lacking SecretRef."
|
|
instructions = "Read crates/protocol/src/fleet.rs and crates/secrets/src/lib.rs. Check for: (1) new fields without serde(default) or skip_serializing_if, (2) raw secrets in struct fields instead of FleetSecretRef, (3) missing Clone/Debug/PartialEq derives on new types. Write a concise report with file:line references for each finding. If there are no findings, report 'all clear'."
|
|
worker = { role = "reviewer", tool_profile = "read-only", tools = ["rg", "git", "cargo"], capabilities = ["rust"] }
|
|
workspace = { required_files = ["crates/protocol/src/fleet.rs", "crates/secrets/src/lib.rs"], writable_paths = [".codewhale/fleet"], environment = { required = ["PATH"] } }
|
|
input_files = ["crates/protocol/src/fleet.rs", "crates/secrets/src/lib.rs"]
|
|
context = ["You are a fleet protocol reviewer. Be thorough but concise. Reference specific lines."]
|
|
budget = { max_tokens = 10000, max_tool_calls = 16, max_seconds = 600 }
|
|
expected_artifacts = ["log", "report", "receipt"]
|
|
scorer = { kind = "code_whale_verifier_prompt", prompt = "Verify the review includes at least one concrete file:line finding or explicitly says 'all clear'." }
|
|
retry_policy = { max_attempts = 1, initial_backoff_seconds = 10 }
|
|
timeout_seconds = 600
|
|
tags = ["smoke", "review", "protocol"]
|