From fb1a27b4f2f7ed44d8a3214f068c36a1673c3159 Mon Sep 17 00:00:00 2001 From: Hunter B Date: Fri, 12 Jun 2026 22:26:35 -0700 Subject: [PATCH] feat(fleet): add manager runbook skill --- .../tui/assets/skills/fleet-manager/SKILL.md | 106 ++++++++++++++++++ crates/tui/src/skills/system.rs | 10 +- docs/FLEET.md | 61 ++++++++++ 3 files changed, 175 insertions(+), 2 deletions(-) create mode 100644 crates/tui/assets/skills/fleet-manager/SKILL.md diff --git a/crates/tui/assets/skills/fleet-manager/SKILL.md b/crates/tui/assets/skills/fleet-manager/SKILL.md new file mode 100644 index 00000000..ad2b0f67 --- /dev/null +++ b/crates/tui/assets/skills/fleet-manager/SKILL.md @@ -0,0 +1,106 @@ +--- +name: fleet-manager +description: Use when managing, triaging, restarting, escalating, or summarizing CodeWhale Agent Fleet runs and workers. +metadata: + short-description: Triage CodeWhale Agent Fleet runs +--- + +# Fleet Manager + +Use this skill when acting as a manager agent for CodeWhale Agent Fleet runs. +Your job is to classify worker state, choose the narrowest safe typed action, +and leave a ledgered receipt or a safe escalation draft. + +## Authority Boundary + +- Prefer typed fleet surfaces over shell spelunking: `codewhale fleet status`, + `inspect`, `logs`, `artifacts`, `interrupt`, `restart`, `stop`, and the + Runtime API fleet endpoints. +- Do not read `.codewhale/fleet.jsonl`, host logs, or remote files directly + unless the typed command or API is missing required evidence. +- Do not send Slack, webhook, PagerDuty, email, or chat messages unless the + user or run config explicitly authorizes sending. Draft the message instead. +- Never include secrets, tokens, webhook URLs, routing keys, full prompts, or + oversized logs in a summary or escalation. + +## Triage Loop + +1. Identify the run and worker from the user request, run receipt, or fleet + status output. If no worker is named, start with `codewhale fleet status`. +2. Inspect the worker with `codewhale fleet inspect ` or the matching + Runtime API worker endpoint. +3. Review bounded evidence with `codewhale fleet logs ` and + `codewhale fleet artifacts `. Summarize artifact refs, not full + payloads. +4. Classify the state before acting: + - `transient failure`: transport error, timeout, stale heartbeat, host + unavailable, or retryable provider/network failure. + - `task failure`: worker completed the task but the result is wrong, + missing required artifacts, or reports a domain error. + - `verifier failure`: scorer/verifier failed or disagrees with the worker + result. + - `needs-human`: missing authority, unsafe secret boundary, destructive + action, repeated restart exhaustion, ambiguous product decision, or + conflict between artifacts and verifier. +5. Choose one typed action: + - transient and retry budget remains: `codewhale fleet restart `. + - transient but unsafe to retry: draft escalation and mark needs-human. + - task failure: preserve artifacts, summarize the failure, and avoid restart + unless the task spec says retrying can produce new evidence. + - verifier failure: inspect scorer inputs and artifacts, then escalate if the + verifier cannot be corrected through a typed action. + - needs-human: do not restart automatically; draft a concise escalation. +6. Record the result in the response: classification, action taken or drafted, + evidence commands, artifact refs, and next owner. + +## Restart vs Escalate + +Restart only when all of these are true: + +- the failure is likely transient, +- the task is idempotent or the run policy allows retry, +- retry budget remains, +- no secret, permission, or destructive action boundary is involved, and +- the previous attempt produced enough receipt data to explain the restart. + +Escalate when any of these are true: + +- restart budget is exhausted, +- the worker requests secrets or new authority, +- artifacts indicate data loss, corruption, or destructive side effects, +- the verifier and task result conflict in a way you cannot resolve from typed + evidence, +- the same failure repeats after a restart, or +- a human product or release decision is required. + +## Safe Escalation Draft + +Use this shape for Slack/PagerDuty drafts. Keep logs to three short lines or an +artifact ref. + +```text +CodeWhale fleet needs attention +Run: +Worker: +Task: +Classification: +Reason: +Latest typed evidence: codewhale fleet inspect ; codewhale fleet artifacts +Safe log excerpt: <3 lines max or "see artifact "> +Requested decision: +``` + +## Post-Run Receipt + +End every fleet-manager response with a compact receipt: + +```text +Fleet receipt +Run: +Workers checked: +Classification: +Action: +Ledger expectation: +Artifacts reviewed: +Follow-up owner: +``` diff --git a/crates/tui/src/skills/system.rs b/crates/tui/src/skills/system.rs index 9c37969d..a1ef12e0 100644 --- a/crates/tui/src/skills/system.rs +++ b/crates/tui/src/skills/system.rs @@ -4,13 +4,14 @@ use std::fs; use std::path::Path; -const BUNDLED_SKILL_VERSION: &str = "3"; +const BUNDLED_SKILL_VERSION: &str = "4"; const SKILL_CREATOR_BODY: &str = include_str!("../../assets/skills/skill-creator/SKILL.md"); const DELEGATE_BODY: &str = include_str!("../../assets/skills/delegate/SKILL.md"); const V4_BEST_PRACTICES_BODY: &str = include_str!("../../assets/skills/v4-best-practices/SKILL.md"); const PLUGIN_CREATOR_BODY: &str = include_str!("../../assets/skills/plugin-creator/SKILL.md"); const SKILL_INSTALLER_BODY: &str = include_str!("../../assets/skills/skill-installer/SKILL.md"); const MCP_BUILDER_BODY: &str = include_str!("../../assets/skills/mcp-builder/SKILL.md"); +const FLEET_MANAGER_BODY: &str = include_str!("../../assets/skills/fleet-manager/SKILL.md"); const DOCUMENTS_BODY: &str = include_str!("../../assets/skills/documents/SKILL.md"); const PRESENTATIONS_BODY: &str = include_str!("../../assets/skills/presentations/SKILL.md"); const SPREADSHEETS_BODY: &str = include_str!("../../assets/skills/spreadsheets/SKILL.md"); @@ -54,6 +55,11 @@ const BUNDLED_SKILLS: &[BundledSkill] = &[ body: MCP_BUILDER_BODY, introduced_in: 3, }, + BundledSkill { + name: "fleet-manager", + body: FLEET_MANAGER_BODY, + introduced_in: 4, + }, BundledSkill { name: "documents", body: DOCUMENTS_BODY, @@ -370,7 +376,7 @@ mod tests { let tmp = TempDir::new().unwrap(); // Simulate v2 where older bundled skills had been deliberately removed - // before v3 introduced more system skills. + // before later versions introduced more system skills. fs::write(marker_file(&tmp), "2").unwrap(); install_system_skills(tmp.path()).unwrap(); diff --git a/docs/FLEET.md b/docs/FLEET.md index 98c089a6..bc15ca76 100644 --- a/docs/FLEET.md +++ b/docs/FLEET.md @@ -269,6 +269,67 @@ POST /v1/fleet/runs/{run_id}/stop Action endpoints call the same manager controls as the CLI and record their decisions in the fleet ledger. +## Manager-Agent Runbook + +Manager agents should treat Fleet operations as typed, ledgered control-plane +work. Start with `codewhale fleet status`, then inspect one run or worker with +`codewhale fleet inspect `, `logs`, and `artifacts`. Use direct +reads of `.codewhale/fleet.jsonl`, host logs, or remote files only when the +typed CLI/API surface cannot provide the required evidence. + +Classify the worker before taking action: + +- `transient failure`: stale heartbeat, host timeout, interrupted transport, + retryable provider/network error, or an adapter status that can plausibly + recover without changing the task. +- `task failure`: the worker completed but produced an incorrect result, + domain failure, missing required artifact, or explicit task-level error. +- `verifier failure`: the worker result exists, but the scorer/verifier failed, + timed out, or disagrees with the receipt. +- `needs-human`: missing authority, secret request, destructive operation, + repeated restart exhaustion, ambiguous product decision, or conflicting + evidence that the manager cannot resolve from typed artifacts. + +Choose one typed action: + +- Restart a worker only when the failure is transient, retry budget remains, + the task is idempotent or retry-safe, and no permission or secret boundary is + involved: `codewhale fleet restart `. +- Interrupt or stop only when the current task is unsafe to continue or the + operator explicitly asks for cancellation: `codewhale fleet interrupt + ` or `codewhale fleet stop --all`. +- Do not restart pure task failures by default; preserve artifacts and hand the + receipt to the task owner unless the task spec says retrying can produce new + evidence. +- For verifier failures, inspect scorer inputs and artifact refs first. If the + verifier cannot be corrected through typed fleet actions, escalate for human + review. +- For `needs-human`, draft an escalation instead of sending it unless alert + config explicitly authorizes sending. + +Safe Slack or PagerDuty draft: + +```text +CodeWhale fleet needs attention +Run: +Worker: +Task: +Classification: +Reason: +Latest typed evidence: codewhale fleet inspect ; codewhale fleet artifacts +Safe log excerpt: <3 lines max or "see artifact "> +Requested decision: +``` + +Post-run summaries should include the run id, workers checked, classification, +typed action taken or drafted, expected ledger effect, artifact refs reviewed, +and next owner. Keep summaries bounded; link artifact refs instead of copying +full logs or transcripts. + +The bundled `fleet-manager` skill mirrors this runbook for manager agents. It +is a first-party system skill and should be discoverable through the normal +skill registry after system skills are installed or refreshed. + ## Host Adapters The host adapter boundary supports local child processes and explicit SSH