merge: fleet security/trust + headless-worker foundation + depth unification

2026-06-13 01:11:50 -07:00
parent 7c52d6ae11 e8b52ac57a
commit b1cc6ecf5d
21 changed files with 2223 additions and 123 deletions
@@ -936,6 +936,57 @@ default_text_model = "deepseek-ai/deepseek-v4-pro"
 # printf '%s\n' '{"content":"audit wrapper placeholder: configure an executor","success":false}'
 # ```
 # ─────────────────────────────────────────────────────────────────────────────────
 # Agent Fleet trust, security, and role registry (#3165, #3167)
 # ─────────────────────────────────────────────────────────────────────────────────
 # [fleet]
 # # Default trust level for fleet workers: "sandbox" | "local" | "remote-verified" | "operator"
 # default_trust_level = "sandbox"
 # # Require SSH host-key verification before granting remote-verified trust
 # require_identity_verification = true
 # # Maximum trust level any worker may have
 # max_trust_level = "operator"
 #
 # # Headless worker execution hardening (#3027)
 # [fleet.exec]
 # # Tools always allowed regardless of role
 # allowed_tools = []
 # # Tools always disallowed (overrides role and task spec)
 # disallowed_tools = ["exec_shell"]
 # # Hard ceiling on worker steps (tool calls + model turns)
 # max_turns = 500
 # # Recursive child-agent depth for fleet workers. Shares ONE recursion axis
 # # with standalone sub-agents (a fleet worker IS a headless sub-agent).
 # # 0 blocks child agents (the root worker still runs); 3 is the default and the
 # # cap, affording at least three nested delegation levels.
 # max_spawn_depth = 3
 # # Extra system prompt injected into every headless worker
 # append_system_prompt = "Never modify .git/config or change remotes."
 # # Output format: "text" (default) or "stream-json" for ndjson events
 # output_format = "text"
 #
 # # Built-in role presets are always available: smoke-runner, reviewer, builder, read-only.
 # # User-defined roles here override or extend the built-in set. Any key under
 # # [fleet.roles] becomes a valid role name that task specs can reference.
 # [fleet.roles.ci-linter]
 # description = "Runs linters and formatters"
 # tool_profile = "read-only"
 # tools = ["cargo", "cargo-clippy", "cargo-fmt"]
 # capabilities = ["rust"]
 # max_tokens = 12000
 # max_tool_calls = 20
 # timeout_seconds = 600
 #
 # [fleet.roles.pr-reviewer]
 # description = "Reviews PRs with GitHub access"
 # tool_profile = "read-only"
 # tools = ["git", "gh", "rg"]
 # capabilities = ["git", "github"]
 # max_tokens = 16000
 # max_tool_calls = 30
 # timeout_seconds = 900
 # trust_level = "local"
 # ─────────────────────────────────────────────────────────────────────────────────
 # Requirements (admin constraints) example file
 # ─────────────────────────────────────────────────────────────────────────────────
@@ -22,7 +22,7 @@ use codewhale_mcp::{McpServerDefinition, run_stdio_server};
 use codewhale_secrets::Secrets;
 use codewhale_state::{StateStore, ThreadListFilters};
-#[derive(Debug, Clone, Copy, ValueEnum)]
+#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
 enum ProviderArg {
    Deepseek,
    NvidiaNim,
@@ -43,6 +43,10 @@ enum ProviderArg {
    Huggingface,
    Together,
    OpenaiCodex,
    Anthropic,
    Zai,
    Stepfun,
    Minimax,
 }
 impl From<ProviderArg> for ProviderKind {
@@ -67,6 +71,10 @@ impl From<ProviderArg> for ProviderKind {
            ProviderArg::Huggingface => ProviderKind::Huggingface,
            ProviderArg::Together => ProviderKind::Together,
            ProviderArg::OpenaiCodex => ProviderKind::OpenaiCodex,
            ProviderArg::Anthropic => ProviderKind::Anthropic,
            ProviderArg::Zai => ProviderKind::Zai,
            ProviderArg::Stepfun => ProviderKind::Stepfun,
            ProviderArg::Minimax => ProviderKind::Minimax,
        }
    }
 }
@@ -787,7 +795,7 @@ fn provider_slot(provider: ProviderKind) -> &'static str {
 }
 /// Provider order used by the `auth list` and `auth status` outputs.
-const PROVIDER_LIST: [ProviderKind; 20] = [
+const PROVIDER_LIST: [ProviderKind; 24] = [
    ProviderKind::Deepseek,
    ProviderKind::NvidiaNim,
    ProviderKind::Openai,
@@ -808,6 +816,10 @@ const PROVIDER_LIST: [ProviderKind; 20] = [
    ProviderKind::Huggingface,
    ProviderKind::Together,
    ProviderKind::OpenaiCodex,
    ProviderKind::Anthropic,
    ProviderKind::Zai,
    ProviderKind::Stepfun,
    ProviderKind::Minimax,
 ];
 fn provider_is_supported_by_tui(provider: ProviderKind) -> bool {
@@ -833,6 +845,10 @@ fn provider_is_supported_by_tui(provider: ProviderKind) -> bool {
            | ProviderKind::Huggingface
            | ProviderKind::Together
            | ProviderKind::OpenaiCodex
            | ProviderKind::Anthropic
            | ProviderKind::Zai
            | ProviderKind::Stepfun
            | ProviderKind::Minimax
    )
 }
@@ -2606,6 +2622,32 @@ mod tests {
            }))
        ));
        for (provider, expected) in [
            ("anthropic", ProviderArg::Anthropic),
            ("zai", ProviderArg::Zai),
            ("stepfun", ProviderArg::Stepfun),
            ("minimax", ProviderArg::Minimax),
        ] {
            let cli = parse_ok(&[
                "deepseek",
                "auth",
                "set",
                "--provider",
                provider,
                "--api-key-stdin",
            ]);
            assert!(matches!(
                cli.command,
                Some(Commands::Auth(AuthArgs {
                    command: AuthCommand::Set {
                        provider,
                        api_key: None,
                        api_key_stdin: true,
                    }
                })) if provider == expected
            ));
        }
        let cli = parse_ok(&["deepseek", "auth", "list"]);
        assert!(matches!(
            cli.command,
@@ -663,6 +663,10 @@ pub struct ConfigToml {
    /// lifecycle `[hooks]` table so config rewrites preserve existing hooks.
    #[serde(default)]
    pub hook_sinks: Option<HookSinksToml>,
    /// Agent Fleet trust and security policy (#3165). When absent, fleet
    /// workers inherit conservative Sandbox defaults.
    #[serde(default)]
    pub fleet: Option<FleetConfigToml>,
    #[serde(flatten)]
    pub extras: BTreeMap<String, toml::Value>,
 }
@@ -1059,6 +1063,236 @@ impl Default for SnapshotsToml {
    }
 }
 /// On-disk schema for the `[fleet]` table (#3165). See `config.example.toml`
 /// and `docs/FLEET.md` for documentation.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct FleetConfigToml {
    /// Default trust level for fleet workers. One of `"sandbox"`, `"local"`,
    /// `"remote-verified"`, or `"operator"`. Defaults to `"sandbox"`.
    #[serde(default = "default_fleet_trust_level_str")]
    pub default_trust_level: String,
    /// Require identity verification for remote (SSH) workers before
    /// granting them `remote-verified` trust. Defaults to true.
    #[serde(default = "default_fleet_require_identity")]
    pub require_identity_verification: bool,
    /// Maximum trust level any worker may have (`"sandbox"`, `"local"`,
    /// `"remote-verified"`, or `"operator"`). Defaults to `"operator"`.
    #[serde(default = "default_fleet_max_trust_level_str")]
    pub max_trust_level: String,
    /// User-defined and built-in role presets.
    ///
    /// Each role defines default tool profiles, capabilities, budgets, and
    /// trust settings that task specs can reference by name. Built-in roles
    /// (`smoke-runner`, `reviewer`, `builder`, `read-only`) are always
    /// available; user-defined roles in config override or extend them.
    #[serde(default)]
    pub roles: BTreeMap<String, FleetRolePreset>,
    /// Headless worker execution hardening (#3027).
    #[serde(default)]
    pub exec: FleetExecConfig,
 }
 /// Canonical recursion-depth policy for the headless worker runtime.
 ///
 /// Single source of truth shared by BOTH standalone sub-agents and fleet
 /// workers so the two cannot drift into "two moving targets":
 /// - [`DEFAULT_SPAWN_DEPTH`] is the default recursion budget (the sub-agent
 ///   runtime's `DEFAULT_MAX_SPAWN_DEPTH` is defined as this value).
 /// - [`MAX_SPAWN_DEPTH_CEILING`] is the hard safety cap; every configured
 ///   value (fleet `max_spawn_depth`, `agent_open`'s `max_depth`) clamps to it.
 ///
 /// A worker runs at `spawn_depth = 0` and may spawn while
 /// `spawn_depth + 1 <= max_spawn_depth`, so a depth of N affords N nested
 /// delegation levels below the root worker. The default of 3 affords at least
 /// three recursion levels out of the box; the root worker still runs at
 /// depth 0 even when the budget is 0.
 pub const DEFAULT_SPAWN_DEPTH: u32 = 3;
 /// Hard ceiling on recursion depth for any worker/sub-agent. See
 /// [`DEFAULT_SPAWN_DEPTH`]. Raising this single constant lifts the limit
 /// everywhere (the fleet clamp and `agent_open` validation both read it).
 pub const MAX_SPAWN_DEPTH_CEILING: u32 = 3;
 /// Headless worker execution constraints (#3027).
 ///
 /// These limits apply to all fleet workers and sub-agents spawned through
 /// the headless worker runtime. Task specs can tighten but not loosen them.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct FleetExecConfig {
    /// Tools that are always allowed regardless of role or task spec.
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub allowed_tools: Vec<String>,
    /// Tools that are always disallowed, overriding role and task spec.
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub disallowed_tools: Vec<String>,
    /// Hard ceiling on sub-agent steps (tool calls + model turns).
    /// Workers that exceed this are terminated. Default: unbounded (u32::MAX).
    #[serde(default = "default_fleet_max_turns")]
    pub max_turns: u32,
    /// Recursive child-agent budget for headless fleet workers.
    /// Defaults to [`DEFAULT_SPAWN_DEPTH`] (3) so a fleet worker has the SAME
    /// recursion budget as a standalone sub-agent — fleet and sub-agents are one
    /// substrate, not two. Set 0 to block child `agent_open` (the root worker
    /// still runs); the value is clamped to [`MAX_SPAWN_DEPTH_CEILING`].
    #[serde(default = "default_fleet_max_spawn_depth")]
    pub max_spawn_depth: u32,
    /// Extra system prompt text appended to every headless worker.
    /// Useful for injecting org-wide policy or behavior constraints.
    #[serde(default, skip_serializing_if = "String::is_empty")]
    pub append_system_prompt: String,
    /// Output format for fleet worker results.
    /// `"text"` (default) or `"stream-json"` for newline-delimited JSON events.
    #[serde(default = "default_fleet_output_format")]
    pub output_format: String,
 }
 fn default_fleet_max_turns() -> u32 {
    u32::MAX
 }
 fn default_fleet_max_spawn_depth() -> u32 {
    DEFAULT_SPAWN_DEPTH
 }
 fn default_fleet_output_format() -> String {
    "text".to_string()
 }
 impl Default for FleetExecConfig {
    fn default() -> Self {
        Self {
            allowed_tools: Vec::new(),
            disallowed_tools: Vec::new(),
            max_turns: default_fleet_max_turns(),
            max_spawn_depth: default_fleet_max_spawn_depth(),
            append_system_prompt: String::new(),
            output_format: default_fleet_output_format(),
        }
    }
 }
 /// A named role preset that bundles common worker settings.
 ///
 /// Task specs reference a role name (e.g. `"role": "reviewer"`), and the
 /// fleet manager fills in any missing fields from the preset. User-defined
 /// roles in `[fleet.roles]` override built-in defaults with the same name.
 ///
 /// Token budgets and tool-call limits are task-level decisions — they don't
 /// belong on role presets. Use `timeout_seconds` as the safety bound.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct FleetRolePreset {
    /// Short description of what this role is for.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub description: Option<String>,
    /// Default tool profile (`"read-only"`, `"read-write"`, or `"custom"`).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_profile: Option<String>,
    /// Default set of tool names available to this role.
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub tools: Vec<String>,
    /// Default capability tags (e.g. `"rust"`, `"git"`, `"gh"`).
    #[serde(default, skip_serializing_if = "Vec::is_empty")]
    pub capabilities: Vec<String>,
    /// Default timeout in seconds for tasks using this role.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub timeout_seconds: Option<u64>,
    /// Default trust level override for this role.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub trust_level: Option<String>,
 }
 fn default_fleet_trust_level_str() -> String {
    "sandbox".to_string()
 }
 fn default_fleet_require_identity() -> bool {
    true
 }
 fn default_fleet_max_trust_level_str() -> String {
    "operator".to_string()
 }
 impl Default for FleetConfigToml {
    fn default() -> Self {
        Self {
            default_trust_level: default_fleet_trust_level_str(),
            require_identity_verification: default_fleet_require_identity(),
            max_trust_level: default_fleet_max_trust_level_str(),
            roles: BTreeMap::new(),
            exec: FleetExecConfig::default(),
        }
    }
 }
 impl FleetConfigToml {
    /// Resolve a role preset by name. Checks user-defined roles first,
    /// then falls back to built-in role defaults.
    #[must_use]
    pub fn resolve_role(&self, name: &str) -> Option<FleetRolePreset> {
        self.roles
            .get(name)
            .cloned()
            .or_else(|| built_in_role_presets().get(name).cloned())
    }
 }
 /// Built-in role presets that are always available without config.
 #[must_use]
 pub fn built_in_role_presets() -> BTreeMap<String, FleetRolePreset> {
    [
        (
            "smoke-runner".to_string(),
            FleetRolePreset {
                description: Some("Lightweight read-only smoke check worker".to_string()),
                tool_profile: Some("read-only".to_string()),
                tools: vec![],
                capabilities: vec![],
                timeout_seconds: Some(300),
                trust_level: Some("local".to_string()),
            },
        ),
        (
            "reviewer".to_string(),
            FleetRolePreset {
                description: Some("Read-only code and documentation review".to_string()),
                tool_profile: Some("read-only".to_string()),
                tools: vec![],
                capabilities: vec![],
                timeout_seconds: Some(600),
                trust_level: None,
            },
        ),
        (
            "builder".to_string(),
            FleetRolePreset {
                description: Some(
                    "Read-write builder with compilation and test access".to_string(),
                ),
                tool_profile: Some("read-write".to_string()),
                tools: vec![],
                capabilities: vec![],
                timeout_seconds: Some(1800),
                trust_level: Some("local".to_string()),
            },
        ),
        (
            "read-only".to_string(),
            FleetRolePreset {
                description: Some(
                    "Minimal read-only observer with no writes or secrets".to_string(),
                ),
                tool_profile: Some("read-only".to_string()),
                tools: vec![],
                capabilities: vec![],
                timeout_seconds: Some(300),
                trust_level: Some("sandbox".to_string()),
            },
        ),
    ]
    .into()
 }
 /// On-disk schema for the `[network]` table (#135). See `config.example.toml`
 /// for documentation.
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -7179,6 +7413,32 @@ fallback_providers = ["deepseek", "openrouter"]
        assert!(!serialized.contains("fallback_providers"));
    }
    #[test]
    fn fleet_exec_config_default_matches_subagent_spawn_depth() {
        // Fleet workers and standalone sub-agents share one recursion axis:
        // the fleet default equals DEFAULT_SPAWN_DEPTH (3) and affords >=3
        // nested delegation levels out of the box.
        assert_eq!(
            FleetExecConfig::default().max_spawn_depth,
            DEFAULT_SPAWN_DEPTH
        );
        assert_eq!(FleetExecConfig::default().max_spawn_depth, 3);
        assert!(DEFAULT_SPAWN_DEPTH <= MAX_SPAWN_DEPTH_CEILING);
    }
    #[test]
    fn fleet_exec_config_parses_max_spawn_depth() {
        let config: ConfigToml = toml::from_str(
            r#"
 [fleet.exec]
 max_spawn_depth = 2
 "#,
        )
        .expect("fleet exec config should parse");
        assert_eq!(config.fleet.expect("fleet config").exec.max_spawn_depth, 2);
    }
    #[test]
    fn fallback_providers_do_not_change_runtime_resolution() {
        let _lock = env_lock();
@@ -12,7 +12,7 @@
 use std::collections::BTreeMap;
 use std::path::PathBuf;
-use serde::{Deserialize, Deserializer, Serialize, Serializer};
+use serde::{Deserialize, Deserializer, Serialize, Serializer, de};
 use serde_json::Value;
 pub const FLEET_PROTOCOL_VERSION: &str = "0.1.0";
@@ -45,6 +45,8 @@ pub struct FleetRun {
    pub worker_specs: Vec<FleetWorkerSpec>,
    #[serde(default)]
    pub labels: BTreeMap<String, String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub security_policy: Option<FleetSecurityPolicy>,
    pub created_at: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub updated_at: Option<String>,
@@ -260,6 +262,9 @@ pub struct FleetWorkerSpec {
    pub name: String,
    pub host: FleetHostSpec,
    #[serde(default)]
    #[serde(skip_serializing_if = "Option::is_none")]
    pub trust_level: Option<FleetTrustLevel>,
    #[serde(default)]
    pub labels: BTreeMap<String, String>,
    #[serde(default)]
    pub capabilities: Vec<String>,
@@ -280,6 +285,14 @@ pub enum FleetHostSpec {
        user: Option<String>,
        #[serde(skip_serializing_if = "Option::is_none")]
        identity: Option<PathBuf>,
        /// Known hosts file for host-key verification.
        #[serde(skip_serializing_if = "Option::is_none")]
        known_hosts: Option<PathBuf>,
        /// Expected host key fingerprint (SHA256:...) for key pinning.
        /// When set, the connection is only trusted if the server's
        /// host key matches this fingerprint exactly.
        #[serde(skip_serializing_if = "Option::is_none")]
        host_key_fingerprint: Option<String>,
        #[serde(skip_serializing_if = "Option::is_none")]
        working_directory: Option<PathBuf>,
        #[serde(default)]
@@ -288,6 +301,8 @@ pub enum FleetHostSpec {
        #[serde(skip_serializing_if = "Option::is_none")]
        codewhale_binary: Option<String>,
    },
    #[serde(alias = "container")]
    #[serde(alias = "Container")]
    Docker {
        image: String,
        #[serde(default)]
@@ -295,6 +310,264 @@ pub enum FleetHostSpec {
    },
 }
 // ── Security and trust types ────────────────────────────────────────────────
 /// Trust classification assigned to a worker host.
 ///
 /// The trust level determines what a worker is allowed to do and what
 /// secrets it may access. The default for new workers is [`FleetTrustLevel::Sandbox`];
 /// operators must explicitly raise trust for SSH or container workers.
 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
 #[serde(rename_all = "snake_case")]
 pub enum FleetTrustLevel {
    /// Fully isolated: no network, no secrets, no writes outside `.codewhale/fleet/`.
    /// Suitable for untrusted code review, community PR checks, or third-party tool runs.
    Sandbox = 0,
    /// Local-only worker with access to the workspace and configured secrets.
    /// Default for local workers. May read repo files but writes are gated.
    Local = 1,
    /// Worker on a known remote host with verified identity and a bounded
    /// set of explicitly granted capabilities. Requires SSH host-key
    /// verification or equivalent attestation.
    #[serde(alias = "remote-verified", alias = "remoteVerified")]
    RemoteVerified = 2,
    /// Fully trusted worker (e.g. operator's own machine, CI runner).
    /// Has access to all configured secrets and may perform any action the
    /// operator can. Reserved for dogfood smoke and operator-owned machines.
    Operator = 3,
 }
 impl Default for FleetTrustLevel {
    fn default() -> Self {
        Self::Sandbox
    }
 }
 impl FleetTrustLevel {
    /// Whether this trust level is allowed to access provider secrets.
    #[must_use]
    pub fn may_access_secrets(&self) -> bool {
        matches!(self, Self::Operator | Self::RemoteVerified | Self::Local)
    }
    /// Whether this trust level is allowed to write outside `.codewhale/fleet/`.
    #[must_use]
    pub fn may_write_workspace(&self) -> bool {
        matches!(self, Self::Operator | Self::Local)
    }
    /// Whether this trust level is allowed network access.
    #[must_use]
    pub fn may_access_network(&self) -> bool {
        matches!(self, Self::Operator | Self::RemoteVerified | Self::Local)
    }
 }
 /// Security policy applied to a fleet run.
 ///
 /// A policy defines the default trust level for workers, which secrets
 /// may be resolved, and what capabilities are granted. When a run has no
 /// explicit policy, workers inherit conservative defaults.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 pub struct FleetSecurityPolicy {
    /// Default trust level for workers that don't declare one explicitly.
    #[serde(default)]
    pub default_trust_level: FleetTrustLevel,
    /// Secret refs that workers may resolve. An empty list means no secrets
    /// are available. Each entry is a key name, not a value.
    #[serde(default)]
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub allowed_secrets: Vec<FleetSecretRef>,
    /// Capability grants for workers in this run.
    #[serde(default)]
    #[serde(skip_serializing_if = "Vec::is_empty")]
    pub capability_grants: Vec<FleetCapabilityGrant>,
    /// Maximum trust level any worker in this run may have, even if the
    /// worker spec requests higher. Defaults to Operator (no ceiling).
    #[serde(default = "default_max_trust_level")]
    pub max_trust_level: FleetTrustLevel,
    /// Require identity verification for remote workers. When true, SSH
    /// workers must pass host-key verification before being trusted at
    /// RemoteVerified level; unverified remotes stay at Sandbox.
    #[serde(default)]
    pub require_identity_verification: bool,
    /// Allow conservative parallel execution of read-only tools (#2983).
    /// When true, workers may batch independent read-only tool calls
    /// (reads, searches, greps) into concurrent turns. Disabled by default
    /// to avoid overwhelming providers or hitting rate limits.
    #[serde(default)]
    pub allow_parallel_reads: bool,
 }
 fn default_max_trust_level() -> FleetTrustLevel {
    FleetTrustLevel::Operator
 }
 impl Default for FleetSecurityPolicy {
    fn default() -> Self {
        Self {
            default_trust_level: FleetTrustLevel::Sandbox,
            allowed_secrets: Vec::new(),
            capability_grants: Vec::new(),
            max_trust_level: FleetTrustLevel::Operator,
            require_identity_verification: false,
            allow_parallel_reads: false,
        }
    }
 }
 /// A reference to a secret that should be resolved at runtime, never
 /// serialized as a plaintext value.
 ///
 /// Secret refs appear in task specs, alert configs, and worker definitions.
 /// The actual secret value is resolved by the fleet manager from the
 /// secrets backend (OS keyring, environment, or file store) just before
 /// the worker starts.
 #[derive(Debug, Clone, Serialize, PartialEq, Eq, Hash, PartialOrd, Ord)]
 pub struct FleetSecretRef {
    /// The secret key name (e.g. `"CODEWHALE_API_KEY"`, `"GH_TOKEN"`).
    pub key: String,
    /// Optional source hint for resolution order.
    /// - `"env"` — resolve from environment variable
    /// - `"keyring"` — resolve from OS keyring
    /// - `"file"` — resolve from `~/.codewhale/secrets/`
    /// - absent / null — try all sources in default order
    #[serde(skip_serializing_if = "Option::is_none")]
    pub source: Option<String>,
 }
 impl FleetSecretRef {
    /// Create a secret ref from a key name with default resolution.
    #[must_use]
    pub fn new(key: impl Into<String>) -> Self {
        Self {
            key: key.into(),
            source: None,
        }
    }
    /// Create a secret ref with an explicit source.
    #[must_use]
    pub fn with_source(key: impl Into<String>, source: impl Into<String>) -> Self {
        Self {
            key: key.into(),
            source: Some(source.into()),
        }
    }
    /// Redacted display form for logging. Shows the key name and source
    /// but never the resolved value.
    #[must_use]
    pub fn redacted(&self) -> String {
        match &self.source {
            Some(src) => format!("<secret:{}.{}>", src, self.key),
            None => format!("<secret:{}>", self.key),
        }
    }
 }
 impl std::fmt::Display for FleetSecretRef {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.redacted())
    }
 }
 impl From<&str> for FleetSecretRef {
    fn from(key: &str) -> Self {
        Self::new(key)
    }
 }
 impl From<String> for FleetSecretRef {
    fn from(key: String) -> Self {
        Self::new(key)
    }
 }
 impl<'de> Deserialize<'de> for FleetSecretRef {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: Deserializer<'de>,
    {
        #[derive(Deserialize)]
        #[serde(untagged)]
        enum SecretRefWire {
            Key(String),
            Structured {
                key: String,
                #[serde(default)]
                source: Option<String>,
            },
        }
        match SecretRefWire::deserialize(deserializer)? {
            SecretRefWire::Key(key) if !key.trim().is_empty() => Ok(FleetSecretRef::new(key)),
            SecretRefWire::Key(_) => Err(de::Error::custom("secret ref key cannot be empty")),
            SecretRefWire::Structured { key, source } if !key.trim().is_empty() => {
                Ok(FleetSecretRef { key, source })
            }
            SecretRefWire::Structured { .. } => {
                Err(de::Error::custom("secret ref key cannot be empty"))
            }
        }
    }
 }
 /// How a worker authenticates to the fleet manager.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 #[serde(tag = "method", rename_all = "snake_case")]
 pub enum FleetWorkerAuth {
    /// No authentication (local workers share the same uid).
    None,
    /// SSH key-based authentication with host-key verification.
    SshKey {
        /// Path to the SSH identity file (may be a FleetSecretRef in JSON
        /// as `{"key": "...", "source": "file"}`).
        identity: PathBuf,
        /// Known hosts file for host-key verification.
        #[serde(skip_serializing_if = "Option::is_none")]
        known_hosts: Option<PathBuf>,
        /// Expected host key fingerprint for pinning.
        #[serde(skip_serializing_if = "Option::is_none")]
        host_key_fingerprint: Option<String>,
        /// SSH user for the connection.
        #[serde(skip_serializing_if = "Option::is_none")]
        user: Option<String>,
    },
    /// Token-based authentication for remote workers behind a fleet proxy.
    Token {
        /// Reference to the token secret.
        token_ref: FleetSecretRef,
    },
    /// mTLS certificate-based authentication.
    Mtls {
        /// Path to the client certificate.
        cert_path: PathBuf,
        /// Reference to the private key secret.
        key_ref: FleetSecretRef,
    },
 }
 /// A capability grant that explicitly authorizes a worker to perform
 /// a specific class of action.
 ///
 /// By default, new workers get no grants (least privilege). Grants are
 /// additive: a worker's effective capabilities are the union of its
 /// trust-level defaults plus any explicit grants.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 pub struct FleetCapabilityGrant {
    /// The capability being granted (e.g. `"network"`, `"git-push"`,
    /// `"provider-secrets"`, `"release"`).
    pub capability: String,
    /// Optional scope limiting the grant (e.g. `"github.com"` for network,
    /// `"crates/tui/**"` for file writes).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub scope: Option<String>,
    /// Optional justification for the grant (audit trail).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub reason: Option<String>,
 }
 /// Runtime status of a worker.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 #[serde(rename_all = "snake_case")]
@@ -469,18 +742,84 @@ pub enum FleetAlertEventClass {
 #[serde(tag = "kind", rename_all = "snake_case")]
 pub enum FleetAlertChannel {
    Slack {
-        webhook_url: String,
+        /// Webhook URL, resolved from a secret ref or inline.
        #[serde(flatten)]
        webhook: FleetAlertEndpoint,
    },
    Webhook {
-        url: String,
+        #[serde(flatten)]
-        secret: Option<String>,
+        endpoint: FleetAlertEndpoint,
    },
    #[serde(alias = "pager_duty")]
    #[serde(alias = "pagerduty")]
    PagerDuty {
        routing_key: String,
        severity: String,
    },
 }
 /// An alert channel endpoint, supporting both inline URLs and secret refs.
 ///
 /// For Slack and generic webhook channels, the URL may be provided directly
 /// or as a secret reference resolved at send time. When both `url` and
 /// `url_ref` are present, `url_ref` takes precedence after resolution.
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 pub struct FleetAlertEndpoint {
    /// Inline URL (plaintext; only for non-sensitive endpoints).
    #[serde(
        alias = "webhook_url",
        alias = "endpoint_url",
        skip_serializing_if = "Option::is_none"
    )]
    pub url: Option<String>,
    /// Reference to a secret containing the webhook URL.
    #[serde(
        alias = "webhook_url_ref",
        alias = "webhook_ref",
        alias = "url_secret_ref",
        skip_serializing_if = "Option::is_none"
    )]
    pub url_ref: Option<FleetSecretRef>,
    /// Optional HMAC secret for webhook payload signing, as a secret ref.
    #[serde(
        alias = "secret",
        alias = "webhook_secret",
        alias = "signing_secret",
        skip_serializing_if = "Option::is_none"
    )]
    pub secret_ref: Option<FleetSecretRef>,
 }
 impl FleetAlertEndpoint {
    /// Create an inline URL endpoint (for non-sensitive use).
    #[must_use]
    pub fn inline(url: impl Into<String>) -> Self {
        Self {
            url: Some(url.into()),
            url_ref: None,
            secret_ref: None,
        }
    }
    /// Create a secret-backed URL endpoint.
    #[must_use]
    pub fn from_secret(url_ref: FleetSecretRef) -> Self {
        Self {
            url: None,
            url_ref: Some(url_ref),
            secret_ref: None,
        }
    }
    /// Redacted display form for logging.
    #[must_use]
    pub fn redacted(&self) -> String {
        self.url_ref
            .as_ref()
            .map_or_else(|| "<inline-url>".to_string(), |r| r.redacted())
    }
 }
 /// Receipt produced when a task completes verification.
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct FleetReceipt {
@@ -573,6 +912,7 @@ mod tests {
            }],
            worker_specs: vec![],
            labels: BTreeMap::new(),
            security_policy: None,
            created_at: "2026-06-12T17:00:00Z".to_string(),
            updated_at: None,
            completed_at: None,
@@ -648,7 +988,7 @@ mod tests {
        let policy = FleetAlertPolicy {
            events: vec![FleetAlertEventClass::Stale],
            channels: vec![FleetAlertChannel::Slack {
-                webhook_url: "https://hooks.slack.com/test".to_string(),
+                webhook: FleetAlertEndpoint::inline("https://hooks.slack.com/test"),
            }],
            after_attempts: Some(2),
            after_minutes_stale: Some(10),
@@ -687,6 +1027,8 @@ mod tests {
                port,
                user,
                identity,
                known_hosts,
                host_key_fingerprint,
                working_directory,
                env_allowlist,
                codewhale_binary,
@@ -695,6 +1037,8 @@ mod tests {
                assert_eq!(port, None);
                assert_eq!(user, None);
                assert_eq!(identity, None);
                assert_eq!(known_hosts, None);
                assert_eq!(host_key_fingerprint, None);
                assert_eq!(working_directory, None);
                assert!(env_allowlist.is_empty());
                assert_eq!(codewhale_binary, None);
@@ -801,4 +1145,131 @@ mod tests {
        assert_eq!(back.result, FleetTaskResult::Partial);
        assert_eq!(back.failure_kind, Some(FleetTaskFailureKind::Verifier));
    }
    #[test]
    fn ssh_host_spec_with_key_pinning_round_trip() {
        let spec = FleetHostSpec::Ssh {
            host: "builder.trusted.example.com".to_string(),
            port: Some(22),
            user: Some("codewhale".to_string()),
            identity: Some(PathBuf::from("~/.ssh/codewhale_fleet")),
            known_hosts: Some(PathBuf::from("~/.ssh/known_hosts")),
            host_key_fingerprint: Some("SHA256:aLGqZo1M6c...".to_string()),
            working_directory: Some(PathBuf::from("/srv/codewhale/work")),
            env_allowlist: vec!["CODEWHALE_PROFILE".to_string()],
            codewhale_binary: Some("/usr/local/bin/codewhale".to_string()),
        };
        let json = serde_json::to_string_pretty(&spec).unwrap();
        assert!(json.contains("\"known_hosts\""));
        assert!(json.contains("\"host_key_fingerprint\""));
        assert!(json.contains("SHA256:aLGqZo1M6c..."));
        let back: FleetHostSpec = serde_json::from_str(&json).unwrap();
        match back {
            FleetHostSpec::Ssh {
                host,
                known_hosts,
                host_key_fingerprint,
                ..
            } => {
                assert_eq!(host, "builder.trusted.example.com");
                assert_eq!(known_hosts, Some(PathBuf::from("~/.ssh/known_hosts")));
                assert_eq!(
                    host_key_fingerprint,
                    Some("SHA256:aLGqZo1M6c...".to_string())
                );
            }
            other => panic!("expected ssh host spec, got {other:?}"),
        }
    }
    #[test]
    fn secret_ref_redacted_never_exposes_value() {
        let ref_ = FleetSecretRef::new("DEEPSEEK_API_KEY");
        let redacted = ref_.redacted();
        assert!(redacted.contains("DEEPSEEK_API_KEY"));
        assert!(!redacted.contains("sk-"));
        assert!(redacted.contains("<secret:"));
        let ref_ = FleetSecretRef::with_source("GH_TOKEN", "env");
        let redacted = ref_.redacted();
        assert!(redacted.contains("env.GH_TOKEN"));
        assert!(!redacted.contains("ghp_"));
    }
    #[test]
    fn alert_endpoint_from_secret_round_trip() {
        let endpoint = FleetAlertEndpoint::from_secret(FleetSecretRef::new("SLACK_WEBHOOK"));
        let json = serde_json::to_string(&endpoint).unwrap();
        assert!(json.contains("SLACK_WEBHOOK"));
        assert!(!json.contains("hooks.slack.com"));
        let back: FleetAlertEndpoint = serde_json::from_str(&json).unwrap();
        assert_eq!(back.url_ref.as_ref().unwrap().key, "SLACK_WEBHOOK");
        assert_eq!(back.url, None);
    }
    #[test]
    fn secret_ref_accepts_legacy_string_wire_shape() {
        let ref_: FleetSecretRef = serde_json::from_str(r#""CODEWHALE_FLEET_TOKEN""#).unwrap();
        assert_eq!(ref_, FleetSecretRef::new("CODEWHALE_FLEET_TOKEN"));
        let ref_: FleetSecretRef =
            serde_json::from_str(r#"{"key":"GH_TOKEN","source":"env"}"#).unwrap();
        assert_eq!(ref_, FleetSecretRef::with_source("GH_TOKEN", "env"));
    }
    #[test]
    fn trust_level_accepts_hyphenated_remote_verified() {
        let trust: FleetTrustLevel = serde_json::from_str(r#""remote-verified""#).unwrap();
        assert_eq!(trust, FleetTrustLevel::RemoteVerified);
        let canonical = serde_json::to_string(&trust).unwrap();
        assert_eq!(canonical, r#""remote_verified""#);
    }
    #[test]
    fn alert_channel_accepts_legacy_webhook_fields() {
        let channel: FleetAlertChannel = serde_json::from_str(
            r#"{
                "kind": "slack",
                "webhook_url": "https://hooks.slack.com/test",
                "secret": "SLACK_SIGNING_SECRET"
            }"#,
        )
        .unwrap();
        match channel {
            FleetAlertChannel::Slack { webhook } => {
                assert_eq!(webhook.url.as_deref(), Some("https://hooks.slack.com/test"));
                assert_eq!(
                    webhook.secret_ref,
                    Some(FleetSecretRef::new("SLACK_SIGNING_SECRET"))
                );
            }
            other => panic!("expected slack channel, got {other:?}"),
        }
    }
    #[test]
    fn security_policy_defaults_are_conservative() {
        let policy = FleetSecurityPolicy::default();
        assert_eq!(policy.default_trust_level, FleetTrustLevel::Sandbox);
        assert!(policy.allowed_secrets.is_empty());
        assert!(policy.capability_grants.is_empty());
        assert_eq!(policy.max_trust_level, FleetTrustLevel::Operator);
        assert!(!policy.require_identity_verification);
    }
    #[test]
    fn trust_level_ordinal_reflects_privilege() {
        assert!(FleetTrustLevel::Operator > FleetTrustLevel::RemoteVerified);
        assert!(FleetTrustLevel::RemoteVerified > FleetTrustLevel::Local);
        assert!(FleetTrustLevel::Local > FleetTrustLevel::Sandbox);
        assert!(FleetTrustLevel::Operator.may_access_secrets());
        assert!(!FleetTrustLevel::Sandbox.may_access_secrets());
        assert!(!FleetTrustLevel::Sandbox.may_write_workspace());
        assert!(FleetTrustLevel::Operator.may_write_workspace());
    }
 }
@@ -737,6 +737,44 @@ impl Secrets {
    pub fn get(&self, name: &str) -> Result<Option<String>, SecretsError> {
        self.store.get(name)
    }
    /// Resolve a secret by key name with an optional source constraint.
    ///
    /// This is the fleet-worker secret resolution path. Unlike
    /// [`resolve`](Secrets::resolve), this does NOT map provider names
    /// to their canonical env vars — the caller controls the exact key
    /// and resolution order.
    ///
    /// `source_hint` controls the resolution order:
    /// - `Some("env")` — only check environment variables
    /// - `Some("keyring")` — only check the keyring/file store
    /// - `None` — try the store first, then fall back to environment
    #[must_use]
    pub fn resolve_direct(&self, key: &str, source_hint: Option<&str>) -> Option<String> {
        match source_hint {
            Some("env") => {
                // Only check process environment — skip the store entirely.
                std::env::var(key).ok().filter(|v| !v.trim().is_empty())
            }
            Some("keyring") | Some("file") => {
                // Only check the store backend.
                self.store
                    .get(key)
                    .ok()
                    .flatten()
                    .filter(|v| !v.trim().is_empty())
            }
            Some(_) | None => {
                // Default: store first, then env fallback.
                if let Ok(Some(v)) = self.store.get(key)
                    && !v.trim().is_empty()
                {
                    return Some(v);
                }
                std::env::var(key).ok().filter(|v| !v.trim().is_empty())
            }
        }
    }
 }
 /// Map a canonical provider name to its environment variable(s), returning
@@ -1883,6 +1883,10 @@ pub struct Config {
    #[serde(default)]
    pub context: ContextConfig,
    /// Agent Fleet trust/security/role/exec config.
    #[serde(default)]
    pub fleet: Option<codewhale_config::FleetConfigToml>,
    /// Sub-agent model overrides.
    #[serde(default)]
    pub subagents: Option<SubagentsConfig>,
@@ -5012,6 +5016,7 @@ fn merge_config(base: Config, override_cfg: Config) -> Config {
                .or(base.context.l3_threshold),
            seam_model: override_cfg.context.seam_model.or(base.context.seam_model),
        },
        fleet: override_cfg.fleet.or(base.fleet),
        subagents: override_cfg.subagents.or(base.subagents),
        strict_tool_mode: override_cfg.strict_tool_mode.or(base.strict_tool_mode),
        runtime_api: override_cfg.runtime_api.or(base.runtime_api),
@@ -373,6 +373,8 @@ pub struct SshFleetHostConfig {
    pub user: Option<String>,
    pub port: Option<u16>,
    pub identity: Option<PathBuf>,
    pub known_hosts: Option<PathBuf>,
    pub host_key_fingerprint: Option<String>,
    pub working_directory: PathBuf,
    pub env_allowlist: BTreeSet<String>,
    pub codewhale_binary: String,
@@ -387,6 +389,8 @@ impl SshFleetHostConfig {
            user: None,
            port: None,
            identity: None,
            known_hosts: None,
            host_key_fingerprint: None,
            working_directory: working_directory.into(),
            env_allowlist: BTreeSet::new(),
            codewhale_binary: "codewhale".to_string(),
@@ -401,6 +405,8 @@ impl SshFleetHostConfig {
            port,
            user,
            identity,
            known_hosts,
            host_key_fingerprint,
            working_directory,
            env_allowlist,
            codewhale_binary,
@@ -420,6 +426,8 @@ impl SshFleetHostConfig {
        config.port = *port;
        config.user = user.clone();
        config.identity = identity.clone();
        config.known_hosts = known_hosts.clone();
        config.host_key_fingerprint = host_key_fingerprint.clone();
        config.env_allowlist = env_allowlist.iter().cloned().collect();
        config.codewhale_binary = codewhale_binary;
        config.validate()?;
@@ -918,6 +926,8 @@ mod tests {
            port: Some(2222),
            user: Some("fleet".to_string()),
            identity: Some(PathBuf::from("/tmp/fleet_id")),
            known_hosts: None,
            host_key_fingerprint: None,
            working_directory: Some(PathBuf::from("/srv/codewhale")),
            env_allowlist: vec!["FLEET_PROFILE".to_string()],
            codewhale_binary: Some("/usr/local/bin/codewhale".to_string()),
@@ -661,12 +661,21 @@ fn sanitize_run_for_ledger(run: &FleetRun) -> FleetRun {
        if let Some(policy) = &mut task.alert_policy {
            for channel in &mut policy.channels {
                match channel {
-                    FleetAlertChannel::Slack { webhook_url } => {
+                    FleetAlertChannel::Slack { webhook } => {
-                        *webhook_url = "<redacted>".to_string();
+                        webhook.url = webhook.url.as_ref().map(|_| "<redacted>".to_string());
                    }
-                    FleetAlertChannel::Webhook { url, secret } => {
+                    FleetAlertChannel::Webhook { endpoint } => {
-                        *url = "<redacted>".to_string();
+                        *endpoint = FleetAlertEndpoint {
-                        *secret = secret.as_ref().map(|_| "<redacted>".to_string());
+                            url: endpoint.url.as_ref().map(|_| "<redacted>".to_string()),
                            url_ref: endpoint
                                .url_ref
                                .as_ref()
                                .map(|_| FleetSecretRef::new("<redacted>")),
                            secret_ref: endpoint
                                .secret_ref
                                .as_ref()
                                .map(|_| FleetSecretRef::new("<redacted>")),
                        };
                    }
                    FleetAlertChannel::PagerDuty { routing_key, .. } => {
                        *routing_key = "<redacted>".to_string();
@@ -691,6 +700,7 @@ mod tests {
            task_specs: vec![],
            worker_specs: vec![],
            labels: BTreeMap::new(),
            security_policy: None,
            created_at: "2026-06-12T17:00:00Z".to_string(),
            updated_at: None,
            completed_at: None,
@@ -21,14 +21,38 @@ use super::task_spec::{
    FleetTaskSpecDocument, FleetTaskVerificationInput, load_task_spec_document,
    record_verification_receipt, validate_task_spec_document, verify_task_result,
 };
 use super::worker_runtime;
 use crate::tools::subagent::SharedSubAgentManager;
 const DEFAULT_STALE_AFTER_SECONDS: u64 = 300;
 #[derive(Debug)]
 pub struct FleetManager {
    workspace: PathBuf,
    ledger: FleetLedger,
    stale_after: Duration,
    exec_config: codewhale_config::FleetExecConfig,
    /// Optional sub-agent manager for headless worker execution.
    /// When set, fleet workers spawn real sub-agents; when None,
    /// the manager falls back to local simulation.
    sub_agent_manager: Option<SharedSubAgentManager>,
 }
 impl std::fmt::Debug for FleetManager {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        f.debug_struct("FleetManager")
            .field("workspace", &self.workspace)
            .field("ledger", &self.ledger)
            .field("stale_after", &self.stale_after)
            .field("exec_config", &self.exec_config)
            .field(
                "sub_agent_manager",
                &self
                    .sub_agent_manager
                    .as_ref()
                    .map(|_| "SharedSubAgentManager"),
            )
            .finish()
    }
 }
 #[derive(Debug, Clone)]
@@ -78,6 +102,28 @@ pub struct FleetWorkerInspection {
    pub artifacts: Vec<FleetArtifactRef>,
    pub last_error: Option<String>,
    pub alert_state: Option<String>,
    /// Lightweight projection from the sub-agent worker runtime.
    /// Populated when a sub-agent manager is attached.
    pub runtime_state: Option<FleetWorkerRuntimeProjection>,
 }
 /// Lightweight TUI projection of a headless sub-agent worker's current state.
 ///
 /// Derived from the sub-agent manager's `AgentWorkerRecord`.
 #[derive(Debug, Clone)]
 pub struct FleetWorkerRuntimeProjection {
    /// Sub-agent lifecycle status (Queued, Starting, Running, Completed, etc.)
    pub agent_status: String,
    /// Steps taken so far (tool calls + model turns)
    pub steps_taken: u32,
    /// Latest human-readable message from the worker
    pub latest_message: Option<String>,
    /// Error message if the worker failed
    pub error: Option<String>,
    /// Result summary if the worker completed
    pub result_summary: Option<String>,
    /// Whether the worker has a sub-agent session running
    pub has_session: bool,
 }
 impl FleetManager {
@@ -88,6 +134,8 @@ impl FleetManager {
            workspace,
            ledger,
            stale_after: Duration::from_secs(DEFAULT_STALE_AFTER_SECONDS),
            exec_config: codewhale_config::FleetExecConfig::default(),
            sub_agent_manager: None,
        })
    }
@@ -96,6 +144,23 @@ impl FleetManager {
        self
    }
    /// Apply fleet headless-worker execution policy from config.
    pub fn with_exec_config(mut self, exec_config: codewhale_config::FleetExecConfig) -> Self {
        self.exec_config = exec_config;
        self
    }
    /// Attach a sub-agent manager so fleet workers can spawn real headless agents.
    pub fn with_sub_agent_manager(mut self, mgr: SharedSubAgentManager) -> Self {
        self.sub_agent_manager = Some(mgr);
        self
    }
    /// True when the manager has a sub-agent runtime for headless worker execution.
    pub fn has_worker_runtime(&self) -> bool {
        self.sub_agent_manager.is_some()
    }
    pub fn ledger_path(&self) -> &Path {
        self.ledger.path()
    }
@@ -139,6 +204,7 @@ impl FleetManager {
            task_specs: doc.tasks.clone(),
            worker_specs: doc.workers.clone(),
            labels: doc.labels,
            security_policy: doc.security_policy.clone(),
            created_at: now.clone(),
            updated_at: Some(now.clone()),
            completed_at: None,
@@ -280,6 +346,27 @@ impl FleetManager {
            .get(worker_id)
            .map(|heartbeat| heartbeat.timestamp.clone());
        let alert_state = latest_alert_for_worker(&state, worker_id);
        // Enrich with sub-agent worker runtime state when available.
        let runtime_state = self.sub_agent_manager.as_ref().and_then(|mgr| {
            mgr.try_read()
                .ok()
                .and_then(|guard| guard.get_worker_record(worker_id))
                .map(|record| FleetWorkerRuntimeProjection {
                    agent_status: format!("{:?}", record.status).to_lowercase(),
                    steps_taken: record.steps_taken,
                    latest_message: record.latest_message,
                    error: record.error,
                    result_summary: record.result_summary,
                    has_session: !matches!(
                        record.status,
                        crate::tools::subagent::AgentWorkerStatus::Completed
                            | crate::tools::subagent::AgentWorkerStatus::Failed
                            | crate::tools::subagent::AgentWorkerStatus::Cancelled
                    ),
                })
        });
        Ok(FleetWorkerInspection {
            worker_id: worker_id.to_string(),
            status,
@@ -293,6 +380,7 @@ impl FleetManager {
            artifacts,
            last_error,
            alert_state,
            runtime_state,
        })
    }
@@ -475,6 +563,45 @@ impl FleetManager {
            FleetWorkerEventPayload::Running,
        )?;
        self.ledger.heartbeat(worker_id, &timestamp(), None, None)?;
        // Register with the sub-agent manager for headless worker tracking.
        // The engine's agent_open path handles actual sub-agent spawning.
        if let Some(ref mgr) = self.sub_agent_manager {
            if let Ok(guard) = mgr.try_write() {
                let run = self
                    .ledger
                    .rebuild_state()
                    .ok()
                    .and_then(|state| state.runs.get(&entry.run_id.0).cloned());
                let worker_spec = run
                    .as_ref()
                    .and_then(|r| r.worker_specs.iter().find(|w| w.id == worker_id).cloned())
                    .unwrap_or_else(|| FleetWorkerSpec {
                        id: worker_id.to_string(),
                        name: worker_id.to_string(),
                        host: FleetHostSpec::Local,
                        trust_level: Some(FleetTrustLevel::Local),
                        labels: BTreeMap::new(),
                        capabilities: vec![],
                        max_concurrent_tasks: Some(1),
                    });
                let worker = worker_runtime::fleet_task_to_worker_spec(
                    worker_id,
                    &entry.run_id.0,
                    task_spec,
                    &worker_spec,
                    "auto",
                    &self.workspace,
                );
                let worker = worker_runtime::apply_exec_hardening(worker, &self.exec_config);
                // drop guard after registering so we don't hold the write lock
                drop(guard);
                if let Ok(mut guard) = mgr.try_write() {
                    guard.register_worker(worker);
                }
            }
        }
        self.maybe_complete_local_simulation(entry, worker_id, task_spec, log_artifact)
    }
@@ -746,6 +873,7 @@ fn default_local_workers(run_id: &FleetRunId, max_workers: usize) -> Vec<FleetWo
            id: format!("{}-local-{}", run_id.0, index),
            name: format!("Local worker {index}"),
            host: FleetHostSpec::Local,
            trust_level: Some(FleetTrustLevel::Local),
            labels: BTreeMap::new(),
            capabilities: vec!["local".to_string()],
            max_concurrent_tasks: Some(1),
@@ -1267,4 +1395,159 @@ mod tests {
            Some("escalated via pagerduty alert_id=alert-1")
        );
    }
    #[test]
    fn fleet_dogfood_smoke_run_two_local_workers_two_tasks() {
        let tmp = TempDir::new().unwrap();
        let workspace = tmp.path().join("repo");
        std::fs::create_dir_all(&workspace).unwrap();
        // Create a minimal Cargo.toml so the cargo-check task can succeed.
        std::fs::write(
            workspace.join("Cargo.toml"),
            "[package]\nname = \"smoke\"\nversion = \"0.1.0\"\nedition = \"2021\"\n",
        )
        .unwrap();
        std::fs::create_dir_all(workspace.join("src")).unwrap();
        std::fs::write(
            workspace.join("src").join("lib.rs"),
            "pub fn answer() -> u8 { 42 }\n",
        )
        .unwrap();
        let tasks = vec![
            FleetTaskSpec {
                id: "check".to_string(),
                name: "check".to_string(),
                description: None,
                objective: Some("cargo check".to_string()),
                instructions: "run cargo check and report result".to_string(),
                worker: Some(FleetTaskWorkerProfile {
                    role: Some("release-checker".to_string()),
                    tool_profile: Some("read-only".to_string()),
                    tools: vec!["cargo".to_string()],
                    capabilities: vec!["rust".to_string()],
                }),
                workspace: Some(FleetWorkspaceRequirements {
                    root: None,
                    required_files: vec![PathBuf::from("Cargo.toml")],
                    writable_paths: vec![PathBuf::from(".codewhale/fleet")],
                    environment: Some(FleetEnvironmentRequirements {
                        required: vec!["PATH".to_string()],
                        allowlist: vec![],
                    }),
                }),
                input_files: vec![],
                context: vec![],
                budget: None,
                tags: vec!["smoke".to_string()],
                expected_artifacts: vec![FleetArtifactKind::Log, FleetArtifactKind::Receipt],
                scorer: Some(FleetScorerSpec::ExitCode),
                retry_policy: Some(FleetRetryPolicy {
                    max_attempts: 1,
                    ..Default::default()
                }),
                alert_policy: None,
                timeout_seconds: Some(60),
                metadata: BTreeMap::new(),
            },
            FleetTaskSpec {
                id: "review".to_string(),
                name: "review".to_string(),
                description: None,
                objective: Some("review source".to_string()),
                instructions: "read src/lib.rs and report findings".to_string(),
                worker: Some(FleetTaskWorkerProfile {
                    role: Some("reviewer".to_string()),
                    tool_profile: Some("read-only".to_string()),
                    tools: vec!["cargo".to_string()],
                    capabilities: vec!["rust".to_string()],
                }),
                workspace: Some(FleetWorkspaceRequirements {
                    root: None,
                    required_files: vec![],
                    writable_paths: vec![],
                    environment: Some(FleetEnvironmentRequirements {
                        required: vec!["PATH".to_string()],
                        allowlist: vec![],
                    }),
                }),
                input_files: vec![],
                context: vec![],
                budget: None,
                tags: vec!["smoke".to_string()],
                expected_artifacts: vec![FleetArtifactKind::Log, FleetArtifactKind::Receipt],
                scorer: None,
                retry_policy: Some(FleetRetryPolicy {
                    max_attempts: 1,
                    ..Default::default()
                }),
                alert_policy: None,
                timeout_seconds: Some(60),
                metadata: BTreeMap::new(),
            },
        ];
        let manager = FleetManager::open(&workspace).unwrap();
        let report = manager
            .create_run(
                FleetTaskSpecDocument {
                    name: Some("dogfood smoke".to_string()),
                    labels: BTreeMap::new(),
                    security_policy: Some(FleetSecurityPolicy {
                        default_trust_level: FleetTrustLevel::Local,
                        ..Default::default()
                    }),
                    workers: vec![],
                    tasks,
                },
                2,
            )
            .unwrap();
        assert_eq!(report.task_count, 2);
        assert!(!report.worker_ids.is_empty());
        assert_eq!(report.worker_ids.len(), 2);
        // After immediate scheduling, tasks may already be leased,
        // so queued+running should total 2.
        let status = manager.run_status(&report.run_id).unwrap();
        assert_eq!(status.queued + status.running, 2);
    }
    #[test]
    fn fleet_security_policy_propagates_from_task_spec_document_to_run() {
        let tmp = TempDir::new().unwrap();
        let manager = FleetManager::open(tmp.path()).unwrap();
        let path = task_spec_file(&tmp, vec![task("task-a")]);
        // Rewrite the spec file with a security_policy block.
        let doc = serde_json::json!({
            "name": "secure smoke",
            "tasks": [{
                "id": "task-a",
                "name": "task-a",
                "instructions": "report ok",
                "expected_artifacts": ["log"]
            }],
            "security_policy": {
                "default_trust_level": "local",
                "allowed_secrets": [{"key": "GH_TOKEN", "source": "env"}],
                "max_trust_level": "remote_verified",
                "require_identity_verification": true
            }
        });
        let spec_path = tmp.path().join("secure-tasks.json");
        std::fs::write(&spec_path, serde_json::to_string_pretty(&doc).unwrap()).unwrap();
        let report = manager
            .create_run_from_task_spec_path(&spec_path, 1)
            .unwrap();
        let state = manager.ledger.rebuild_state().unwrap();
        let run = state.runs.get(&report.run_id.0).unwrap();
        let policy = run.security_policy.as_ref().unwrap();
        assert_eq!(policy.default_trust_level, FleetTrustLevel::Local);
        assert_eq!(policy.allowed_secrets.len(), 1);
        assert_eq!(policy.allowed_secrets[0].key, "GH_TOKEN");
        assert_eq!(policy.max_trust_level, FleetTrustLevel::RemoteVerified);
        assert!(policy.require_identity_verification);
    }
 }
@@ -6,3 +6,4 @@ pub mod ledger;
 pub mod manager;
 pub mod scheduler;
 pub mod task_spec;
 pub mod worker_runtime;
@@ -573,6 +573,7 @@ mod tests {
            id: id.to_string(),
            name: id.to_string(),
            host: FleetHostSpec::Local,
            trust_level: Some(FleetTrustLevel::Local),
            labels: BTreeMap::new(),
            capabilities: vec!["local".to_string()],
            max_concurrent_tasks: Some(1),
@@ -622,6 +623,7 @@ mod tests {
                    .map(|idx| worker(&format!("worker-{idx}")))
                    .collect(),
                labels: BTreeMap::new(),
                security_policy: None,
                created_at: scheduler.timestamp(),
                updated_at: None,
                completed_at: None,
@@ -704,7 +706,7 @@ mod tests {
        failing.alert_policy = Some(FleetAlertPolicy {
            events: vec![FleetAlertEventClass::RestartExhausted],
            channels: vec![FleetAlertChannel::Slack {
-                webhook_url: "https://hooks.slack.invalid/secret".to_string(),
+                webhook: FleetAlertEndpoint::inline("https://hooks.slack.invalid/secret"),
            }],
            after_attempts: Some(1),
            after_minutes_stale: Some(1),
@@ -23,6 +23,9 @@ pub struct FleetTaskSpecDocument {
    pub name: Option<String>,
    #[serde(default)]
    pub labels: BTreeMap<String, String>,
    #[serde(default)]
    #[serde(skip_serializing_if = "Option::is_none")]
    pub security_policy: Option<FleetSecurityPolicy>,
    #[serde(default, alias = "worker_specs")]
    pub workers: Vec<FleetWorkerSpec>,
    #[serde(default)]
@@ -49,12 +52,14 @@ impl FleetTaskSpecFile {
            Self::Tasks(tasks) => FleetTaskSpecDocument {
                name: Some(fallback_name),
                labels: BTreeMap::new(),
                security_policy: None,
                workers: Vec::new(),
                tasks,
            },
            Self::Single(task) => FleetTaskSpecDocument {
                name: Some(fallback_name),
                labels: BTreeMap::new(),
                security_policy: None,
                workers: Vec::new(),
                tasks: vec![task],
            },
@@ -0,0 +1,626 @@
 //! Fleet worker runtime — bridges fleet task specs to headless sub-agent execution.
 //!
 //! This module makes fleet workers real: instead of simulating task completion,
 //! each fleet worker spawns a headless sub-agent that runs the task instructions
 //! and streams progress back into the fleet ledger.
 //!
 //! Architecture:
 //! - `FleetTaskSpec` + `FleetWorkerSpec` → `AgentWorkerSpec`
 //! - `SubAgentManager::register_worker()` tracks the worker
 //! - Sub-agent spawn happens through the existing `agent_open` machinery
 //! - Mailbox events stream into fleet ledger as `FleetWorkerEventPayload`
 //! - `FleetWorkerInspection` reads both ledger state and sub-agent worker records
 #![allow(dead_code)]
 use codewhale_protocol::fleet::{
    FleetHostSpec, FleetTaskSpec, FleetTaskWorkerProfile, FleetWorkerEventPayload, FleetWorkerSpec,
 };
 use super::host::FleetHostKind;
 use crate::tools::subagent::{
    AgentWorkerSpec, AgentWorkerStatus, AgentWorkerToolProfile, SubAgentType,
 };
 /// Map a fleet worker spec's host kind to a display string.
 pub fn fleet_host_kind_for_spec(spec: &FleetWorkerSpec) -> FleetHostKind {
    match &spec.host {
        FleetHostSpec::Local => FleetHostKind::LocalProcess,
        FleetHostSpec::Ssh { .. } => FleetHostKind::Ssh,
        FleetHostSpec::Docker { .. } => FleetHostKind::LocalProcess, // Docker runs local-ish
    }
 }
 /// Map a fleet host kind to a compact display label.
 pub fn fleet_host_kind_label(kind: FleetHostKind) -> &'static str {
    match kind {
        FleetHostKind::LocalProcess => "local",
        FleetHostKind::Ssh => "ssh",
    }
 }
 /// Build a sub-agent `AgentWorkerSpec` from a fleet task spec and worker spec.
 ///
 /// The fleet task's `instructions` become the sub-agent's `objective`, the
 /// `worker.role` maps to a `SubAgentType`, and tool/capability restrictions
 /// become an `AgentWorkerToolProfile`.
 pub fn fleet_task_to_worker_spec(
    worker_id: &str,
    run_id: &str,
    task_spec: &FleetTaskSpec,
    _worker_spec: &FleetWorkerSpec,
    model: &str,
    workspace: &std::path::Path,
 ) -> AgentWorkerSpec {
    let agent_type =
        fleet_role_to_agent_type(task_spec.worker.as_ref().and_then(|w| w.role.as_deref()));
    let tool_profile = fleet_tool_profile(task_spec.worker.as_ref());
    let objective = fleet_task_prompt(task_spec);
    AgentWorkerSpec {
        worker_id: worker_id.to_string(),
        run_id: run_id.to_string(),
        parent_run_id: None,
        session_name: Some(format!("fleet-{}-{}", worker_id, task_spec.id)),
        objective,
        role: task_spec.worker.as_ref().and_then(|w| w.role.clone()),
        agent_type,
        model: model.to_string(),
        workspace: workspace.to_path_buf(),
        git_branch: None,
        context_mode: "fresh".to_string(),
        fork_context: false,
        tool_profile,
        max_steps: task_spec
            .budget
            .as_ref()
            .and_then(|b| b.max_tool_calls)
            .unwrap_or(u32::MAX),
        spawn_depth: 0,
        max_spawn_depth: codewhale_config::FleetExecConfig::default().max_spawn_depth,
    }
 }
 fn fleet_task_prompt(task_spec: &FleetTaskSpec) -> String {
    let mut prompt = String::new();
    prompt.push_str("Fleet task: ");
    prompt.push_str(&task_spec.name);
    if let Some(objective) = task_spec.objective.as_deref() {
        prompt.push_str("\n\nObjective:\n");
        prompt.push_str(objective);
    } else if let Some(description) = task_spec.description.as_deref() {
        prompt.push_str("\n\nObjective:\n");
        prompt.push_str(description);
    }
    prompt.push_str("\n\nInstructions:\n");
    prompt.push_str(&task_spec.instructions);
    if !task_spec.context.is_empty() {
        prompt.push_str("\n\nContext:\n");
        for item in &task_spec.context {
            prompt.push_str("- ");
            prompt.push_str(item);
            prompt.push('\n');
        }
    }
    if !task_spec.input_files.is_empty() {
        prompt.push_str("\nInput files:\n");
        for path in &task_spec.input_files {
            prompt.push_str("- ");
            prompt.push_str(&path.display().to_string());
            prompt.push('\n');
        }
    }
    prompt
 }
 /// Map a fleet role name to a `SubAgentType`. Unknown roles default to `General`.
 fn fleet_role_to_agent_type(role: Option<&str>) -> SubAgentType {
    match role {
        Some("smoke-runner") | Some("read-only") => SubAgentType::ToolAgent,
        Some("reviewer") => SubAgentType::Review,
        Some("builder") => SubAgentType::Implementer,
        Some("verifier") | Some("tester") => SubAgentType::Verifier,
        Some("planner") => SubAgentType::Plan,
        Some("explorer") => SubAgentType::Explore,
        Some("general") | None => SubAgentType::General,
        Some(other) => {
            // Try parsing as a SubAgentType directly
            SubAgentType::from_str(other).unwrap_or(SubAgentType::General)
        }
    }
 }
 /// Convert a fleet worker profile's tool list into an `AgentWorkerToolProfile`.
 fn fleet_tool_profile(profile: Option<&FleetTaskWorkerProfile>) -> AgentWorkerToolProfile {
    match profile {
        Some(p) if !p.tools.is_empty() => AgentWorkerToolProfile::Explicit(p.tools.clone()),
        _ => AgentWorkerToolProfile::Inherited,
    }
 }
 /// Create a fleet artifact ref from a worker output.
 ///
 /// Uses the fleet artifact conventions: logs go under `.codewhale/fleet/`,
 /// reports under `.codewhale/fleet/reports/`.
 pub fn fleet_artifact_ref(
    _run_id: &str,
    _worker_id: &str,
    kind: codewhale_protocol::fleet::FleetArtifactKind,
    path: std::path::PathBuf,
 ) -> codewhale_protocol::fleet::FleetArtifactRef {
    codewhale_protocol::fleet::FleetArtifactRef {
        kind,
        path,
        checksum: None,
        mime_type: None,
        size_bytes: None,
    }
 }
 /// Map a sub-agent `AgentWorkerStatus` to a fleet `FleetWorkerEventPayload`.
 ///
 /// This is the streaming bridge: as the sub-agent runs, each status transition
 /// produces a corresponding fleet ledger event so the TUI surfaces stay in sync.
 pub fn agent_status_to_fleet_event(
    status: AgentWorkerStatus,
    message: Option<&str>,
    tool_name: Option<&str>,
 ) -> FleetWorkerEventPayload {
    match status {
        AgentWorkerStatus::Queued => FleetWorkerEventPayload::Queued,
        AgentWorkerStatus::Starting => FleetWorkerEventPayload::Starting,
        AgentWorkerStatus::Running => FleetWorkerEventPayload::Running,
        AgentWorkerStatus::WaitingForUser => FleetWorkerEventPayload::ModelWait { model: None },
        AgentWorkerStatus::ModelWait => FleetWorkerEventPayload::ModelWait { model: None },
        AgentWorkerStatus::RunningTool => FleetWorkerEventPayload::RunningTool {
            tool: tool_name.unwrap_or("unknown").to_string(),
            call_id: None,
        },
        AgentWorkerStatus::Completed => FleetWorkerEventPayload::Completed {
            exit_code: Some(0),
            summary: message.map(|s| s.to_string()),
        },
        AgentWorkerStatus::Failed => FleetWorkerEventPayload::Failed {
            reason: message.unwrap_or("unknown error").to_string(),
            recoverable: false,
        },
        AgentWorkerStatus::Cancelled => FleetWorkerEventPayload::Cancelled { cancelled_by: None },
        AgentWorkerStatus::Interrupted => FleetWorkerEventPayload::Interrupted {
            signal: message.map(|s| s.to_string()),
        },
    }
 }
 /// Apply exec hardening to a worker spec from fleet config (#3027).
 ///
 /// Filters tools against allowed/disallowed lists, caps max_steps to
 /// config's max_turns, and returns the objective with system prompt
 /// appended when configured.
 pub fn apply_exec_hardening(
    mut spec: AgentWorkerSpec,
    exec: &codewhale_config::FleetExecConfig,
 ) -> AgentWorkerSpec {
    // Cap max_steps to config max_turns
    if exec.max_turns > 0 && exec.max_turns != u32::MAX {
        spec.max_steps = spec.max_steps.min(exec.max_turns);
    }
    spec.max_spawn_depth = exec
        .max_spawn_depth
        .min(codewhale_config::MAX_SPAWN_DEPTH_CEILING);
    // Apply tool filtering
    if !exec.allowed_tools.is_empty() || !exec.disallowed_tools.is_empty() {
        spec.tool_profile = filter_tool_profile(&spec.tool_profile, exec);
    }
    // Append system prompt
    if !exec.append_system_prompt.is_empty() {
        spec.objective = format!(
            "{}\n\n[Policy]\n{}",
            spec.objective, exec.append_system_prompt
        );
    }
    spec
 }
 /// Filter a tool profile against allowed/disallowed lists.
 fn filter_tool_profile(
    profile: &AgentWorkerToolProfile,
    exec: &codewhale_config::FleetExecConfig,
 ) -> AgentWorkerToolProfile {
    match profile {
        AgentWorkerToolProfile::Explicit(tools) => {
            let filtered: Vec<String> = tools
                .iter()
                .filter(|t| {
                    // If allowed_tools is non-empty, only keep tools in the list
                    if !exec.allowed_tools.is_empty() && !exec.allowed_tools.contains(t) {
                        return false;
                    }
                    // Disallowed tools always win
                    !exec.disallowed_tools.contains(t)
                })
                .cloned()
                .collect();
            AgentWorkerToolProfile::Explicit(filtered)
        }
        AgentWorkerToolProfile::Inherited => {
            // Inherited profiles can't be filtered at spec time;
            // the sub-agent spawn path applies tool filtering.
            AgentWorkerToolProfile::Inherited
        }
    }
 }
 /// Determine whether a tool is safe for parallel execution (#2983).
 ///
 /// Read-only tools that don't mutate state and have no side effects
 /// are candidates for conservative parallel batching.
 pub fn is_parallel_safe_read_only_tool(tool_name: &str) -> bool {
    matches!(
        tool_name,
        "read_file"
            | "grep_files"
            | "file_search"
            | "list_dir"
            | "git_status"
            | "git_diff"
            | "git_log"
            | "git_show"
            | "git_blame"
            | "fetch_url"
            | "web_search"
            | "tool_search_tool_regex"
            | "tool_search_tool_bm25"
    )
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn fleet_role_smoke_runner_maps_to_tool_agent() {
        assert_eq!(
            fleet_role_to_agent_type(Some("smoke-runner")),
            SubAgentType::ToolAgent
        );
    }
    #[test]
    fn fleet_role_reviewer_maps_to_review() {
        assert_eq!(
            fleet_role_to_agent_type(Some("reviewer")),
            SubAgentType::Review
        );
    }
    #[test]
    fn fleet_role_builder_maps_to_implementer() {
        assert_eq!(
            fleet_role_to_agent_type(Some("builder")),
            SubAgentType::Implementer
        );
    }
    #[test]
    fn fleet_role_none_maps_to_general() {
        assert_eq!(fleet_role_to_agent_type(None), SubAgentType::General);
    }
    #[test]
    fn unknown_role_maps_to_general() {
        assert_eq!(
            fleet_role_to_agent_type(Some("nonexistent-role")),
            SubAgentType::General
        );
    }
    #[test]
    fn fleet_tool_profile_empty_uses_inherited() {
        let profile = FleetTaskWorkerProfile {
            role: None,
            tool_profile: None,
            tools: vec![],
            capabilities: vec![],
        };
        assert_eq!(
            fleet_tool_profile(Some(&profile)),
            AgentWorkerToolProfile::Inherited
        );
    }
    #[test]
    fn fleet_tool_profile_explicit_passes_tools() {
        let profile = FleetTaskWorkerProfile {
            role: None,
            tool_profile: None,
            tools: vec!["cargo".to_string(), "git".to_string()],
            capabilities: vec![],
        };
        assert_eq!(
            fleet_tool_profile(Some(&profile)),
            AgentWorkerToolProfile::Explicit(vec!["cargo".to_string(), "git".to_string()])
        );
    }
    #[test]
    fn fleet_task_prompt_includes_instructions_context_and_input_files() {
        let task = FleetTaskSpec {
            id: "review".to_string(),
            name: "Review protocol".to_string(),
            description: None,
            objective: Some("Find protocol regressions".to_string()),
            instructions: "Read the fleet protocol and report issues.".to_string(),
            worker: None,
            workspace: None,
            input_files: vec![std::path::PathBuf::from("crates/protocol/src/fleet.rs")],
            context: vec!["Keep the report concise.".to_string()],
            budget: None,
            tags: vec![],
            expected_artifacts: vec![],
            scorer: None,
            retry_policy: None,
            alert_policy: None,
            timeout_seconds: None,
            metadata: Default::default(),
        };
        let prompt = fleet_task_prompt(&task);
        assert!(prompt.contains("Review protocol"));
        assert!(prompt.contains("Find protocol regressions"));
        assert!(prompt.contains("Read the fleet protocol and report issues."));
        assert!(prompt.contains("Keep the report concise."));
        assert!(prompt.contains("crates/protocol/src/fleet.rs"));
    }
    #[test]
    fn fleet_worker_spec_defaults_to_shared_subagent_spawn_depth() {
        let task = FleetTaskSpec {
            id: "task-1".to_string(),
            name: "Task".to_string(),
            description: None,
            objective: None,
            instructions: "Do the task.".to_string(),
            worker: None,
            workspace: None,
            input_files: vec![],
            context: vec![],
            budget: None,
            tags: vec![],
            expected_artifacts: vec![],
            scorer: None,
            retry_policy: None,
            alert_policy: None,
            timeout_seconds: None,
            metadata: Default::default(),
        };
        let worker = FleetWorkerSpec {
            id: "worker-1".to_string(),
            name: "Worker".to_string(),
            host: FleetHostSpec::Local,
            trust_level: None,
            labels: Default::default(),
            capabilities: vec![],
            max_concurrent_tasks: None,
        };
        let spec = fleet_task_to_worker_spec(
            "worker-1",
            "run-1",
            &task,
            &worker,
            "auto",
            std::path::Path::new("/tmp"),
        );
        // Root fleet worker runs at depth 0; its budget equals the shared
        // sub-agent default (3) so fleet and sub-agents are one substrate and
        // at least 3 nested delegation levels are afforded.
        assert_eq!(spec.spawn_depth, 0);
        assert_eq!(spec.max_spawn_depth, codewhale_config::DEFAULT_SPAWN_DEPTH);
        assert_eq!(spec.max_spawn_depth, 3);
        // End-to-end reachability: walk the SAME gate the SubAgentRuntime
        // enforces (`would_exceed_depth` = `spawn_depth + 1 > max_spawn_depth`).
        // A depth-0 root must reach 3 nested levels, then stop. This fails if
        // anyone lowers the shared default below 3 (Hunter: afford >= 3).
        let hardened = apply_exec_hardening(spec, &codewhale_config::FleetExecConfig::default());
        let would_exceed = |spawn_depth: u32| spawn_depth + 1 > hardened.max_spawn_depth;
        assert!(
            !would_exceed(0),
            "root (depth 0) must spawn a child at depth 1"
        );
        assert!(!would_exceed(1), "depth-1 child must spawn to depth 2");
        assert!(!would_exceed(2), "depth-2 child must spawn to depth 3");
        assert!(
            would_exceed(3),
            "depth 3 is the afforded ceiling; depth 4 is blocked"
        );
    }
    #[test]
    fn exec_hardening_caps_max_steps_to_max_turns() {
        let spec = AgentWorkerSpec {
            worker_id: "w1".to_string(),
            run_id: "r1".to_string(),
            parent_run_id: None,
            session_name: None,
            objective: "test".to_string(),
            role: None,
            agent_type: SubAgentType::General,
            model: "auto".to_string(),
            workspace: std::path::PathBuf::from("/tmp"),
            git_branch: None,
            context_mode: "fresh".to_string(),
            fork_context: false,
            tool_profile: AgentWorkerToolProfile::Inherited,
            max_steps: 1000,
            spawn_depth: 0,
            max_spawn_depth: 0,
        };
        let exec = codewhale_config::FleetExecConfig {
            max_turns: 50,
            ..Default::default()
        };
        let hardened = apply_exec_hardening(spec, &exec);
        assert_eq!(hardened.max_steps, 50);
    }
    #[test]
    fn exec_hardening_applies_and_clamps_spawn_depth() {
        let spec = AgentWorkerSpec {
            worker_id: "w1".to_string(),
            run_id: "r1".to_string(),
            parent_run_id: None,
            session_name: None,
            objective: "test".to_string(),
            role: None,
            agent_type: SubAgentType::General,
            model: "auto".to_string(),
            workspace: std::path::PathBuf::from("/tmp"),
            git_branch: None,
            context_mode: "fresh".to_string(),
            fork_context: false,
            tool_profile: AgentWorkerToolProfile::Inherited,
            max_steps: 1000,
            spawn_depth: 0,
            max_spawn_depth: 0,
        };
        let exec = codewhale_config::FleetExecConfig {
            max_spawn_depth: 2,
            ..Default::default()
        };
        let hardened = apply_exec_hardening(spec.clone(), &exec);
        assert_eq!(hardened.max_spawn_depth, 2);
        let exec = codewhale_config::FleetExecConfig {
            max_spawn_depth: 99,
            ..Default::default()
        };
        let hardened = apply_exec_hardening(spec.clone(), &exec);
        assert_eq!(hardened.max_spawn_depth, 3);
        let exec = codewhale_config::FleetExecConfig {
            max_spawn_depth: 0,
            ..Default::default()
        };
        let hardened = apply_exec_hardening(spec, &exec);
        assert_eq!(hardened.max_spawn_depth, 0);
    }
    #[test]
    fn exec_hardening_filters_disallowed_tools() {
        let profile = AgentWorkerToolProfile::Explicit(vec![
            "read_file".to_string(),
            "exec_shell".to_string(),
            "git_diff".to_string(),
        ]);
        let exec = codewhale_config::FleetExecConfig {
            disallowed_tools: vec!["exec_shell".to_string()],
            ..Default::default()
        };
        let filtered = filter_tool_profile(&profile, &exec);
        assert_eq!(
            filtered,
            AgentWorkerToolProfile::Explicit(
                vec!["read_file".to_string(), "git_diff".to_string(),]
            )
        );
    }
    #[test]
    fn exec_hardening_allowed_tools_acts_as_allowlist() {
        let profile = AgentWorkerToolProfile::Explicit(vec![
            "read_file".to_string(),
            "exec_shell".to_string(),
            "git_diff".to_string(),
        ]);
        let exec = codewhale_config::FleetExecConfig {
            allowed_tools: vec!["read_file".to_string(), "git_diff".to_string()],
            ..Default::default()
        };
        let filtered = filter_tool_profile(&profile, &exec);
        assert_eq!(
            filtered,
            AgentWorkerToolProfile::Explicit(
                vec!["read_file".to_string(), "git_diff".to_string(),]
            )
        );
    }
    #[test]
    fn exec_hardening_allowed_plus_disallowed_disallowed_wins() {
        let profile = AgentWorkerToolProfile::Explicit(vec![
            "read_file".to_string(),
            "exec_shell".to_string(),
        ]);
        let exec = codewhale_config::FleetExecConfig {
            allowed_tools: vec!["read_file".to_string(), "exec_shell".to_string()],
            disallowed_tools: vec!["exec_shell".to_string()],
            ..Default::default()
        };
        let filtered = filter_tool_profile(&profile, &exec);
        assert_eq!(
            filtered,
            AgentWorkerToolProfile::Explicit(vec!["read_file".to_string(),])
        );
    }
    #[test]
    fn parallel_safe_read_only_tools_includes_grep_and_read() {
        assert!(is_parallel_safe_read_only_tool("read_file"));
        assert!(is_parallel_safe_read_only_tool("grep_files"));
        assert!(is_parallel_safe_read_only_tool("git_status"));
        assert!(is_parallel_safe_read_only_tool("web_search"));
    }
    #[test]
    fn destructive_tools_not_parallel_safe() {
        assert!(!is_parallel_safe_read_only_tool("exec_shell"));
        assert!(!is_parallel_safe_read_only_tool("write_file"));
        assert!(!is_parallel_safe_read_only_tool("edit_file"));
        assert!(!is_parallel_safe_read_only_tool("apply_patch"));
        assert!(!is_parallel_safe_read_only_tool("agent_open"));
    }
    #[test]
    fn exec_hardening_appends_system_prompt() {
        let spec = AgentWorkerSpec {
            worker_id: "w1".to_string(),
            run_id: "r1".to_string(),
            parent_run_id: None,
            session_name: None,
            objective: "do the thing".to_string(),
            role: None,
            agent_type: SubAgentType::General,
            model: "auto".to_string(),
            workspace: std::path::PathBuf::from("/tmp"),
            git_branch: None,
            context_mode: "fresh".to_string(),
            fork_context: false,
            tool_profile: AgentWorkerToolProfile::Inherited,
            max_steps: 100,
            spawn_depth: 0,
            max_spawn_depth: 0,
        };
        let exec = codewhale_config::FleetExecConfig {
            append_system_prompt: "never push to main".to_string(),
            ..Default::default()
        };
        let hardened = apply_exec_hardening(spec, &exec);
        assert!(hardened.objective.contains("do the thing"));
        assert!(hardened.objective.contains("[Policy]"));
        assert!(hardened.objective.contains("never push to main"));
    }
 }
@@ -255,6 +255,7 @@ pub enum MessageId {
    ConfigSectionSidebar,
    ConfigSectionHistory,
    ConfigSectionMcp,
    ConfigSectionFleet,
    ConfigSectionExperimental,
    ConfigScopeSession,
    ConfigScopeSaved,
@@ -699,6 +700,7 @@ pub const ALL_MESSAGE_IDS: &[MessageId] = &[
    MessageId::ConfigSectionSidebar,
    MessageId::ConfigSectionHistory,
    MessageId::ConfigSectionMcp,
    MessageId::ConfigSectionFleet,
    MessageId::ConfigSectionExperimental,
    MessageId::ConfigScopeSession,
    MessageId::ConfigScopeSaved,
@@ -1315,6 +1317,7 @@ fn english(id: MessageId) -> &'static str {
        MessageId::ConfigSectionSidebar => "Sidebar",
        MessageId::ConfigSectionHistory => "History",
        MessageId::ConfigSectionMcp => "MCP",
        MessageId::ConfigSectionFleet => "Fleet",
        MessageId::ConfigSectionExperimental => "Experimental",
        MessageId::ConfigScopeSession => "SESSION",
        MessageId::ConfigScopeSaved => "SAVED",
@@ -1915,6 +1918,7 @@ fn vietnamese(id: MessageId) -> Option<&'static str> {
        MessageId::ConfigSectionSidebar => "Thanh bên",
        MessageId::ConfigSectionHistory => "Lịch sử",
        MessageId::ConfigSectionMcp => "MCP",
        MessageId::ConfigSectionFleet => "Fleet",
        MessageId::ConfigSectionExperimental => "Thử nghiệm",
        MessageId::ConfigScopeSession => "PHIÊN",
        MessageId::ConfigScopeSaved => "ĐÃ LƯU",
@@ -2624,6 +2628,7 @@ fn traditional_chinese(id: MessageId) -> Option<&'static str> {
        MessageId::ConfigSectionSidebar => "側邊欄",
        MessageId::ConfigSectionHistory => "歷史",
        MessageId::ConfigSectionMcp => "MCP",
        MessageId::ConfigSectionFleet => "艦隊",
        MessageId::ConfigSectionExperimental => "實驗",
        MessageId::ConfigScopeSession => "會話",
        MessageId::ConfigScopeSaved => "已儲存",
@@ -2724,6 +2729,7 @@ fn japanese(id: MessageId) -> Option<&'static str> {
        MessageId::ConfigSectionSidebar => "サイドバー",
        MessageId::ConfigSectionHistory => "履歴",
        MessageId::ConfigSectionMcp => "MCP",
        MessageId::ConfigSectionFleet => "Fleet",
        MessageId::ConfigSectionExperimental => "実験",
        MessageId::ConfigScopeSession => "セッション",
        MessageId::ConfigScopeSaved => "保存済み",
@@ -3315,6 +3321,7 @@ fn chinese_simplified(id: MessageId) -> Option<&'static str> {
        MessageId::ConfigSectionSidebar => "侧边栏",
        MessageId::ConfigSectionHistory => "历史",
        MessageId::ConfigSectionMcp => "MCP",
        MessageId::ConfigSectionFleet => "舰队",
        MessageId::ConfigSectionExperimental => "实验",
        MessageId::ConfigScopeSession => "会话",
        MessageId::ConfigScopeSaved => "已保存",
@@ -3842,6 +3849,7 @@ fn portuguese_brazil(id: MessageId) -> Option<&'static str> {
        MessageId::ConfigSectionSidebar => "Barra lateral",
        MessageId::ConfigSectionHistory => "Histórico",
        MessageId::ConfigSectionMcp => "MCP",
        MessageId::ConfigSectionFleet => "Fleet",
        MessageId::ConfigSectionExperimental => "Experimental",
        MessageId::ConfigScopeSession => "SESSÃO",
        MessageId::ConfigScopeSaved => "SALVO",
@@ -4461,6 +4469,7 @@ fn spanish_latin_america(id: MessageId) -> Option<&'static str> {
        MessageId::ConfigSectionSidebar => "Barra lateral",
        MessageId::ConfigSectionHistory => "Historial",
        MessageId::ConfigSectionMcp => "MCP",
        MessageId::ConfigSectionFleet => "Fleet",
        MessageId::ConfigSectionExperimental => "Experimental",
        MessageId::ConfigScopeSession => "SESIÓN",
        MessageId::ConfigScopeSaved => "GUARDADO",
@@ -1193,8 +1193,9 @@ async fn main() -> Result<()> {
                run_swebench_command(&config, &model, workspace, max_subagents, args).await
            }
            Commands::Fleet(args) => {
                let config = load_config_from_cli(&cli)?;
                let workspace = resolve_workspace(&cli);
-                run_fleet_command(&workspace, args).await
+                run_fleet_command(&workspace, &config, args).await
            }
            Commands::Review(args) => {
                let config = load_config_from_cli(&cli)?;
@@ -1458,7 +1459,7 @@ async fn run_swebench_command(
    }
 }
-async fn run_fleet_command(workspace: &Path, args: FleetArgs) -> Result<()> {
+async fn run_fleet_command(workspace: &Path, config: &Config, args: FleetArgs) -> Result<()> {
    use crate::fleet::alerts::{
        FleetAlertAdapterConfig, FleetAlertConfig, FleetAlertDispatcher, FleetAlertEvent,
        FleetEnvSecretResolver,
@@ -1712,7 +1713,12 @@ async fn run_fleet_command(workspace: &Path, args: FleetArgs) -> Result<()> {
        }
    }
-    let manager = FleetManager::open(workspace)?;
+    let exec_config = config
        .fleet
        .as_ref()
        .map(|fleet| fleet.exec.clone())
        .unwrap_or_default();
    let manager = FleetManager::open(workspace)?.with_exec_config(exec_config);
    match args.command {
        FleetCommand::Init => {
            println!("fleet ledger: {}", manager.ledger_path().display());
@@ -3980,6 +3980,7 @@ mod tests {
            crate::fleet::task_spec::FleetTaskSpecDocument {
                name: Some("api smoke".to_string()),
                labels: std::collections::BTreeMap::new(),
                security_policy: None,
                workers: Vec::new(),
                tasks: vec![task],
            },
@@ -1161,8 +1161,13 @@ impl Default for PersistedSubAgentState {
 }
 /// Default cap on sub-agent recursion depth. Override via
-/// `[runtime] max_spawn_depth = N` in `~/.deepseek/config.toml`.
+/// `[runtime] max_spawn_depth = N` in config.
-pub const DEFAULT_MAX_SPAWN_DEPTH: u32 = 3;
+///
 /// Sourced from [`codewhale_config::DEFAULT_SPAWN_DEPTH`] so standalone
 /// sub-agents and fleet workers share ONE recursion axis (no "two moving
 /// targets"). Configured/requested depths clamp to
 /// [`codewhale_config::MAX_SPAWN_DEPTH_CEILING`].
 pub const DEFAULT_MAX_SPAWN_DEPTH: u32 = codewhale_config::DEFAULT_SPAWN_DEPTH;
 /// Terminal-state notification emitted to the engine's parent turn loop
 /// when one of its direct children finishes (issue #756). Carries the
@@ -1794,7 +1799,7 @@ impl SubAgentManager {
            .retain(|worker_id, _| keep_ids.contains(worker_id));
    }
-    fn register_worker(&mut self, spec: AgentWorkerSpec) {
+    pub fn register_worker(&mut self, spec: AgentWorkerSpec) {
        let worker_id = spec.worker_id.clone();
        let now_ms = epoch_millis_now();
        let mut record = AgentWorkerRecord::new(normalize_worker_spec(spec), now_ms);
@@ -5927,15 +5932,18 @@ fn parse_spawn_request(input: &Value) -> Result<SpawnRequest, ToolError> {
        .or_else(|| input.get("max_spawn_depth"))
        .and_then(Value::as_u64)
        .map(|depth| {
            let ceiling = codewhale_config::MAX_SPAWN_DEPTH_CEILING;
            u32::try_from(depth)
-                .map_err(|_| ToolError::invalid_input("max_depth must be between 0 and 3"))
+                .map_err(|_| {
                    ToolError::invalid_input(format!("max_depth must be between 0 and {ceiling}"))
                })
                .and_then(|depth| {
-                    if depth <= 3 {
+                    if depth <= ceiling {
                        Ok(depth)
                    } else {
-                        Err(ToolError::invalid_input(
+                        Err(ToolError::invalid_input(format!(
-                            "max_depth must be between 0 and 3",
+                            "max_depth must be between 0 and {ceiling}"
-                        ))
+                        )))
                    }
                })
        })
@@ -412,6 +412,7 @@ enum ConfigSection {
    Sidebar,
    History,
    Mcp,
    Fleet,
    Experimental,
 }
@@ -429,6 +430,7 @@ impl ConfigSection {
                ConfigSection::Sidebar => MessageId::ConfigSectionSidebar,
                ConfigSection::History => MessageId::ConfigSectionHistory,
                ConfigSection::Mcp => MessageId::ConfigSectionMcp,
                ConfigSection::Fleet => MessageId::ConfigSectionFleet,
                ConfigSection::Experimental => MessageId::ConfigSectionExperimental,
            },
        )
@@ -759,6 +761,18 @@ impl ConfigView {
                editable: true,
                scope: ConfigScope::Saved,
            },
            ConfigRow {
                section: ConfigSection::Fleet,
                key: "fleet.exec.max_spawn_depth".to_string(),
                value: config
                    .fleet
                    .as_ref()
                    .map(|fleet| fleet.exec.max_spawn_depth)
                    .unwrap_or_else(|| codewhale_config::FleetExecConfig::default().max_spawn_depth)
                    .to_string(),
                editable: false,
                scope: ConfigScope::Saved,
            },
        ];
        rows.extend(experimental_config_rows(&config));
@@ -1180,7 +1194,7 @@ fn experimental_config_rows(config: &Config) -> Vec<ConfigRow> {
    rows.push(ConfigRow {
        section: ConfigSection::Experimental,
        key: "whaleflow".to_string(),
-        value: "preview placeholder (not stable; see #2981/#2974)".to_string(),
+        value: "preview overlay for workflow/fleet runs (not stable; see #3154/#3178)".to_string(),
        editable: false,
        scope: ConfigScope::Saved,
    });
@@ -1235,6 +1249,9 @@ fn config_hint_for_key(key: &str) -> &'static str {
            "DeepSeek: auto/off/high/max; Codex: low/medium/high/xhigh; default clears saved value"
        }
        "mcp_config_path" => "path to mcp.json",
        "fleet.exec.max_spawn_depth" => {
            "0 blocks child agents; 3 default (same axis as sub-agents); capped at 3"
        }
        _ => "",
    }
 }
@@ -2393,6 +2410,7 @@ mod tests {
                "Sidebar",
                "History",
                "MCP",
                "Fleet",
                "Experimental",
            ]
        );
@@ -2429,6 +2447,7 @@ mod tests {
        assert!(keys.contains(&"cost_currency"));
        assert!(keys.contains(&"prefer_external_pdftotext"));
        assert!(keys.contains(&"mcp_config_path"));
        assert!(keys.contains(&"fleet.exec.max_spawn_depth"));
        assert!(keys.contains(&"features.subagents"));
        assert!(keys.contains(&"features.web_search"));
        assert!(keys.contains(&"features.apply_patch"));
@@ -2440,13 +2459,23 @@ mod tests {
        assert!(
            view.rows
                .iter()
-                .filter(|row| row.section != super::ConfigSection::Experimental)
+                .filter(|row| {
                    !matches!(
                        row.section,
                        super::ConfigSection::Experimental | super::ConfigSection::Fleet
                    )
                })
                .all(|row| row.editable)
        );
        assert!(
            view.rows
                .iter()
-                .filter(|row| row.section == super::ConfigSection::Experimental)
+                .filter(|row| {
                    matches!(
                        row.section,
                        super::ConfigSection::Experimental | super::ConfigSection::Fleet
                    )
                })
                .all(|row| !row.editable)
        );
    }
@@ -2497,6 +2526,36 @@ vision_model = true
        assert_eq!(subagents.value, "enabled (default enabled)");
    }
    #[test]
    fn config_view_shows_fleet_max_spawn_depth_from_config() {
        let temp_root = std::env::temp_dir().join(format!(
            "codewhale-fleet-config-view-test-{}",
            std::process::id()
        ));
        fs::create_dir_all(&temp_root).unwrap();
        let config_path = temp_root.join("config.toml");
        fs::write(
            &config_path,
            r#"
 [fleet.exec]
 max_spawn_depth = 2
 "#,
        )
        .unwrap();
        let mut app = create_test_app();
        app.config_path = Some(config_path);
        let view = ConfigView::new_for_app(&app);
        let row = view
            .rows
            .iter()
            .find(|row| row.key == "fleet.exec.max_spawn_depth")
            .expect("fleet spawn depth row");
        assert_eq!(row.value, "2");
        assert!(!row.editable);
    }
    #[test]
    fn config_view_experimental_section_is_searchable() {
        let mut view = create_config_view(Locale::En);
@@ -19,6 +19,29 @@ Fleet state is stored under the workspace in `.codewhale/fleet.jsonl`. Worker
 logs and adapter logs are stored under `.codewhale/fleet/` and
 `.codewhale/fleet-host/`.
 ## Naming: Modes, WhaleFlow, Fleet, and Swarm
 These names describe different layers, not competing systems. Agent, Plan, and
 YOLO stay the permission/work modes. WhaleFlow is an orchestration overlay that
 can run on top of those modes when the task needs a continuous workflow.
 - **WhaleFlow** is the repeatable workflow plan and user-facing orchestration
  overlay: a script/IR that decides which phases and agents run next, keeps
  intermediate results out of the main conversation, and can be inspected or
  rerun. A WhaleFlow run should have a visible progress view and a clear active
  header state instead of feeling like a hidden background task.
 - **Fleet** is the execution substrate: headless workers, local/SSH hosts,
  trust policy, leases, heartbeats, logs, receipts, and status APIs.
 - **Swarm** is the high-fanout behavior inside WhaleFlow. It should compile into
  a WhaleFlow-backed fleet run instead of reviving the old `agent_swarm` tool
  surface.
 UI guidance: keep the main transcript calm. A WhaleFlow run should appear as a
 compact progress card plus Work/Agents sidebar rows with phase names, worker
 counts, receipts, and nested indentation for child workers. Use the whale mark
 sparingly as an active header/status signal; avoid repeating emoji-heavy rows
 for every worker.
 ## Task Spec
 `codewhale fleet run` accepts JSON or TOML. A minimal JSON spec:
@@ -60,118 +83,79 @@ and `json_path`. Specs may also declare `command`,
 `code_whale_verifier_prompt`, or `manual`; those record a partial receipt until
 an explicit verifier pass completes.
-### Release Triage Example
+### Using Role Presets
 Tasks can reference a role name, and the fleet manager fills in defaults
 from the role registry. Built-in roles (`smoke-runner`, `reviewer`, `builder`,
 `read-only`) are always available; define your own in `[fleet.roles]`.
 ```json
 {
-  "name": "v0.8.60 release triage",
+  "name": "smoke check",
  "labels": {
    "milestone": "v0.8.60"
  },
  "tasks": [
    {
-      "id": "release-issue-sweep",
+      "id": "lint",
-      "name": "Release issue sweep",
+      "name": "Lint check",
-      "objective": "Find open v0.8.60 blockers and credit-sensitive PRs.",
+      "instructions": "Run lint and report failures.",
-      "instructions": "Review the v0.8.60 milestone, linked PRs, changelog entries, and contributor-credit requirements. Write a concise blocker report.",
+      "worker": { "role": "smoke-runner" },
-      "worker": {
+      "expected_artifacts": ["log"]
        "role": "release-triage",
        "tool_profile": "read-only",
        "tools": ["gh", "git"],
        "capabilities": ["github", "release"]
      },
      "workspace": {
        "required_files": ["Cargo.toml", "CHANGELOG.md", ".github/AUTHOR_MAP"],
        "writable_paths": [".codewhale/fleet"],
        "environment": {
          "required": ["PATH"]
        }
      },
      "input_files": ["CHANGELOG.md", ".github/AUTHOR_MAP"],
      "context": ["Treat community PRs as maintainer evidence."],
      "budget": {
        "max_tokens": 12000,
        "max_tool_calls": 24,
        "max_seconds": 900
      },
      "timeout_seconds": 900,
      "expected_artifacts": ["log", "report", "receipt"],
      "scorer": {
        "kind": "exit_code"
      },
      "retry_policy": {
        "max_attempts": 2,
        "initial_backoff_seconds": 10,
        "max_backoff_seconds": 60,
        "backoff_multiplier": 2
      },
      "tags": ["release", "triage"],
      "metadata": {
        "class": "release"
      }
    }
  ]
 }
 ```
-### Code Review Swarm Example
+The task inherits the role's tool profile, budget, and timeout. You can
 override any field in the task spec:
 ```json
 {
-  "name": "code review swarm",
+  "id": "deep-review",
  "name": "Deep review",
  "instructions": "Review the entire crate for soundness issues.",
  "worker": {
    "role": "reviewer",
    "tools": ["cargo", "rg", "git"],
    "capabilities": ["rust"]
  },
  "input_files": ["crates/**/*.rs"],
  "budget": { "max_tokens": 32000 },
  "expected_artifacts": ["log", "report"],
  "scorer": { "kind": "regex_match", "path": ".codewhale/fleet/report.md", "pattern": "finding|all clear" }
 }
 ```
 ### Multi-Task Run Example
 A single fleet run can dispatch several independent tasks in parallel:
 ```json
 {
  "name": "CI gate",
  "tasks": [
    {
-      "id": "protocol-review",
+      "id": "check",
-      "name": "Protocol review",
+      "name": "Compile check",
-      "objective": "Review fleet protocol changes for compatibility and sparse JSON behavior.",
+      "instructions": "Run cargo check --workspace and report errors.",
-      "instructions": "Inspect crates/protocol/src/fleet.rs and report behavior regressions, missing serde defaults, or unsafe wire changes.",
+      "worker": { "role": "builder" },
-      "worker": {
+      "expected_artifacts": ["log"],
-        "role": "reviewer",
+      "scorer": { "kind": "exit_code" }
        "tool_profile": "read-only",
        "tools": ["git", "rg", "cargo"],
        "capabilities": ["rust"]
      },
      "input_files": ["crates/protocol/src/fleet.rs"],
      "budget": {
        "max_tokens": 8000,
        "max_tool_calls": 16,
        "max_seconds": 600
      },
      "expected_artifacts": ["log", "report", "receipt"],
      "scorer": {
        "kind": "code_whale_verifier_prompt",
        "prompt": "Verify the review includes at least one concrete file:line finding or explicitly says no issues were found."
      },
      "tags": ["review", "protocol"],
      "metadata": {
        "class": "code-review"
      }
    },
    {
-      "id": "tui-review",
+      "id": "clippy",
-      "name": "TUI review",
+      "name": "Clippy lint",
-      "objective": "Review fleet CLI and manager behavior for operator-visible regressions.",
+      "instructions": "Run cargo clippy --workspace and report warnings.",
-      "instructions": "Inspect crates/tui/src/fleet and crates/tui/src/main.rs. Focus on status output, receipt recording, and failure classification.",
+      "worker": { "role": "reviewer", "tools": ["cargo", "cargo-clippy"] },
-      "worker": {
+      "expected_artifacts": ["log"],
-        "role": "reviewer",
+      "scorer": { "kind": "exit_code" }
-        "tool_profile": "read-only",
+    },
-        "tools": ["git", "rg", "cargo"],
+    {
-        "capabilities": ["rust", "cli"]
+      "id": "security",
-      },
+      "name": "Secret audit",
-      "input_files": ["crates/tui/src/fleet", "crates/tui/src/main.rs"],
+      "instructions": "Search for plaintext secrets and report any matches.",
-      "budget": {
+      "worker": { "role": "read-only", "tools": ["rg"] },
-        "max_tokens": 10000,
+      "input_files": ["crates/**/*.rs"],
-        "max_tool_calls": 20,
+      "expected_artifacts": ["log", "report"],
-        "max_seconds": 600
+      "retry_policy": { "max_attempts": 1 }
      },
      "expected_artifacts": ["log", "report", "receipt"],
      "scorer": {
        "kind": "manual"
      },
      "tags": ["review", "tui"],
      "metadata": {
        "class": "code-review"
      }
    }
  ]
 }
@@ -373,3 +357,168 @@ Defaults are intentionally conservative:
  `API_KEY`, and `PRIVATE_KEY` are rejected from adapter allowlists;
 - secrets should remain in CodeWhale config providers or remote host config,
  not in task instructions, argv, or fleet logs.
 ## Security and Trust Boundaries
 Agent Fleet enforces a trust-level model that separates workers into four tiers.
 The trust level determines what a worker can access (secrets, network, workspace
 writes) and how it must prove its identity before being granted those privileges.
 ### Trust Levels
 | Level | Access | Requires |
 |-------|--------|----------|
 | `sandbox` | No network, no secrets, writes only to `.codewhale/fleet/` | Nothing — default for new workers |
 | `local` | Workspace reads, gated writes, configured secrets | Local process (same uid) |
 | `remote-verified` | Network access, bounded capability grants, configured secrets | SSH host-key verification or equivalent attestation |
 | `operator` | Full access to all secrets, unrestricted writes, any action | Operator-owned machine |
 The default trust level is `sandbox`. Operators must explicitly raise trust for
 SSH or container workers through the security policy.
 ### Security Policy
 A fleet run may carry an optional `security_policy` block that defines the
 default trust level, which secrets workers may resolve, what capabilities are
 granted, and a ceiling on the maximum trust level:
 ```json
 {
  "security_policy": {
    "default_trust_level": "sandbox",
    "allowed_secrets": [
      {"key": "GH_TOKEN", "source": "env"},
      {"key": "CODEWHALE_API_KEY", "source": "keyring"}
    ],
    "capability_grants": [
      {
        "capability": "network",
        "scope": "github.com",
        "reason": "PR review needs GitHub API access"
      }
    ],
    "max_trust_level": "remote_verified",
    "require_identity_verification": true
  }
 }
 ```
 When a run has no explicit `security_policy`, workers inherit conservative
 defaults: `sandbox` trust, no secrets, no capability grants, and no identity
 verification requirement.
 ### Secret References
 Secrets are never stored as plaintext in task specs, alert configs, or worker
 definitions. Instead, every secret is a `FleetSecretRef` — a key name plus an
 optional source hint that tells the fleet manager where to resolve the value:
 ```json
 {"key": "GH_TOKEN", "source": "env"}
 ```
 Supported sources:
 - `"env"` — resolve from a process environment variable
 - `"keyring"` — resolve from the OS keyring (macOS Keychain, Windows Credential Manager, Linux Secret Service)
 - `"file"` — resolve from `~/.codewhale/secrets/`
 - absent — try all sources in default order (store first, then env)
 Secret refs are redacted in logs and ledger entries: `<secret:env.GH_TOKEN>`.
 ### Worker Authentication
 Workers authenticate to the fleet manager using one of three methods:
 - **None** — local workers sharing the same uid (default)
 - **SSH key** — with optional host-key fingerprint pinning and known-hosts
  verification. The `host_key_fingerprint` field (SHA256:...) pins the expected
  server key, preventing MITM attacks on first connection.
 - **Token** — a bearer token resolved from a `FleetSecretRef`, useful for remote
  workers behind a fleet proxy.
 - **mTLS** — mutual TLS with a client certificate and a secret-backed private key.
 SSH workers should always set `host_key_fingerprint` in production:
 ```json
 {
  "id": "builder-1",
  "name": "Builder 1",
  "trust_level": "remote_verified",
  "host": {
    "kind": "ssh",
    "host": "builder.example.com",
    "user": "codewhale",
    "port": 22,
    "identity": "~/.ssh/codewhale_fleet",
    "host_key_fingerprint": "SHA256:aLGqZo1M6c...",
    "known_hosts": "~/.ssh/known_hosts",
    "working_directory": "/srv/codewhale/work",
    "env_allowlist": ["CODEWHALE_PROFILE"],
    "codewhale_binary": "/usr/local/bin/codewhale"
  },
  "capabilities": ["local", "linux", "tests"],
  "max_concurrent_tasks": 1
 }
 ```
 ### Alert Channel Secrets
 Alert channels (Slack, generic webhook, PagerDuty) use `FleetAlertEndpoint`
 instead of raw URLs. The webhook URL can be provided inline for non-sensitive
 endpoints, or as a secret reference:
 ```json
 {
  "kind": "slack",
  "webhook": {
    "url_ref": {"key": "CODEWHALE_FLEET_SLACK_WEBHOOK", "source": "env"},
    "secret_ref": {"key": "CODEWHALE_FLEET_SLACK_SIGNING_SECRET", "source": "keyring"}
  }
 }
 ```
 The `secret_ref` field provides an optional HMAC secret for webhook payload
 signing, never stored in plaintext.
 ### Config File
 The `[fleet]` table in `config.toml` sets global trust policy defaults:
 ```toml
 [fleet]
 default_trust_level = "sandbox"
 require_identity_verification = true
 max_trust_level = "operator"
 [fleet.exec]
 # Recursion depth shares ONE axis with standalone sub-agents — a fleet worker
 # IS a headless sub-agent. 0 blocks child agents (the root worker still runs);
 # 3 is the default and the ceiling, affording at least three nested levels.
 max_spawn_depth = 3
 ```
 These defaults apply to fleet runs that don't carry their own `security_policy`.
 Per-run policies always override the config defaults.
 ### Capability Grants
 Capability grants are additive, scoped permissions that authorize specific
 actions. By default, workers get no grants (least privilege). Common grants:
 - `"network"` with scope `"github.com"` — allow outbound HTTP to GitHub
 - `"git-push"` — allow `git push` to remotes
 - `"provider-secrets"` — allow accessing provider API keys
 - `"release"` — allow release-related operations (tagging, publishing)
 - `"workspace-write"` with scope `"crates/tui/**"` — allow writes within a path
 ### Environment Sanitization
 The host adapter layer enforces environment sanitization at worker start:
 - Only `HOME`, `PATH`, and platform-specific vars (`SYSTEMROOT`, `COMSPEC`) are
  injected into worker processes by default
 - Environment allowlists reject any key containing `SECRET`, `TOKEN`, `PASSWORD`,
  `PASSWD`, `API_KEY`, `CREDENTIAL`, or `PRIVATE_KEY`
 - SSH workers only send explicitly allowlisted variables via OpenSSH `SendEnv`
 - Secret values are never embedded in worker argv, task instructions, or fleet
  logs — only secret refs appear, and they are always redacted
@@ -4,11 +4,18 @@ codewhale has two related concepts:
 - **TUI mode**: what kind of visible interaction you're in (Plan/Agent/YOLO).
 - **Approval mode**: how aggressively the UI asks before executing tools.
 - **WhaleFlow overlay**: optional long-running workflow orchestration that can
  run on top of any TUI mode when a task needs many coordinated workers.
 Model selection is separate. `--model auto` and `/model auto` route each turn to
 a concrete model and thinking level; they are not TUI modes and are not part of
 the `Tab` cycle.
 WhaleFlow is also separate from the `Tab` mode cycle. It is the visible
 continuous-work layer for repeatable workflows, fleet workers, and swarm-style
 fanout. The active mode still controls permissions; WhaleFlow controls whether a
 large task is planned into a resumable workflow with its own progress view.
 Each user turn includes a small `<turn_meta>` block with the current local date
 and the concrete model sent to the provider. When `--model auto` is active, the
 same block also records that the model was auto-routed.
@@ -56,6 +63,11 @@ the turn, `/goal complete` marks it done, `/goal blocked` marks it blocked, and
 approval mode, or model route. This remains distinct from `--model auto`, which
 only controls model and thinking selection.
 WhaleFlow builds on the same separation: a goal can ask the agent to keep
 working, while WhaleFlow supplies the repeatable workflow/progress surface for
 large fanout. In the UI, a WhaleFlow run should be shown as an overlay on the
 main screen, not as a fourth mode next to Agent, Plan, and YOLO.
 App-server clients can persist a thread-scoped goal with `thread/goal/set`, read
 it with `thread/goal/get`, and clear it with `thread/goal/clear`. That persisted
 record carries `active`, `paused`, `blocked`, `usage_limited`, `budget_limited`,
@@ -0,0 +1,52 @@
 # Agent Fleet dogfood smoke spec (#3166)
 #
 # This spec exercises the fleet end-to-end: create a run with two local
 # workers, run a lint task and a review task, verify the ledger records
 # receipts, and confirm the status surfaces work.
 #
 # Run:
 #   codewhale fleet run docs/examples/fleet-dogfood.toml --max-workers 2 --once
 #
 # Then check:
 #   codewhale fleet status
 #   codewhale fleet inspect <worker-id-from-status>
 #   codewhale fleet logs <worker-id-from-status>
 name = "dogfood smoke"
 labels = { milestone = "v0.8.60", class = "smoke" }
 security_policy = { default_trust_level = "local", allowed_secrets = [], require_identity_verification = false }
 [[tasks]]
 id = "cargo-check"
 name = "Workspace check"
 description = "Run `cargo check --workspace` and report any compilation errors."
 objective = "Verify the workspace compiles cleanly with zero errors."
 instructions = "Run `cargo check --workspace` in the repo root. If it compiles cleanly, report success. If there are errors, list each file:line and the error message. Do NOT attempt to fix anything — just report what you found."
 worker = { role = "release-checker", tool_profile = "read-only", tools = ["cargo"], capabilities = ["rust"] }
 workspace = { required_files = ["Cargo.toml"], writable_paths = [".codewhale/fleet"], environment = { required = ["PATH"] } }
 input_files = ["Cargo.toml"]
 context = ["You are running in a fleet smoke test. Be concise. Only report the pass/fail and any specific errors."]
 budget = { max_tokens = 8000, max_tool_calls = 12, max_seconds = 300 }
 expected_artifacts = ["log", "report", "receipt"]
 scorer = { kind = "exit_code" }
 retry_policy = { max_attempts = 2, initial_backoff_seconds = 5, max_backoff_seconds = 30 }
 timeout_seconds = 300
 tags = ["smoke", "check"]
 [[tasks]]
 id = "protocol-review"
 name = "Protocol review"
 description = "Review fleet protocol types for security and correctness."
 objective = "Inspect crates/protocol/src/fleet.rs and crates/secrets/src/lib.rs. Report any missing serde defaults, unsafe wire changes, or security-sensitive fields lacking SecretRef."
 instructions = "Read crates/protocol/src/fleet.rs and crates/secrets/src/lib.rs. Check for: (1) new fields without serde(default) or skip_serializing_if, (2) raw secrets in struct fields instead of FleetSecretRef, (3) missing Clone/Debug/PartialEq derives on new types. Write a concise report with file:line references for each finding. If there are no findings, report 'all clear'."
 worker = { role = "reviewer", tool_profile = "read-only", tools = ["rg", "git", "cargo"], capabilities = ["rust"] }
 workspace = { required_files = ["crates/protocol/src/fleet.rs", "crates/secrets/src/lib.rs"], writable_paths = [".codewhale/fleet"], environment = { required = ["PATH"] } }
 input_files = ["crates/protocol/src/fleet.rs", "crates/secrets/src/lib.rs"]
 context = ["You are a fleet protocol reviewer. Be thorough but concise. Reference specific lines."]
 budget = { max_tokens = 10000, max_tool_calls = 16, max_seconds = 600 }
 expected_artifacts = ["log", "report", "receipt"]
 scorer = { kind = "code_whale_verifier_prompt", prompt = "Verify the review includes at least one concrete file:line finding or explicitly says 'all clear'." }
 retry_policy = { max_attempts = 1, initial_backoff_seconds = 10 }
 timeout_seconds = 600
 tags = ["smoke", "review", "protocol"]