diff --git a/config.example.toml b/config.example.toml index 03e56bdf..fde5d9c1 100644 --- a/config.example.toml +++ b/config.example.toml @@ -915,6 +915,57 @@ default_text_model = "deepseek-ai/deepseek-v4-pro" # printf '%s\n' '{"content":"audit wrapper placeholder: configure an executor","success":false}' # ``` +# ───────────────────────────────────────────────────────────────────────────────── +# Agent Fleet trust, security, and role registry (#3165, #3167) +# ───────────────────────────────────────────────────────────────────────────────── +# [fleet] +# # Default trust level for fleet workers: "sandbox" | "local" | "remote-verified" | "operator" +# default_trust_level = "sandbox" +# # Require SSH host-key verification before granting remote-verified trust +# require_identity_verification = true +# # Maximum trust level any worker may have +# max_trust_level = "operator" +# +# # Headless worker execution hardening (#3027) +# [fleet.exec] +# # Tools always allowed regardless of role +# allowed_tools = [] +# # Tools always disallowed (overrides role and task spec) +# disallowed_tools = ["exec_shell"] +# # Hard ceiling on worker steps (tool calls + model turns) +# max_turns = 500 +# # Recursive child-agent depth for fleet workers. Shares ONE recursion axis +# # with standalone sub-agents (a fleet worker IS a headless sub-agent). +# # 0 blocks child agents (the root worker still runs); 3 is the default and the +# # cap, affording at least three nested delegation levels. +# max_spawn_depth = 3 +# # Extra system prompt injected into every headless worker +# append_system_prompt = "Never modify .git/config or change remotes." +# # Output format: "text" (default) or "stream-json" for ndjson events +# output_format = "text" +# +# # Built-in role presets are always available: smoke-runner, reviewer, builder, read-only. +# # User-defined roles here override or extend the built-in set. Any key under +# # [fleet.roles] becomes a valid role name that task specs can reference. +# [fleet.roles.ci-linter] +# description = "Runs linters and formatters" +# tool_profile = "read-only" +# tools = ["cargo", "cargo-clippy", "cargo-fmt"] +# capabilities = ["rust"] +# max_tokens = 12000 +# max_tool_calls = 20 +# timeout_seconds = 600 +# +# [fleet.roles.pr-reviewer] +# description = "Reviews PRs with GitHub access" +# tool_profile = "read-only" +# tools = ["git", "gh", "rg"] +# capabilities = ["git", "github"] +# max_tokens = 16000 +# max_tool_calls = 30 +# timeout_seconds = 900 +# trust_level = "local" + # ───────────────────────────────────────────────────────────────────────────────── # Requirements (admin constraints) example file # ───────────────────────────────────────────────────────────────────────────────── diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index dcd149b2..fd0f7e8f 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -22,7 +22,7 @@ use codewhale_mcp::{McpServerDefinition, run_stdio_server}; use codewhale_secrets::Secrets; use codewhale_state::{StateStore, ThreadListFilters}; -#[derive(Debug, Clone, Copy, ValueEnum)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)] enum ProviderArg { Deepseek, NvidiaNim, @@ -43,6 +43,10 @@ enum ProviderArg { Huggingface, Together, OpenaiCodex, + Anthropic, + Zai, + Stepfun, + Minimax, } impl From for ProviderKind { @@ -67,6 +71,10 @@ impl From for ProviderKind { ProviderArg::Huggingface => ProviderKind::Huggingface, ProviderArg::Together => ProviderKind::Together, ProviderArg::OpenaiCodex => ProviderKind::OpenaiCodex, + ProviderArg::Anthropic => ProviderKind::Anthropic, + ProviderArg::Zai => ProviderKind::Zai, + ProviderArg::Stepfun => ProviderKind::Stepfun, + ProviderArg::Minimax => ProviderKind::Minimax, } } } @@ -787,7 +795,7 @@ fn provider_slot(provider: ProviderKind) -> &'static str { } /// Provider order used by the `auth list` and `auth status` outputs. -const PROVIDER_LIST: [ProviderKind; 20] = [ +const PROVIDER_LIST: [ProviderKind; 24] = [ ProviderKind::Deepseek, ProviderKind::NvidiaNim, ProviderKind::Openai, @@ -808,6 +816,10 @@ const PROVIDER_LIST: [ProviderKind; 20] = [ ProviderKind::Huggingface, ProviderKind::Together, ProviderKind::OpenaiCodex, + ProviderKind::Anthropic, + ProviderKind::Zai, + ProviderKind::Stepfun, + ProviderKind::Minimax, ]; fn provider_is_supported_by_tui(provider: ProviderKind) -> bool { @@ -833,6 +845,10 @@ fn provider_is_supported_by_tui(provider: ProviderKind) -> bool { | ProviderKind::Huggingface | ProviderKind::Together | ProviderKind::OpenaiCodex + | ProviderKind::Anthropic + | ProviderKind::Zai + | ProviderKind::Stepfun + | ProviderKind::Minimax ) } @@ -2606,6 +2622,32 @@ mod tests { })) )); + for (provider, expected) in [ + ("anthropic", ProviderArg::Anthropic), + ("zai", ProviderArg::Zai), + ("stepfun", ProviderArg::Stepfun), + ("minimax", ProviderArg::Minimax), + ] { + let cli = parse_ok(&[ + "deepseek", + "auth", + "set", + "--provider", + provider, + "--api-key-stdin", + ]); + assert!(matches!( + cli.command, + Some(Commands::Auth(AuthArgs { + command: AuthCommand::Set { + provider, + api_key: None, + api_key_stdin: true, + } + })) if provider == expected + )); + } + let cli = parse_ok(&["deepseek", "auth", "list"]); assert!(matches!( cli.command, diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index 19ad3698..4adb69d7 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -659,6 +659,10 @@ pub struct ConfigToml { /// lifecycle `[hooks]` table so config rewrites preserve existing hooks. #[serde(default)] pub hook_sinks: Option, + /// Agent Fleet trust and security policy (#3165). When absent, fleet + /// workers inherit conservative Sandbox defaults. + #[serde(default)] + pub fleet: Option, #[serde(flatten)] pub extras: BTreeMap, } @@ -1055,6 +1059,236 @@ impl Default for SnapshotsToml { } } +/// On-disk schema for the `[fleet]` table (#3165). See `config.example.toml` +/// and `docs/FLEET.md` for documentation. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FleetConfigToml { + /// Default trust level for fleet workers. One of `"sandbox"`, `"local"`, + /// `"remote-verified"`, or `"operator"`. Defaults to `"sandbox"`. + #[serde(default = "default_fleet_trust_level_str")] + pub default_trust_level: String, + /// Require identity verification for remote (SSH) workers before + /// granting them `remote-verified` trust. Defaults to true. + #[serde(default = "default_fleet_require_identity")] + pub require_identity_verification: bool, + /// Maximum trust level any worker may have (`"sandbox"`, `"local"`, + /// `"remote-verified"`, or `"operator"`). Defaults to `"operator"`. + #[serde(default = "default_fleet_max_trust_level_str")] + pub max_trust_level: String, + /// User-defined and built-in role presets. + /// + /// Each role defines default tool profiles, capabilities, budgets, and + /// trust settings that task specs can reference by name. Built-in roles + /// (`smoke-runner`, `reviewer`, `builder`, `read-only`) are always + /// available; user-defined roles in config override or extend them. + #[serde(default)] + pub roles: BTreeMap, + /// Headless worker execution hardening (#3027). + #[serde(default)] + pub exec: FleetExecConfig, +} + +/// Canonical recursion-depth policy for the headless worker runtime. +/// +/// Single source of truth shared by BOTH standalone sub-agents and fleet +/// workers so the two cannot drift into "two moving targets": +/// - [`DEFAULT_SPAWN_DEPTH`] is the default recursion budget (the sub-agent +/// runtime's `DEFAULT_MAX_SPAWN_DEPTH` is defined as this value). +/// - [`MAX_SPAWN_DEPTH_CEILING`] is the hard safety cap; every configured +/// value (fleet `max_spawn_depth`, `agent_open`'s `max_depth`) clamps to it. +/// +/// A worker runs at `spawn_depth = 0` and may spawn while +/// `spawn_depth + 1 <= max_spawn_depth`, so a depth of N affords N nested +/// delegation levels below the root worker. The default of 3 affords at least +/// three recursion levels out of the box; the root worker still runs at +/// depth 0 even when the budget is 0. +pub const DEFAULT_SPAWN_DEPTH: u32 = 3; + +/// Hard ceiling on recursion depth for any worker/sub-agent. See +/// [`DEFAULT_SPAWN_DEPTH`]. Raising this single constant lifts the limit +/// everywhere (the fleet clamp and `agent_open` validation both read it). +pub const MAX_SPAWN_DEPTH_CEILING: u32 = 3; + +/// Headless worker execution constraints (#3027). +/// +/// These limits apply to all fleet workers and sub-agents spawned through +/// the headless worker runtime. Task specs can tighten but not loosen them. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FleetExecConfig { + /// Tools that are always allowed regardless of role or task spec. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub allowed_tools: Vec, + /// Tools that are always disallowed, overriding role and task spec. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub disallowed_tools: Vec, + /// Hard ceiling on sub-agent steps (tool calls + model turns). + /// Workers that exceed this are terminated. Default: unbounded (u32::MAX). + #[serde(default = "default_fleet_max_turns")] + pub max_turns: u32, + /// Recursive child-agent budget for headless fleet workers. + /// Defaults to [`DEFAULT_SPAWN_DEPTH`] (3) so a fleet worker has the SAME + /// recursion budget as a standalone sub-agent — fleet and sub-agents are one + /// substrate, not two. Set 0 to block child `agent_open` (the root worker + /// still runs); the value is clamped to [`MAX_SPAWN_DEPTH_CEILING`]. + #[serde(default = "default_fleet_max_spawn_depth")] + pub max_spawn_depth: u32, + /// Extra system prompt text appended to every headless worker. + /// Useful for injecting org-wide policy or behavior constraints. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub append_system_prompt: String, + /// Output format for fleet worker results. + /// `"text"` (default) or `"stream-json"` for newline-delimited JSON events. + #[serde(default = "default_fleet_output_format")] + pub output_format: String, +} + +fn default_fleet_max_turns() -> u32 { + u32::MAX +} + +fn default_fleet_max_spawn_depth() -> u32 { + DEFAULT_SPAWN_DEPTH +} + +fn default_fleet_output_format() -> String { + "text".to_string() +} + +impl Default for FleetExecConfig { + fn default() -> Self { + Self { + allowed_tools: Vec::new(), + disallowed_tools: Vec::new(), + max_turns: default_fleet_max_turns(), + max_spawn_depth: default_fleet_max_spawn_depth(), + append_system_prompt: String::new(), + output_format: default_fleet_output_format(), + } + } +} + +/// A named role preset that bundles common worker settings. +/// +/// Task specs reference a role name (e.g. `"role": "reviewer"`), and the +/// fleet manager fills in any missing fields from the preset. User-defined +/// roles in `[fleet.roles]` override built-in defaults with the same name. +/// +/// Token budgets and tool-call limits are task-level decisions — they don't +/// belong on role presets. Use `timeout_seconds` as the safety bound. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct FleetRolePreset { + /// Short description of what this role is for. + #[serde(skip_serializing_if = "Option::is_none")] + pub description: Option, + /// Default tool profile (`"read-only"`, `"read-write"`, or `"custom"`). + #[serde(skip_serializing_if = "Option::is_none")] + pub tool_profile: Option, + /// Default set of tool names available to this role. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub tools: Vec, + /// Default capability tags (e.g. `"rust"`, `"git"`, `"gh"`). + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub capabilities: Vec, + /// Default timeout in seconds for tasks using this role. + #[serde(skip_serializing_if = "Option::is_none")] + pub timeout_seconds: Option, + /// Default trust level override for this role. + #[serde(skip_serializing_if = "Option::is_none")] + pub trust_level: Option, +} + +fn default_fleet_trust_level_str() -> String { + "sandbox".to_string() +} + +fn default_fleet_require_identity() -> bool { + true +} + +fn default_fleet_max_trust_level_str() -> String { + "operator".to_string() +} + +impl Default for FleetConfigToml { + fn default() -> Self { + Self { + default_trust_level: default_fleet_trust_level_str(), + require_identity_verification: default_fleet_require_identity(), + max_trust_level: default_fleet_max_trust_level_str(), + roles: BTreeMap::new(), + exec: FleetExecConfig::default(), + } + } +} + +impl FleetConfigToml { + /// Resolve a role preset by name. Checks user-defined roles first, + /// then falls back to built-in role defaults. + #[must_use] + pub fn resolve_role(&self, name: &str) -> Option { + self.roles + .get(name) + .cloned() + .or_else(|| built_in_role_presets().get(name).cloned()) + } +} + +/// Built-in role presets that are always available without config. +#[must_use] +pub fn built_in_role_presets() -> BTreeMap { + [ + ( + "smoke-runner".to_string(), + FleetRolePreset { + description: Some("Lightweight read-only smoke check worker".to_string()), + tool_profile: Some("read-only".to_string()), + tools: vec![], + capabilities: vec![], + timeout_seconds: Some(300), + trust_level: Some("local".to_string()), + }, + ), + ( + "reviewer".to_string(), + FleetRolePreset { + description: Some("Read-only code and documentation review".to_string()), + tool_profile: Some("read-only".to_string()), + tools: vec![], + capabilities: vec![], + timeout_seconds: Some(600), + trust_level: None, + }, + ), + ( + "builder".to_string(), + FleetRolePreset { + description: Some( + "Read-write builder with compilation and test access".to_string(), + ), + tool_profile: Some("read-write".to_string()), + tools: vec![], + capabilities: vec![], + timeout_seconds: Some(1800), + trust_level: Some("local".to_string()), + }, + ), + ( + "read-only".to_string(), + FleetRolePreset { + description: Some( + "Minimal read-only observer with no writes or secrets".to_string(), + ), + tool_profile: Some("read-only".to_string()), + tools: vec![], + capabilities: vec![], + timeout_seconds: Some(300), + trust_level: Some("sandbox".to_string()), + }, + ), + ] + .into() +} + /// On-disk schema for the `[network]` table (#135). See `config.example.toml` /// for documentation. #[derive(Debug, Clone, Serialize, Deserialize)] @@ -7128,6 +7362,32 @@ fallback_providers = ["deepseek", "openrouter"] assert!(!serialized.contains("fallback_providers")); } + #[test] + fn fleet_exec_config_default_matches_subagent_spawn_depth() { + // Fleet workers and standalone sub-agents share one recursion axis: + // the fleet default equals DEFAULT_SPAWN_DEPTH (3) and affords >=3 + // nested delegation levels out of the box. + assert_eq!( + FleetExecConfig::default().max_spawn_depth, + DEFAULT_SPAWN_DEPTH + ); + assert_eq!(FleetExecConfig::default().max_spawn_depth, 3); + assert!(DEFAULT_SPAWN_DEPTH <= MAX_SPAWN_DEPTH_CEILING); + } + + #[test] + fn fleet_exec_config_parses_max_spawn_depth() { + let config: ConfigToml = toml::from_str( + r#" +[fleet.exec] +max_spawn_depth = 2 +"#, + ) + .expect("fleet exec config should parse"); + + assert_eq!(config.fleet.expect("fleet config").exec.max_spawn_depth, 2); + } + #[test] fn fallback_providers_do_not_change_runtime_resolution() { let _lock = env_lock(); diff --git a/crates/protocol/src/fleet.rs b/crates/protocol/src/fleet.rs index 40deb957..11ef671e 100644 --- a/crates/protocol/src/fleet.rs +++ b/crates/protocol/src/fleet.rs @@ -12,7 +12,7 @@ use std::collections::BTreeMap; use std::path::PathBuf; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use serde::{Deserialize, Deserializer, Serialize, Serializer, de}; use serde_json::Value; pub const FLEET_PROTOCOL_VERSION: &str = "0.1.0"; @@ -45,6 +45,8 @@ pub struct FleetRun { pub worker_specs: Vec, #[serde(default)] pub labels: BTreeMap, + #[serde(skip_serializing_if = "Option::is_none")] + pub security_policy: Option, pub created_at: String, #[serde(skip_serializing_if = "Option::is_none")] pub updated_at: Option, @@ -260,6 +262,9 @@ pub struct FleetWorkerSpec { pub name: String, pub host: FleetHostSpec, #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub trust_level: Option, + #[serde(default)] pub labels: BTreeMap, #[serde(default)] pub capabilities: Vec, @@ -280,6 +285,14 @@ pub enum FleetHostSpec { user: Option, #[serde(skip_serializing_if = "Option::is_none")] identity: Option, + /// Known hosts file for host-key verification. + #[serde(skip_serializing_if = "Option::is_none")] + known_hosts: Option, + /// Expected host key fingerprint (SHA256:...) for key pinning. + /// When set, the connection is only trusted if the server's + /// host key matches this fingerprint exactly. + #[serde(skip_serializing_if = "Option::is_none")] + host_key_fingerprint: Option, #[serde(skip_serializing_if = "Option::is_none")] working_directory: Option, #[serde(default)] @@ -288,6 +301,8 @@ pub enum FleetHostSpec { #[serde(skip_serializing_if = "Option::is_none")] codewhale_binary: Option, }, + #[serde(alias = "container")] + #[serde(alias = "Container")] Docker { image: String, #[serde(default)] @@ -295,6 +310,264 @@ pub enum FleetHostSpec { }, } +// ── Security and trust types ──────────────────────────────────────────────── + +/// Trust classification assigned to a worker host. +/// +/// The trust level determines what a worker is allowed to do and what +/// secrets it may access. The default for new workers is [`FleetTrustLevel::Sandbox`]; +/// operators must explicitly raise trust for SSH or container workers. +#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)] +#[serde(rename_all = "snake_case")] +pub enum FleetTrustLevel { + /// Fully isolated: no network, no secrets, no writes outside `.codewhale/fleet/`. + /// Suitable for untrusted code review, community PR checks, or third-party tool runs. + Sandbox = 0, + /// Local-only worker with access to the workspace and configured secrets. + /// Default for local workers. May read repo files but writes are gated. + Local = 1, + /// Worker on a known remote host with verified identity and a bounded + /// set of explicitly granted capabilities. Requires SSH host-key + /// verification or equivalent attestation. + #[serde(alias = "remote-verified", alias = "remoteVerified")] + RemoteVerified = 2, + /// Fully trusted worker (e.g. operator's own machine, CI runner). + /// Has access to all configured secrets and may perform any action the + /// operator can. Reserved for dogfood smoke and operator-owned machines. + Operator = 3, +} + +impl Default for FleetTrustLevel { + fn default() -> Self { + Self::Sandbox + } +} + +impl FleetTrustLevel { + /// Whether this trust level is allowed to access provider secrets. + #[must_use] + pub fn may_access_secrets(&self) -> bool { + matches!(self, Self::Operator | Self::RemoteVerified | Self::Local) + } + + /// Whether this trust level is allowed to write outside `.codewhale/fleet/`. + #[must_use] + pub fn may_write_workspace(&self) -> bool { + matches!(self, Self::Operator | Self::Local) + } + + /// Whether this trust level is allowed network access. + #[must_use] + pub fn may_access_network(&self) -> bool { + matches!(self, Self::Operator | Self::RemoteVerified | Self::Local) + } +} + +/// Security policy applied to a fleet run. +/// +/// A policy defines the default trust level for workers, which secrets +/// may be resolved, and what capabilities are granted. When a run has no +/// explicit policy, workers inherit conservative defaults. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct FleetSecurityPolicy { + /// Default trust level for workers that don't declare one explicitly. + #[serde(default)] + pub default_trust_level: FleetTrustLevel, + /// Secret refs that workers may resolve. An empty list means no secrets + /// are available. Each entry is a key name, not a value. + #[serde(default)] + #[serde(skip_serializing_if = "Vec::is_empty")] + pub allowed_secrets: Vec, + /// Capability grants for workers in this run. + #[serde(default)] + #[serde(skip_serializing_if = "Vec::is_empty")] + pub capability_grants: Vec, + /// Maximum trust level any worker in this run may have, even if the + /// worker spec requests higher. Defaults to Operator (no ceiling). + #[serde(default = "default_max_trust_level")] + pub max_trust_level: FleetTrustLevel, + /// Require identity verification for remote workers. When true, SSH + /// workers must pass host-key verification before being trusted at + /// RemoteVerified level; unverified remotes stay at Sandbox. + #[serde(default)] + pub require_identity_verification: bool, + /// Allow conservative parallel execution of read-only tools (#2983). + /// When true, workers may batch independent read-only tool calls + /// (reads, searches, greps) into concurrent turns. Disabled by default + /// to avoid overwhelming providers or hitting rate limits. + #[serde(default)] + pub allow_parallel_reads: bool, +} + +fn default_max_trust_level() -> FleetTrustLevel { + FleetTrustLevel::Operator +} + +impl Default for FleetSecurityPolicy { + fn default() -> Self { + Self { + default_trust_level: FleetTrustLevel::Sandbox, + allowed_secrets: Vec::new(), + capability_grants: Vec::new(), + max_trust_level: FleetTrustLevel::Operator, + require_identity_verification: false, + allow_parallel_reads: false, + } + } +} + +/// A reference to a secret that should be resolved at runtime, never +/// serialized as a plaintext value. +/// +/// Secret refs appear in task specs, alert configs, and worker definitions. +/// The actual secret value is resolved by the fleet manager from the +/// secrets backend (OS keyring, environment, or file store) just before +/// the worker starts. +#[derive(Debug, Clone, Serialize, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct FleetSecretRef { + /// The secret key name (e.g. `"CODEWHALE_API_KEY"`, `"GH_TOKEN"`). + pub key: String, + /// Optional source hint for resolution order. + /// - `"env"` — resolve from environment variable + /// - `"keyring"` — resolve from OS keyring + /// - `"file"` — resolve from `~/.codewhale/secrets/` + /// - absent / null — try all sources in default order + #[serde(skip_serializing_if = "Option::is_none")] + pub source: Option, +} + +impl FleetSecretRef { + /// Create a secret ref from a key name with default resolution. + #[must_use] + pub fn new(key: impl Into) -> Self { + Self { + key: key.into(), + source: None, + } + } + + /// Create a secret ref with an explicit source. + #[must_use] + pub fn with_source(key: impl Into, source: impl Into) -> Self { + Self { + key: key.into(), + source: Some(source.into()), + } + } + + /// Redacted display form for logging. Shows the key name and source + /// but never the resolved value. + #[must_use] + pub fn redacted(&self) -> String { + match &self.source { + Some(src) => format!("", src, self.key), + None => format!("", self.key), + } + } +} + +impl std::fmt::Display for FleetSecretRef { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.redacted()) + } +} + +impl From<&str> for FleetSecretRef { + fn from(key: &str) -> Self { + Self::new(key) + } +} + +impl From for FleetSecretRef { + fn from(key: String) -> Self { + Self::new(key) + } +} + +impl<'de> Deserialize<'de> for FleetSecretRef { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + #[derive(Deserialize)] + #[serde(untagged)] + enum SecretRefWire { + Key(String), + Structured { + key: String, + #[serde(default)] + source: Option, + }, + } + + match SecretRefWire::deserialize(deserializer)? { + SecretRefWire::Key(key) if !key.trim().is_empty() => Ok(FleetSecretRef::new(key)), + SecretRefWire::Key(_) => Err(de::Error::custom("secret ref key cannot be empty")), + SecretRefWire::Structured { key, source } if !key.trim().is_empty() => { + Ok(FleetSecretRef { key, source }) + } + SecretRefWire::Structured { .. } => { + Err(de::Error::custom("secret ref key cannot be empty")) + } + } + } +} + +/// How a worker authenticates to the fleet manager. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(tag = "method", rename_all = "snake_case")] +pub enum FleetWorkerAuth { + /// No authentication (local workers share the same uid). + None, + /// SSH key-based authentication with host-key verification. + SshKey { + /// Path to the SSH identity file (may be a FleetSecretRef in JSON + /// as `{"key": "...", "source": "file"}`). + identity: PathBuf, + /// Known hosts file for host-key verification. + #[serde(skip_serializing_if = "Option::is_none")] + known_hosts: Option, + /// Expected host key fingerprint for pinning. + #[serde(skip_serializing_if = "Option::is_none")] + host_key_fingerprint: Option, + /// SSH user for the connection. + #[serde(skip_serializing_if = "Option::is_none")] + user: Option, + }, + /// Token-based authentication for remote workers behind a fleet proxy. + Token { + /// Reference to the token secret. + token_ref: FleetSecretRef, + }, + /// mTLS certificate-based authentication. + Mtls { + /// Path to the client certificate. + cert_path: PathBuf, + /// Reference to the private key secret. + key_ref: FleetSecretRef, + }, +} + +/// A capability grant that explicitly authorizes a worker to perform +/// a specific class of action. +/// +/// By default, new workers get no grants (least privilege). Grants are +/// additive: a worker's effective capabilities are the union of its +/// trust-level defaults plus any explicit grants. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct FleetCapabilityGrant { + /// The capability being granted (e.g. `"network"`, `"git-push"`, + /// `"provider-secrets"`, `"release"`). + pub capability: String, + /// Optional scope limiting the grant (e.g. `"github.com"` for network, + /// `"crates/tui/**"` for file writes). + #[serde(skip_serializing_if = "Option::is_none")] + pub scope: Option, + /// Optional justification for the grant (audit trail). + #[serde(skip_serializing_if = "Option::is_none")] + pub reason: Option, +} + /// Runtime status of a worker. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(rename_all = "snake_case")] @@ -469,18 +742,84 @@ pub enum FleetAlertEventClass { #[serde(tag = "kind", rename_all = "snake_case")] pub enum FleetAlertChannel { Slack { - webhook_url: String, + /// Webhook URL, resolved from a secret ref or inline. + #[serde(flatten)] + webhook: FleetAlertEndpoint, }, Webhook { - url: String, - secret: Option, + #[serde(flatten)] + endpoint: FleetAlertEndpoint, }, + #[serde(alias = "pager_duty")] + #[serde(alias = "pagerduty")] PagerDuty { routing_key: String, severity: String, }, } +/// An alert channel endpoint, supporting both inline URLs and secret refs. +/// +/// For Slack and generic webhook channels, the URL may be provided directly +/// or as a secret reference resolved at send time. When both `url` and +/// `url_ref` are present, `url_ref` takes precedence after resolution. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct FleetAlertEndpoint { + /// Inline URL (plaintext; only for non-sensitive endpoints). + #[serde( + alias = "webhook_url", + alias = "endpoint_url", + skip_serializing_if = "Option::is_none" + )] + pub url: Option, + /// Reference to a secret containing the webhook URL. + #[serde( + alias = "webhook_url_ref", + alias = "webhook_ref", + alias = "url_secret_ref", + skip_serializing_if = "Option::is_none" + )] + pub url_ref: Option, + /// Optional HMAC secret for webhook payload signing, as a secret ref. + #[serde( + alias = "secret", + alias = "webhook_secret", + alias = "signing_secret", + skip_serializing_if = "Option::is_none" + )] + pub secret_ref: Option, +} + +impl FleetAlertEndpoint { + /// Create an inline URL endpoint (for non-sensitive use). + #[must_use] + pub fn inline(url: impl Into) -> Self { + Self { + url: Some(url.into()), + url_ref: None, + secret_ref: None, + } + } + + /// Create a secret-backed URL endpoint. + #[must_use] + pub fn from_secret(url_ref: FleetSecretRef) -> Self { + Self { + url: None, + url_ref: Some(url_ref), + secret_ref: None, + } + } + + /// Redacted display form for logging. + #[must_use] + pub fn redacted(&self) -> String { + self.url_ref + .as_ref() + .map_or_else(|| "".to_string(), |r| r.redacted()) + } +} + /// Receipt produced when a task completes verification. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FleetReceipt { @@ -573,6 +912,7 @@ mod tests { }], worker_specs: vec![], labels: BTreeMap::new(), + security_policy: None, created_at: "2026-06-12T17:00:00Z".to_string(), updated_at: None, completed_at: None, @@ -648,7 +988,7 @@ mod tests { let policy = FleetAlertPolicy { events: vec![FleetAlertEventClass::Stale], channels: vec![FleetAlertChannel::Slack { - webhook_url: "https://hooks.slack.com/test".to_string(), + webhook: FleetAlertEndpoint::inline("https://hooks.slack.com/test"), }], after_attempts: Some(2), after_minutes_stale: Some(10), @@ -687,6 +1027,8 @@ mod tests { port, user, identity, + known_hosts, + host_key_fingerprint, working_directory, env_allowlist, codewhale_binary, @@ -695,6 +1037,8 @@ mod tests { assert_eq!(port, None); assert_eq!(user, None); assert_eq!(identity, None); + assert_eq!(known_hosts, None); + assert_eq!(host_key_fingerprint, None); assert_eq!(working_directory, None); assert!(env_allowlist.is_empty()); assert_eq!(codewhale_binary, None); @@ -801,4 +1145,131 @@ mod tests { assert_eq!(back.result, FleetTaskResult::Partial); assert_eq!(back.failure_kind, Some(FleetTaskFailureKind::Verifier)); } + + #[test] + fn ssh_host_spec_with_key_pinning_round_trip() { + let spec = FleetHostSpec::Ssh { + host: "builder.trusted.example.com".to_string(), + port: Some(22), + user: Some("codewhale".to_string()), + identity: Some(PathBuf::from("~/.ssh/codewhale_fleet")), + known_hosts: Some(PathBuf::from("~/.ssh/known_hosts")), + host_key_fingerprint: Some("SHA256:aLGqZo1M6c...".to_string()), + working_directory: Some(PathBuf::from("/srv/codewhale/work")), + env_allowlist: vec!["CODEWHALE_PROFILE".to_string()], + codewhale_binary: Some("/usr/local/bin/codewhale".to_string()), + }; + let json = serde_json::to_string_pretty(&spec).unwrap(); + assert!(json.contains("\"known_hosts\"")); + assert!(json.contains("\"host_key_fingerprint\"")); + assert!(json.contains("SHA256:aLGqZo1M6c...")); + + let back: FleetHostSpec = serde_json::from_str(&json).unwrap(); + match back { + FleetHostSpec::Ssh { + host, + known_hosts, + host_key_fingerprint, + .. + } => { + assert_eq!(host, "builder.trusted.example.com"); + assert_eq!(known_hosts, Some(PathBuf::from("~/.ssh/known_hosts"))); + assert_eq!( + host_key_fingerprint, + Some("SHA256:aLGqZo1M6c...".to_string()) + ); + } + other => panic!("expected ssh host spec, got {other:?}"), + } + } + + #[test] + fn secret_ref_redacted_never_exposes_value() { + let ref_ = FleetSecretRef::new("DEEPSEEK_API_KEY"); + let redacted = ref_.redacted(); + assert!(redacted.contains("DEEPSEEK_API_KEY")); + assert!(!redacted.contains("sk-")); + assert!(redacted.contains(" { + assert_eq!(webhook.url.as_deref(), Some("https://hooks.slack.com/test")); + assert_eq!( + webhook.secret_ref, + Some(FleetSecretRef::new("SLACK_SIGNING_SECRET")) + ); + } + other => panic!("expected slack channel, got {other:?}"), + } + } + + #[test] + fn security_policy_defaults_are_conservative() { + let policy = FleetSecurityPolicy::default(); + assert_eq!(policy.default_trust_level, FleetTrustLevel::Sandbox); + assert!(policy.allowed_secrets.is_empty()); + assert!(policy.capability_grants.is_empty()); + assert_eq!(policy.max_trust_level, FleetTrustLevel::Operator); + assert!(!policy.require_identity_verification); + } + + #[test] + fn trust_level_ordinal_reflects_privilege() { + assert!(FleetTrustLevel::Operator > FleetTrustLevel::RemoteVerified); + assert!(FleetTrustLevel::RemoteVerified > FleetTrustLevel::Local); + assert!(FleetTrustLevel::Local > FleetTrustLevel::Sandbox); + + assert!(FleetTrustLevel::Operator.may_access_secrets()); + assert!(!FleetTrustLevel::Sandbox.may_access_secrets()); + assert!(!FleetTrustLevel::Sandbox.may_write_workspace()); + assert!(FleetTrustLevel::Operator.may_write_workspace()); + } } diff --git a/crates/secrets/src/lib.rs b/crates/secrets/src/lib.rs index 9e97286f..15506df3 100644 --- a/crates/secrets/src/lib.rs +++ b/crates/secrets/src/lib.rs @@ -737,6 +737,44 @@ impl Secrets { pub fn get(&self, name: &str) -> Result, SecretsError> { self.store.get(name) } + + /// Resolve a secret by key name with an optional source constraint. + /// + /// This is the fleet-worker secret resolution path. Unlike + /// [`resolve`](Secrets::resolve), this does NOT map provider names + /// to their canonical env vars — the caller controls the exact key + /// and resolution order. + /// + /// `source_hint` controls the resolution order: + /// - `Some("env")` — only check environment variables + /// - `Some("keyring")` — only check the keyring/file store + /// - `None` — try the store first, then fall back to environment + #[must_use] + pub fn resolve_direct(&self, key: &str, source_hint: Option<&str>) -> Option { + match source_hint { + Some("env") => { + // Only check process environment — skip the store entirely. + std::env::var(key).ok().filter(|v| !v.trim().is_empty()) + } + Some("keyring") | Some("file") => { + // Only check the store backend. + self.store + .get(key) + .ok() + .flatten() + .filter(|v| !v.trim().is_empty()) + } + Some(_) | None => { + // Default: store first, then env fallback. + if let Ok(Some(v)) = self.store.get(key) + && !v.trim().is_empty() + { + return Some(v); + } + std::env::var(key).ok().filter(|v| !v.trim().is_empty()) + } + } + } } /// Map a canonical provider name to its environment variable(s), returning diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 3b60c00c..4dbb0081 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -1854,6 +1854,10 @@ pub struct Config { #[serde(default)] pub context: ContextConfig, + /// Agent Fleet trust/security/role/exec config. + #[serde(default)] + pub fleet: Option, + /// Sub-agent model overrides. #[serde(default)] pub subagents: Option, @@ -4983,6 +4987,7 @@ fn merge_config(base: Config, override_cfg: Config) -> Config { .or(base.context.l3_threshold), seam_model: override_cfg.context.seam_model.or(base.context.seam_model), }, + fleet: override_cfg.fleet.or(base.fleet), subagents: override_cfg.subagents.or(base.subagents), strict_tool_mode: override_cfg.strict_tool_mode.or(base.strict_tool_mode), runtime_api: override_cfg.runtime_api.or(base.runtime_api), diff --git a/crates/tui/src/fleet/host.rs b/crates/tui/src/fleet/host.rs index 8a4502d9..28c74c9a 100644 --- a/crates/tui/src/fleet/host.rs +++ b/crates/tui/src/fleet/host.rs @@ -373,6 +373,8 @@ pub struct SshFleetHostConfig { pub user: Option, pub port: Option, pub identity: Option, + pub known_hosts: Option, + pub host_key_fingerprint: Option, pub working_directory: PathBuf, pub env_allowlist: BTreeSet, pub codewhale_binary: String, @@ -387,6 +389,8 @@ impl SshFleetHostConfig { user: None, port: None, identity: None, + known_hosts: None, + host_key_fingerprint: None, working_directory: working_directory.into(), env_allowlist: BTreeSet::new(), codewhale_binary: "codewhale".to_string(), @@ -401,6 +405,8 @@ impl SshFleetHostConfig { port, user, identity, + known_hosts, + host_key_fingerprint, working_directory, env_allowlist, codewhale_binary, @@ -420,6 +426,8 @@ impl SshFleetHostConfig { config.port = *port; config.user = user.clone(); config.identity = identity.clone(); + config.known_hosts = known_hosts.clone(); + config.host_key_fingerprint = host_key_fingerprint.clone(); config.env_allowlist = env_allowlist.iter().cloned().collect(); config.codewhale_binary = codewhale_binary; config.validate()?; @@ -918,6 +926,8 @@ mod tests { port: Some(2222), user: Some("fleet".to_string()), identity: Some(PathBuf::from("/tmp/fleet_id")), + known_hosts: None, + host_key_fingerprint: None, working_directory: Some(PathBuf::from("/srv/codewhale")), env_allowlist: vec!["FLEET_PROFILE".to_string()], codewhale_binary: Some("/usr/local/bin/codewhale".to_string()), diff --git a/crates/tui/src/fleet/ledger.rs b/crates/tui/src/fleet/ledger.rs index 37d2cfdd..7b863bf9 100644 --- a/crates/tui/src/fleet/ledger.rs +++ b/crates/tui/src/fleet/ledger.rs @@ -661,12 +661,21 @@ fn sanitize_run_for_ledger(run: &FleetRun) -> FleetRun { if let Some(policy) = &mut task.alert_policy { for channel in &mut policy.channels { match channel { - FleetAlertChannel::Slack { webhook_url } => { - *webhook_url = "".to_string(); + FleetAlertChannel::Slack { webhook } => { + webhook.url = webhook.url.as_ref().map(|_| "".to_string()); } - FleetAlertChannel::Webhook { url, secret } => { - *url = "".to_string(); - *secret = secret.as_ref().map(|_| "".to_string()); + FleetAlertChannel::Webhook { endpoint } => { + *endpoint = FleetAlertEndpoint { + url: endpoint.url.as_ref().map(|_| "".to_string()), + url_ref: endpoint + .url_ref + .as_ref() + .map(|_| FleetSecretRef::new("")), + secret_ref: endpoint + .secret_ref + .as_ref() + .map(|_| FleetSecretRef::new("")), + }; } FleetAlertChannel::PagerDuty { routing_key, .. } => { *routing_key = "".to_string(); @@ -691,6 +700,7 @@ mod tests { task_specs: vec![], worker_specs: vec![], labels: BTreeMap::new(), + security_policy: None, created_at: "2026-06-12T17:00:00Z".to_string(), updated_at: None, completed_at: None, diff --git a/crates/tui/src/fleet/manager.rs b/crates/tui/src/fleet/manager.rs index cf2fdb2a..9f8ff6fb 100644 --- a/crates/tui/src/fleet/manager.rs +++ b/crates/tui/src/fleet/manager.rs @@ -21,14 +21,38 @@ use super::task_spec::{ FleetTaskSpecDocument, FleetTaskVerificationInput, load_task_spec_document, record_verification_receipt, validate_task_spec_document, verify_task_result, }; +use super::worker_runtime; +use crate::tools::subagent::SharedSubAgentManager; const DEFAULT_STALE_AFTER_SECONDS: u64 = 300; -#[derive(Debug)] pub struct FleetManager { workspace: PathBuf, ledger: FleetLedger, stale_after: Duration, + exec_config: codewhale_config::FleetExecConfig, + /// Optional sub-agent manager for headless worker execution. + /// When set, fleet workers spawn real sub-agents; when None, + /// the manager falls back to local simulation. + sub_agent_manager: Option, +} + +impl std::fmt::Debug for FleetManager { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("FleetManager") + .field("workspace", &self.workspace) + .field("ledger", &self.ledger) + .field("stale_after", &self.stale_after) + .field("exec_config", &self.exec_config) + .field( + "sub_agent_manager", + &self + .sub_agent_manager + .as_ref() + .map(|_| "SharedSubAgentManager"), + ) + .finish() + } } #[derive(Debug, Clone)] @@ -78,6 +102,28 @@ pub struct FleetWorkerInspection { pub artifacts: Vec, pub last_error: Option, pub alert_state: Option, + /// Lightweight projection from the sub-agent worker runtime. + /// Populated when a sub-agent manager is attached. + pub runtime_state: Option, +} + +/// Lightweight TUI projection of a headless sub-agent worker's current state. +/// +/// Derived from the sub-agent manager's `AgentWorkerRecord`. +#[derive(Debug, Clone)] +pub struct FleetWorkerRuntimeProjection { + /// Sub-agent lifecycle status (Queued, Starting, Running, Completed, etc.) + pub agent_status: String, + /// Steps taken so far (tool calls + model turns) + pub steps_taken: u32, + /// Latest human-readable message from the worker + pub latest_message: Option, + /// Error message if the worker failed + pub error: Option, + /// Result summary if the worker completed + pub result_summary: Option, + /// Whether the worker has a sub-agent session running + pub has_session: bool, } impl FleetManager { @@ -88,6 +134,8 @@ impl FleetManager { workspace, ledger, stale_after: Duration::from_secs(DEFAULT_STALE_AFTER_SECONDS), + exec_config: codewhale_config::FleetExecConfig::default(), + sub_agent_manager: None, }) } @@ -96,6 +144,23 @@ impl FleetManager { self } + /// Apply fleet headless-worker execution policy from config. + pub fn with_exec_config(mut self, exec_config: codewhale_config::FleetExecConfig) -> Self { + self.exec_config = exec_config; + self + } + + /// Attach a sub-agent manager so fleet workers can spawn real headless agents. + pub fn with_sub_agent_manager(mut self, mgr: SharedSubAgentManager) -> Self { + self.sub_agent_manager = Some(mgr); + self + } + + /// True when the manager has a sub-agent runtime for headless worker execution. + pub fn has_worker_runtime(&self) -> bool { + self.sub_agent_manager.is_some() + } + pub fn ledger_path(&self) -> &Path { self.ledger.path() } @@ -139,6 +204,7 @@ impl FleetManager { task_specs: doc.tasks.clone(), worker_specs: doc.workers.clone(), labels: doc.labels, + security_policy: doc.security_policy.clone(), created_at: now.clone(), updated_at: Some(now.clone()), completed_at: None, @@ -280,6 +346,27 @@ impl FleetManager { .get(worker_id) .map(|heartbeat| heartbeat.timestamp.clone()); let alert_state = latest_alert_for_worker(&state, worker_id); + + // Enrich with sub-agent worker runtime state when available. + let runtime_state = self.sub_agent_manager.as_ref().and_then(|mgr| { + mgr.try_read() + .ok() + .and_then(|guard| guard.get_worker_record(worker_id)) + .map(|record| FleetWorkerRuntimeProjection { + agent_status: format!("{:?}", record.status).to_lowercase(), + steps_taken: record.steps_taken, + latest_message: record.latest_message, + error: record.error, + result_summary: record.result_summary, + has_session: !matches!( + record.status, + crate::tools::subagent::AgentWorkerStatus::Completed + | crate::tools::subagent::AgentWorkerStatus::Failed + | crate::tools::subagent::AgentWorkerStatus::Cancelled + ), + }) + }); + Ok(FleetWorkerInspection { worker_id: worker_id.to_string(), status, @@ -293,6 +380,7 @@ impl FleetManager { artifacts, last_error, alert_state, + runtime_state, }) } @@ -475,6 +563,45 @@ impl FleetManager { FleetWorkerEventPayload::Running, )?; self.ledger.heartbeat(worker_id, ×tamp(), None, None)?; + + // Register with the sub-agent manager for headless worker tracking. + // The engine's agent_open path handles actual sub-agent spawning. + if let Some(ref mgr) = self.sub_agent_manager { + if let Ok(guard) = mgr.try_write() { + let run = self + .ledger + .rebuild_state() + .ok() + .and_then(|state| state.runs.get(&entry.run_id.0).cloned()); + let worker_spec = run + .as_ref() + .and_then(|r| r.worker_specs.iter().find(|w| w.id == worker_id).cloned()) + .unwrap_or_else(|| FleetWorkerSpec { + id: worker_id.to_string(), + name: worker_id.to_string(), + host: FleetHostSpec::Local, + trust_level: Some(FleetTrustLevel::Local), + labels: BTreeMap::new(), + capabilities: vec![], + max_concurrent_tasks: Some(1), + }); + let worker = worker_runtime::fleet_task_to_worker_spec( + worker_id, + &entry.run_id.0, + task_spec, + &worker_spec, + "auto", + &self.workspace, + ); + let worker = worker_runtime::apply_exec_hardening(worker, &self.exec_config); + // drop guard after registering so we don't hold the write lock + drop(guard); + if let Ok(mut guard) = mgr.try_write() { + guard.register_worker(worker); + } + } + } + self.maybe_complete_local_simulation(entry, worker_id, task_spec, log_artifact) } @@ -746,6 +873,7 @@ fn default_local_workers(run_id: &FleetRunId, max_workers: usize) -> Vec u8 { 42 }\n", + ) + .unwrap(); + + let tasks = vec![ + FleetTaskSpec { + id: "check".to_string(), + name: "check".to_string(), + description: None, + objective: Some("cargo check".to_string()), + instructions: "run cargo check and report result".to_string(), + worker: Some(FleetTaskWorkerProfile { + role: Some("release-checker".to_string()), + tool_profile: Some("read-only".to_string()), + tools: vec!["cargo".to_string()], + capabilities: vec!["rust".to_string()], + }), + workspace: Some(FleetWorkspaceRequirements { + root: None, + required_files: vec![PathBuf::from("Cargo.toml")], + writable_paths: vec![PathBuf::from(".codewhale/fleet")], + environment: Some(FleetEnvironmentRequirements { + required: vec!["PATH".to_string()], + allowlist: vec![], + }), + }), + input_files: vec![], + context: vec![], + budget: None, + tags: vec!["smoke".to_string()], + expected_artifacts: vec![FleetArtifactKind::Log, FleetArtifactKind::Receipt], + scorer: Some(FleetScorerSpec::ExitCode), + retry_policy: Some(FleetRetryPolicy { + max_attempts: 1, + ..Default::default() + }), + alert_policy: None, + timeout_seconds: Some(60), + metadata: BTreeMap::new(), + }, + FleetTaskSpec { + id: "review".to_string(), + name: "review".to_string(), + description: None, + objective: Some("review source".to_string()), + instructions: "read src/lib.rs and report findings".to_string(), + worker: Some(FleetTaskWorkerProfile { + role: Some("reviewer".to_string()), + tool_profile: Some("read-only".to_string()), + tools: vec!["cargo".to_string()], + capabilities: vec!["rust".to_string()], + }), + workspace: Some(FleetWorkspaceRequirements { + root: None, + required_files: vec![], + writable_paths: vec![], + environment: Some(FleetEnvironmentRequirements { + required: vec!["PATH".to_string()], + allowlist: vec![], + }), + }), + input_files: vec![], + context: vec![], + budget: None, + tags: vec!["smoke".to_string()], + expected_artifacts: vec![FleetArtifactKind::Log, FleetArtifactKind::Receipt], + scorer: None, + retry_policy: Some(FleetRetryPolicy { + max_attempts: 1, + ..Default::default() + }), + alert_policy: None, + timeout_seconds: Some(60), + metadata: BTreeMap::new(), + }, + ]; + + let manager = FleetManager::open(&workspace).unwrap(); + let report = manager + .create_run( + FleetTaskSpecDocument { + name: Some("dogfood smoke".to_string()), + labels: BTreeMap::new(), + security_policy: Some(FleetSecurityPolicy { + default_trust_level: FleetTrustLevel::Local, + ..Default::default() + }), + workers: vec![], + tasks, + }, + 2, + ) + .unwrap(); + + assert_eq!(report.task_count, 2); + assert!(!report.worker_ids.is_empty()); + assert_eq!(report.worker_ids.len(), 2); + // After immediate scheduling, tasks may already be leased, + // so queued+running should total 2. + let status = manager.run_status(&report.run_id).unwrap(); + assert_eq!(status.queued + status.running, 2); + } + + #[test] + fn fleet_security_policy_propagates_from_task_spec_document_to_run() { + let tmp = TempDir::new().unwrap(); + let manager = FleetManager::open(tmp.path()).unwrap(); + let path = task_spec_file(&tmp, vec![task("task-a")]); + // Rewrite the spec file with a security_policy block. + let doc = serde_json::json!({ + "name": "secure smoke", + "tasks": [{ + "id": "task-a", + "name": "task-a", + "instructions": "report ok", + "expected_artifacts": ["log"] + }], + "security_policy": { + "default_trust_level": "local", + "allowed_secrets": [{"key": "GH_TOKEN", "source": "env"}], + "max_trust_level": "remote_verified", + "require_identity_verification": true + } + }); + let spec_path = tmp.path().join("secure-tasks.json"); + std::fs::write(&spec_path, serde_json::to_string_pretty(&doc).unwrap()).unwrap(); + + let report = manager + .create_run_from_task_spec_path(&spec_path, 1) + .unwrap(); + + let state = manager.ledger.rebuild_state().unwrap(); + let run = state.runs.get(&report.run_id.0).unwrap(); + let policy = run.security_policy.as_ref().unwrap(); + assert_eq!(policy.default_trust_level, FleetTrustLevel::Local); + assert_eq!(policy.allowed_secrets.len(), 1); + assert_eq!(policy.allowed_secrets[0].key, "GH_TOKEN"); + assert_eq!(policy.max_trust_level, FleetTrustLevel::RemoteVerified); + assert!(policy.require_identity_verification); + } } diff --git a/crates/tui/src/fleet/mod.rs b/crates/tui/src/fleet/mod.rs index 6a57779c..7258e893 100644 --- a/crates/tui/src/fleet/mod.rs +++ b/crates/tui/src/fleet/mod.rs @@ -6,3 +6,4 @@ pub mod ledger; pub mod manager; pub mod scheduler; pub mod task_spec; +pub mod worker_runtime; diff --git a/crates/tui/src/fleet/scheduler.rs b/crates/tui/src/fleet/scheduler.rs index dc15a770..b62a33a8 100644 --- a/crates/tui/src/fleet/scheduler.rs +++ b/crates/tui/src/fleet/scheduler.rs @@ -573,6 +573,7 @@ mod tests { id: id.to_string(), name: id.to_string(), host: FleetHostSpec::Local, + trust_level: Some(FleetTrustLevel::Local), labels: BTreeMap::new(), capabilities: vec!["local".to_string()], max_concurrent_tasks: Some(1), @@ -622,6 +623,7 @@ mod tests { .map(|idx| worker(&format!("worker-{idx}"))) .collect(), labels: BTreeMap::new(), + security_policy: None, created_at: scheduler.timestamp(), updated_at: None, completed_at: None, @@ -704,7 +706,7 @@ mod tests { failing.alert_policy = Some(FleetAlertPolicy { events: vec![FleetAlertEventClass::RestartExhausted], channels: vec![FleetAlertChannel::Slack { - webhook_url: "https://hooks.slack.invalid/secret".to_string(), + webhook: FleetAlertEndpoint::inline("https://hooks.slack.invalid/secret"), }], after_attempts: Some(1), after_minutes_stale: Some(1), diff --git a/crates/tui/src/fleet/task_spec.rs b/crates/tui/src/fleet/task_spec.rs index 1dfcdadc..f0425d7f 100644 --- a/crates/tui/src/fleet/task_spec.rs +++ b/crates/tui/src/fleet/task_spec.rs @@ -23,6 +23,9 @@ pub struct FleetTaskSpecDocument { pub name: Option, #[serde(default)] pub labels: BTreeMap, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub security_policy: Option, #[serde(default, alias = "worker_specs")] pub workers: Vec, #[serde(default)] @@ -49,12 +52,14 @@ impl FleetTaskSpecFile { Self::Tasks(tasks) => FleetTaskSpecDocument { name: Some(fallback_name), labels: BTreeMap::new(), + security_policy: None, workers: Vec::new(), tasks, }, Self::Single(task) => FleetTaskSpecDocument { name: Some(fallback_name), labels: BTreeMap::new(), + security_policy: None, workers: Vec::new(), tasks: vec![task], }, diff --git a/crates/tui/src/fleet/worker_runtime.rs b/crates/tui/src/fleet/worker_runtime.rs new file mode 100644 index 00000000..9b9f2137 --- /dev/null +++ b/crates/tui/src/fleet/worker_runtime.rs @@ -0,0 +1,626 @@ +//! Fleet worker runtime — bridges fleet task specs to headless sub-agent execution. +//! +//! This module makes fleet workers real: instead of simulating task completion, +//! each fleet worker spawns a headless sub-agent that runs the task instructions +//! and streams progress back into the fleet ledger. +//! +//! Architecture: +//! - `FleetTaskSpec` + `FleetWorkerSpec` → `AgentWorkerSpec` +//! - `SubAgentManager::register_worker()` tracks the worker +//! - Sub-agent spawn happens through the existing `agent_open` machinery +//! - Mailbox events stream into fleet ledger as `FleetWorkerEventPayload` +//! - `FleetWorkerInspection` reads both ledger state and sub-agent worker records + +#![allow(dead_code)] + +use codewhale_protocol::fleet::{ + FleetHostSpec, FleetTaskSpec, FleetTaskWorkerProfile, FleetWorkerEventPayload, FleetWorkerSpec, +}; + +use super::host::FleetHostKind; +use crate::tools::subagent::{ + AgentWorkerSpec, AgentWorkerStatus, AgentWorkerToolProfile, SubAgentType, +}; + +/// Map a fleet worker spec's host kind to a display string. +pub fn fleet_host_kind_for_spec(spec: &FleetWorkerSpec) -> FleetHostKind { + match &spec.host { + FleetHostSpec::Local => FleetHostKind::LocalProcess, + FleetHostSpec::Ssh { .. } => FleetHostKind::Ssh, + FleetHostSpec::Docker { .. } => FleetHostKind::LocalProcess, // Docker runs local-ish + } +} + +/// Map a fleet host kind to a compact display label. +pub fn fleet_host_kind_label(kind: FleetHostKind) -> &'static str { + match kind { + FleetHostKind::LocalProcess => "local", + FleetHostKind::Ssh => "ssh", + } +} + +/// Build a sub-agent `AgentWorkerSpec` from a fleet task spec and worker spec. +/// +/// The fleet task's `instructions` become the sub-agent's `objective`, the +/// `worker.role` maps to a `SubAgentType`, and tool/capability restrictions +/// become an `AgentWorkerToolProfile`. +pub fn fleet_task_to_worker_spec( + worker_id: &str, + run_id: &str, + task_spec: &FleetTaskSpec, + _worker_spec: &FleetWorkerSpec, + model: &str, + workspace: &std::path::Path, +) -> AgentWorkerSpec { + let agent_type = + fleet_role_to_agent_type(task_spec.worker.as_ref().and_then(|w| w.role.as_deref())); + + let tool_profile = fleet_tool_profile(task_spec.worker.as_ref()); + + let objective = fleet_task_prompt(task_spec); + + AgentWorkerSpec { + worker_id: worker_id.to_string(), + run_id: run_id.to_string(), + parent_run_id: None, + session_name: Some(format!("fleet-{}-{}", worker_id, task_spec.id)), + objective, + role: task_spec.worker.as_ref().and_then(|w| w.role.clone()), + agent_type, + model: model.to_string(), + workspace: workspace.to_path_buf(), + git_branch: None, + context_mode: "fresh".to_string(), + fork_context: false, + tool_profile, + max_steps: task_spec + .budget + .as_ref() + .and_then(|b| b.max_tool_calls) + .unwrap_or(u32::MAX), + spawn_depth: 0, + max_spawn_depth: codewhale_config::FleetExecConfig::default().max_spawn_depth, + } +} + +fn fleet_task_prompt(task_spec: &FleetTaskSpec) -> String { + let mut prompt = String::new(); + prompt.push_str("Fleet task: "); + prompt.push_str(&task_spec.name); + + if let Some(objective) = task_spec.objective.as_deref() { + prompt.push_str("\n\nObjective:\n"); + prompt.push_str(objective); + } else if let Some(description) = task_spec.description.as_deref() { + prompt.push_str("\n\nObjective:\n"); + prompt.push_str(description); + } + + prompt.push_str("\n\nInstructions:\n"); + prompt.push_str(&task_spec.instructions); + + if !task_spec.context.is_empty() { + prompt.push_str("\n\nContext:\n"); + for item in &task_spec.context { + prompt.push_str("- "); + prompt.push_str(item); + prompt.push('\n'); + } + } + + if !task_spec.input_files.is_empty() { + prompt.push_str("\nInput files:\n"); + for path in &task_spec.input_files { + prompt.push_str("- "); + prompt.push_str(&path.display().to_string()); + prompt.push('\n'); + } + } + + prompt +} + +/// Map a fleet role name to a `SubAgentType`. Unknown roles default to `General`. +fn fleet_role_to_agent_type(role: Option<&str>) -> SubAgentType { + match role { + Some("smoke-runner") | Some("read-only") => SubAgentType::ToolAgent, + Some("reviewer") => SubAgentType::Review, + Some("builder") => SubAgentType::Implementer, + Some("verifier") | Some("tester") => SubAgentType::Verifier, + Some("planner") => SubAgentType::Plan, + Some("explorer") => SubAgentType::Explore, + Some("general") | None => SubAgentType::General, + Some(other) => { + // Try parsing as a SubAgentType directly + SubAgentType::from_str(other).unwrap_or(SubAgentType::General) + } + } +} + +/// Convert a fleet worker profile's tool list into an `AgentWorkerToolProfile`. +fn fleet_tool_profile(profile: Option<&FleetTaskWorkerProfile>) -> AgentWorkerToolProfile { + match profile { + Some(p) if !p.tools.is_empty() => AgentWorkerToolProfile::Explicit(p.tools.clone()), + _ => AgentWorkerToolProfile::Inherited, + } +} + +/// Create a fleet artifact ref from a worker output. +/// +/// Uses the fleet artifact conventions: logs go under `.codewhale/fleet/`, +/// reports under `.codewhale/fleet/reports/`. +pub fn fleet_artifact_ref( + _run_id: &str, + _worker_id: &str, + kind: codewhale_protocol::fleet::FleetArtifactKind, + path: std::path::PathBuf, +) -> codewhale_protocol::fleet::FleetArtifactRef { + codewhale_protocol::fleet::FleetArtifactRef { + kind, + path, + checksum: None, + mime_type: None, + size_bytes: None, + } +} + +/// Map a sub-agent `AgentWorkerStatus` to a fleet `FleetWorkerEventPayload`. +/// +/// This is the streaming bridge: as the sub-agent runs, each status transition +/// produces a corresponding fleet ledger event so the TUI surfaces stay in sync. +pub fn agent_status_to_fleet_event( + status: AgentWorkerStatus, + message: Option<&str>, + tool_name: Option<&str>, +) -> FleetWorkerEventPayload { + match status { + AgentWorkerStatus::Queued => FleetWorkerEventPayload::Queued, + AgentWorkerStatus::Starting => FleetWorkerEventPayload::Starting, + AgentWorkerStatus::Running => FleetWorkerEventPayload::Running, + AgentWorkerStatus::WaitingForUser => FleetWorkerEventPayload::ModelWait { model: None }, + AgentWorkerStatus::ModelWait => FleetWorkerEventPayload::ModelWait { model: None }, + AgentWorkerStatus::RunningTool => FleetWorkerEventPayload::RunningTool { + tool: tool_name.unwrap_or("unknown").to_string(), + call_id: None, + }, + AgentWorkerStatus::Completed => FleetWorkerEventPayload::Completed { + exit_code: Some(0), + summary: message.map(|s| s.to_string()), + }, + AgentWorkerStatus::Failed => FleetWorkerEventPayload::Failed { + reason: message.unwrap_or("unknown error").to_string(), + recoverable: false, + }, + AgentWorkerStatus::Cancelled => FleetWorkerEventPayload::Cancelled { cancelled_by: None }, + AgentWorkerStatus::Interrupted => FleetWorkerEventPayload::Interrupted { + signal: message.map(|s| s.to_string()), + }, + } +} + +/// Apply exec hardening to a worker spec from fleet config (#3027). +/// +/// Filters tools against allowed/disallowed lists, caps max_steps to +/// config's max_turns, and returns the objective with system prompt +/// appended when configured. +pub fn apply_exec_hardening( + mut spec: AgentWorkerSpec, + exec: &codewhale_config::FleetExecConfig, +) -> AgentWorkerSpec { + // Cap max_steps to config max_turns + if exec.max_turns > 0 && exec.max_turns != u32::MAX { + spec.max_steps = spec.max_steps.min(exec.max_turns); + } + spec.max_spawn_depth = exec + .max_spawn_depth + .min(codewhale_config::MAX_SPAWN_DEPTH_CEILING); + + // Apply tool filtering + if !exec.allowed_tools.is_empty() || !exec.disallowed_tools.is_empty() { + spec.tool_profile = filter_tool_profile(&spec.tool_profile, exec); + } + + // Append system prompt + if !exec.append_system_prompt.is_empty() { + spec.objective = format!( + "{}\n\n[Policy]\n{}", + spec.objective, exec.append_system_prompt + ); + } + + spec +} + +/// Filter a tool profile against allowed/disallowed lists. +fn filter_tool_profile( + profile: &AgentWorkerToolProfile, + exec: &codewhale_config::FleetExecConfig, +) -> AgentWorkerToolProfile { + match profile { + AgentWorkerToolProfile::Explicit(tools) => { + let filtered: Vec = tools + .iter() + .filter(|t| { + // If allowed_tools is non-empty, only keep tools in the list + if !exec.allowed_tools.is_empty() && !exec.allowed_tools.contains(t) { + return false; + } + // Disallowed tools always win + !exec.disallowed_tools.contains(t) + }) + .cloned() + .collect(); + AgentWorkerToolProfile::Explicit(filtered) + } + AgentWorkerToolProfile::Inherited => { + // Inherited profiles can't be filtered at spec time; + // the sub-agent spawn path applies tool filtering. + AgentWorkerToolProfile::Inherited + } + } +} + +/// Determine whether a tool is safe for parallel execution (#2983). +/// +/// Read-only tools that don't mutate state and have no side effects +/// are candidates for conservative parallel batching. +pub fn is_parallel_safe_read_only_tool(tool_name: &str) -> bool { + matches!( + tool_name, + "read_file" + | "grep_files" + | "file_search" + | "list_dir" + | "git_status" + | "git_diff" + | "git_log" + | "git_show" + | "git_blame" + | "fetch_url" + | "web_search" + | "tool_search_tool_regex" + | "tool_search_tool_bm25" + ) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn fleet_role_smoke_runner_maps_to_tool_agent() { + assert_eq!( + fleet_role_to_agent_type(Some("smoke-runner")), + SubAgentType::ToolAgent + ); + } + + #[test] + fn fleet_role_reviewer_maps_to_review() { + assert_eq!( + fleet_role_to_agent_type(Some("reviewer")), + SubAgentType::Review + ); + } + + #[test] + fn fleet_role_builder_maps_to_implementer() { + assert_eq!( + fleet_role_to_agent_type(Some("builder")), + SubAgentType::Implementer + ); + } + + #[test] + fn fleet_role_none_maps_to_general() { + assert_eq!(fleet_role_to_agent_type(None), SubAgentType::General); + } + + #[test] + fn unknown_role_maps_to_general() { + assert_eq!( + fleet_role_to_agent_type(Some("nonexistent-role")), + SubAgentType::General + ); + } + + #[test] + fn fleet_tool_profile_empty_uses_inherited() { + let profile = FleetTaskWorkerProfile { + role: None, + tool_profile: None, + tools: vec![], + capabilities: vec![], + }; + assert_eq!( + fleet_tool_profile(Some(&profile)), + AgentWorkerToolProfile::Inherited + ); + } + + #[test] + fn fleet_tool_profile_explicit_passes_tools() { + let profile = FleetTaskWorkerProfile { + role: None, + tool_profile: None, + tools: vec!["cargo".to_string(), "git".to_string()], + capabilities: vec![], + }; + assert_eq!( + fleet_tool_profile(Some(&profile)), + AgentWorkerToolProfile::Explicit(vec!["cargo".to_string(), "git".to_string()]) + ); + } + + #[test] + fn fleet_task_prompt_includes_instructions_context_and_input_files() { + let task = FleetTaskSpec { + id: "review".to_string(), + name: "Review protocol".to_string(), + description: None, + objective: Some("Find protocol regressions".to_string()), + instructions: "Read the fleet protocol and report issues.".to_string(), + worker: None, + workspace: None, + input_files: vec![std::path::PathBuf::from("crates/protocol/src/fleet.rs")], + context: vec!["Keep the report concise.".to_string()], + budget: None, + tags: vec![], + expected_artifacts: vec![], + scorer: None, + retry_policy: None, + alert_policy: None, + timeout_seconds: None, + metadata: Default::default(), + }; + + let prompt = fleet_task_prompt(&task); + + assert!(prompt.contains("Review protocol")); + assert!(prompt.contains("Find protocol regressions")); + assert!(prompt.contains("Read the fleet protocol and report issues.")); + assert!(prompt.contains("Keep the report concise.")); + assert!(prompt.contains("crates/protocol/src/fleet.rs")); + } + + #[test] + fn fleet_worker_spec_defaults_to_shared_subagent_spawn_depth() { + let task = FleetTaskSpec { + id: "task-1".to_string(), + name: "Task".to_string(), + description: None, + objective: None, + instructions: "Do the task.".to_string(), + worker: None, + workspace: None, + input_files: vec![], + context: vec![], + budget: None, + tags: vec![], + expected_artifacts: vec![], + scorer: None, + retry_policy: None, + alert_policy: None, + timeout_seconds: None, + metadata: Default::default(), + }; + let worker = FleetWorkerSpec { + id: "worker-1".to_string(), + name: "Worker".to_string(), + host: FleetHostSpec::Local, + trust_level: None, + labels: Default::default(), + capabilities: vec![], + max_concurrent_tasks: None, + }; + + let spec = fleet_task_to_worker_spec( + "worker-1", + "run-1", + &task, + &worker, + "auto", + std::path::Path::new("/tmp"), + ); + + // Root fleet worker runs at depth 0; its budget equals the shared + // sub-agent default (3) so fleet and sub-agents are one substrate and + // at least 3 nested delegation levels are afforded. + assert_eq!(spec.spawn_depth, 0); + assert_eq!(spec.max_spawn_depth, codewhale_config::DEFAULT_SPAWN_DEPTH); + assert_eq!(spec.max_spawn_depth, 3); + + // End-to-end reachability: walk the SAME gate the SubAgentRuntime + // enforces (`would_exceed_depth` = `spawn_depth + 1 > max_spawn_depth`). + // A depth-0 root must reach 3 nested levels, then stop. This fails if + // anyone lowers the shared default below 3 (Hunter: afford >= 3). + let hardened = apply_exec_hardening(spec, &codewhale_config::FleetExecConfig::default()); + let would_exceed = |spawn_depth: u32| spawn_depth + 1 > hardened.max_spawn_depth; + assert!( + !would_exceed(0), + "root (depth 0) must spawn a child at depth 1" + ); + assert!(!would_exceed(1), "depth-1 child must spawn to depth 2"); + assert!(!would_exceed(2), "depth-2 child must spawn to depth 3"); + assert!( + would_exceed(3), + "depth 3 is the afforded ceiling; depth 4 is blocked" + ); + } + + #[test] + fn exec_hardening_caps_max_steps_to_max_turns() { + let spec = AgentWorkerSpec { + worker_id: "w1".to_string(), + run_id: "r1".to_string(), + parent_run_id: None, + session_name: None, + objective: "test".to_string(), + role: None, + agent_type: SubAgentType::General, + model: "auto".to_string(), + workspace: std::path::PathBuf::from("/tmp"), + git_branch: None, + context_mode: "fresh".to_string(), + fork_context: false, + tool_profile: AgentWorkerToolProfile::Inherited, + max_steps: 1000, + spawn_depth: 0, + max_spawn_depth: 0, + }; + let exec = codewhale_config::FleetExecConfig { + max_turns: 50, + ..Default::default() + }; + let hardened = apply_exec_hardening(spec, &exec); + assert_eq!(hardened.max_steps, 50); + } + + #[test] + fn exec_hardening_applies_and_clamps_spawn_depth() { + let spec = AgentWorkerSpec { + worker_id: "w1".to_string(), + run_id: "r1".to_string(), + parent_run_id: None, + session_name: None, + objective: "test".to_string(), + role: None, + agent_type: SubAgentType::General, + model: "auto".to_string(), + workspace: std::path::PathBuf::from("/tmp"), + git_branch: None, + context_mode: "fresh".to_string(), + fork_context: false, + tool_profile: AgentWorkerToolProfile::Inherited, + max_steps: 1000, + spawn_depth: 0, + max_spawn_depth: 0, + }; + + let exec = codewhale_config::FleetExecConfig { + max_spawn_depth: 2, + ..Default::default() + }; + let hardened = apply_exec_hardening(spec.clone(), &exec); + assert_eq!(hardened.max_spawn_depth, 2); + + let exec = codewhale_config::FleetExecConfig { + max_spawn_depth: 99, + ..Default::default() + }; + let hardened = apply_exec_hardening(spec.clone(), &exec); + assert_eq!(hardened.max_spawn_depth, 3); + + let exec = codewhale_config::FleetExecConfig { + max_spawn_depth: 0, + ..Default::default() + }; + let hardened = apply_exec_hardening(spec, &exec); + assert_eq!(hardened.max_spawn_depth, 0); + } + + #[test] + fn exec_hardening_filters_disallowed_tools() { + let profile = AgentWorkerToolProfile::Explicit(vec![ + "read_file".to_string(), + "exec_shell".to_string(), + "git_diff".to_string(), + ]); + let exec = codewhale_config::FleetExecConfig { + disallowed_tools: vec!["exec_shell".to_string()], + ..Default::default() + }; + let filtered = filter_tool_profile(&profile, &exec); + assert_eq!( + filtered, + AgentWorkerToolProfile::Explicit( + vec!["read_file".to_string(), "git_diff".to_string(),] + ) + ); + } + + #[test] + fn exec_hardening_allowed_tools_acts_as_allowlist() { + let profile = AgentWorkerToolProfile::Explicit(vec![ + "read_file".to_string(), + "exec_shell".to_string(), + "git_diff".to_string(), + ]); + let exec = codewhale_config::FleetExecConfig { + allowed_tools: vec!["read_file".to_string(), "git_diff".to_string()], + ..Default::default() + }; + let filtered = filter_tool_profile(&profile, &exec); + assert_eq!( + filtered, + AgentWorkerToolProfile::Explicit( + vec!["read_file".to_string(), "git_diff".to_string(),] + ) + ); + } + + #[test] + fn exec_hardening_allowed_plus_disallowed_disallowed_wins() { + let profile = AgentWorkerToolProfile::Explicit(vec![ + "read_file".to_string(), + "exec_shell".to_string(), + ]); + let exec = codewhale_config::FleetExecConfig { + allowed_tools: vec!["read_file".to_string(), "exec_shell".to_string()], + disallowed_tools: vec!["exec_shell".to_string()], + ..Default::default() + }; + let filtered = filter_tool_profile(&profile, &exec); + assert_eq!( + filtered, + AgentWorkerToolProfile::Explicit(vec!["read_file".to_string(),]) + ); + } + + #[test] + fn parallel_safe_read_only_tools_includes_grep_and_read() { + assert!(is_parallel_safe_read_only_tool("read_file")); + assert!(is_parallel_safe_read_only_tool("grep_files")); + assert!(is_parallel_safe_read_only_tool("git_status")); + assert!(is_parallel_safe_read_only_tool("web_search")); + } + + #[test] + fn destructive_tools_not_parallel_safe() { + assert!(!is_parallel_safe_read_only_tool("exec_shell")); + assert!(!is_parallel_safe_read_only_tool("write_file")); + assert!(!is_parallel_safe_read_only_tool("edit_file")); + assert!(!is_parallel_safe_read_only_tool("apply_patch")); + assert!(!is_parallel_safe_read_only_tool("agent_open")); + } + + #[test] + fn exec_hardening_appends_system_prompt() { + let spec = AgentWorkerSpec { + worker_id: "w1".to_string(), + run_id: "r1".to_string(), + parent_run_id: None, + session_name: None, + objective: "do the thing".to_string(), + role: None, + agent_type: SubAgentType::General, + model: "auto".to_string(), + workspace: std::path::PathBuf::from("/tmp"), + git_branch: None, + context_mode: "fresh".to_string(), + fork_context: false, + tool_profile: AgentWorkerToolProfile::Inherited, + max_steps: 100, + spawn_depth: 0, + max_spawn_depth: 0, + }; + let exec = codewhale_config::FleetExecConfig { + append_system_prompt: "never push to main".to_string(), + ..Default::default() + }; + let hardened = apply_exec_hardening(spec, &exec); + assert!(hardened.objective.contains("do the thing")); + assert!(hardened.objective.contains("[Policy]")); + assert!(hardened.objective.contains("never push to main")); + } +} diff --git a/crates/tui/src/localization.rs b/crates/tui/src/localization.rs index aad101df..9d203577 100644 --- a/crates/tui/src/localization.rs +++ b/crates/tui/src/localization.rs @@ -255,6 +255,7 @@ pub enum MessageId { ConfigSectionSidebar, ConfigSectionHistory, ConfigSectionMcp, + ConfigSectionFleet, ConfigSectionExperimental, ConfigScopeSession, ConfigScopeSaved, @@ -699,6 +700,7 @@ pub const ALL_MESSAGE_IDS: &[MessageId] = &[ MessageId::ConfigSectionSidebar, MessageId::ConfigSectionHistory, MessageId::ConfigSectionMcp, + MessageId::ConfigSectionFleet, MessageId::ConfigSectionExperimental, MessageId::ConfigScopeSession, MessageId::ConfigScopeSaved, @@ -1315,6 +1317,7 @@ fn english(id: MessageId) -> &'static str { MessageId::ConfigSectionSidebar => "Sidebar", MessageId::ConfigSectionHistory => "History", MessageId::ConfigSectionMcp => "MCP", + MessageId::ConfigSectionFleet => "Fleet", MessageId::ConfigSectionExperimental => "Experimental", MessageId::ConfigScopeSession => "SESSION", MessageId::ConfigScopeSaved => "SAVED", @@ -1915,6 +1918,7 @@ fn vietnamese(id: MessageId) -> Option<&'static str> { MessageId::ConfigSectionSidebar => "Thanh bên", MessageId::ConfigSectionHistory => "Lịch sử", MessageId::ConfigSectionMcp => "MCP", + MessageId::ConfigSectionFleet => "Fleet", MessageId::ConfigSectionExperimental => "Thử nghiệm", MessageId::ConfigScopeSession => "PHIÊN", MessageId::ConfigScopeSaved => "ĐÃ LƯU", @@ -2624,6 +2628,7 @@ fn traditional_chinese(id: MessageId) -> Option<&'static str> { MessageId::ConfigSectionSidebar => "側邊欄", MessageId::ConfigSectionHistory => "歷史", MessageId::ConfigSectionMcp => "MCP", + MessageId::ConfigSectionFleet => "艦隊", MessageId::ConfigSectionExperimental => "實驗", MessageId::ConfigScopeSession => "會話", MessageId::ConfigScopeSaved => "已儲存", @@ -2724,6 +2729,7 @@ fn japanese(id: MessageId) -> Option<&'static str> { MessageId::ConfigSectionSidebar => "サイドバー", MessageId::ConfigSectionHistory => "履歴", MessageId::ConfigSectionMcp => "MCP", + MessageId::ConfigSectionFleet => "Fleet", MessageId::ConfigSectionExperimental => "実験", MessageId::ConfigScopeSession => "セッション", MessageId::ConfigScopeSaved => "保存済み", @@ -3315,6 +3321,7 @@ fn chinese_simplified(id: MessageId) -> Option<&'static str> { MessageId::ConfigSectionSidebar => "侧边栏", MessageId::ConfigSectionHistory => "历史", MessageId::ConfigSectionMcp => "MCP", + MessageId::ConfigSectionFleet => "舰队", MessageId::ConfigSectionExperimental => "实验", MessageId::ConfigScopeSession => "会话", MessageId::ConfigScopeSaved => "已保存", @@ -3842,6 +3849,7 @@ fn portuguese_brazil(id: MessageId) -> Option<&'static str> { MessageId::ConfigSectionSidebar => "Barra lateral", MessageId::ConfigSectionHistory => "Histórico", MessageId::ConfigSectionMcp => "MCP", + MessageId::ConfigSectionFleet => "Fleet", MessageId::ConfigSectionExperimental => "Experimental", MessageId::ConfigScopeSession => "SESSÃO", MessageId::ConfigScopeSaved => "SALVO", @@ -4461,6 +4469,7 @@ fn spanish_latin_america(id: MessageId) -> Option<&'static str> { MessageId::ConfigSectionSidebar => "Barra lateral", MessageId::ConfigSectionHistory => "Historial", MessageId::ConfigSectionMcp => "MCP", + MessageId::ConfigSectionFleet => "Fleet", MessageId::ConfigSectionExperimental => "Experimental", MessageId::ConfigScopeSession => "SESIÓN", MessageId::ConfigScopeSaved => "GUARDADO", diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 4ba997b2..c7985e5a 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -1193,8 +1193,9 @@ async fn main() -> Result<()> { run_swebench_command(&config, &model, workspace, max_subagents, args).await } Commands::Fleet(args) => { + let config = load_config_from_cli(&cli)?; let workspace = resolve_workspace(&cli); - run_fleet_command(&workspace, args).await + run_fleet_command(&workspace, &config, args).await } Commands::Review(args) => { let config = load_config_from_cli(&cli)?; @@ -1458,7 +1459,7 @@ async fn run_swebench_command( } } -async fn run_fleet_command(workspace: &Path, args: FleetArgs) -> Result<()> { +async fn run_fleet_command(workspace: &Path, config: &Config, args: FleetArgs) -> Result<()> { use crate::fleet::alerts::{ FleetAlertAdapterConfig, FleetAlertConfig, FleetAlertDispatcher, FleetAlertEvent, FleetEnvSecretResolver, @@ -1712,7 +1713,12 @@ async fn run_fleet_command(workspace: &Path, args: FleetArgs) -> Result<()> { } } - let manager = FleetManager::open(workspace)?; + let exec_config = config + .fleet + .as_ref() + .map(|fleet| fleet.exec.clone()) + .unwrap_or_default(); + let manager = FleetManager::open(workspace)?.with_exec_config(exec_config); match args.command { FleetCommand::Init => { println!("fleet ledger: {}", manager.ledger_path().display()); diff --git a/crates/tui/src/runtime_api.rs b/crates/tui/src/runtime_api.rs index 0d927099..d133bf6e 100644 --- a/crates/tui/src/runtime_api.rs +++ b/crates/tui/src/runtime_api.rs @@ -3980,6 +3980,7 @@ mod tests { crate::fleet::task_spec::FleetTaskSpecDocument { name: Some("api smoke".to_string()), labels: std::collections::BTreeMap::new(), + security_policy: None, workers: Vec::new(), tasks: vec![task], }, diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs index c1411fb8..c103fc88 100644 --- a/crates/tui/src/tools/subagent/mod.rs +++ b/crates/tui/src/tools/subagent/mod.rs @@ -1161,8 +1161,13 @@ impl Default for PersistedSubAgentState { } /// Default cap on sub-agent recursion depth. Override via -/// `[runtime] max_spawn_depth = N` in `~/.deepseek/config.toml`. -pub const DEFAULT_MAX_SPAWN_DEPTH: u32 = 3; +/// `[runtime] max_spawn_depth = N` in config. +/// +/// Sourced from [`codewhale_config::DEFAULT_SPAWN_DEPTH`] so standalone +/// sub-agents and fleet workers share ONE recursion axis (no "two moving +/// targets"). Configured/requested depths clamp to +/// [`codewhale_config::MAX_SPAWN_DEPTH_CEILING`]. +pub const DEFAULT_MAX_SPAWN_DEPTH: u32 = codewhale_config::DEFAULT_SPAWN_DEPTH; /// Terminal-state notification emitted to the engine's parent turn loop /// when one of its direct children finishes (issue #756). Carries the @@ -1794,7 +1799,7 @@ impl SubAgentManager { .retain(|worker_id, _| keep_ids.contains(worker_id)); } - fn register_worker(&mut self, spec: AgentWorkerSpec) { + pub fn register_worker(&mut self, spec: AgentWorkerSpec) { let worker_id = spec.worker_id.clone(); let now_ms = epoch_millis_now(); let mut record = AgentWorkerRecord::new(normalize_worker_spec(spec), now_ms); @@ -5927,15 +5932,18 @@ fn parse_spawn_request(input: &Value) -> Result { .or_else(|| input.get("max_spawn_depth")) .and_then(Value::as_u64) .map(|depth| { + let ceiling = codewhale_config::MAX_SPAWN_DEPTH_CEILING; u32::try_from(depth) - .map_err(|_| ToolError::invalid_input("max_depth must be between 0 and 3")) + .map_err(|_| { + ToolError::invalid_input(format!("max_depth must be between 0 and {ceiling}")) + }) .and_then(|depth| { - if depth <= 3 { + if depth <= ceiling { Ok(depth) } else { - Err(ToolError::invalid_input( - "max_depth must be between 0 and 3", - )) + Err(ToolError::invalid_input(format!( + "max_depth must be between 0 and {ceiling}" + ))) } }) }) diff --git a/crates/tui/src/tui/views/mod.rs b/crates/tui/src/tui/views/mod.rs index 1a4ff137..77b71083 100644 --- a/crates/tui/src/tui/views/mod.rs +++ b/crates/tui/src/tui/views/mod.rs @@ -412,6 +412,7 @@ enum ConfigSection { Sidebar, History, Mcp, + Fleet, Experimental, } @@ -429,6 +430,7 @@ impl ConfigSection { ConfigSection::Sidebar => MessageId::ConfigSectionSidebar, ConfigSection::History => MessageId::ConfigSectionHistory, ConfigSection::Mcp => MessageId::ConfigSectionMcp, + ConfigSection::Fleet => MessageId::ConfigSectionFleet, ConfigSection::Experimental => MessageId::ConfigSectionExperimental, }, ) @@ -759,6 +761,18 @@ impl ConfigView { editable: true, scope: ConfigScope::Saved, }, + ConfigRow { + section: ConfigSection::Fleet, + key: "fleet.exec.max_spawn_depth".to_string(), + value: config + .fleet + .as_ref() + .map(|fleet| fleet.exec.max_spawn_depth) + .unwrap_or_else(|| codewhale_config::FleetExecConfig::default().max_spawn_depth) + .to_string(), + editable: false, + scope: ConfigScope::Saved, + }, ]; rows.extend(experimental_config_rows(&config)); @@ -1180,7 +1194,7 @@ fn experimental_config_rows(config: &Config) -> Vec { rows.push(ConfigRow { section: ConfigSection::Experimental, key: "whaleflow".to_string(), - value: "preview placeholder (not stable; see #2981/#2974)".to_string(), + value: "preview overlay for workflow/fleet runs (not stable; see #3154/#3178)".to_string(), editable: false, scope: ConfigScope::Saved, }); @@ -1235,6 +1249,9 @@ fn config_hint_for_key(key: &str) -> &'static str { "DeepSeek: auto/off/high/max; Codex: low/medium/high/xhigh; default clears saved value" } "mcp_config_path" => "path to mcp.json", + "fleet.exec.max_spawn_depth" => { + "0 blocks child agents; 3 default (same axis as sub-agents); capped at 3" + } _ => "", } } @@ -2393,6 +2410,7 @@ mod tests { "Sidebar", "History", "MCP", + "Fleet", "Experimental", ] ); @@ -2429,6 +2447,7 @@ mod tests { assert!(keys.contains(&"cost_currency")); assert!(keys.contains(&"prefer_external_pdftotext")); assert!(keys.contains(&"mcp_config_path")); + assert!(keys.contains(&"fleet.exec.max_spawn_depth")); assert!(keys.contains(&"features.subagents")); assert!(keys.contains(&"features.web_search")); assert!(keys.contains(&"features.apply_patch")); @@ -2440,13 +2459,23 @@ mod tests { assert!( view.rows .iter() - .filter(|row| row.section != super::ConfigSection::Experimental) + .filter(|row| { + !matches!( + row.section, + super::ConfigSection::Experimental | super::ConfigSection::Fleet + ) + }) .all(|row| row.editable) ); assert!( view.rows .iter() - .filter(|row| row.section == super::ConfigSection::Experimental) + .filter(|row| { + matches!( + row.section, + super::ConfigSection::Experimental | super::ConfigSection::Fleet + ) + }) .all(|row| !row.editable) ); } @@ -2497,6 +2526,36 @@ vision_model = true assert_eq!(subagents.value, "enabled (default enabled)"); } + #[test] + fn config_view_shows_fleet_max_spawn_depth_from_config() { + let temp_root = std::env::temp_dir().join(format!( + "codewhale-fleet-config-view-test-{}", + std::process::id() + )); + fs::create_dir_all(&temp_root).unwrap(); + let config_path = temp_root.join("config.toml"); + fs::write( + &config_path, + r#" +[fleet.exec] +max_spawn_depth = 2 +"#, + ) + .unwrap(); + + let mut app = create_test_app(); + app.config_path = Some(config_path); + let view = ConfigView::new_for_app(&app); + + let row = view + .rows + .iter() + .find(|row| row.key == "fleet.exec.max_spawn_depth") + .expect("fleet spawn depth row"); + assert_eq!(row.value, "2"); + assert!(!row.editable); + } + #[test] fn config_view_experimental_section_is_searchable() { let mut view = create_config_view(Locale::En); diff --git a/docs/FLEET.md b/docs/FLEET.md index bc15ca76..0e33f454 100644 --- a/docs/FLEET.md +++ b/docs/FLEET.md @@ -19,6 +19,29 @@ Fleet state is stored under the workspace in `.codewhale/fleet.jsonl`. Worker logs and adapter logs are stored under `.codewhale/fleet/` and `.codewhale/fleet-host/`. +## Naming: Modes, WhaleFlow, Fleet, and Swarm + +These names describe different layers, not competing systems. Agent, Plan, and +YOLO stay the permission/work modes. WhaleFlow is an orchestration overlay that +can run on top of those modes when the task needs a continuous workflow. + +- **WhaleFlow** is the repeatable workflow plan and user-facing orchestration + overlay: a script/IR that decides which phases and agents run next, keeps + intermediate results out of the main conversation, and can be inspected or + rerun. A WhaleFlow run should have a visible progress view and a clear active + header state instead of feeling like a hidden background task. +- **Fleet** is the execution substrate: headless workers, local/SSH hosts, + trust policy, leases, heartbeats, logs, receipts, and status APIs. +- **Swarm** is the high-fanout behavior inside WhaleFlow. It should compile into + a WhaleFlow-backed fleet run instead of reviving the old `agent_swarm` tool + surface. + +UI guidance: keep the main transcript calm. A WhaleFlow run should appear as a +compact progress card plus Work/Agents sidebar rows with phase names, worker +counts, receipts, and nested indentation for child workers. Use the whale mark +sparingly as an active header/status signal; avoid repeating emoji-heavy rows +for every worker. + ## Task Spec `codewhale fleet run` accepts JSON or TOML. A minimal JSON spec: @@ -60,118 +83,79 @@ and `json_path`. Specs may also declare `command`, `code_whale_verifier_prompt`, or `manual`; those record a partial receipt until an explicit verifier pass completes. -### Release Triage Example +### Using Role Presets + +Tasks can reference a role name, and the fleet manager fills in defaults +from the role registry. Built-in roles (`smoke-runner`, `reviewer`, `builder`, +`read-only`) are always available; define your own in `[fleet.roles]`. ```json { - "name": "v0.8.60 release triage", - "labels": { - "milestone": "v0.8.60" - }, + "name": "smoke check", "tasks": [ { - "id": "release-issue-sweep", - "name": "Release issue sweep", - "objective": "Find open v0.8.60 blockers and credit-sensitive PRs.", - "instructions": "Review the v0.8.60 milestone, linked PRs, changelog entries, and contributor-credit requirements. Write a concise blocker report.", - "worker": { - "role": "release-triage", - "tool_profile": "read-only", - "tools": ["gh", "git"], - "capabilities": ["github", "release"] - }, - "workspace": { - "required_files": ["Cargo.toml", "CHANGELOG.md", ".github/AUTHOR_MAP"], - "writable_paths": [".codewhale/fleet"], - "environment": { - "required": ["PATH"] - } - }, - "input_files": ["CHANGELOG.md", ".github/AUTHOR_MAP"], - "context": ["Treat community PRs as maintainer evidence."], - "budget": { - "max_tokens": 12000, - "max_tool_calls": 24, - "max_seconds": 900 - }, - "timeout_seconds": 900, - "expected_artifacts": ["log", "report", "receipt"], - "scorer": { - "kind": "exit_code" - }, - "retry_policy": { - "max_attempts": 2, - "initial_backoff_seconds": 10, - "max_backoff_seconds": 60, - "backoff_multiplier": 2 - }, - "tags": ["release", "triage"], - "metadata": { - "class": "release" - } + "id": "lint", + "name": "Lint check", + "instructions": "Run lint and report failures.", + "worker": { "role": "smoke-runner" }, + "expected_artifacts": ["log"] } ] } ``` -### Code Review Swarm Example +The task inherits the role's tool profile, budget, and timeout. You can +override any field in the task spec: ```json { - "name": "code review swarm", + "id": "deep-review", + "name": "Deep review", + "instructions": "Review the entire crate for soundness issues.", + "worker": { + "role": "reviewer", + "tools": ["cargo", "rg", "git"], + "capabilities": ["rust"] + }, + "input_files": ["crates/**/*.rs"], + "budget": { "max_tokens": 32000 }, + "expected_artifacts": ["log", "report"], + "scorer": { "kind": "regex_match", "path": ".codewhale/fleet/report.md", "pattern": "finding|all clear" } +} +``` + +### Multi-Task Run Example + +A single fleet run can dispatch several independent tasks in parallel: + +```json +{ + "name": "CI gate", "tasks": [ { - "id": "protocol-review", - "name": "Protocol review", - "objective": "Review fleet protocol changes for compatibility and sparse JSON behavior.", - "instructions": "Inspect crates/protocol/src/fleet.rs and report behavior regressions, missing serde defaults, or unsafe wire changes.", - "worker": { - "role": "reviewer", - "tool_profile": "read-only", - "tools": ["git", "rg", "cargo"], - "capabilities": ["rust"] - }, - "input_files": ["crates/protocol/src/fleet.rs"], - "budget": { - "max_tokens": 8000, - "max_tool_calls": 16, - "max_seconds": 600 - }, - "expected_artifacts": ["log", "report", "receipt"], - "scorer": { - "kind": "code_whale_verifier_prompt", - "prompt": "Verify the review includes at least one concrete file:line finding or explicitly says no issues were found." - }, - "tags": ["review", "protocol"], - "metadata": { - "class": "code-review" - } + "id": "check", + "name": "Compile check", + "instructions": "Run cargo check --workspace and report errors.", + "worker": { "role": "builder" }, + "expected_artifacts": ["log"], + "scorer": { "kind": "exit_code" } }, { - "id": "tui-review", - "name": "TUI review", - "objective": "Review fleet CLI and manager behavior for operator-visible regressions.", - "instructions": "Inspect crates/tui/src/fleet and crates/tui/src/main.rs. Focus on status output, receipt recording, and failure classification.", - "worker": { - "role": "reviewer", - "tool_profile": "read-only", - "tools": ["git", "rg", "cargo"], - "capabilities": ["rust", "cli"] - }, - "input_files": ["crates/tui/src/fleet", "crates/tui/src/main.rs"], - "budget": { - "max_tokens": 10000, - "max_tool_calls": 20, - "max_seconds": 600 - }, - "expected_artifacts": ["log", "report", "receipt"], - "scorer": { - "kind": "manual" - }, - "tags": ["review", "tui"], - "metadata": { - "class": "code-review" - } + "id": "clippy", + "name": "Clippy lint", + "instructions": "Run cargo clippy --workspace and report warnings.", + "worker": { "role": "reviewer", "tools": ["cargo", "cargo-clippy"] }, + "expected_artifacts": ["log"], + "scorer": { "kind": "exit_code" } + }, + { + "id": "security", + "name": "Secret audit", + "instructions": "Search for plaintext secrets and report any matches.", + "worker": { "role": "read-only", "tools": ["rg"] }, + "input_files": ["crates/**/*.rs"], + "expected_artifacts": ["log", "report"], + "retry_policy": { "max_attempts": 1 } } ] } @@ -373,3 +357,168 @@ Defaults are intentionally conservative: `API_KEY`, and `PRIVATE_KEY` are rejected from adapter allowlists; - secrets should remain in CodeWhale config providers or remote host config, not in task instructions, argv, or fleet logs. + +## Security and Trust Boundaries + +Agent Fleet enforces a trust-level model that separates workers into four tiers. +The trust level determines what a worker can access (secrets, network, workspace +writes) and how it must prove its identity before being granted those privileges. + +### Trust Levels + +| Level | Access | Requires | +|-------|--------|----------| +| `sandbox` | No network, no secrets, writes only to `.codewhale/fleet/` | Nothing — default for new workers | +| `local` | Workspace reads, gated writes, configured secrets | Local process (same uid) | +| `remote-verified` | Network access, bounded capability grants, configured secrets | SSH host-key verification or equivalent attestation | +| `operator` | Full access to all secrets, unrestricted writes, any action | Operator-owned machine | + +The default trust level is `sandbox`. Operators must explicitly raise trust for +SSH or container workers through the security policy. + +### Security Policy + +A fleet run may carry an optional `security_policy` block that defines the +default trust level, which secrets workers may resolve, what capabilities are +granted, and a ceiling on the maximum trust level: + +```json +{ + "security_policy": { + "default_trust_level": "sandbox", + "allowed_secrets": [ + {"key": "GH_TOKEN", "source": "env"}, + {"key": "CODEWHALE_API_KEY", "source": "keyring"} + ], + "capability_grants": [ + { + "capability": "network", + "scope": "github.com", + "reason": "PR review needs GitHub API access" + } + ], + "max_trust_level": "remote_verified", + "require_identity_verification": true + } +} +``` + +When a run has no explicit `security_policy`, workers inherit conservative +defaults: `sandbox` trust, no secrets, no capability grants, and no identity +verification requirement. + +### Secret References + +Secrets are never stored as plaintext in task specs, alert configs, or worker +definitions. Instead, every secret is a `FleetSecretRef` — a key name plus an +optional source hint that tells the fleet manager where to resolve the value: + +```json +{"key": "GH_TOKEN", "source": "env"} +``` + +Supported sources: +- `"env"` — resolve from a process environment variable +- `"keyring"` — resolve from the OS keyring (macOS Keychain, Windows Credential Manager, Linux Secret Service) +- `"file"` — resolve from `~/.codewhale/secrets/` +- absent — try all sources in default order (store first, then env) + +Secret refs are redacted in logs and ledger entries: ``. + +### Worker Authentication + +Workers authenticate to the fleet manager using one of three methods: + +- **None** — local workers sharing the same uid (default) +- **SSH key** — with optional host-key fingerprint pinning and known-hosts + verification. The `host_key_fingerprint` field (SHA256:...) pins the expected + server key, preventing MITM attacks on first connection. +- **Token** — a bearer token resolved from a `FleetSecretRef`, useful for remote + workers behind a fleet proxy. +- **mTLS** — mutual TLS with a client certificate and a secret-backed private key. + +SSH workers should always set `host_key_fingerprint` in production: + +```json +{ + "id": "builder-1", + "name": "Builder 1", + "trust_level": "remote_verified", + "host": { + "kind": "ssh", + "host": "builder.example.com", + "user": "codewhale", + "port": 22, + "identity": "~/.ssh/codewhale_fleet", + "host_key_fingerprint": "SHA256:aLGqZo1M6c...", + "known_hosts": "~/.ssh/known_hosts", + "working_directory": "/srv/codewhale/work", + "env_allowlist": ["CODEWHALE_PROFILE"], + "codewhale_binary": "/usr/local/bin/codewhale" + }, + "capabilities": ["local", "linux", "tests"], + "max_concurrent_tasks": 1 +} +``` + +### Alert Channel Secrets + +Alert channels (Slack, generic webhook, PagerDuty) use `FleetAlertEndpoint` +instead of raw URLs. The webhook URL can be provided inline for non-sensitive +endpoints, or as a secret reference: + +```json +{ + "kind": "slack", + "webhook": { + "url_ref": {"key": "CODEWHALE_FLEET_SLACK_WEBHOOK", "source": "env"}, + "secret_ref": {"key": "CODEWHALE_FLEET_SLACK_SIGNING_SECRET", "source": "keyring"} + } +} +``` + +The `secret_ref` field provides an optional HMAC secret for webhook payload +signing, never stored in plaintext. + +### Config File + +The `[fleet]` table in `config.toml` sets global trust policy defaults: + +```toml +[fleet] +default_trust_level = "sandbox" +require_identity_verification = true +max_trust_level = "operator" + +[fleet.exec] +# Recursion depth shares ONE axis with standalone sub-agents — a fleet worker +# IS a headless sub-agent. 0 blocks child agents (the root worker still runs); +# 3 is the default and the ceiling, affording at least three nested levels. +max_spawn_depth = 3 +``` + +These defaults apply to fleet runs that don't carry their own `security_policy`. +Per-run policies always override the config defaults. + +### Capability Grants + +Capability grants are additive, scoped permissions that authorize specific +actions. By default, workers get no grants (least privilege). Common grants: + +- `"network"` with scope `"github.com"` — allow outbound HTTP to GitHub +- `"git-push"` — allow `git push` to remotes +- `"provider-secrets"` — allow accessing provider API keys +- `"release"` — allow release-related operations (tagging, publishing) +- `"workspace-write"` with scope `"crates/tui/**"` — allow writes within a path + +### Environment Sanitization + +The host adapter layer enforces environment sanitization at worker start: + +- Only `HOME`, `PATH`, and platform-specific vars (`SYSTEMROOT`, `COMSPEC`) are + injected into worker processes by default +- Environment allowlists reject any key containing `SECRET`, `TOKEN`, `PASSWORD`, + `PASSWD`, `API_KEY`, `CREDENTIAL`, or `PRIVATE_KEY` +- SSH workers only send explicitly allowlisted variables via OpenSSH `SendEnv` +- Secret values are never embedded in worker argv, task instructions, or fleet + logs — only secret refs appear, and they are always redacted diff --git a/docs/MODES.md b/docs/MODES.md index c5e823e9..d4381dd1 100644 --- a/docs/MODES.md +++ b/docs/MODES.md @@ -4,11 +4,18 @@ codewhale has two related concepts: - **TUI mode**: what kind of visible interaction you're in (Plan/Agent/YOLO). - **Approval mode**: how aggressively the UI asks before executing tools. +- **WhaleFlow overlay**: optional long-running workflow orchestration that can + run on top of any TUI mode when a task needs many coordinated workers. Model selection is separate. `--model auto` and `/model auto` route each turn to a concrete model and thinking level; they are not TUI modes and are not part of the `Tab` cycle. +WhaleFlow is also separate from the `Tab` mode cycle. It is the visible +continuous-work layer for repeatable workflows, fleet workers, and swarm-style +fanout. The active mode still controls permissions; WhaleFlow controls whether a +large task is planned into a resumable workflow with its own progress view. + Each user turn includes a small `` block with the current local date and the concrete model sent to the provider. When `--model auto` is active, the same block also records that the model was auto-routed. @@ -56,6 +63,11 @@ the turn, `/goal complete` marks it done, `/goal blocked` marks it blocked, and approval mode, or model route. This remains distinct from `--model auto`, which only controls model and thinking selection. +WhaleFlow builds on the same separation: a goal can ask the agent to keep +working, while WhaleFlow supplies the repeatable workflow/progress surface for +large fanout. In the UI, a WhaleFlow run should be shown as an overlay on the +main screen, not as a fourth mode next to Agent, Plan, and YOLO. + App-server clients can persist a thread-scoped goal with `thread/goal/set`, read it with `thread/goal/get`, and clear it with `thread/goal/clear`. That persisted record carries `active`, `paused`, `blocked`, `usage_limited`, `budget_limited`, diff --git a/docs/examples/fleet-dogfood.toml b/docs/examples/fleet-dogfood.toml new file mode 100644 index 00000000..14e9e047 --- /dev/null +++ b/docs/examples/fleet-dogfood.toml @@ -0,0 +1,52 @@ +# Agent Fleet dogfood smoke spec (#3166) +# +# This spec exercises the fleet end-to-end: create a run with two local +# workers, run a lint task and a review task, verify the ledger records +# receipts, and confirm the status surfaces work. +# +# Run: +# codewhale fleet run docs/examples/fleet-dogfood.toml --max-workers 2 --once +# +# Then check: +# codewhale fleet status +# codewhale fleet inspect +# codewhale fleet logs + +name = "dogfood smoke" +labels = { milestone = "v0.8.60", class = "smoke" } + +security_policy = { default_trust_level = "local", allowed_secrets = [], require_identity_verification = false } + +[[tasks]] +id = "cargo-check" +name = "Workspace check" +description = "Run `cargo check --workspace` and report any compilation errors." +objective = "Verify the workspace compiles cleanly with zero errors." +instructions = "Run `cargo check --workspace` in the repo root. If it compiles cleanly, report success. If there are errors, list each file:line and the error message. Do NOT attempt to fix anything — just report what you found." +worker = { role = "release-checker", tool_profile = "read-only", tools = ["cargo"], capabilities = ["rust"] } +workspace = { required_files = ["Cargo.toml"], writable_paths = [".codewhale/fleet"], environment = { required = ["PATH"] } } +input_files = ["Cargo.toml"] +context = ["You are running in a fleet smoke test. Be concise. Only report the pass/fail and any specific errors."] +budget = { max_tokens = 8000, max_tool_calls = 12, max_seconds = 300 } +expected_artifacts = ["log", "report", "receipt"] +scorer = { kind = "exit_code" } +retry_policy = { max_attempts = 2, initial_backoff_seconds = 5, max_backoff_seconds = 30 } +timeout_seconds = 300 +tags = ["smoke", "check"] + +[[tasks]] +id = "protocol-review" +name = "Protocol review" +description = "Review fleet protocol types for security and correctness." +objective = "Inspect crates/protocol/src/fleet.rs and crates/secrets/src/lib.rs. Report any missing serde defaults, unsafe wire changes, or security-sensitive fields lacking SecretRef." +instructions = "Read crates/protocol/src/fleet.rs and crates/secrets/src/lib.rs. Check for: (1) new fields without serde(default) or skip_serializing_if, (2) raw secrets in struct fields instead of FleetSecretRef, (3) missing Clone/Debug/PartialEq derives on new types. Write a concise report with file:line references for each finding. If there are no findings, report 'all clear'." +worker = { role = "reviewer", tool_profile = "read-only", tools = ["rg", "git", "cargo"], capabilities = ["rust"] } +workspace = { required_files = ["crates/protocol/src/fleet.rs", "crates/secrets/src/lib.rs"], writable_paths = [".codewhale/fleet"], environment = { required = ["PATH"] } } +input_files = ["crates/protocol/src/fleet.rs", "crates/secrets/src/lib.rs"] +context = ["You are a fleet protocol reviewer. Be thorough but concise. Reference specific lines."] +budget = { max_tokens = 10000, max_tool_calls = 16, max_seconds = 600 } +expected_artifacts = ["log", "report", "receipt"] +scorer = { kind = "code_whale_verifier_prompt", prompt = "Verify the review includes at least one concrete file:line finding or explicitly says 'all clear'." } +retry_policy = { max_attempts = 1, initial_backoff_seconds = 10 } +timeout_seconds = 600 +tags = ["smoke", "review", "protocol"]