merge: fleet security/trust + headless-worker foundation + depth unification

This commit is contained in:
Hunter B
2026-06-13 01:11:50 -07:00
21 changed files with 2223 additions and 123 deletions
+51
View File
@@ -936,6 +936,57 @@ default_text_model = "deepseek-ai/deepseek-v4-pro"
# printf '%s\n' '{"content":"audit wrapper placeholder: configure an executor","success":false}'
# ```
# ─────────────────────────────────────────────────────────────────────────────────
# Agent Fleet trust, security, and role registry (#3165, #3167)
# ─────────────────────────────────────────────────────────────────────────────────
# [fleet]
# # Default trust level for fleet workers: "sandbox" | "local" | "remote-verified" | "operator"
# default_trust_level = "sandbox"
# # Require SSH host-key verification before granting remote-verified trust
# require_identity_verification = true
# # Maximum trust level any worker may have
# max_trust_level = "operator"
#
# # Headless worker execution hardening (#3027)
# [fleet.exec]
# # Tools always allowed regardless of role
# allowed_tools = []
# # Tools always disallowed (overrides role and task spec)
# disallowed_tools = ["exec_shell"]
# # Hard ceiling on worker steps (tool calls + model turns)
# max_turns = 500
# # Recursive child-agent depth for fleet workers. Shares ONE recursion axis
# # with standalone sub-agents (a fleet worker IS a headless sub-agent).
# # 0 blocks child agents (the root worker still runs); 3 is the default and the
# # cap, affording at least three nested delegation levels.
# max_spawn_depth = 3
# # Extra system prompt injected into every headless worker
# append_system_prompt = "Never modify .git/config or change remotes."
# # Output format: "text" (default) or "stream-json" for ndjson events
# output_format = "text"
#
# # Built-in role presets are always available: smoke-runner, reviewer, builder, read-only.
# # User-defined roles here override or extend the built-in set. Any key under
# # [fleet.roles] becomes a valid role name that task specs can reference.
# [fleet.roles.ci-linter]
# description = "Runs linters and formatters"
# tool_profile = "read-only"
# tools = ["cargo", "cargo-clippy", "cargo-fmt"]
# capabilities = ["rust"]
# max_tokens = 12000
# max_tool_calls = 20
# timeout_seconds = 600
#
# [fleet.roles.pr-reviewer]
# description = "Reviews PRs with GitHub access"
# tool_profile = "read-only"
# tools = ["git", "gh", "rg"]
# capabilities = ["git", "github"]
# max_tokens = 16000
# max_tool_calls = 30
# timeout_seconds = 900
# trust_level = "local"
# ─────────────────────────────────────────────────────────────────────────────────
# Requirements (admin constraints) example file
# ─────────────────────────────────────────────────────────────────────────────────
+44 -2
View File
@@ -22,7 +22,7 @@ use codewhale_mcp::{McpServerDefinition, run_stdio_server};
use codewhale_secrets::Secrets;
use codewhale_state::{StateStore, ThreadListFilters};
#[derive(Debug, Clone, Copy, ValueEnum)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
enum ProviderArg {
Deepseek,
NvidiaNim,
@@ -43,6 +43,10 @@ enum ProviderArg {
Huggingface,
Together,
OpenaiCodex,
Anthropic,
Zai,
Stepfun,
Minimax,
}
impl From<ProviderArg> for ProviderKind {
@@ -67,6 +71,10 @@ impl From<ProviderArg> for ProviderKind {
ProviderArg::Huggingface => ProviderKind::Huggingface,
ProviderArg::Together => ProviderKind::Together,
ProviderArg::OpenaiCodex => ProviderKind::OpenaiCodex,
ProviderArg::Anthropic => ProviderKind::Anthropic,
ProviderArg::Zai => ProviderKind::Zai,
ProviderArg::Stepfun => ProviderKind::Stepfun,
ProviderArg::Minimax => ProviderKind::Minimax,
}
}
}
@@ -787,7 +795,7 @@ fn provider_slot(provider: ProviderKind) -> &'static str {
}
/// Provider order used by the `auth list` and `auth status` outputs.
const PROVIDER_LIST: [ProviderKind; 20] = [
const PROVIDER_LIST: [ProviderKind; 24] = [
ProviderKind::Deepseek,
ProviderKind::NvidiaNim,
ProviderKind::Openai,
@@ -808,6 +816,10 @@ const PROVIDER_LIST: [ProviderKind; 20] = [
ProviderKind::Huggingface,
ProviderKind::Together,
ProviderKind::OpenaiCodex,
ProviderKind::Anthropic,
ProviderKind::Zai,
ProviderKind::Stepfun,
ProviderKind::Minimax,
];
fn provider_is_supported_by_tui(provider: ProviderKind) -> bool {
@@ -833,6 +845,10 @@ fn provider_is_supported_by_tui(provider: ProviderKind) -> bool {
| ProviderKind::Huggingface
| ProviderKind::Together
| ProviderKind::OpenaiCodex
| ProviderKind::Anthropic
| ProviderKind::Zai
| ProviderKind::Stepfun
| ProviderKind::Minimax
)
}
@@ -2606,6 +2622,32 @@ mod tests {
}))
));
for (provider, expected) in [
("anthropic", ProviderArg::Anthropic),
("zai", ProviderArg::Zai),
("stepfun", ProviderArg::Stepfun),
("minimax", ProviderArg::Minimax),
] {
let cli = parse_ok(&[
"deepseek",
"auth",
"set",
"--provider",
provider,
"--api-key-stdin",
]);
assert!(matches!(
cli.command,
Some(Commands::Auth(AuthArgs {
command: AuthCommand::Set {
provider,
api_key: None,
api_key_stdin: true,
}
})) if provider == expected
));
}
let cli = parse_ok(&["deepseek", "auth", "list"]);
assert!(matches!(
cli.command,
+260
View File
@@ -663,6 +663,10 @@ pub struct ConfigToml {
/// lifecycle `[hooks]` table so config rewrites preserve existing hooks.
#[serde(default)]
pub hook_sinks: Option<HookSinksToml>,
/// Agent Fleet trust and security policy (#3165). When absent, fleet
/// workers inherit conservative Sandbox defaults.
#[serde(default)]
pub fleet: Option<FleetConfigToml>,
#[serde(flatten)]
pub extras: BTreeMap<String, toml::Value>,
}
@@ -1059,6 +1063,236 @@ impl Default for SnapshotsToml {
}
}
/// On-disk schema for the `[fleet]` table (#3165). See `config.example.toml`
/// and `docs/FLEET.md` for documentation.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FleetConfigToml {
/// Default trust level for fleet workers. One of `"sandbox"`, `"local"`,
/// `"remote-verified"`, or `"operator"`. Defaults to `"sandbox"`.
#[serde(default = "default_fleet_trust_level_str")]
pub default_trust_level: String,
/// Require identity verification for remote (SSH) workers before
/// granting them `remote-verified` trust. Defaults to true.
#[serde(default = "default_fleet_require_identity")]
pub require_identity_verification: bool,
/// Maximum trust level any worker may have (`"sandbox"`, `"local"`,
/// `"remote-verified"`, or `"operator"`). Defaults to `"operator"`.
#[serde(default = "default_fleet_max_trust_level_str")]
pub max_trust_level: String,
/// User-defined and built-in role presets.
///
/// Each role defines default tool profiles, capabilities, budgets, and
/// trust settings that task specs can reference by name. Built-in roles
/// (`smoke-runner`, `reviewer`, `builder`, `read-only`) are always
/// available; user-defined roles in config override or extend them.
#[serde(default)]
pub roles: BTreeMap<String, FleetRolePreset>,
/// Headless worker execution hardening (#3027).
#[serde(default)]
pub exec: FleetExecConfig,
}
/// Canonical recursion-depth policy for the headless worker runtime.
///
/// Single source of truth shared by BOTH standalone sub-agents and fleet
/// workers so the two cannot drift into "two moving targets":
/// - [`DEFAULT_SPAWN_DEPTH`] is the default recursion budget (the sub-agent
/// runtime's `DEFAULT_MAX_SPAWN_DEPTH` is defined as this value).
/// - [`MAX_SPAWN_DEPTH_CEILING`] is the hard safety cap; every configured
/// value (fleet `max_spawn_depth`, `agent_open`'s `max_depth`) clamps to it.
///
/// A worker runs at `spawn_depth = 0` and may spawn while
/// `spawn_depth + 1 <= max_spawn_depth`, so a depth of N affords N nested
/// delegation levels below the root worker. The default of 3 affords at least
/// three recursion levels out of the box; the root worker still runs at
/// depth 0 even when the budget is 0.
pub const DEFAULT_SPAWN_DEPTH: u32 = 3;
/// Hard ceiling on recursion depth for any worker/sub-agent. See
/// [`DEFAULT_SPAWN_DEPTH`]. Raising this single constant lifts the limit
/// everywhere (the fleet clamp and `agent_open` validation both read it).
pub const MAX_SPAWN_DEPTH_CEILING: u32 = 3;
/// Headless worker execution constraints (#3027).
///
/// These limits apply to all fleet workers and sub-agents spawned through
/// the headless worker runtime. Task specs can tighten but not loosen them.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FleetExecConfig {
/// Tools that are always allowed regardless of role or task spec.
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub allowed_tools: Vec<String>,
/// Tools that are always disallowed, overriding role and task spec.
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub disallowed_tools: Vec<String>,
/// Hard ceiling on sub-agent steps (tool calls + model turns).
/// Workers that exceed this are terminated. Default: unbounded (u32::MAX).
#[serde(default = "default_fleet_max_turns")]
pub max_turns: u32,
/// Recursive child-agent budget for headless fleet workers.
/// Defaults to [`DEFAULT_SPAWN_DEPTH`] (3) so a fleet worker has the SAME
/// recursion budget as a standalone sub-agent — fleet and sub-agents are one
/// substrate, not two. Set 0 to block child `agent_open` (the root worker
/// still runs); the value is clamped to [`MAX_SPAWN_DEPTH_CEILING`].
#[serde(default = "default_fleet_max_spawn_depth")]
pub max_spawn_depth: u32,
/// Extra system prompt text appended to every headless worker.
/// Useful for injecting org-wide policy or behavior constraints.
#[serde(default, skip_serializing_if = "String::is_empty")]
pub append_system_prompt: String,
/// Output format for fleet worker results.
/// `"text"` (default) or `"stream-json"` for newline-delimited JSON events.
#[serde(default = "default_fleet_output_format")]
pub output_format: String,
}
fn default_fleet_max_turns() -> u32 {
u32::MAX
}
fn default_fleet_max_spawn_depth() -> u32 {
DEFAULT_SPAWN_DEPTH
}
fn default_fleet_output_format() -> String {
"text".to_string()
}
impl Default for FleetExecConfig {
fn default() -> Self {
Self {
allowed_tools: Vec::new(),
disallowed_tools: Vec::new(),
max_turns: default_fleet_max_turns(),
max_spawn_depth: default_fleet_max_spawn_depth(),
append_system_prompt: String::new(),
output_format: default_fleet_output_format(),
}
}
}
/// A named role preset that bundles common worker settings.
///
/// Task specs reference a role name (e.g. `"role": "reviewer"`), and the
/// fleet manager fills in any missing fields from the preset. User-defined
/// roles in `[fleet.roles]` override built-in defaults with the same name.
///
/// Token budgets and tool-call limits are task-level decisions — they don't
/// belong on role presets. Use `timeout_seconds` as the safety bound.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FleetRolePreset {
/// Short description of what this role is for.
#[serde(skip_serializing_if = "Option::is_none")]
pub description: Option<String>,
/// Default tool profile (`"read-only"`, `"read-write"`, or `"custom"`).
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_profile: Option<String>,
/// Default set of tool names available to this role.
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub tools: Vec<String>,
/// Default capability tags (e.g. `"rust"`, `"git"`, `"gh"`).
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub capabilities: Vec<String>,
/// Default timeout in seconds for tasks using this role.
#[serde(skip_serializing_if = "Option::is_none")]
pub timeout_seconds: Option<u64>,
/// Default trust level override for this role.
#[serde(skip_serializing_if = "Option::is_none")]
pub trust_level: Option<String>,
}
fn default_fleet_trust_level_str() -> String {
"sandbox".to_string()
}
fn default_fleet_require_identity() -> bool {
true
}
fn default_fleet_max_trust_level_str() -> String {
"operator".to_string()
}
impl Default for FleetConfigToml {
fn default() -> Self {
Self {
default_trust_level: default_fleet_trust_level_str(),
require_identity_verification: default_fleet_require_identity(),
max_trust_level: default_fleet_max_trust_level_str(),
roles: BTreeMap::new(),
exec: FleetExecConfig::default(),
}
}
}
impl FleetConfigToml {
/// Resolve a role preset by name. Checks user-defined roles first,
/// then falls back to built-in role defaults.
#[must_use]
pub fn resolve_role(&self, name: &str) -> Option<FleetRolePreset> {
self.roles
.get(name)
.cloned()
.or_else(|| built_in_role_presets().get(name).cloned())
}
}
/// Built-in role presets that are always available without config.
#[must_use]
pub fn built_in_role_presets() -> BTreeMap<String, FleetRolePreset> {
[
(
"smoke-runner".to_string(),
FleetRolePreset {
description: Some("Lightweight read-only smoke check worker".to_string()),
tool_profile: Some("read-only".to_string()),
tools: vec![],
capabilities: vec![],
timeout_seconds: Some(300),
trust_level: Some("local".to_string()),
},
),
(
"reviewer".to_string(),
FleetRolePreset {
description: Some("Read-only code and documentation review".to_string()),
tool_profile: Some("read-only".to_string()),
tools: vec![],
capabilities: vec![],
timeout_seconds: Some(600),
trust_level: None,
},
),
(
"builder".to_string(),
FleetRolePreset {
description: Some(
"Read-write builder with compilation and test access".to_string(),
),
tool_profile: Some("read-write".to_string()),
tools: vec![],
capabilities: vec![],
timeout_seconds: Some(1800),
trust_level: Some("local".to_string()),
},
),
(
"read-only".to_string(),
FleetRolePreset {
description: Some(
"Minimal read-only observer with no writes or secrets".to_string(),
),
tool_profile: Some("read-only".to_string()),
tools: vec![],
capabilities: vec![],
timeout_seconds: Some(300),
trust_level: Some("sandbox".to_string()),
},
),
]
.into()
}
/// On-disk schema for the `[network]` table (#135). See `config.example.toml`
/// for documentation.
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -7179,6 +7413,32 @@ fallback_providers = ["deepseek", "openrouter"]
assert!(!serialized.contains("fallback_providers"));
}
#[test]
fn fleet_exec_config_default_matches_subagent_spawn_depth() {
// Fleet workers and standalone sub-agents share one recursion axis:
// the fleet default equals DEFAULT_SPAWN_DEPTH (3) and affords >=3
// nested delegation levels out of the box.
assert_eq!(
FleetExecConfig::default().max_spawn_depth,
DEFAULT_SPAWN_DEPTH
);
assert_eq!(FleetExecConfig::default().max_spawn_depth, 3);
assert!(DEFAULT_SPAWN_DEPTH <= MAX_SPAWN_DEPTH_CEILING);
}
#[test]
fn fleet_exec_config_parses_max_spawn_depth() {
let config: ConfigToml = toml::from_str(
r#"
[fleet.exec]
max_spawn_depth = 2
"#,
)
.expect("fleet exec config should parse");
assert_eq!(config.fleet.expect("fleet config").exec.max_spawn_depth, 2);
}
#[test]
fn fallback_providers_do_not_change_runtime_resolution() {
let _lock = env_lock();
+476 -5
View File
@@ -12,7 +12,7 @@
use std::collections::BTreeMap;
use std::path::PathBuf;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use serde::{Deserialize, Deserializer, Serialize, Serializer, de};
use serde_json::Value;
pub const FLEET_PROTOCOL_VERSION: &str = "0.1.0";
@@ -45,6 +45,8 @@ pub struct FleetRun {
pub worker_specs: Vec<FleetWorkerSpec>,
#[serde(default)]
pub labels: BTreeMap<String, String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub security_policy: Option<FleetSecurityPolicy>,
pub created_at: String,
#[serde(skip_serializing_if = "Option::is_none")]
pub updated_at: Option<String>,
@@ -260,6 +262,9 @@ pub struct FleetWorkerSpec {
pub name: String,
pub host: FleetHostSpec,
#[serde(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub trust_level: Option<FleetTrustLevel>,
#[serde(default)]
pub labels: BTreeMap<String, String>,
#[serde(default)]
pub capabilities: Vec<String>,
@@ -280,6 +285,14 @@ pub enum FleetHostSpec {
user: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
identity: Option<PathBuf>,
/// Known hosts file for host-key verification.
#[serde(skip_serializing_if = "Option::is_none")]
known_hosts: Option<PathBuf>,
/// Expected host key fingerprint (SHA256:...) for key pinning.
/// When set, the connection is only trusted if the server's
/// host key matches this fingerprint exactly.
#[serde(skip_serializing_if = "Option::is_none")]
host_key_fingerprint: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
working_directory: Option<PathBuf>,
#[serde(default)]
@@ -288,6 +301,8 @@ pub enum FleetHostSpec {
#[serde(skip_serializing_if = "Option::is_none")]
codewhale_binary: Option<String>,
},
#[serde(alias = "container")]
#[serde(alias = "Container")]
Docker {
image: String,
#[serde(default)]
@@ -295,6 +310,264 @@ pub enum FleetHostSpec {
},
}
// ── Security and trust types ────────────────────────────────────────────────
/// Trust classification assigned to a worker host.
///
/// The trust level determines what a worker is allowed to do and what
/// secrets it may access. The default for new workers is [`FleetTrustLevel::Sandbox`];
/// operators must explicitly raise trust for SSH or container workers.
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
#[serde(rename_all = "snake_case")]
pub enum FleetTrustLevel {
/// Fully isolated: no network, no secrets, no writes outside `.codewhale/fleet/`.
/// Suitable for untrusted code review, community PR checks, or third-party tool runs.
Sandbox = 0,
/// Local-only worker with access to the workspace and configured secrets.
/// Default for local workers. May read repo files but writes are gated.
Local = 1,
/// Worker on a known remote host with verified identity and a bounded
/// set of explicitly granted capabilities. Requires SSH host-key
/// verification or equivalent attestation.
#[serde(alias = "remote-verified", alias = "remoteVerified")]
RemoteVerified = 2,
/// Fully trusted worker (e.g. operator's own machine, CI runner).
/// Has access to all configured secrets and may perform any action the
/// operator can. Reserved for dogfood smoke and operator-owned machines.
Operator = 3,
}
impl Default for FleetTrustLevel {
fn default() -> Self {
Self::Sandbox
}
}
impl FleetTrustLevel {
/// Whether this trust level is allowed to access provider secrets.
#[must_use]
pub fn may_access_secrets(&self) -> bool {
matches!(self, Self::Operator | Self::RemoteVerified | Self::Local)
}
/// Whether this trust level is allowed to write outside `.codewhale/fleet/`.
#[must_use]
pub fn may_write_workspace(&self) -> bool {
matches!(self, Self::Operator | Self::Local)
}
/// Whether this trust level is allowed network access.
#[must_use]
pub fn may_access_network(&self) -> bool {
matches!(self, Self::Operator | Self::RemoteVerified | Self::Local)
}
}
/// Security policy applied to a fleet run.
///
/// A policy defines the default trust level for workers, which secrets
/// may be resolved, and what capabilities are granted. When a run has no
/// explicit policy, workers inherit conservative defaults.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct FleetSecurityPolicy {
/// Default trust level for workers that don't declare one explicitly.
#[serde(default)]
pub default_trust_level: FleetTrustLevel,
/// Secret refs that workers may resolve. An empty list means no secrets
/// are available. Each entry is a key name, not a value.
#[serde(default)]
#[serde(skip_serializing_if = "Vec::is_empty")]
pub allowed_secrets: Vec<FleetSecretRef>,
/// Capability grants for workers in this run.
#[serde(default)]
#[serde(skip_serializing_if = "Vec::is_empty")]
pub capability_grants: Vec<FleetCapabilityGrant>,
/// Maximum trust level any worker in this run may have, even if the
/// worker spec requests higher. Defaults to Operator (no ceiling).
#[serde(default = "default_max_trust_level")]
pub max_trust_level: FleetTrustLevel,
/// Require identity verification for remote workers. When true, SSH
/// workers must pass host-key verification before being trusted at
/// RemoteVerified level; unverified remotes stay at Sandbox.
#[serde(default)]
pub require_identity_verification: bool,
/// Allow conservative parallel execution of read-only tools (#2983).
/// When true, workers may batch independent read-only tool calls
/// (reads, searches, greps) into concurrent turns. Disabled by default
/// to avoid overwhelming providers or hitting rate limits.
#[serde(default)]
pub allow_parallel_reads: bool,
}
fn default_max_trust_level() -> FleetTrustLevel {
FleetTrustLevel::Operator
}
impl Default for FleetSecurityPolicy {
fn default() -> Self {
Self {
default_trust_level: FleetTrustLevel::Sandbox,
allowed_secrets: Vec::new(),
capability_grants: Vec::new(),
max_trust_level: FleetTrustLevel::Operator,
require_identity_verification: false,
allow_parallel_reads: false,
}
}
}
/// A reference to a secret that should be resolved at runtime, never
/// serialized as a plaintext value.
///
/// Secret refs appear in task specs, alert configs, and worker definitions.
/// The actual secret value is resolved by the fleet manager from the
/// secrets backend (OS keyring, environment, or file store) just before
/// the worker starts.
#[derive(Debug, Clone, Serialize, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub struct FleetSecretRef {
/// The secret key name (e.g. `"CODEWHALE_API_KEY"`, `"GH_TOKEN"`).
pub key: String,
/// Optional source hint for resolution order.
/// - `"env"` — resolve from environment variable
/// - `"keyring"` — resolve from OS keyring
/// - `"file"` — resolve from `~/.codewhale/secrets/`
/// - absent / null — try all sources in default order
#[serde(skip_serializing_if = "Option::is_none")]
pub source: Option<String>,
}
impl FleetSecretRef {
/// Create a secret ref from a key name with default resolution.
#[must_use]
pub fn new(key: impl Into<String>) -> Self {
Self {
key: key.into(),
source: None,
}
}
/// Create a secret ref with an explicit source.
#[must_use]
pub fn with_source(key: impl Into<String>, source: impl Into<String>) -> Self {
Self {
key: key.into(),
source: Some(source.into()),
}
}
/// Redacted display form for logging. Shows the key name and source
/// but never the resolved value.
#[must_use]
pub fn redacted(&self) -> String {
match &self.source {
Some(src) => format!("<secret:{}.{}>", src, self.key),
None => format!("<secret:{}>", self.key),
}
}
}
impl std::fmt::Display for FleetSecretRef {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.redacted())
}
}
impl From<&str> for FleetSecretRef {
fn from(key: &str) -> Self {
Self::new(key)
}
}
impl From<String> for FleetSecretRef {
fn from(key: String) -> Self {
Self::new(key)
}
}
impl<'de> Deserialize<'de> for FleetSecretRef {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
#[derive(Deserialize)]
#[serde(untagged)]
enum SecretRefWire {
Key(String),
Structured {
key: String,
#[serde(default)]
source: Option<String>,
},
}
match SecretRefWire::deserialize(deserializer)? {
SecretRefWire::Key(key) if !key.trim().is_empty() => Ok(FleetSecretRef::new(key)),
SecretRefWire::Key(_) => Err(de::Error::custom("secret ref key cannot be empty")),
SecretRefWire::Structured { key, source } if !key.trim().is_empty() => {
Ok(FleetSecretRef { key, source })
}
SecretRefWire::Structured { .. } => {
Err(de::Error::custom("secret ref key cannot be empty"))
}
}
}
}
/// How a worker authenticates to the fleet manager.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(tag = "method", rename_all = "snake_case")]
pub enum FleetWorkerAuth {
/// No authentication (local workers share the same uid).
None,
/// SSH key-based authentication with host-key verification.
SshKey {
/// Path to the SSH identity file (may be a FleetSecretRef in JSON
/// as `{"key": "...", "source": "file"}`).
identity: PathBuf,
/// Known hosts file for host-key verification.
#[serde(skip_serializing_if = "Option::is_none")]
known_hosts: Option<PathBuf>,
/// Expected host key fingerprint for pinning.
#[serde(skip_serializing_if = "Option::is_none")]
host_key_fingerprint: Option<String>,
/// SSH user for the connection.
#[serde(skip_serializing_if = "Option::is_none")]
user: Option<String>,
},
/// Token-based authentication for remote workers behind a fleet proxy.
Token {
/// Reference to the token secret.
token_ref: FleetSecretRef,
},
/// mTLS certificate-based authentication.
Mtls {
/// Path to the client certificate.
cert_path: PathBuf,
/// Reference to the private key secret.
key_ref: FleetSecretRef,
},
}
/// A capability grant that explicitly authorizes a worker to perform
/// a specific class of action.
///
/// By default, new workers get no grants (least privilege). Grants are
/// additive: a worker's effective capabilities are the union of its
/// trust-level defaults plus any explicit grants.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct FleetCapabilityGrant {
/// The capability being granted (e.g. `"network"`, `"git-push"`,
/// `"provider-secrets"`, `"release"`).
pub capability: String,
/// Optional scope limiting the grant (e.g. `"github.com"` for network,
/// `"crates/tui/**"` for file writes).
#[serde(skip_serializing_if = "Option::is_none")]
pub scope: Option<String>,
/// Optional justification for the grant (audit trail).
#[serde(skip_serializing_if = "Option::is_none")]
pub reason: Option<String>,
}
/// Runtime status of a worker.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
#[serde(rename_all = "snake_case")]
@@ -469,18 +742,84 @@ pub enum FleetAlertEventClass {
#[serde(tag = "kind", rename_all = "snake_case")]
pub enum FleetAlertChannel {
Slack {
webhook_url: String,
/// Webhook URL, resolved from a secret ref or inline.
#[serde(flatten)]
webhook: FleetAlertEndpoint,
},
Webhook {
url: String,
secret: Option<String>,
#[serde(flatten)]
endpoint: FleetAlertEndpoint,
},
#[serde(alias = "pager_duty")]
#[serde(alias = "pagerduty")]
PagerDuty {
routing_key: String,
severity: String,
},
}
/// An alert channel endpoint, supporting both inline URLs and secret refs.
///
/// For Slack and generic webhook channels, the URL may be provided directly
/// or as a secret reference resolved at send time. When both `url` and
/// `url_ref` are present, `url_ref` takes precedence after resolution.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct FleetAlertEndpoint {
/// Inline URL (plaintext; only for non-sensitive endpoints).
#[serde(
alias = "webhook_url",
alias = "endpoint_url",
skip_serializing_if = "Option::is_none"
)]
pub url: Option<String>,
/// Reference to a secret containing the webhook URL.
#[serde(
alias = "webhook_url_ref",
alias = "webhook_ref",
alias = "url_secret_ref",
skip_serializing_if = "Option::is_none"
)]
pub url_ref: Option<FleetSecretRef>,
/// Optional HMAC secret for webhook payload signing, as a secret ref.
#[serde(
alias = "secret",
alias = "webhook_secret",
alias = "signing_secret",
skip_serializing_if = "Option::is_none"
)]
pub secret_ref: Option<FleetSecretRef>,
}
impl FleetAlertEndpoint {
/// Create an inline URL endpoint (for non-sensitive use).
#[must_use]
pub fn inline(url: impl Into<String>) -> Self {
Self {
url: Some(url.into()),
url_ref: None,
secret_ref: None,
}
}
/// Create a secret-backed URL endpoint.
#[must_use]
pub fn from_secret(url_ref: FleetSecretRef) -> Self {
Self {
url: None,
url_ref: Some(url_ref),
secret_ref: None,
}
}
/// Redacted display form for logging.
#[must_use]
pub fn redacted(&self) -> String {
self.url_ref
.as_ref()
.map_or_else(|| "<inline-url>".to_string(), |r| r.redacted())
}
}
/// Receipt produced when a task completes verification.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FleetReceipt {
@@ -573,6 +912,7 @@ mod tests {
}],
worker_specs: vec![],
labels: BTreeMap::new(),
security_policy: None,
created_at: "2026-06-12T17:00:00Z".to_string(),
updated_at: None,
completed_at: None,
@@ -648,7 +988,7 @@ mod tests {
let policy = FleetAlertPolicy {
events: vec![FleetAlertEventClass::Stale],
channels: vec![FleetAlertChannel::Slack {
webhook_url: "https://hooks.slack.com/test".to_string(),
webhook: FleetAlertEndpoint::inline("https://hooks.slack.com/test"),
}],
after_attempts: Some(2),
after_minutes_stale: Some(10),
@@ -687,6 +1027,8 @@ mod tests {
port,
user,
identity,
known_hosts,
host_key_fingerprint,
working_directory,
env_allowlist,
codewhale_binary,
@@ -695,6 +1037,8 @@ mod tests {
assert_eq!(port, None);
assert_eq!(user, None);
assert_eq!(identity, None);
assert_eq!(known_hosts, None);
assert_eq!(host_key_fingerprint, None);
assert_eq!(working_directory, None);
assert!(env_allowlist.is_empty());
assert_eq!(codewhale_binary, None);
@@ -801,4 +1145,131 @@ mod tests {
assert_eq!(back.result, FleetTaskResult::Partial);
assert_eq!(back.failure_kind, Some(FleetTaskFailureKind::Verifier));
}
#[test]
fn ssh_host_spec_with_key_pinning_round_trip() {
let spec = FleetHostSpec::Ssh {
host: "builder.trusted.example.com".to_string(),
port: Some(22),
user: Some("codewhale".to_string()),
identity: Some(PathBuf::from("~/.ssh/codewhale_fleet")),
known_hosts: Some(PathBuf::from("~/.ssh/known_hosts")),
host_key_fingerprint: Some("SHA256:aLGqZo1M6c...".to_string()),
working_directory: Some(PathBuf::from("/srv/codewhale/work")),
env_allowlist: vec!["CODEWHALE_PROFILE".to_string()],
codewhale_binary: Some("/usr/local/bin/codewhale".to_string()),
};
let json = serde_json::to_string_pretty(&spec).unwrap();
assert!(json.contains("\"known_hosts\""));
assert!(json.contains("\"host_key_fingerprint\""));
assert!(json.contains("SHA256:aLGqZo1M6c..."));
let back: FleetHostSpec = serde_json::from_str(&json).unwrap();
match back {
FleetHostSpec::Ssh {
host,
known_hosts,
host_key_fingerprint,
..
} => {
assert_eq!(host, "builder.trusted.example.com");
assert_eq!(known_hosts, Some(PathBuf::from("~/.ssh/known_hosts")));
assert_eq!(
host_key_fingerprint,
Some("SHA256:aLGqZo1M6c...".to_string())
);
}
other => panic!("expected ssh host spec, got {other:?}"),
}
}
#[test]
fn secret_ref_redacted_never_exposes_value() {
let ref_ = FleetSecretRef::new("DEEPSEEK_API_KEY");
let redacted = ref_.redacted();
assert!(redacted.contains("DEEPSEEK_API_KEY"));
assert!(!redacted.contains("sk-"));
assert!(redacted.contains("<secret:"));
let ref_ = FleetSecretRef::with_source("GH_TOKEN", "env");
let redacted = ref_.redacted();
assert!(redacted.contains("env.GH_TOKEN"));
assert!(!redacted.contains("ghp_"));
}
#[test]
fn alert_endpoint_from_secret_round_trip() {
let endpoint = FleetAlertEndpoint::from_secret(FleetSecretRef::new("SLACK_WEBHOOK"));
let json = serde_json::to_string(&endpoint).unwrap();
assert!(json.contains("SLACK_WEBHOOK"));
assert!(!json.contains("hooks.slack.com"));
let back: FleetAlertEndpoint = serde_json::from_str(&json).unwrap();
assert_eq!(back.url_ref.as_ref().unwrap().key, "SLACK_WEBHOOK");
assert_eq!(back.url, None);
}
#[test]
fn secret_ref_accepts_legacy_string_wire_shape() {
let ref_: FleetSecretRef = serde_json::from_str(r#""CODEWHALE_FLEET_TOKEN""#).unwrap();
assert_eq!(ref_, FleetSecretRef::new("CODEWHALE_FLEET_TOKEN"));
let ref_: FleetSecretRef =
serde_json::from_str(r#"{"key":"GH_TOKEN","source":"env"}"#).unwrap();
assert_eq!(ref_, FleetSecretRef::with_source("GH_TOKEN", "env"));
}
#[test]
fn trust_level_accepts_hyphenated_remote_verified() {
let trust: FleetTrustLevel = serde_json::from_str(r#""remote-verified""#).unwrap();
assert_eq!(trust, FleetTrustLevel::RemoteVerified);
let canonical = serde_json::to_string(&trust).unwrap();
assert_eq!(canonical, r#""remote_verified""#);
}
#[test]
fn alert_channel_accepts_legacy_webhook_fields() {
let channel: FleetAlertChannel = serde_json::from_str(
r#"{
"kind": "slack",
"webhook_url": "https://hooks.slack.com/test",
"secret": "SLACK_SIGNING_SECRET"
}"#,
)
.unwrap();
match channel {
FleetAlertChannel::Slack { webhook } => {
assert_eq!(webhook.url.as_deref(), Some("https://hooks.slack.com/test"));
assert_eq!(
webhook.secret_ref,
Some(FleetSecretRef::new("SLACK_SIGNING_SECRET"))
);
}
other => panic!("expected slack channel, got {other:?}"),
}
}
#[test]
fn security_policy_defaults_are_conservative() {
let policy = FleetSecurityPolicy::default();
assert_eq!(policy.default_trust_level, FleetTrustLevel::Sandbox);
assert!(policy.allowed_secrets.is_empty());
assert!(policy.capability_grants.is_empty());
assert_eq!(policy.max_trust_level, FleetTrustLevel::Operator);
assert!(!policy.require_identity_verification);
}
#[test]
fn trust_level_ordinal_reflects_privilege() {
assert!(FleetTrustLevel::Operator > FleetTrustLevel::RemoteVerified);
assert!(FleetTrustLevel::RemoteVerified > FleetTrustLevel::Local);
assert!(FleetTrustLevel::Local > FleetTrustLevel::Sandbox);
assert!(FleetTrustLevel::Operator.may_access_secrets());
assert!(!FleetTrustLevel::Sandbox.may_access_secrets());
assert!(!FleetTrustLevel::Sandbox.may_write_workspace());
assert!(FleetTrustLevel::Operator.may_write_workspace());
}
}
+38
View File
@@ -737,6 +737,44 @@ impl Secrets {
pub fn get(&self, name: &str) -> Result<Option<String>, SecretsError> {
self.store.get(name)
}
/// Resolve a secret by key name with an optional source constraint.
///
/// This is the fleet-worker secret resolution path. Unlike
/// [`resolve`](Secrets::resolve), this does NOT map provider names
/// to their canonical env vars — the caller controls the exact key
/// and resolution order.
///
/// `source_hint` controls the resolution order:
/// - `Some("env")` — only check environment variables
/// - `Some("keyring")` — only check the keyring/file store
/// - `None` — try the store first, then fall back to environment
#[must_use]
pub fn resolve_direct(&self, key: &str, source_hint: Option<&str>) -> Option<String> {
match source_hint {
Some("env") => {
// Only check process environment — skip the store entirely.
std::env::var(key).ok().filter(|v| !v.trim().is_empty())
}
Some("keyring") | Some("file") => {
// Only check the store backend.
self.store
.get(key)
.ok()
.flatten()
.filter(|v| !v.trim().is_empty())
}
Some(_) | None => {
// Default: store first, then env fallback.
if let Ok(Some(v)) = self.store.get(key)
&& !v.trim().is_empty()
{
return Some(v);
}
std::env::var(key).ok().filter(|v| !v.trim().is_empty())
}
}
}
}
/// Map a canonical provider name to its environment variable(s), returning
+5
View File
@@ -1883,6 +1883,10 @@ pub struct Config {
#[serde(default)]
pub context: ContextConfig,
/// Agent Fleet trust/security/role/exec config.
#[serde(default)]
pub fleet: Option<codewhale_config::FleetConfigToml>,
/// Sub-agent model overrides.
#[serde(default)]
pub subagents: Option<SubagentsConfig>,
@@ -5012,6 +5016,7 @@ fn merge_config(base: Config, override_cfg: Config) -> Config {
.or(base.context.l3_threshold),
seam_model: override_cfg.context.seam_model.or(base.context.seam_model),
},
fleet: override_cfg.fleet.or(base.fleet),
subagents: override_cfg.subagents.or(base.subagents),
strict_tool_mode: override_cfg.strict_tool_mode.or(base.strict_tool_mode),
runtime_api: override_cfg.runtime_api.or(base.runtime_api),
+10
View File
@@ -373,6 +373,8 @@ pub struct SshFleetHostConfig {
pub user: Option<String>,
pub port: Option<u16>,
pub identity: Option<PathBuf>,
pub known_hosts: Option<PathBuf>,
pub host_key_fingerprint: Option<String>,
pub working_directory: PathBuf,
pub env_allowlist: BTreeSet<String>,
pub codewhale_binary: String,
@@ -387,6 +389,8 @@ impl SshFleetHostConfig {
user: None,
port: None,
identity: None,
known_hosts: None,
host_key_fingerprint: None,
working_directory: working_directory.into(),
env_allowlist: BTreeSet::new(),
codewhale_binary: "codewhale".to_string(),
@@ -401,6 +405,8 @@ impl SshFleetHostConfig {
port,
user,
identity,
known_hosts,
host_key_fingerprint,
working_directory,
env_allowlist,
codewhale_binary,
@@ -420,6 +426,8 @@ impl SshFleetHostConfig {
config.port = *port;
config.user = user.clone();
config.identity = identity.clone();
config.known_hosts = known_hosts.clone();
config.host_key_fingerprint = host_key_fingerprint.clone();
config.env_allowlist = env_allowlist.iter().cloned().collect();
config.codewhale_binary = codewhale_binary;
config.validate()?;
@@ -918,6 +926,8 @@ mod tests {
port: Some(2222),
user: Some("fleet".to_string()),
identity: Some(PathBuf::from("/tmp/fleet_id")),
known_hosts: None,
host_key_fingerprint: None,
working_directory: Some(PathBuf::from("/srv/codewhale")),
env_allowlist: vec!["FLEET_PROFILE".to_string()],
codewhale_binary: Some("/usr/local/bin/codewhale".to_string()),
+15 -5
View File
@@ -661,12 +661,21 @@ fn sanitize_run_for_ledger(run: &FleetRun) -> FleetRun {
if let Some(policy) = &mut task.alert_policy {
for channel in &mut policy.channels {
match channel {
FleetAlertChannel::Slack { webhook_url } => {
*webhook_url = "<redacted>".to_string();
FleetAlertChannel::Slack { webhook } => {
webhook.url = webhook.url.as_ref().map(|_| "<redacted>".to_string());
}
FleetAlertChannel::Webhook { url, secret } => {
*url = "<redacted>".to_string();
*secret = secret.as_ref().map(|_| "<redacted>".to_string());
FleetAlertChannel::Webhook { endpoint } => {
*endpoint = FleetAlertEndpoint {
url: endpoint.url.as_ref().map(|_| "<redacted>".to_string()),
url_ref: endpoint
.url_ref
.as_ref()
.map(|_| FleetSecretRef::new("<redacted>")),
secret_ref: endpoint
.secret_ref
.as_ref()
.map(|_| FleetSecretRef::new("<redacted>")),
};
}
FleetAlertChannel::PagerDuty { routing_key, .. } => {
*routing_key = "<redacted>".to_string();
@@ -691,6 +700,7 @@ mod tests {
task_specs: vec![],
worker_specs: vec![],
labels: BTreeMap::new(),
security_policy: None,
created_at: "2026-06-12T17:00:00Z".to_string(),
updated_at: None,
completed_at: None,
+284 -1
View File
@@ -21,14 +21,38 @@ use super::task_spec::{
FleetTaskSpecDocument, FleetTaskVerificationInput, load_task_spec_document,
record_verification_receipt, validate_task_spec_document, verify_task_result,
};
use super::worker_runtime;
use crate::tools::subagent::SharedSubAgentManager;
const DEFAULT_STALE_AFTER_SECONDS: u64 = 300;
#[derive(Debug)]
pub struct FleetManager {
workspace: PathBuf,
ledger: FleetLedger,
stale_after: Duration,
exec_config: codewhale_config::FleetExecConfig,
/// Optional sub-agent manager for headless worker execution.
/// When set, fleet workers spawn real sub-agents; when None,
/// the manager falls back to local simulation.
sub_agent_manager: Option<SharedSubAgentManager>,
}
impl std::fmt::Debug for FleetManager {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("FleetManager")
.field("workspace", &self.workspace)
.field("ledger", &self.ledger)
.field("stale_after", &self.stale_after)
.field("exec_config", &self.exec_config)
.field(
"sub_agent_manager",
&self
.sub_agent_manager
.as_ref()
.map(|_| "SharedSubAgentManager"),
)
.finish()
}
}
#[derive(Debug, Clone)]
@@ -78,6 +102,28 @@ pub struct FleetWorkerInspection {
pub artifacts: Vec<FleetArtifactRef>,
pub last_error: Option<String>,
pub alert_state: Option<String>,
/// Lightweight projection from the sub-agent worker runtime.
/// Populated when a sub-agent manager is attached.
pub runtime_state: Option<FleetWorkerRuntimeProjection>,
}
/// Lightweight TUI projection of a headless sub-agent worker's current state.
///
/// Derived from the sub-agent manager's `AgentWorkerRecord`.
#[derive(Debug, Clone)]
pub struct FleetWorkerRuntimeProjection {
/// Sub-agent lifecycle status (Queued, Starting, Running, Completed, etc.)
pub agent_status: String,
/// Steps taken so far (tool calls + model turns)
pub steps_taken: u32,
/// Latest human-readable message from the worker
pub latest_message: Option<String>,
/// Error message if the worker failed
pub error: Option<String>,
/// Result summary if the worker completed
pub result_summary: Option<String>,
/// Whether the worker has a sub-agent session running
pub has_session: bool,
}
impl FleetManager {
@@ -88,6 +134,8 @@ impl FleetManager {
workspace,
ledger,
stale_after: Duration::from_secs(DEFAULT_STALE_AFTER_SECONDS),
exec_config: codewhale_config::FleetExecConfig::default(),
sub_agent_manager: None,
})
}
@@ -96,6 +144,23 @@ impl FleetManager {
self
}
/// Apply fleet headless-worker execution policy from config.
pub fn with_exec_config(mut self, exec_config: codewhale_config::FleetExecConfig) -> Self {
self.exec_config = exec_config;
self
}
/// Attach a sub-agent manager so fleet workers can spawn real headless agents.
pub fn with_sub_agent_manager(mut self, mgr: SharedSubAgentManager) -> Self {
self.sub_agent_manager = Some(mgr);
self
}
/// True when the manager has a sub-agent runtime for headless worker execution.
pub fn has_worker_runtime(&self) -> bool {
self.sub_agent_manager.is_some()
}
pub fn ledger_path(&self) -> &Path {
self.ledger.path()
}
@@ -139,6 +204,7 @@ impl FleetManager {
task_specs: doc.tasks.clone(),
worker_specs: doc.workers.clone(),
labels: doc.labels,
security_policy: doc.security_policy.clone(),
created_at: now.clone(),
updated_at: Some(now.clone()),
completed_at: None,
@@ -280,6 +346,27 @@ impl FleetManager {
.get(worker_id)
.map(|heartbeat| heartbeat.timestamp.clone());
let alert_state = latest_alert_for_worker(&state, worker_id);
// Enrich with sub-agent worker runtime state when available.
let runtime_state = self.sub_agent_manager.as_ref().and_then(|mgr| {
mgr.try_read()
.ok()
.and_then(|guard| guard.get_worker_record(worker_id))
.map(|record| FleetWorkerRuntimeProjection {
agent_status: format!("{:?}", record.status).to_lowercase(),
steps_taken: record.steps_taken,
latest_message: record.latest_message,
error: record.error,
result_summary: record.result_summary,
has_session: !matches!(
record.status,
crate::tools::subagent::AgentWorkerStatus::Completed
| crate::tools::subagent::AgentWorkerStatus::Failed
| crate::tools::subagent::AgentWorkerStatus::Cancelled
),
})
});
Ok(FleetWorkerInspection {
worker_id: worker_id.to_string(),
status,
@@ -293,6 +380,7 @@ impl FleetManager {
artifacts,
last_error,
alert_state,
runtime_state,
})
}
@@ -475,6 +563,45 @@ impl FleetManager {
FleetWorkerEventPayload::Running,
)?;
self.ledger.heartbeat(worker_id, &timestamp(), None, None)?;
// Register with the sub-agent manager for headless worker tracking.
// The engine's agent_open path handles actual sub-agent spawning.
if let Some(ref mgr) = self.sub_agent_manager {
if let Ok(guard) = mgr.try_write() {
let run = self
.ledger
.rebuild_state()
.ok()
.and_then(|state| state.runs.get(&entry.run_id.0).cloned());
let worker_spec = run
.as_ref()
.and_then(|r| r.worker_specs.iter().find(|w| w.id == worker_id).cloned())
.unwrap_or_else(|| FleetWorkerSpec {
id: worker_id.to_string(),
name: worker_id.to_string(),
host: FleetHostSpec::Local,
trust_level: Some(FleetTrustLevel::Local),
labels: BTreeMap::new(),
capabilities: vec![],
max_concurrent_tasks: Some(1),
});
let worker = worker_runtime::fleet_task_to_worker_spec(
worker_id,
&entry.run_id.0,
task_spec,
&worker_spec,
"auto",
&self.workspace,
);
let worker = worker_runtime::apply_exec_hardening(worker, &self.exec_config);
// drop guard after registering so we don't hold the write lock
drop(guard);
if let Ok(mut guard) = mgr.try_write() {
guard.register_worker(worker);
}
}
}
self.maybe_complete_local_simulation(entry, worker_id, task_spec, log_artifact)
}
@@ -746,6 +873,7 @@ fn default_local_workers(run_id: &FleetRunId, max_workers: usize) -> Vec<FleetWo
id: format!("{}-local-{}", run_id.0, index),
name: format!("Local worker {index}"),
host: FleetHostSpec::Local,
trust_level: Some(FleetTrustLevel::Local),
labels: BTreeMap::new(),
capabilities: vec!["local".to_string()],
max_concurrent_tasks: Some(1),
@@ -1267,4 +1395,159 @@ mod tests {
Some("escalated via pagerduty alert_id=alert-1")
);
}
#[test]
fn fleet_dogfood_smoke_run_two_local_workers_two_tasks() {
let tmp = TempDir::new().unwrap();
let workspace = tmp.path().join("repo");
std::fs::create_dir_all(&workspace).unwrap();
// Create a minimal Cargo.toml so the cargo-check task can succeed.
std::fs::write(
workspace.join("Cargo.toml"),
"[package]\nname = \"smoke\"\nversion = \"0.1.0\"\nedition = \"2021\"\n",
)
.unwrap();
std::fs::create_dir_all(workspace.join("src")).unwrap();
std::fs::write(
workspace.join("src").join("lib.rs"),
"pub fn answer() -> u8 { 42 }\n",
)
.unwrap();
let tasks = vec![
FleetTaskSpec {
id: "check".to_string(),
name: "check".to_string(),
description: None,
objective: Some("cargo check".to_string()),
instructions: "run cargo check and report result".to_string(),
worker: Some(FleetTaskWorkerProfile {
role: Some("release-checker".to_string()),
tool_profile: Some("read-only".to_string()),
tools: vec!["cargo".to_string()],
capabilities: vec!["rust".to_string()],
}),
workspace: Some(FleetWorkspaceRequirements {
root: None,
required_files: vec![PathBuf::from("Cargo.toml")],
writable_paths: vec![PathBuf::from(".codewhale/fleet")],
environment: Some(FleetEnvironmentRequirements {
required: vec!["PATH".to_string()],
allowlist: vec![],
}),
}),
input_files: vec![],
context: vec![],
budget: None,
tags: vec!["smoke".to_string()],
expected_artifacts: vec![FleetArtifactKind::Log, FleetArtifactKind::Receipt],
scorer: Some(FleetScorerSpec::ExitCode),
retry_policy: Some(FleetRetryPolicy {
max_attempts: 1,
..Default::default()
}),
alert_policy: None,
timeout_seconds: Some(60),
metadata: BTreeMap::new(),
},
FleetTaskSpec {
id: "review".to_string(),
name: "review".to_string(),
description: None,
objective: Some("review source".to_string()),
instructions: "read src/lib.rs and report findings".to_string(),
worker: Some(FleetTaskWorkerProfile {
role: Some("reviewer".to_string()),
tool_profile: Some("read-only".to_string()),
tools: vec!["cargo".to_string()],
capabilities: vec!["rust".to_string()],
}),
workspace: Some(FleetWorkspaceRequirements {
root: None,
required_files: vec![],
writable_paths: vec![],
environment: Some(FleetEnvironmentRequirements {
required: vec!["PATH".to_string()],
allowlist: vec![],
}),
}),
input_files: vec![],
context: vec![],
budget: None,
tags: vec!["smoke".to_string()],
expected_artifacts: vec![FleetArtifactKind::Log, FleetArtifactKind::Receipt],
scorer: None,
retry_policy: Some(FleetRetryPolicy {
max_attempts: 1,
..Default::default()
}),
alert_policy: None,
timeout_seconds: Some(60),
metadata: BTreeMap::new(),
},
];
let manager = FleetManager::open(&workspace).unwrap();
let report = manager
.create_run(
FleetTaskSpecDocument {
name: Some("dogfood smoke".to_string()),
labels: BTreeMap::new(),
security_policy: Some(FleetSecurityPolicy {
default_trust_level: FleetTrustLevel::Local,
..Default::default()
}),
workers: vec![],
tasks,
},
2,
)
.unwrap();
assert_eq!(report.task_count, 2);
assert!(!report.worker_ids.is_empty());
assert_eq!(report.worker_ids.len(), 2);
// After immediate scheduling, tasks may already be leased,
// so queued+running should total 2.
let status = manager.run_status(&report.run_id).unwrap();
assert_eq!(status.queued + status.running, 2);
}
#[test]
fn fleet_security_policy_propagates_from_task_spec_document_to_run() {
let tmp = TempDir::new().unwrap();
let manager = FleetManager::open(tmp.path()).unwrap();
let path = task_spec_file(&tmp, vec![task("task-a")]);
// Rewrite the spec file with a security_policy block.
let doc = serde_json::json!({
"name": "secure smoke",
"tasks": [{
"id": "task-a",
"name": "task-a",
"instructions": "report ok",
"expected_artifacts": ["log"]
}],
"security_policy": {
"default_trust_level": "local",
"allowed_secrets": [{"key": "GH_TOKEN", "source": "env"}],
"max_trust_level": "remote_verified",
"require_identity_verification": true
}
});
let spec_path = tmp.path().join("secure-tasks.json");
std::fs::write(&spec_path, serde_json::to_string_pretty(&doc).unwrap()).unwrap();
let report = manager
.create_run_from_task_spec_path(&spec_path, 1)
.unwrap();
let state = manager.ledger.rebuild_state().unwrap();
let run = state.runs.get(&report.run_id.0).unwrap();
let policy = run.security_policy.as_ref().unwrap();
assert_eq!(policy.default_trust_level, FleetTrustLevel::Local);
assert_eq!(policy.allowed_secrets.len(), 1);
assert_eq!(policy.allowed_secrets[0].key, "GH_TOKEN");
assert_eq!(policy.max_trust_level, FleetTrustLevel::RemoteVerified);
assert!(policy.require_identity_verification);
}
}
+1
View File
@@ -6,3 +6,4 @@ pub mod ledger;
pub mod manager;
pub mod scheduler;
pub mod task_spec;
pub mod worker_runtime;
+3 -1
View File
@@ -573,6 +573,7 @@ mod tests {
id: id.to_string(),
name: id.to_string(),
host: FleetHostSpec::Local,
trust_level: Some(FleetTrustLevel::Local),
labels: BTreeMap::new(),
capabilities: vec!["local".to_string()],
max_concurrent_tasks: Some(1),
@@ -622,6 +623,7 @@ mod tests {
.map(|idx| worker(&format!("worker-{idx}")))
.collect(),
labels: BTreeMap::new(),
security_policy: None,
created_at: scheduler.timestamp(),
updated_at: None,
completed_at: None,
@@ -704,7 +706,7 @@ mod tests {
failing.alert_policy = Some(FleetAlertPolicy {
events: vec![FleetAlertEventClass::RestartExhausted],
channels: vec![FleetAlertChannel::Slack {
webhook_url: "https://hooks.slack.invalid/secret".to_string(),
webhook: FleetAlertEndpoint::inline("https://hooks.slack.invalid/secret"),
}],
after_attempts: Some(1),
after_minutes_stale: Some(1),
+5
View File
@@ -23,6 +23,9 @@ pub struct FleetTaskSpecDocument {
pub name: Option<String>,
#[serde(default)]
pub labels: BTreeMap<String, String>,
#[serde(default)]
#[serde(skip_serializing_if = "Option::is_none")]
pub security_policy: Option<FleetSecurityPolicy>,
#[serde(default, alias = "worker_specs")]
pub workers: Vec<FleetWorkerSpec>,
#[serde(default)]
@@ -49,12 +52,14 @@ impl FleetTaskSpecFile {
Self::Tasks(tasks) => FleetTaskSpecDocument {
name: Some(fallback_name),
labels: BTreeMap::new(),
security_policy: None,
workers: Vec::new(),
tasks,
},
Self::Single(task) => FleetTaskSpecDocument {
name: Some(fallback_name),
labels: BTreeMap::new(),
security_policy: None,
workers: Vec::new(),
tasks: vec![task],
},
+626
View File
@@ -0,0 +1,626 @@
//! Fleet worker runtime — bridges fleet task specs to headless sub-agent execution.
//!
//! This module makes fleet workers real: instead of simulating task completion,
//! each fleet worker spawns a headless sub-agent that runs the task instructions
//! and streams progress back into the fleet ledger.
//!
//! Architecture:
//! - `FleetTaskSpec` + `FleetWorkerSpec` → `AgentWorkerSpec`
//! - `SubAgentManager::register_worker()` tracks the worker
//! - Sub-agent spawn happens through the existing `agent_open` machinery
//! - Mailbox events stream into fleet ledger as `FleetWorkerEventPayload`
//! - `FleetWorkerInspection` reads both ledger state and sub-agent worker records
#![allow(dead_code)]
use codewhale_protocol::fleet::{
FleetHostSpec, FleetTaskSpec, FleetTaskWorkerProfile, FleetWorkerEventPayload, FleetWorkerSpec,
};
use super::host::FleetHostKind;
use crate::tools::subagent::{
AgentWorkerSpec, AgentWorkerStatus, AgentWorkerToolProfile, SubAgentType,
};
/// Map a fleet worker spec's host kind to a display string.
pub fn fleet_host_kind_for_spec(spec: &FleetWorkerSpec) -> FleetHostKind {
match &spec.host {
FleetHostSpec::Local => FleetHostKind::LocalProcess,
FleetHostSpec::Ssh { .. } => FleetHostKind::Ssh,
FleetHostSpec::Docker { .. } => FleetHostKind::LocalProcess, // Docker runs local-ish
}
}
/// Map a fleet host kind to a compact display label.
pub fn fleet_host_kind_label(kind: FleetHostKind) -> &'static str {
match kind {
FleetHostKind::LocalProcess => "local",
FleetHostKind::Ssh => "ssh",
}
}
/// Build a sub-agent `AgentWorkerSpec` from a fleet task spec and worker spec.
///
/// The fleet task's `instructions` become the sub-agent's `objective`, the
/// `worker.role` maps to a `SubAgentType`, and tool/capability restrictions
/// become an `AgentWorkerToolProfile`.
pub fn fleet_task_to_worker_spec(
worker_id: &str,
run_id: &str,
task_spec: &FleetTaskSpec,
_worker_spec: &FleetWorkerSpec,
model: &str,
workspace: &std::path::Path,
) -> AgentWorkerSpec {
let agent_type =
fleet_role_to_agent_type(task_spec.worker.as_ref().and_then(|w| w.role.as_deref()));
let tool_profile = fleet_tool_profile(task_spec.worker.as_ref());
let objective = fleet_task_prompt(task_spec);
AgentWorkerSpec {
worker_id: worker_id.to_string(),
run_id: run_id.to_string(),
parent_run_id: None,
session_name: Some(format!("fleet-{}-{}", worker_id, task_spec.id)),
objective,
role: task_spec.worker.as_ref().and_then(|w| w.role.clone()),
agent_type,
model: model.to_string(),
workspace: workspace.to_path_buf(),
git_branch: None,
context_mode: "fresh".to_string(),
fork_context: false,
tool_profile,
max_steps: task_spec
.budget
.as_ref()
.and_then(|b| b.max_tool_calls)
.unwrap_or(u32::MAX),
spawn_depth: 0,
max_spawn_depth: codewhale_config::FleetExecConfig::default().max_spawn_depth,
}
}
fn fleet_task_prompt(task_spec: &FleetTaskSpec) -> String {
let mut prompt = String::new();
prompt.push_str("Fleet task: ");
prompt.push_str(&task_spec.name);
if let Some(objective) = task_spec.objective.as_deref() {
prompt.push_str("\n\nObjective:\n");
prompt.push_str(objective);
} else if let Some(description) = task_spec.description.as_deref() {
prompt.push_str("\n\nObjective:\n");
prompt.push_str(description);
}
prompt.push_str("\n\nInstructions:\n");
prompt.push_str(&task_spec.instructions);
if !task_spec.context.is_empty() {
prompt.push_str("\n\nContext:\n");
for item in &task_spec.context {
prompt.push_str("- ");
prompt.push_str(item);
prompt.push('\n');
}
}
if !task_spec.input_files.is_empty() {
prompt.push_str("\nInput files:\n");
for path in &task_spec.input_files {
prompt.push_str("- ");
prompt.push_str(&path.display().to_string());
prompt.push('\n');
}
}
prompt
}
/// Map a fleet role name to a `SubAgentType`. Unknown roles default to `General`.
fn fleet_role_to_agent_type(role: Option<&str>) -> SubAgentType {
match role {
Some("smoke-runner") | Some("read-only") => SubAgentType::ToolAgent,
Some("reviewer") => SubAgentType::Review,
Some("builder") => SubAgentType::Implementer,
Some("verifier") | Some("tester") => SubAgentType::Verifier,
Some("planner") => SubAgentType::Plan,
Some("explorer") => SubAgentType::Explore,
Some("general") | None => SubAgentType::General,
Some(other) => {
// Try parsing as a SubAgentType directly
SubAgentType::from_str(other).unwrap_or(SubAgentType::General)
}
}
}
/// Convert a fleet worker profile's tool list into an `AgentWorkerToolProfile`.
fn fleet_tool_profile(profile: Option<&FleetTaskWorkerProfile>) -> AgentWorkerToolProfile {
match profile {
Some(p) if !p.tools.is_empty() => AgentWorkerToolProfile::Explicit(p.tools.clone()),
_ => AgentWorkerToolProfile::Inherited,
}
}
/// Create a fleet artifact ref from a worker output.
///
/// Uses the fleet artifact conventions: logs go under `.codewhale/fleet/`,
/// reports under `.codewhale/fleet/reports/`.
pub fn fleet_artifact_ref(
_run_id: &str,
_worker_id: &str,
kind: codewhale_protocol::fleet::FleetArtifactKind,
path: std::path::PathBuf,
) -> codewhale_protocol::fleet::FleetArtifactRef {
codewhale_protocol::fleet::FleetArtifactRef {
kind,
path,
checksum: None,
mime_type: None,
size_bytes: None,
}
}
/// Map a sub-agent `AgentWorkerStatus` to a fleet `FleetWorkerEventPayload`.
///
/// This is the streaming bridge: as the sub-agent runs, each status transition
/// produces a corresponding fleet ledger event so the TUI surfaces stay in sync.
pub fn agent_status_to_fleet_event(
status: AgentWorkerStatus,
message: Option<&str>,
tool_name: Option<&str>,
) -> FleetWorkerEventPayload {
match status {
AgentWorkerStatus::Queued => FleetWorkerEventPayload::Queued,
AgentWorkerStatus::Starting => FleetWorkerEventPayload::Starting,
AgentWorkerStatus::Running => FleetWorkerEventPayload::Running,
AgentWorkerStatus::WaitingForUser => FleetWorkerEventPayload::ModelWait { model: None },
AgentWorkerStatus::ModelWait => FleetWorkerEventPayload::ModelWait { model: None },
AgentWorkerStatus::RunningTool => FleetWorkerEventPayload::RunningTool {
tool: tool_name.unwrap_or("unknown").to_string(),
call_id: None,
},
AgentWorkerStatus::Completed => FleetWorkerEventPayload::Completed {
exit_code: Some(0),
summary: message.map(|s| s.to_string()),
},
AgentWorkerStatus::Failed => FleetWorkerEventPayload::Failed {
reason: message.unwrap_or("unknown error").to_string(),
recoverable: false,
},
AgentWorkerStatus::Cancelled => FleetWorkerEventPayload::Cancelled { cancelled_by: None },
AgentWorkerStatus::Interrupted => FleetWorkerEventPayload::Interrupted {
signal: message.map(|s| s.to_string()),
},
}
}
/// Apply exec hardening to a worker spec from fleet config (#3027).
///
/// Filters tools against allowed/disallowed lists, caps max_steps to
/// config's max_turns, and returns the objective with system prompt
/// appended when configured.
pub fn apply_exec_hardening(
mut spec: AgentWorkerSpec,
exec: &codewhale_config::FleetExecConfig,
) -> AgentWorkerSpec {
// Cap max_steps to config max_turns
if exec.max_turns > 0 && exec.max_turns != u32::MAX {
spec.max_steps = spec.max_steps.min(exec.max_turns);
}
spec.max_spawn_depth = exec
.max_spawn_depth
.min(codewhale_config::MAX_SPAWN_DEPTH_CEILING);
// Apply tool filtering
if !exec.allowed_tools.is_empty() || !exec.disallowed_tools.is_empty() {
spec.tool_profile = filter_tool_profile(&spec.tool_profile, exec);
}
// Append system prompt
if !exec.append_system_prompt.is_empty() {
spec.objective = format!(
"{}\n\n[Policy]\n{}",
spec.objective, exec.append_system_prompt
);
}
spec
}
/// Filter a tool profile against allowed/disallowed lists.
fn filter_tool_profile(
profile: &AgentWorkerToolProfile,
exec: &codewhale_config::FleetExecConfig,
) -> AgentWorkerToolProfile {
match profile {
AgentWorkerToolProfile::Explicit(tools) => {
let filtered: Vec<String> = tools
.iter()
.filter(|t| {
// If allowed_tools is non-empty, only keep tools in the list
if !exec.allowed_tools.is_empty() && !exec.allowed_tools.contains(t) {
return false;
}
// Disallowed tools always win
!exec.disallowed_tools.contains(t)
})
.cloned()
.collect();
AgentWorkerToolProfile::Explicit(filtered)
}
AgentWorkerToolProfile::Inherited => {
// Inherited profiles can't be filtered at spec time;
// the sub-agent spawn path applies tool filtering.
AgentWorkerToolProfile::Inherited
}
}
}
/// Determine whether a tool is safe for parallel execution (#2983).
///
/// Read-only tools that don't mutate state and have no side effects
/// are candidates for conservative parallel batching.
pub fn is_parallel_safe_read_only_tool(tool_name: &str) -> bool {
matches!(
tool_name,
"read_file"
| "grep_files"
| "file_search"
| "list_dir"
| "git_status"
| "git_diff"
| "git_log"
| "git_show"
| "git_blame"
| "fetch_url"
| "web_search"
| "tool_search_tool_regex"
| "tool_search_tool_bm25"
)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn fleet_role_smoke_runner_maps_to_tool_agent() {
assert_eq!(
fleet_role_to_agent_type(Some("smoke-runner")),
SubAgentType::ToolAgent
);
}
#[test]
fn fleet_role_reviewer_maps_to_review() {
assert_eq!(
fleet_role_to_agent_type(Some("reviewer")),
SubAgentType::Review
);
}
#[test]
fn fleet_role_builder_maps_to_implementer() {
assert_eq!(
fleet_role_to_agent_type(Some("builder")),
SubAgentType::Implementer
);
}
#[test]
fn fleet_role_none_maps_to_general() {
assert_eq!(fleet_role_to_agent_type(None), SubAgentType::General);
}
#[test]
fn unknown_role_maps_to_general() {
assert_eq!(
fleet_role_to_agent_type(Some("nonexistent-role")),
SubAgentType::General
);
}
#[test]
fn fleet_tool_profile_empty_uses_inherited() {
let profile = FleetTaskWorkerProfile {
role: None,
tool_profile: None,
tools: vec![],
capabilities: vec![],
};
assert_eq!(
fleet_tool_profile(Some(&profile)),
AgentWorkerToolProfile::Inherited
);
}
#[test]
fn fleet_tool_profile_explicit_passes_tools() {
let profile = FleetTaskWorkerProfile {
role: None,
tool_profile: None,
tools: vec!["cargo".to_string(), "git".to_string()],
capabilities: vec![],
};
assert_eq!(
fleet_tool_profile(Some(&profile)),
AgentWorkerToolProfile::Explicit(vec!["cargo".to_string(), "git".to_string()])
);
}
#[test]
fn fleet_task_prompt_includes_instructions_context_and_input_files() {
let task = FleetTaskSpec {
id: "review".to_string(),
name: "Review protocol".to_string(),
description: None,
objective: Some("Find protocol regressions".to_string()),
instructions: "Read the fleet protocol and report issues.".to_string(),
worker: None,
workspace: None,
input_files: vec![std::path::PathBuf::from("crates/protocol/src/fleet.rs")],
context: vec!["Keep the report concise.".to_string()],
budget: None,
tags: vec![],
expected_artifacts: vec![],
scorer: None,
retry_policy: None,
alert_policy: None,
timeout_seconds: None,
metadata: Default::default(),
};
let prompt = fleet_task_prompt(&task);
assert!(prompt.contains("Review protocol"));
assert!(prompt.contains("Find protocol regressions"));
assert!(prompt.contains("Read the fleet protocol and report issues."));
assert!(prompt.contains("Keep the report concise."));
assert!(prompt.contains("crates/protocol/src/fleet.rs"));
}
#[test]
fn fleet_worker_spec_defaults_to_shared_subagent_spawn_depth() {
let task = FleetTaskSpec {
id: "task-1".to_string(),
name: "Task".to_string(),
description: None,
objective: None,
instructions: "Do the task.".to_string(),
worker: None,
workspace: None,
input_files: vec![],
context: vec![],
budget: None,
tags: vec![],
expected_artifacts: vec![],
scorer: None,
retry_policy: None,
alert_policy: None,
timeout_seconds: None,
metadata: Default::default(),
};
let worker = FleetWorkerSpec {
id: "worker-1".to_string(),
name: "Worker".to_string(),
host: FleetHostSpec::Local,
trust_level: None,
labels: Default::default(),
capabilities: vec![],
max_concurrent_tasks: None,
};
let spec = fleet_task_to_worker_spec(
"worker-1",
"run-1",
&task,
&worker,
"auto",
std::path::Path::new("/tmp"),
);
// Root fleet worker runs at depth 0; its budget equals the shared
// sub-agent default (3) so fleet and sub-agents are one substrate and
// at least 3 nested delegation levels are afforded.
assert_eq!(spec.spawn_depth, 0);
assert_eq!(spec.max_spawn_depth, codewhale_config::DEFAULT_SPAWN_DEPTH);
assert_eq!(spec.max_spawn_depth, 3);
// End-to-end reachability: walk the SAME gate the SubAgentRuntime
// enforces (`would_exceed_depth` = `spawn_depth + 1 > max_spawn_depth`).
// A depth-0 root must reach 3 nested levels, then stop. This fails if
// anyone lowers the shared default below 3 (Hunter: afford >= 3).
let hardened = apply_exec_hardening(spec, &codewhale_config::FleetExecConfig::default());
let would_exceed = |spawn_depth: u32| spawn_depth + 1 > hardened.max_spawn_depth;
assert!(
!would_exceed(0),
"root (depth 0) must spawn a child at depth 1"
);
assert!(!would_exceed(1), "depth-1 child must spawn to depth 2");
assert!(!would_exceed(2), "depth-2 child must spawn to depth 3");
assert!(
would_exceed(3),
"depth 3 is the afforded ceiling; depth 4 is blocked"
);
}
#[test]
fn exec_hardening_caps_max_steps_to_max_turns() {
let spec = AgentWorkerSpec {
worker_id: "w1".to_string(),
run_id: "r1".to_string(),
parent_run_id: None,
session_name: None,
objective: "test".to_string(),
role: None,
agent_type: SubAgentType::General,
model: "auto".to_string(),
workspace: std::path::PathBuf::from("/tmp"),
git_branch: None,
context_mode: "fresh".to_string(),
fork_context: false,
tool_profile: AgentWorkerToolProfile::Inherited,
max_steps: 1000,
spawn_depth: 0,
max_spawn_depth: 0,
};
let exec = codewhale_config::FleetExecConfig {
max_turns: 50,
..Default::default()
};
let hardened = apply_exec_hardening(spec, &exec);
assert_eq!(hardened.max_steps, 50);
}
#[test]
fn exec_hardening_applies_and_clamps_spawn_depth() {
let spec = AgentWorkerSpec {
worker_id: "w1".to_string(),
run_id: "r1".to_string(),
parent_run_id: None,
session_name: None,
objective: "test".to_string(),
role: None,
agent_type: SubAgentType::General,
model: "auto".to_string(),
workspace: std::path::PathBuf::from("/tmp"),
git_branch: None,
context_mode: "fresh".to_string(),
fork_context: false,
tool_profile: AgentWorkerToolProfile::Inherited,
max_steps: 1000,
spawn_depth: 0,
max_spawn_depth: 0,
};
let exec = codewhale_config::FleetExecConfig {
max_spawn_depth: 2,
..Default::default()
};
let hardened = apply_exec_hardening(spec.clone(), &exec);
assert_eq!(hardened.max_spawn_depth, 2);
let exec = codewhale_config::FleetExecConfig {
max_spawn_depth: 99,
..Default::default()
};
let hardened = apply_exec_hardening(spec.clone(), &exec);
assert_eq!(hardened.max_spawn_depth, 3);
let exec = codewhale_config::FleetExecConfig {
max_spawn_depth: 0,
..Default::default()
};
let hardened = apply_exec_hardening(spec, &exec);
assert_eq!(hardened.max_spawn_depth, 0);
}
#[test]
fn exec_hardening_filters_disallowed_tools() {
let profile = AgentWorkerToolProfile::Explicit(vec![
"read_file".to_string(),
"exec_shell".to_string(),
"git_diff".to_string(),
]);
let exec = codewhale_config::FleetExecConfig {
disallowed_tools: vec!["exec_shell".to_string()],
..Default::default()
};
let filtered = filter_tool_profile(&profile, &exec);
assert_eq!(
filtered,
AgentWorkerToolProfile::Explicit(
vec!["read_file".to_string(), "git_diff".to_string(),]
)
);
}
#[test]
fn exec_hardening_allowed_tools_acts_as_allowlist() {
let profile = AgentWorkerToolProfile::Explicit(vec![
"read_file".to_string(),
"exec_shell".to_string(),
"git_diff".to_string(),
]);
let exec = codewhale_config::FleetExecConfig {
allowed_tools: vec!["read_file".to_string(), "git_diff".to_string()],
..Default::default()
};
let filtered = filter_tool_profile(&profile, &exec);
assert_eq!(
filtered,
AgentWorkerToolProfile::Explicit(
vec!["read_file".to_string(), "git_diff".to_string(),]
)
);
}
#[test]
fn exec_hardening_allowed_plus_disallowed_disallowed_wins() {
let profile = AgentWorkerToolProfile::Explicit(vec![
"read_file".to_string(),
"exec_shell".to_string(),
]);
let exec = codewhale_config::FleetExecConfig {
allowed_tools: vec!["read_file".to_string(), "exec_shell".to_string()],
disallowed_tools: vec!["exec_shell".to_string()],
..Default::default()
};
let filtered = filter_tool_profile(&profile, &exec);
assert_eq!(
filtered,
AgentWorkerToolProfile::Explicit(vec!["read_file".to_string(),])
);
}
#[test]
fn parallel_safe_read_only_tools_includes_grep_and_read() {
assert!(is_parallel_safe_read_only_tool("read_file"));
assert!(is_parallel_safe_read_only_tool("grep_files"));
assert!(is_parallel_safe_read_only_tool("git_status"));
assert!(is_parallel_safe_read_only_tool("web_search"));
}
#[test]
fn destructive_tools_not_parallel_safe() {
assert!(!is_parallel_safe_read_only_tool("exec_shell"));
assert!(!is_parallel_safe_read_only_tool("write_file"));
assert!(!is_parallel_safe_read_only_tool("edit_file"));
assert!(!is_parallel_safe_read_only_tool("apply_patch"));
assert!(!is_parallel_safe_read_only_tool("agent_open"));
}
#[test]
fn exec_hardening_appends_system_prompt() {
let spec = AgentWorkerSpec {
worker_id: "w1".to_string(),
run_id: "r1".to_string(),
parent_run_id: None,
session_name: None,
objective: "do the thing".to_string(),
role: None,
agent_type: SubAgentType::General,
model: "auto".to_string(),
workspace: std::path::PathBuf::from("/tmp"),
git_branch: None,
context_mode: "fresh".to_string(),
fork_context: false,
tool_profile: AgentWorkerToolProfile::Inherited,
max_steps: 100,
spawn_depth: 0,
max_spawn_depth: 0,
};
let exec = codewhale_config::FleetExecConfig {
append_system_prompt: "never push to main".to_string(),
..Default::default()
};
let hardened = apply_exec_hardening(spec, &exec);
assert!(hardened.objective.contains("do the thing"));
assert!(hardened.objective.contains("[Policy]"));
assert!(hardened.objective.contains("never push to main"));
}
}
+9
View File
@@ -255,6 +255,7 @@ pub enum MessageId {
ConfigSectionSidebar,
ConfigSectionHistory,
ConfigSectionMcp,
ConfigSectionFleet,
ConfigSectionExperimental,
ConfigScopeSession,
ConfigScopeSaved,
@@ -699,6 +700,7 @@ pub const ALL_MESSAGE_IDS: &[MessageId] = &[
MessageId::ConfigSectionSidebar,
MessageId::ConfigSectionHistory,
MessageId::ConfigSectionMcp,
MessageId::ConfigSectionFleet,
MessageId::ConfigSectionExperimental,
MessageId::ConfigScopeSession,
MessageId::ConfigScopeSaved,
@@ -1315,6 +1317,7 @@ fn english(id: MessageId) -> &'static str {
MessageId::ConfigSectionSidebar => "Sidebar",
MessageId::ConfigSectionHistory => "History",
MessageId::ConfigSectionMcp => "MCP",
MessageId::ConfigSectionFleet => "Fleet",
MessageId::ConfigSectionExperimental => "Experimental",
MessageId::ConfigScopeSession => "SESSION",
MessageId::ConfigScopeSaved => "SAVED",
@@ -1915,6 +1918,7 @@ fn vietnamese(id: MessageId) -> Option<&'static str> {
MessageId::ConfigSectionSidebar => "Thanh bên",
MessageId::ConfigSectionHistory => "Lịch sử",
MessageId::ConfigSectionMcp => "MCP",
MessageId::ConfigSectionFleet => "Fleet",
MessageId::ConfigSectionExperimental => "Thử nghiệm",
MessageId::ConfigScopeSession => "PHIÊN",
MessageId::ConfigScopeSaved => "ĐÃ LƯU",
@@ -2624,6 +2628,7 @@ fn traditional_chinese(id: MessageId) -> Option<&'static str> {
MessageId::ConfigSectionSidebar => "側邊欄",
MessageId::ConfigSectionHistory => "歷史",
MessageId::ConfigSectionMcp => "MCP",
MessageId::ConfigSectionFleet => "艦隊",
MessageId::ConfigSectionExperimental => "實驗",
MessageId::ConfigScopeSession => "會話",
MessageId::ConfigScopeSaved => "已儲存",
@@ -2724,6 +2729,7 @@ fn japanese(id: MessageId) -> Option<&'static str> {
MessageId::ConfigSectionSidebar => "サイドバー",
MessageId::ConfigSectionHistory => "履歴",
MessageId::ConfigSectionMcp => "MCP",
MessageId::ConfigSectionFleet => "Fleet",
MessageId::ConfigSectionExperimental => "実験",
MessageId::ConfigScopeSession => "セッション",
MessageId::ConfigScopeSaved => "保存済み",
@@ -3315,6 +3321,7 @@ fn chinese_simplified(id: MessageId) -> Option<&'static str> {
MessageId::ConfigSectionSidebar => "侧边栏",
MessageId::ConfigSectionHistory => "历史",
MessageId::ConfigSectionMcp => "MCP",
MessageId::ConfigSectionFleet => "舰队",
MessageId::ConfigSectionExperimental => "实验",
MessageId::ConfigScopeSession => "会话",
MessageId::ConfigScopeSaved => "已保存",
@@ -3842,6 +3849,7 @@ fn portuguese_brazil(id: MessageId) -> Option<&'static str> {
MessageId::ConfigSectionSidebar => "Barra lateral",
MessageId::ConfigSectionHistory => "Histórico",
MessageId::ConfigSectionMcp => "MCP",
MessageId::ConfigSectionFleet => "Fleet",
MessageId::ConfigSectionExperimental => "Experimental",
MessageId::ConfigScopeSession => "SESSÃO",
MessageId::ConfigScopeSaved => "SALVO",
@@ -4461,6 +4469,7 @@ fn spanish_latin_america(id: MessageId) -> Option<&'static str> {
MessageId::ConfigSectionSidebar => "Barra lateral",
MessageId::ConfigSectionHistory => "Historial",
MessageId::ConfigSectionMcp => "MCP",
MessageId::ConfigSectionFleet => "Fleet",
MessageId::ConfigSectionExperimental => "Experimental",
MessageId::ConfigScopeSession => "SESIÓN",
MessageId::ConfigScopeSaved => "GUARDADO",
+9 -3
View File
@@ -1193,8 +1193,9 @@ async fn main() -> Result<()> {
run_swebench_command(&config, &model, workspace, max_subagents, args).await
}
Commands::Fleet(args) => {
let config = load_config_from_cli(&cli)?;
let workspace = resolve_workspace(&cli);
run_fleet_command(&workspace, args).await
run_fleet_command(&workspace, &config, args).await
}
Commands::Review(args) => {
let config = load_config_from_cli(&cli)?;
@@ -1458,7 +1459,7 @@ async fn run_swebench_command(
}
}
async fn run_fleet_command(workspace: &Path, args: FleetArgs) -> Result<()> {
async fn run_fleet_command(workspace: &Path, config: &Config, args: FleetArgs) -> Result<()> {
use crate::fleet::alerts::{
FleetAlertAdapterConfig, FleetAlertConfig, FleetAlertDispatcher, FleetAlertEvent,
FleetEnvSecretResolver,
@@ -1712,7 +1713,12 @@ async fn run_fleet_command(workspace: &Path, args: FleetArgs) -> Result<()> {
}
}
let manager = FleetManager::open(workspace)?;
let exec_config = config
.fleet
.as_ref()
.map(|fleet| fleet.exec.clone())
.unwrap_or_default();
let manager = FleetManager::open(workspace)?.with_exec_config(exec_config);
match args.command {
FleetCommand::Init => {
println!("fleet ledger: {}", manager.ledger_path().display());
+1
View File
@@ -3980,6 +3980,7 @@ mod tests {
crate::fleet::task_spec::FleetTaskSpecDocument {
name: Some("api smoke".to_string()),
labels: std::collections::BTreeMap::new(),
security_policy: None,
workers: Vec::new(),
tasks: vec![task],
},
+16 -8
View File
@@ -1161,8 +1161,13 @@ impl Default for PersistedSubAgentState {
}
/// Default cap on sub-agent recursion depth. Override via
/// `[runtime] max_spawn_depth = N` in `~/.deepseek/config.toml`.
pub const DEFAULT_MAX_SPAWN_DEPTH: u32 = 3;
/// `[runtime] max_spawn_depth = N` in config.
///
/// Sourced from [`codewhale_config::DEFAULT_SPAWN_DEPTH`] so standalone
/// sub-agents and fleet workers share ONE recursion axis (no "two moving
/// targets"). Configured/requested depths clamp to
/// [`codewhale_config::MAX_SPAWN_DEPTH_CEILING`].
pub const DEFAULT_MAX_SPAWN_DEPTH: u32 = codewhale_config::DEFAULT_SPAWN_DEPTH;
/// Terminal-state notification emitted to the engine's parent turn loop
/// when one of its direct children finishes (issue #756). Carries the
@@ -1794,7 +1799,7 @@ impl SubAgentManager {
.retain(|worker_id, _| keep_ids.contains(worker_id));
}
fn register_worker(&mut self, spec: AgentWorkerSpec) {
pub fn register_worker(&mut self, spec: AgentWorkerSpec) {
let worker_id = spec.worker_id.clone();
let now_ms = epoch_millis_now();
let mut record = AgentWorkerRecord::new(normalize_worker_spec(spec), now_ms);
@@ -5927,15 +5932,18 @@ fn parse_spawn_request(input: &Value) -> Result<SpawnRequest, ToolError> {
.or_else(|| input.get("max_spawn_depth"))
.and_then(Value::as_u64)
.map(|depth| {
let ceiling = codewhale_config::MAX_SPAWN_DEPTH_CEILING;
u32::try_from(depth)
.map_err(|_| ToolError::invalid_input("max_depth must be between 0 and 3"))
.map_err(|_| {
ToolError::invalid_input(format!("max_depth must be between 0 and {ceiling}"))
})
.and_then(|depth| {
if depth <= 3 {
if depth <= ceiling {
Ok(depth)
} else {
Err(ToolError::invalid_input(
"max_depth must be between 0 and 3",
))
Err(ToolError::invalid_input(format!(
"max_depth must be between 0 and {ceiling}"
)))
}
})
})
+62 -3
View File
@@ -412,6 +412,7 @@ enum ConfigSection {
Sidebar,
History,
Mcp,
Fleet,
Experimental,
}
@@ -429,6 +430,7 @@ impl ConfigSection {
ConfigSection::Sidebar => MessageId::ConfigSectionSidebar,
ConfigSection::History => MessageId::ConfigSectionHistory,
ConfigSection::Mcp => MessageId::ConfigSectionMcp,
ConfigSection::Fleet => MessageId::ConfigSectionFleet,
ConfigSection::Experimental => MessageId::ConfigSectionExperimental,
},
)
@@ -759,6 +761,18 @@ impl ConfigView {
editable: true,
scope: ConfigScope::Saved,
},
ConfigRow {
section: ConfigSection::Fleet,
key: "fleet.exec.max_spawn_depth".to_string(),
value: config
.fleet
.as_ref()
.map(|fleet| fleet.exec.max_spawn_depth)
.unwrap_or_else(|| codewhale_config::FleetExecConfig::default().max_spawn_depth)
.to_string(),
editable: false,
scope: ConfigScope::Saved,
},
];
rows.extend(experimental_config_rows(&config));
@@ -1180,7 +1194,7 @@ fn experimental_config_rows(config: &Config) -> Vec<ConfigRow> {
rows.push(ConfigRow {
section: ConfigSection::Experimental,
key: "whaleflow".to_string(),
value: "preview placeholder (not stable; see #2981/#2974)".to_string(),
value: "preview overlay for workflow/fleet runs (not stable; see #3154/#3178)".to_string(),
editable: false,
scope: ConfigScope::Saved,
});
@@ -1235,6 +1249,9 @@ fn config_hint_for_key(key: &str) -> &'static str {
"DeepSeek: auto/off/high/max; Codex: low/medium/high/xhigh; default clears saved value"
}
"mcp_config_path" => "path to mcp.json",
"fleet.exec.max_spawn_depth" => {
"0 blocks child agents; 3 default (same axis as sub-agents); capped at 3"
}
_ => "",
}
}
@@ -2393,6 +2410,7 @@ mod tests {
"Sidebar",
"History",
"MCP",
"Fleet",
"Experimental",
]
);
@@ -2429,6 +2447,7 @@ mod tests {
assert!(keys.contains(&"cost_currency"));
assert!(keys.contains(&"prefer_external_pdftotext"));
assert!(keys.contains(&"mcp_config_path"));
assert!(keys.contains(&"fleet.exec.max_spawn_depth"));
assert!(keys.contains(&"features.subagents"));
assert!(keys.contains(&"features.web_search"));
assert!(keys.contains(&"features.apply_patch"));
@@ -2440,13 +2459,23 @@ mod tests {
assert!(
view.rows
.iter()
.filter(|row| row.section != super::ConfigSection::Experimental)
.filter(|row| {
!matches!(
row.section,
super::ConfigSection::Experimental | super::ConfigSection::Fleet
)
})
.all(|row| row.editable)
);
assert!(
view.rows
.iter()
.filter(|row| row.section == super::ConfigSection::Experimental)
.filter(|row| {
matches!(
row.section,
super::ConfigSection::Experimental | super::ConfigSection::Fleet
)
})
.all(|row| !row.editable)
);
}
@@ -2497,6 +2526,36 @@ vision_model = true
assert_eq!(subagents.value, "enabled (default enabled)");
}
#[test]
fn config_view_shows_fleet_max_spawn_depth_from_config() {
let temp_root = std::env::temp_dir().join(format!(
"codewhale-fleet-config-view-test-{}",
std::process::id()
));
fs::create_dir_all(&temp_root).unwrap();
let config_path = temp_root.join("config.toml");
fs::write(
&config_path,
r#"
[fleet.exec]
max_spawn_depth = 2
"#,
)
.unwrap();
let mut app = create_test_app();
app.config_path = Some(config_path);
let view = ConfigView::new_for_app(&app);
let row = view
.rows
.iter()
.find(|row| row.key == "fleet.exec.max_spawn_depth")
.expect("fleet spawn depth row");
assert_eq!(row.value, "2");
assert!(!row.editable);
}
#[test]
fn config_view_experimental_section_is_searchable() {
let mut view = create_config_view(Locale::En);
+244 -95
View File
@@ -19,6 +19,29 @@ Fleet state is stored under the workspace in `.codewhale/fleet.jsonl`. Worker
logs and adapter logs are stored under `.codewhale/fleet/` and
`.codewhale/fleet-host/`.
## Naming: Modes, WhaleFlow, Fleet, and Swarm
These names describe different layers, not competing systems. Agent, Plan, and
YOLO stay the permission/work modes. WhaleFlow is an orchestration overlay that
can run on top of those modes when the task needs a continuous workflow.
- **WhaleFlow** is the repeatable workflow plan and user-facing orchestration
overlay: a script/IR that decides which phases and agents run next, keeps
intermediate results out of the main conversation, and can be inspected or
rerun. A WhaleFlow run should have a visible progress view and a clear active
header state instead of feeling like a hidden background task.
- **Fleet** is the execution substrate: headless workers, local/SSH hosts,
trust policy, leases, heartbeats, logs, receipts, and status APIs.
- **Swarm** is the high-fanout behavior inside WhaleFlow. It should compile into
a WhaleFlow-backed fleet run instead of reviving the old `agent_swarm` tool
surface.
UI guidance: keep the main transcript calm. A WhaleFlow run should appear as a
compact progress card plus Work/Agents sidebar rows with phase names, worker
counts, receipts, and nested indentation for child workers. Use the whale mark
sparingly as an active header/status signal; avoid repeating emoji-heavy rows
for every worker.
## Task Spec
`codewhale fleet run` accepts JSON or TOML. A minimal JSON spec:
@@ -60,118 +83,79 @@ and `json_path`. Specs may also declare `command`,
`code_whale_verifier_prompt`, or `manual`; those record a partial receipt until
an explicit verifier pass completes.
### Release Triage Example
### Using Role Presets
Tasks can reference a role name, and the fleet manager fills in defaults
from the role registry. Built-in roles (`smoke-runner`, `reviewer`, `builder`,
`read-only`) are always available; define your own in `[fleet.roles]`.
```json
{
"name": "v0.8.60 release triage",
"labels": {
"milestone": "v0.8.60"
},
"name": "smoke check",
"tasks": [
{
"id": "release-issue-sweep",
"name": "Release issue sweep",
"objective": "Find open v0.8.60 blockers and credit-sensitive PRs.",
"instructions": "Review the v0.8.60 milestone, linked PRs, changelog entries, and contributor-credit requirements. Write a concise blocker report.",
"worker": {
"role": "release-triage",
"tool_profile": "read-only",
"tools": ["gh", "git"],
"capabilities": ["github", "release"]
},
"workspace": {
"required_files": ["Cargo.toml", "CHANGELOG.md", ".github/AUTHOR_MAP"],
"writable_paths": [".codewhale/fleet"],
"environment": {
"required": ["PATH"]
}
},
"input_files": ["CHANGELOG.md", ".github/AUTHOR_MAP"],
"context": ["Treat community PRs as maintainer evidence."],
"budget": {
"max_tokens": 12000,
"max_tool_calls": 24,
"max_seconds": 900
},
"timeout_seconds": 900,
"expected_artifacts": ["log", "report", "receipt"],
"scorer": {
"kind": "exit_code"
},
"retry_policy": {
"max_attempts": 2,
"initial_backoff_seconds": 10,
"max_backoff_seconds": 60,
"backoff_multiplier": 2
},
"tags": ["release", "triage"],
"metadata": {
"class": "release"
}
"id": "lint",
"name": "Lint check",
"instructions": "Run lint and report failures.",
"worker": { "role": "smoke-runner" },
"expected_artifacts": ["log"]
}
]
}
```
### Code Review Swarm Example
The task inherits the role's tool profile, budget, and timeout. You can
override any field in the task spec:
```json
{
"name": "code review swarm",
"id": "deep-review",
"name": "Deep review",
"instructions": "Review the entire crate for soundness issues.",
"worker": {
"role": "reviewer",
"tools": ["cargo", "rg", "git"],
"capabilities": ["rust"]
},
"input_files": ["crates/**/*.rs"],
"budget": { "max_tokens": 32000 },
"expected_artifacts": ["log", "report"],
"scorer": { "kind": "regex_match", "path": ".codewhale/fleet/report.md", "pattern": "finding|all clear" }
}
```
### Multi-Task Run Example
A single fleet run can dispatch several independent tasks in parallel:
```json
{
"name": "CI gate",
"tasks": [
{
"id": "protocol-review",
"name": "Protocol review",
"objective": "Review fleet protocol changes for compatibility and sparse JSON behavior.",
"instructions": "Inspect crates/protocol/src/fleet.rs and report behavior regressions, missing serde defaults, or unsafe wire changes.",
"worker": {
"role": "reviewer",
"tool_profile": "read-only",
"tools": ["git", "rg", "cargo"],
"capabilities": ["rust"]
},
"input_files": ["crates/protocol/src/fleet.rs"],
"budget": {
"max_tokens": 8000,
"max_tool_calls": 16,
"max_seconds": 600
},
"expected_artifacts": ["log", "report", "receipt"],
"scorer": {
"kind": "code_whale_verifier_prompt",
"prompt": "Verify the review includes at least one concrete file:line finding or explicitly says no issues were found."
},
"tags": ["review", "protocol"],
"metadata": {
"class": "code-review"
}
"id": "check",
"name": "Compile check",
"instructions": "Run cargo check --workspace and report errors.",
"worker": { "role": "builder" },
"expected_artifacts": ["log"],
"scorer": { "kind": "exit_code" }
},
{
"id": "tui-review",
"name": "TUI review",
"objective": "Review fleet CLI and manager behavior for operator-visible regressions.",
"instructions": "Inspect crates/tui/src/fleet and crates/tui/src/main.rs. Focus on status output, receipt recording, and failure classification.",
"worker": {
"role": "reviewer",
"tool_profile": "read-only",
"tools": ["git", "rg", "cargo"],
"capabilities": ["rust", "cli"]
},
"input_files": ["crates/tui/src/fleet", "crates/tui/src/main.rs"],
"budget": {
"max_tokens": 10000,
"max_tool_calls": 20,
"max_seconds": 600
},
"expected_artifacts": ["log", "report", "receipt"],
"scorer": {
"kind": "manual"
},
"tags": ["review", "tui"],
"metadata": {
"class": "code-review"
}
"id": "clippy",
"name": "Clippy lint",
"instructions": "Run cargo clippy --workspace and report warnings.",
"worker": { "role": "reviewer", "tools": ["cargo", "cargo-clippy"] },
"expected_artifacts": ["log"],
"scorer": { "kind": "exit_code" }
},
{
"id": "security",
"name": "Secret audit",
"instructions": "Search for plaintext secrets and report any matches.",
"worker": { "role": "read-only", "tools": ["rg"] },
"input_files": ["crates/**/*.rs"],
"expected_artifacts": ["log", "report"],
"retry_policy": { "max_attempts": 1 }
}
]
}
@@ -373,3 +357,168 @@ Defaults are intentionally conservative:
`API_KEY`, and `PRIVATE_KEY` are rejected from adapter allowlists;
- secrets should remain in CodeWhale config providers or remote host config,
not in task instructions, argv, or fleet logs.
## Security and Trust Boundaries
Agent Fleet enforces a trust-level model that separates workers into four tiers.
The trust level determines what a worker can access (secrets, network, workspace
writes) and how it must prove its identity before being granted those privileges.
### Trust Levels
| Level | Access | Requires |
|-------|--------|----------|
| `sandbox` | No network, no secrets, writes only to `.codewhale/fleet/` | Nothing — default for new workers |
| `local` | Workspace reads, gated writes, configured secrets | Local process (same uid) |
| `remote-verified` | Network access, bounded capability grants, configured secrets | SSH host-key verification or equivalent attestation |
| `operator` | Full access to all secrets, unrestricted writes, any action | Operator-owned machine |
The default trust level is `sandbox`. Operators must explicitly raise trust for
SSH or container workers through the security policy.
### Security Policy
A fleet run may carry an optional `security_policy` block that defines the
default trust level, which secrets workers may resolve, what capabilities are
granted, and a ceiling on the maximum trust level:
```json
{
"security_policy": {
"default_trust_level": "sandbox",
"allowed_secrets": [
{"key": "GH_TOKEN", "source": "env"},
{"key": "CODEWHALE_API_KEY", "source": "keyring"}
],
"capability_grants": [
{
"capability": "network",
"scope": "github.com",
"reason": "PR review needs GitHub API access"
}
],
"max_trust_level": "remote_verified",
"require_identity_verification": true
}
}
```
When a run has no explicit `security_policy`, workers inherit conservative
defaults: `sandbox` trust, no secrets, no capability grants, and no identity
verification requirement.
### Secret References
Secrets are never stored as plaintext in task specs, alert configs, or worker
definitions. Instead, every secret is a `FleetSecretRef` — a key name plus an
optional source hint that tells the fleet manager where to resolve the value:
```json
{"key": "GH_TOKEN", "source": "env"}
```
Supported sources:
- `"env"` — resolve from a process environment variable
- `"keyring"` — resolve from the OS keyring (macOS Keychain, Windows Credential Manager, Linux Secret Service)
- `"file"` — resolve from `~/.codewhale/secrets/`
- absent — try all sources in default order (store first, then env)
Secret refs are redacted in logs and ledger entries: `<secret:env.GH_TOKEN>`.
### Worker Authentication
Workers authenticate to the fleet manager using one of three methods:
- **None** — local workers sharing the same uid (default)
- **SSH key** — with optional host-key fingerprint pinning and known-hosts
verification. The `host_key_fingerprint` field (SHA256:...) pins the expected
server key, preventing MITM attacks on first connection.
- **Token** — a bearer token resolved from a `FleetSecretRef`, useful for remote
workers behind a fleet proxy.
- **mTLS** — mutual TLS with a client certificate and a secret-backed private key.
SSH workers should always set `host_key_fingerprint` in production:
```json
{
"id": "builder-1",
"name": "Builder 1",
"trust_level": "remote_verified",
"host": {
"kind": "ssh",
"host": "builder.example.com",
"user": "codewhale",
"port": 22,
"identity": "~/.ssh/codewhale_fleet",
"host_key_fingerprint": "SHA256:aLGqZo1M6c...",
"known_hosts": "~/.ssh/known_hosts",
"working_directory": "/srv/codewhale/work",
"env_allowlist": ["CODEWHALE_PROFILE"],
"codewhale_binary": "/usr/local/bin/codewhale"
},
"capabilities": ["local", "linux", "tests"],
"max_concurrent_tasks": 1
}
```
### Alert Channel Secrets
Alert channels (Slack, generic webhook, PagerDuty) use `FleetAlertEndpoint`
instead of raw URLs. The webhook URL can be provided inline for non-sensitive
endpoints, or as a secret reference:
```json
{
"kind": "slack",
"webhook": {
"url_ref": {"key": "CODEWHALE_FLEET_SLACK_WEBHOOK", "source": "env"},
"secret_ref": {"key": "CODEWHALE_FLEET_SLACK_SIGNING_SECRET", "source": "keyring"}
}
}
```
The `secret_ref` field provides an optional HMAC secret for webhook payload
signing, never stored in plaintext.
### Config File
The `[fleet]` table in `config.toml` sets global trust policy defaults:
```toml
[fleet]
default_trust_level = "sandbox"
require_identity_verification = true
max_trust_level = "operator"
[fleet.exec]
# Recursion depth shares ONE axis with standalone sub-agents — a fleet worker
# IS a headless sub-agent. 0 blocks child agents (the root worker still runs);
# 3 is the default and the ceiling, affording at least three nested levels.
max_spawn_depth = 3
```
These defaults apply to fleet runs that don't carry their own `security_policy`.
Per-run policies always override the config defaults.
### Capability Grants
Capability grants are additive, scoped permissions that authorize specific
actions. By default, workers get no grants (least privilege). Common grants:
- `"network"` with scope `"github.com"` — allow outbound HTTP to GitHub
- `"git-push"` — allow `git push` to remotes
- `"provider-secrets"` — allow accessing provider API keys
- `"release"` — allow release-related operations (tagging, publishing)
- `"workspace-write"` with scope `"crates/tui/**"` — allow writes within a path
### Environment Sanitization
The host adapter layer enforces environment sanitization at worker start:
- Only `HOME`, `PATH`, and platform-specific vars (`SYSTEMROOT`, `COMSPEC`) are
injected into worker processes by default
- Environment allowlists reject any key containing `SECRET`, `TOKEN`, `PASSWORD`,
`PASSWD`, `API_KEY`, `CREDENTIAL`, or `PRIVATE_KEY`
- SSH workers only send explicitly allowlisted variables via OpenSSH `SendEnv`
- Secret values are never embedded in worker argv, task instructions, or fleet
logs — only secret refs appear, and they are always redacted
+12
View File
@@ -4,11 +4,18 @@ codewhale has two related concepts:
- **TUI mode**: what kind of visible interaction you're in (Plan/Agent/YOLO).
- **Approval mode**: how aggressively the UI asks before executing tools.
- **WhaleFlow overlay**: optional long-running workflow orchestration that can
run on top of any TUI mode when a task needs many coordinated workers.
Model selection is separate. `--model auto` and `/model auto` route each turn to
a concrete model and thinking level; they are not TUI modes and are not part of
the `Tab` cycle.
WhaleFlow is also separate from the `Tab` mode cycle. It is the visible
continuous-work layer for repeatable workflows, fleet workers, and swarm-style
fanout. The active mode still controls permissions; WhaleFlow controls whether a
large task is planned into a resumable workflow with its own progress view.
Each user turn includes a small `<turn_meta>` block with the current local date
and the concrete model sent to the provider. When `--model auto` is active, the
same block also records that the model was auto-routed.
@@ -56,6 +63,11 @@ the turn, `/goal complete` marks it done, `/goal blocked` marks it blocked, and
approval mode, or model route. This remains distinct from `--model auto`, which
only controls model and thinking selection.
WhaleFlow builds on the same separation: a goal can ask the agent to keep
working, while WhaleFlow supplies the repeatable workflow/progress surface for
large fanout. In the UI, a WhaleFlow run should be shown as an overlay on the
main screen, not as a fourth mode next to Agent, Plan, and YOLO.
App-server clients can persist a thread-scoped goal with `thread/goal/set`, read
it with `thread/goal/get`, and clear it with `thread/goal/clear`. That persisted
record carries `active`, `paused`, `blocked`, `usage_limited`, `budget_limited`,
+52
View File
@@ -0,0 +1,52 @@
# Agent Fleet dogfood smoke spec (#3166)
#
# This spec exercises the fleet end-to-end: create a run with two local
# workers, run a lint task and a review task, verify the ledger records
# receipts, and confirm the status surfaces work.
#
# Run:
# codewhale fleet run docs/examples/fleet-dogfood.toml --max-workers 2 --once
#
# Then check:
# codewhale fleet status
# codewhale fleet inspect <worker-id-from-status>
# codewhale fleet logs <worker-id-from-status>
name = "dogfood smoke"
labels = { milestone = "v0.8.60", class = "smoke" }
security_policy = { default_trust_level = "local", allowed_secrets = [], require_identity_verification = false }
[[tasks]]
id = "cargo-check"
name = "Workspace check"
description = "Run `cargo check --workspace` and report any compilation errors."
objective = "Verify the workspace compiles cleanly with zero errors."
instructions = "Run `cargo check --workspace` in the repo root. If it compiles cleanly, report success. If there are errors, list each file:line and the error message. Do NOT attempt to fix anything — just report what you found."
worker = { role = "release-checker", tool_profile = "read-only", tools = ["cargo"], capabilities = ["rust"] }
workspace = { required_files = ["Cargo.toml"], writable_paths = [".codewhale/fleet"], environment = { required = ["PATH"] } }
input_files = ["Cargo.toml"]
context = ["You are running in a fleet smoke test. Be concise. Only report the pass/fail and any specific errors."]
budget = { max_tokens = 8000, max_tool_calls = 12, max_seconds = 300 }
expected_artifacts = ["log", "report", "receipt"]
scorer = { kind = "exit_code" }
retry_policy = { max_attempts = 2, initial_backoff_seconds = 5, max_backoff_seconds = 30 }
timeout_seconds = 300
tags = ["smoke", "check"]
[[tasks]]
id = "protocol-review"
name = "Protocol review"
description = "Review fleet protocol types for security and correctness."
objective = "Inspect crates/protocol/src/fleet.rs and crates/secrets/src/lib.rs. Report any missing serde defaults, unsafe wire changes, or security-sensitive fields lacking SecretRef."
instructions = "Read crates/protocol/src/fleet.rs and crates/secrets/src/lib.rs. Check for: (1) new fields without serde(default) or skip_serializing_if, (2) raw secrets in struct fields instead of FleetSecretRef, (3) missing Clone/Debug/PartialEq derives on new types. Write a concise report with file:line references for each finding. If there are no findings, report 'all clear'."
worker = { role = "reviewer", tool_profile = "read-only", tools = ["rg", "git", "cargo"], capabilities = ["rust"] }
workspace = { required_files = ["crates/protocol/src/fleet.rs", "crates/secrets/src/lib.rs"], writable_paths = [".codewhale/fleet"], environment = { required = ["PATH"] } }
input_files = ["crates/protocol/src/fleet.rs", "crates/secrets/src/lib.rs"]
context = ["You are a fleet protocol reviewer. Be thorough but concise. Reference specific lines."]
budget = { max_tokens = 10000, max_tool_calls = 16, max_seconds = 600 }
expected_artifacts = ["log", "report", "receipt"]
scorer = { kind = "code_whale_verifier_prompt", prompt = "Verify the review includes at least one concrete file:line finding or explicitly says 'all clear'." }
retry_policy = { max_attempts = 1, initial_backoff_seconds = 10 }
timeout_seconds = 600
tags = ["smoke", "review", "protocol"]