merge: fleet security/trust + headless-worker foundation + depth unification
This commit is contained in:
@@ -936,6 +936,57 @@ default_text_model = "deepseek-ai/deepseek-v4-pro"
|
|||||||
# printf '%s\n' '{"content":"audit wrapper placeholder: configure an executor","success":false}'
|
# printf '%s\n' '{"content":"audit wrapper placeholder: configure an executor","success":false}'
|
||||||
# ```
|
# ```
|
||||||
|
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────────
|
||||||
|
# Agent Fleet trust, security, and role registry (#3165, #3167)
|
||||||
|
# ─────────────────────────────────────────────────────────────────────────────────
|
||||||
|
# [fleet]
|
||||||
|
# # Default trust level for fleet workers: "sandbox" | "local" | "remote-verified" | "operator"
|
||||||
|
# default_trust_level = "sandbox"
|
||||||
|
# # Require SSH host-key verification before granting remote-verified trust
|
||||||
|
# require_identity_verification = true
|
||||||
|
# # Maximum trust level any worker may have
|
||||||
|
# max_trust_level = "operator"
|
||||||
|
#
|
||||||
|
# # Headless worker execution hardening (#3027)
|
||||||
|
# [fleet.exec]
|
||||||
|
# # Tools always allowed regardless of role
|
||||||
|
# allowed_tools = []
|
||||||
|
# # Tools always disallowed (overrides role and task spec)
|
||||||
|
# disallowed_tools = ["exec_shell"]
|
||||||
|
# # Hard ceiling on worker steps (tool calls + model turns)
|
||||||
|
# max_turns = 500
|
||||||
|
# # Recursive child-agent depth for fleet workers. Shares ONE recursion axis
|
||||||
|
# # with standalone sub-agents (a fleet worker IS a headless sub-agent).
|
||||||
|
# # 0 blocks child agents (the root worker still runs); 3 is the default and the
|
||||||
|
# # cap, affording at least three nested delegation levels.
|
||||||
|
# max_spawn_depth = 3
|
||||||
|
# # Extra system prompt injected into every headless worker
|
||||||
|
# append_system_prompt = "Never modify .git/config or change remotes."
|
||||||
|
# # Output format: "text" (default) or "stream-json" for ndjson events
|
||||||
|
# output_format = "text"
|
||||||
|
#
|
||||||
|
# # Built-in role presets are always available: smoke-runner, reviewer, builder, read-only.
|
||||||
|
# # User-defined roles here override or extend the built-in set. Any key under
|
||||||
|
# # [fleet.roles] becomes a valid role name that task specs can reference.
|
||||||
|
# [fleet.roles.ci-linter]
|
||||||
|
# description = "Runs linters and formatters"
|
||||||
|
# tool_profile = "read-only"
|
||||||
|
# tools = ["cargo", "cargo-clippy", "cargo-fmt"]
|
||||||
|
# capabilities = ["rust"]
|
||||||
|
# max_tokens = 12000
|
||||||
|
# max_tool_calls = 20
|
||||||
|
# timeout_seconds = 600
|
||||||
|
#
|
||||||
|
# [fleet.roles.pr-reviewer]
|
||||||
|
# description = "Reviews PRs with GitHub access"
|
||||||
|
# tool_profile = "read-only"
|
||||||
|
# tools = ["git", "gh", "rg"]
|
||||||
|
# capabilities = ["git", "github"]
|
||||||
|
# max_tokens = 16000
|
||||||
|
# max_tool_calls = 30
|
||||||
|
# timeout_seconds = 900
|
||||||
|
# trust_level = "local"
|
||||||
|
|
||||||
# ─────────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────────
|
||||||
# Requirements (admin constraints) example file
|
# Requirements (admin constraints) example file
|
||||||
# ─────────────────────────────────────────────────────────────────────────────────
|
# ─────────────────────────────────────────────────────────────────────────────────
|
||||||
|
|||||||
+44
-2
@@ -22,7 +22,7 @@ use codewhale_mcp::{McpServerDefinition, run_stdio_server};
|
|||||||
use codewhale_secrets::Secrets;
|
use codewhale_secrets::Secrets;
|
||||||
use codewhale_state::{StateStore, ThreadListFilters};
|
use codewhale_state::{StateStore, ThreadListFilters};
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, ValueEnum)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
|
||||||
enum ProviderArg {
|
enum ProviderArg {
|
||||||
Deepseek,
|
Deepseek,
|
||||||
NvidiaNim,
|
NvidiaNim,
|
||||||
@@ -43,6 +43,10 @@ enum ProviderArg {
|
|||||||
Huggingface,
|
Huggingface,
|
||||||
Together,
|
Together,
|
||||||
OpenaiCodex,
|
OpenaiCodex,
|
||||||
|
Anthropic,
|
||||||
|
Zai,
|
||||||
|
Stepfun,
|
||||||
|
Minimax,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<ProviderArg> for ProviderKind {
|
impl From<ProviderArg> for ProviderKind {
|
||||||
@@ -67,6 +71,10 @@ impl From<ProviderArg> for ProviderKind {
|
|||||||
ProviderArg::Huggingface => ProviderKind::Huggingface,
|
ProviderArg::Huggingface => ProviderKind::Huggingface,
|
||||||
ProviderArg::Together => ProviderKind::Together,
|
ProviderArg::Together => ProviderKind::Together,
|
||||||
ProviderArg::OpenaiCodex => ProviderKind::OpenaiCodex,
|
ProviderArg::OpenaiCodex => ProviderKind::OpenaiCodex,
|
||||||
|
ProviderArg::Anthropic => ProviderKind::Anthropic,
|
||||||
|
ProviderArg::Zai => ProviderKind::Zai,
|
||||||
|
ProviderArg::Stepfun => ProviderKind::Stepfun,
|
||||||
|
ProviderArg::Minimax => ProviderKind::Minimax,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -787,7 +795,7 @@ fn provider_slot(provider: ProviderKind) -> &'static str {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Provider order used by the `auth list` and `auth status` outputs.
|
/// Provider order used by the `auth list` and `auth status` outputs.
|
||||||
const PROVIDER_LIST: [ProviderKind; 20] = [
|
const PROVIDER_LIST: [ProviderKind; 24] = [
|
||||||
ProviderKind::Deepseek,
|
ProviderKind::Deepseek,
|
||||||
ProviderKind::NvidiaNim,
|
ProviderKind::NvidiaNim,
|
||||||
ProviderKind::Openai,
|
ProviderKind::Openai,
|
||||||
@@ -808,6 +816,10 @@ const PROVIDER_LIST: [ProviderKind; 20] = [
|
|||||||
ProviderKind::Huggingface,
|
ProviderKind::Huggingface,
|
||||||
ProviderKind::Together,
|
ProviderKind::Together,
|
||||||
ProviderKind::OpenaiCodex,
|
ProviderKind::OpenaiCodex,
|
||||||
|
ProviderKind::Anthropic,
|
||||||
|
ProviderKind::Zai,
|
||||||
|
ProviderKind::Stepfun,
|
||||||
|
ProviderKind::Minimax,
|
||||||
];
|
];
|
||||||
|
|
||||||
fn provider_is_supported_by_tui(provider: ProviderKind) -> bool {
|
fn provider_is_supported_by_tui(provider: ProviderKind) -> bool {
|
||||||
@@ -833,6 +845,10 @@ fn provider_is_supported_by_tui(provider: ProviderKind) -> bool {
|
|||||||
| ProviderKind::Huggingface
|
| ProviderKind::Huggingface
|
||||||
| ProviderKind::Together
|
| ProviderKind::Together
|
||||||
| ProviderKind::OpenaiCodex
|
| ProviderKind::OpenaiCodex
|
||||||
|
| ProviderKind::Anthropic
|
||||||
|
| ProviderKind::Zai
|
||||||
|
| ProviderKind::Stepfun
|
||||||
|
| ProviderKind::Minimax
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2606,6 +2622,32 @@ mod tests {
|
|||||||
}))
|
}))
|
||||||
));
|
));
|
||||||
|
|
||||||
|
for (provider, expected) in [
|
||||||
|
("anthropic", ProviderArg::Anthropic),
|
||||||
|
("zai", ProviderArg::Zai),
|
||||||
|
("stepfun", ProviderArg::Stepfun),
|
||||||
|
("minimax", ProviderArg::Minimax),
|
||||||
|
] {
|
||||||
|
let cli = parse_ok(&[
|
||||||
|
"deepseek",
|
||||||
|
"auth",
|
||||||
|
"set",
|
||||||
|
"--provider",
|
||||||
|
provider,
|
||||||
|
"--api-key-stdin",
|
||||||
|
]);
|
||||||
|
assert!(matches!(
|
||||||
|
cli.command,
|
||||||
|
Some(Commands::Auth(AuthArgs {
|
||||||
|
command: AuthCommand::Set {
|
||||||
|
provider,
|
||||||
|
api_key: None,
|
||||||
|
api_key_stdin: true,
|
||||||
|
}
|
||||||
|
})) if provider == expected
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
let cli = parse_ok(&["deepseek", "auth", "list"]);
|
let cli = parse_ok(&["deepseek", "auth", "list"]);
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
cli.command,
|
cli.command,
|
||||||
|
|||||||
@@ -663,6 +663,10 @@ pub struct ConfigToml {
|
|||||||
/// lifecycle `[hooks]` table so config rewrites preserve existing hooks.
|
/// lifecycle `[hooks]` table so config rewrites preserve existing hooks.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub hook_sinks: Option<HookSinksToml>,
|
pub hook_sinks: Option<HookSinksToml>,
|
||||||
|
/// Agent Fleet trust and security policy (#3165). When absent, fleet
|
||||||
|
/// workers inherit conservative Sandbox defaults.
|
||||||
|
#[serde(default)]
|
||||||
|
pub fleet: Option<FleetConfigToml>,
|
||||||
#[serde(flatten)]
|
#[serde(flatten)]
|
||||||
pub extras: BTreeMap<String, toml::Value>,
|
pub extras: BTreeMap<String, toml::Value>,
|
||||||
}
|
}
|
||||||
@@ -1059,6 +1063,236 @@ impl Default for SnapshotsToml {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// On-disk schema for the `[fleet]` table (#3165). See `config.example.toml`
|
||||||
|
/// and `docs/FLEET.md` for documentation.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct FleetConfigToml {
|
||||||
|
/// Default trust level for fleet workers. One of `"sandbox"`, `"local"`,
|
||||||
|
/// `"remote-verified"`, or `"operator"`. Defaults to `"sandbox"`.
|
||||||
|
#[serde(default = "default_fleet_trust_level_str")]
|
||||||
|
pub default_trust_level: String,
|
||||||
|
/// Require identity verification for remote (SSH) workers before
|
||||||
|
/// granting them `remote-verified` trust. Defaults to true.
|
||||||
|
#[serde(default = "default_fleet_require_identity")]
|
||||||
|
pub require_identity_verification: bool,
|
||||||
|
/// Maximum trust level any worker may have (`"sandbox"`, `"local"`,
|
||||||
|
/// `"remote-verified"`, or `"operator"`). Defaults to `"operator"`.
|
||||||
|
#[serde(default = "default_fleet_max_trust_level_str")]
|
||||||
|
pub max_trust_level: String,
|
||||||
|
/// User-defined and built-in role presets.
|
||||||
|
///
|
||||||
|
/// Each role defines default tool profiles, capabilities, budgets, and
|
||||||
|
/// trust settings that task specs can reference by name. Built-in roles
|
||||||
|
/// (`smoke-runner`, `reviewer`, `builder`, `read-only`) are always
|
||||||
|
/// available; user-defined roles in config override or extend them.
|
||||||
|
#[serde(default)]
|
||||||
|
pub roles: BTreeMap<String, FleetRolePreset>,
|
||||||
|
/// Headless worker execution hardening (#3027).
|
||||||
|
#[serde(default)]
|
||||||
|
pub exec: FleetExecConfig,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Canonical recursion-depth policy for the headless worker runtime.
|
||||||
|
///
|
||||||
|
/// Single source of truth shared by BOTH standalone sub-agents and fleet
|
||||||
|
/// workers so the two cannot drift into "two moving targets":
|
||||||
|
/// - [`DEFAULT_SPAWN_DEPTH`] is the default recursion budget (the sub-agent
|
||||||
|
/// runtime's `DEFAULT_MAX_SPAWN_DEPTH` is defined as this value).
|
||||||
|
/// - [`MAX_SPAWN_DEPTH_CEILING`] is the hard safety cap; every configured
|
||||||
|
/// value (fleet `max_spawn_depth`, `agent_open`'s `max_depth`) clamps to it.
|
||||||
|
///
|
||||||
|
/// A worker runs at `spawn_depth = 0` and may spawn while
|
||||||
|
/// `spawn_depth + 1 <= max_spawn_depth`, so a depth of N affords N nested
|
||||||
|
/// delegation levels below the root worker. The default of 3 affords at least
|
||||||
|
/// three recursion levels out of the box; the root worker still runs at
|
||||||
|
/// depth 0 even when the budget is 0.
|
||||||
|
pub const DEFAULT_SPAWN_DEPTH: u32 = 3;
|
||||||
|
|
||||||
|
/// Hard ceiling on recursion depth for any worker/sub-agent. See
|
||||||
|
/// [`DEFAULT_SPAWN_DEPTH`]. Raising this single constant lifts the limit
|
||||||
|
/// everywhere (the fleet clamp and `agent_open` validation both read it).
|
||||||
|
pub const MAX_SPAWN_DEPTH_CEILING: u32 = 3;
|
||||||
|
|
||||||
|
/// Headless worker execution constraints (#3027).
|
||||||
|
///
|
||||||
|
/// These limits apply to all fleet workers and sub-agents spawned through
|
||||||
|
/// the headless worker runtime. Task specs can tighten but not loosen them.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct FleetExecConfig {
|
||||||
|
/// Tools that are always allowed regardless of role or task spec.
|
||||||
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||||
|
pub allowed_tools: Vec<String>,
|
||||||
|
/// Tools that are always disallowed, overriding role and task spec.
|
||||||
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||||
|
pub disallowed_tools: Vec<String>,
|
||||||
|
/// Hard ceiling on sub-agent steps (tool calls + model turns).
|
||||||
|
/// Workers that exceed this are terminated. Default: unbounded (u32::MAX).
|
||||||
|
#[serde(default = "default_fleet_max_turns")]
|
||||||
|
pub max_turns: u32,
|
||||||
|
/// Recursive child-agent budget for headless fleet workers.
|
||||||
|
/// Defaults to [`DEFAULT_SPAWN_DEPTH`] (3) so a fleet worker has the SAME
|
||||||
|
/// recursion budget as a standalone sub-agent — fleet and sub-agents are one
|
||||||
|
/// substrate, not two. Set 0 to block child `agent_open` (the root worker
|
||||||
|
/// still runs); the value is clamped to [`MAX_SPAWN_DEPTH_CEILING`].
|
||||||
|
#[serde(default = "default_fleet_max_spawn_depth")]
|
||||||
|
pub max_spawn_depth: u32,
|
||||||
|
/// Extra system prompt text appended to every headless worker.
|
||||||
|
/// Useful for injecting org-wide policy or behavior constraints.
|
||||||
|
#[serde(default, skip_serializing_if = "String::is_empty")]
|
||||||
|
pub append_system_prompt: String,
|
||||||
|
/// Output format for fleet worker results.
|
||||||
|
/// `"text"` (default) or `"stream-json"` for newline-delimited JSON events.
|
||||||
|
#[serde(default = "default_fleet_output_format")]
|
||||||
|
pub output_format: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_fleet_max_turns() -> u32 {
|
||||||
|
u32::MAX
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_fleet_max_spawn_depth() -> u32 {
|
||||||
|
DEFAULT_SPAWN_DEPTH
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_fleet_output_format() -> String {
|
||||||
|
"text".to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for FleetExecConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
allowed_tools: Vec::new(),
|
||||||
|
disallowed_tools: Vec::new(),
|
||||||
|
max_turns: default_fleet_max_turns(),
|
||||||
|
max_spawn_depth: default_fleet_max_spawn_depth(),
|
||||||
|
append_system_prompt: String::new(),
|
||||||
|
output_format: default_fleet_output_format(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A named role preset that bundles common worker settings.
|
||||||
|
///
|
||||||
|
/// Task specs reference a role name (e.g. `"role": "reviewer"`), and the
|
||||||
|
/// fleet manager fills in any missing fields from the preset. User-defined
|
||||||
|
/// roles in `[fleet.roles]` override built-in defaults with the same name.
|
||||||
|
///
|
||||||
|
/// Token budgets and tool-call limits are task-level decisions — they don't
|
||||||
|
/// belong on role presets. Use `timeout_seconds` as the safety bound.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct FleetRolePreset {
|
||||||
|
/// Short description of what this role is for.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub description: Option<String>,
|
||||||
|
/// Default tool profile (`"read-only"`, `"read-write"`, or `"custom"`).
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub tool_profile: Option<String>,
|
||||||
|
/// Default set of tool names available to this role.
|
||||||
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||||
|
pub tools: Vec<String>,
|
||||||
|
/// Default capability tags (e.g. `"rust"`, `"git"`, `"gh"`).
|
||||||
|
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||||
|
pub capabilities: Vec<String>,
|
||||||
|
/// Default timeout in seconds for tasks using this role.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub timeout_seconds: Option<u64>,
|
||||||
|
/// Default trust level override for this role.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub trust_level: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_fleet_trust_level_str() -> String {
|
||||||
|
"sandbox".to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_fleet_require_identity() -> bool {
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_fleet_max_trust_level_str() -> String {
|
||||||
|
"operator".to_string()
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for FleetConfigToml {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
default_trust_level: default_fleet_trust_level_str(),
|
||||||
|
require_identity_verification: default_fleet_require_identity(),
|
||||||
|
max_trust_level: default_fleet_max_trust_level_str(),
|
||||||
|
roles: BTreeMap::new(),
|
||||||
|
exec: FleetExecConfig::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FleetConfigToml {
|
||||||
|
/// Resolve a role preset by name. Checks user-defined roles first,
|
||||||
|
/// then falls back to built-in role defaults.
|
||||||
|
#[must_use]
|
||||||
|
pub fn resolve_role(&self, name: &str) -> Option<FleetRolePreset> {
|
||||||
|
self.roles
|
||||||
|
.get(name)
|
||||||
|
.cloned()
|
||||||
|
.or_else(|| built_in_role_presets().get(name).cloned())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Built-in role presets that are always available without config.
|
||||||
|
#[must_use]
|
||||||
|
pub fn built_in_role_presets() -> BTreeMap<String, FleetRolePreset> {
|
||||||
|
[
|
||||||
|
(
|
||||||
|
"smoke-runner".to_string(),
|
||||||
|
FleetRolePreset {
|
||||||
|
description: Some("Lightweight read-only smoke check worker".to_string()),
|
||||||
|
tool_profile: Some("read-only".to_string()),
|
||||||
|
tools: vec![],
|
||||||
|
capabilities: vec![],
|
||||||
|
timeout_seconds: Some(300),
|
||||||
|
trust_level: Some("local".to_string()),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"reviewer".to_string(),
|
||||||
|
FleetRolePreset {
|
||||||
|
description: Some("Read-only code and documentation review".to_string()),
|
||||||
|
tool_profile: Some("read-only".to_string()),
|
||||||
|
tools: vec![],
|
||||||
|
capabilities: vec![],
|
||||||
|
timeout_seconds: Some(600),
|
||||||
|
trust_level: None,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"builder".to_string(),
|
||||||
|
FleetRolePreset {
|
||||||
|
description: Some(
|
||||||
|
"Read-write builder with compilation and test access".to_string(),
|
||||||
|
),
|
||||||
|
tool_profile: Some("read-write".to_string()),
|
||||||
|
tools: vec![],
|
||||||
|
capabilities: vec![],
|
||||||
|
timeout_seconds: Some(1800),
|
||||||
|
trust_level: Some("local".to_string()),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"read-only".to_string(),
|
||||||
|
FleetRolePreset {
|
||||||
|
description: Some(
|
||||||
|
"Minimal read-only observer with no writes or secrets".to_string(),
|
||||||
|
),
|
||||||
|
tool_profile: Some("read-only".to_string()),
|
||||||
|
tools: vec![],
|
||||||
|
capabilities: vec![],
|
||||||
|
timeout_seconds: Some(300),
|
||||||
|
trust_level: Some("sandbox".to_string()),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
]
|
||||||
|
.into()
|
||||||
|
}
|
||||||
|
|
||||||
/// On-disk schema for the `[network]` table (#135). See `config.example.toml`
|
/// On-disk schema for the `[network]` table (#135). See `config.example.toml`
|
||||||
/// for documentation.
|
/// for documentation.
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
@@ -7179,6 +7413,32 @@ fallback_providers = ["deepseek", "openrouter"]
|
|||||||
assert!(!serialized.contains("fallback_providers"));
|
assert!(!serialized.contains("fallback_providers"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fleet_exec_config_default_matches_subagent_spawn_depth() {
|
||||||
|
// Fleet workers and standalone sub-agents share one recursion axis:
|
||||||
|
// the fleet default equals DEFAULT_SPAWN_DEPTH (3) and affords >=3
|
||||||
|
// nested delegation levels out of the box.
|
||||||
|
assert_eq!(
|
||||||
|
FleetExecConfig::default().max_spawn_depth,
|
||||||
|
DEFAULT_SPAWN_DEPTH
|
||||||
|
);
|
||||||
|
assert_eq!(FleetExecConfig::default().max_spawn_depth, 3);
|
||||||
|
assert!(DEFAULT_SPAWN_DEPTH <= MAX_SPAWN_DEPTH_CEILING);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fleet_exec_config_parses_max_spawn_depth() {
|
||||||
|
let config: ConfigToml = toml::from_str(
|
||||||
|
r#"
|
||||||
|
[fleet.exec]
|
||||||
|
max_spawn_depth = 2
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.expect("fleet exec config should parse");
|
||||||
|
|
||||||
|
assert_eq!(config.fleet.expect("fleet config").exec.max_spawn_depth, 2);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn fallback_providers_do_not_change_runtime_resolution() {
|
fn fallback_providers_do_not_change_runtime_resolution() {
|
||||||
let _lock = env_lock();
|
let _lock = env_lock();
|
||||||
|
|||||||
@@ -12,7 +12,7 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
use serde::{Deserialize, Deserializer, Serialize, Serializer, de};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
pub const FLEET_PROTOCOL_VERSION: &str = "0.1.0";
|
pub const FLEET_PROTOCOL_VERSION: &str = "0.1.0";
|
||||||
@@ -45,6 +45,8 @@ pub struct FleetRun {
|
|||||||
pub worker_specs: Vec<FleetWorkerSpec>,
|
pub worker_specs: Vec<FleetWorkerSpec>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub labels: BTreeMap<String, String>,
|
pub labels: BTreeMap<String, String>,
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub security_policy: Option<FleetSecurityPolicy>,
|
||||||
pub created_at: String,
|
pub created_at: String,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
pub updated_at: Option<String>,
|
pub updated_at: Option<String>,
|
||||||
@@ -260,6 +262,9 @@ pub struct FleetWorkerSpec {
|
|||||||
pub name: String,
|
pub name: String,
|
||||||
pub host: FleetHostSpec,
|
pub host: FleetHostSpec,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub trust_level: Option<FleetTrustLevel>,
|
||||||
|
#[serde(default)]
|
||||||
pub labels: BTreeMap<String, String>,
|
pub labels: BTreeMap<String, String>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub capabilities: Vec<String>,
|
pub capabilities: Vec<String>,
|
||||||
@@ -280,6 +285,14 @@ pub enum FleetHostSpec {
|
|||||||
user: Option<String>,
|
user: Option<String>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
identity: Option<PathBuf>,
|
identity: Option<PathBuf>,
|
||||||
|
/// Known hosts file for host-key verification.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
known_hosts: Option<PathBuf>,
|
||||||
|
/// Expected host key fingerprint (SHA256:...) for key pinning.
|
||||||
|
/// When set, the connection is only trusted if the server's
|
||||||
|
/// host key matches this fingerprint exactly.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
host_key_fingerprint: Option<String>,
|
||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
working_directory: Option<PathBuf>,
|
working_directory: Option<PathBuf>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
@@ -288,6 +301,8 @@ pub enum FleetHostSpec {
|
|||||||
#[serde(skip_serializing_if = "Option::is_none")]
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
codewhale_binary: Option<String>,
|
codewhale_binary: Option<String>,
|
||||||
},
|
},
|
||||||
|
#[serde(alias = "container")]
|
||||||
|
#[serde(alias = "Container")]
|
||||||
Docker {
|
Docker {
|
||||||
image: String,
|
image: String,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
@@ -295,6 +310,264 @@ pub enum FleetHostSpec {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ── Security and trust types ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Trust classification assigned to a worker host.
|
||||||
|
///
|
||||||
|
/// The trust level determines what a worker is allowed to do and what
|
||||||
|
/// secrets it may access. The default for new workers is [`FleetTrustLevel::Sandbox`];
|
||||||
|
/// operators must explicitly raise trust for SSH or container workers.
|
||||||
|
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
#[serde(rename_all = "snake_case")]
|
||||||
|
pub enum FleetTrustLevel {
|
||||||
|
/// Fully isolated: no network, no secrets, no writes outside `.codewhale/fleet/`.
|
||||||
|
/// Suitable for untrusted code review, community PR checks, or third-party tool runs.
|
||||||
|
Sandbox = 0,
|
||||||
|
/// Local-only worker with access to the workspace and configured secrets.
|
||||||
|
/// Default for local workers. May read repo files but writes are gated.
|
||||||
|
Local = 1,
|
||||||
|
/// Worker on a known remote host with verified identity and a bounded
|
||||||
|
/// set of explicitly granted capabilities. Requires SSH host-key
|
||||||
|
/// verification or equivalent attestation.
|
||||||
|
#[serde(alias = "remote-verified", alias = "remoteVerified")]
|
||||||
|
RemoteVerified = 2,
|
||||||
|
/// Fully trusted worker (e.g. operator's own machine, CI runner).
|
||||||
|
/// Has access to all configured secrets and may perform any action the
|
||||||
|
/// operator can. Reserved for dogfood smoke and operator-owned machines.
|
||||||
|
Operator = 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for FleetTrustLevel {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::Sandbox
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FleetTrustLevel {
|
||||||
|
/// Whether this trust level is allowed to access provider secrets.
|
||||||
|
#[must_use]
|
||||||
|
pub fn may_access_secrets(&self) -> bool {
|
||||||
|
matches!(self, Self::Operator | Self::RemoteVerified | Self::Local)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether this trust level is allowed to write outside `.codewhale/fleet/`.
|
||||||
|
#[must_use]
|
||||||
|
pub fn may_write_workspace(&self) -> bool {
|
||||||
|
matches!(self, Self::Operator | Self::Local)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether this trust level is allowed network access.
|
||||||
|
#[must_use]
|
||||||
|
pub fn may_access_network(&self) -> bool {
|
||||||
|
matches!(self, Self::Operator | Self::RemoteVerified | Self::Local)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Security policy applied to a fleet run.
|
||||||
|
///
|
||||||
|
/// A policy defines the default trust level for workers, which secrets
|
||||||
|
/// may be resolved, and what capabilities are granted. When a run has no
|
||||||
|
/// explicit policy, workers inherit conservative defaults.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
|
pub struct FleetSecurityPolicy {
|
||||||
|
/// Default trust level for workers that don't declare one explicitly.
|
||||||
|
#[serde(default)]
|
||||||
|
pub default_trust_level: FleetTrustLevel,
|
||||||
|
/// Secret refs that workers may resolve. An empty list means no secrets
|
||||||
|
/// are available. Each entry is a key name, not a value.
|
||||||
|
#[serde(default)]
|
||||||
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||||
|
pub allowed_secrets: Vec<FleetSecretRef>,
|
||||||
|
/// Capability grants for workers in this run.
|
||||||
|
#[serde(default)]
|
||||||
|
#[serde(skip_serializing_if = "Vec::is_empty")]
|
||||||
|
pub capability_grants: Vec<FleetCapabilityGrant>,
|
||||||
|
/// Maximum trust level any worker in this run may have, even if the
|
||||||
|
/// worker spec requests higher. Defaults to Operator (no ceiling).
|
||||||
|
#[serde(default = "default_max_trust_level")]
|
||||||
|
pub max_trust_level: FleetTrustLevel,
|
||||||
|
/// Require identity verification for remote workers. When true, SSH
|
||||||
|
/// workers must pass host-key verification before being trusted at
|
||||||
|
/// RemoteVerified level; unverified remotes stay at Sandbox.
|
||||||
|
#[serde(default)]
|
||||||
|
pub require_identity_verification: bool,
|
||||||
|
/// Allow conservative parallel execution of read-only tools (#2983).
|
||||||
|
/// When true, workers may batch independent read-only tool calls
|
||||||
|
/// (reads, searches, greps) into concurrent turns. Disabled by default
|
||||||
|
/// to avoid overwhelming providers or hitting rate limits.
|
||||||
|
#[serde(default)]
|
||||||
|
pub allow_parallel_reads: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn default_max_trust_level() -> FleetTrustLevel {
|
||||||
|
FleetTrustLevel::Operator
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for FleetSecurityPolicy {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
default_trust_level: FleetTrustLevel::Sandbox,
|
||||||
|
allowed_secrets: Vec::new(),
|
||||||
|
capability_grants: Vec::new(),
|
||||||
|
max_trust_level: FleetTrustLevel::Operator,
|
||||||
|
require_identity_verification: false,
|
||||||
|
allow_parallel_reads: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A reference to a secret that should be resolved at runtime, never
|
||||||
|
/// serialized as a plaintext value.
|
||||||
|
///
|
||||||
|
/// Secret refs appear in task specs, alert configs, and worker definitions.
|
||||||
|
/// The actual secret value is resolved by the fleet manager from the
|
||||||
|
/// secrets backend (OS keyring, environment, or file store) just before
|
||||||
|
/// the worker starts.
|
||||||
|
#[derive(Debug, Clone, Serialize, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||||
|
pub struct FleetSecretRef {
|
||||||
|
/// The secret key name (e.g. `"CODEWHALE_API_KEY"`, `"GH_TOKEN"`).
|
||||||
|
pub key: String,
|
||||||
|
/// Optional source hint for resolution order.
|
||||||
|
/// - `"env"` — resolve from environment variable
|
||||||
|
/// - `"keyring"` — resolve from OS keyring
|
||||||
|
/// - `"file"` — resolve from `~/.codewhale/secrets/`
|
||||||
|
/// - absent / null — try all sources in default order
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub source: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FleetSecretRef {
|
||||||
|
/// Create a secret ref from a key name with default resolution.
|
||||||
|
#[must_use]
|
||||||
|
pub fn new(key: impl Into<String>) -> Self {
|
||||||
|
Self {
|
||||||
|
key: key.into(),
|
||||||
|
source: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a secret ref with an explicit source.
|
||||||
|
#[must_use]
|
||||||
|
pub fn with_source(key: impl Into<String>, source: impl Into<String>) -> Self {
|
||||||
|
Self {
|
||||||
|
key: key.into(),
|
||||||
|
source: Some(source.into()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Redacted display form for logging. Shows the key name and source
|
||||||
|
/// but never the resolved value.
|
||||||
|
#[must_use]
|
||||||
|
pub fn redacted(&self) -> String {
|
||||||
|
match &self.source {
|
||||||
|
Some(src) => format!("<secret:{}.{}>", src, self.key),
|
||||||
|
None => format!("<secret:{}>", self.key),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for FleetSecretRef {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{}", self.redacted())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<&str> for FleetSecretRef {
|
||||||
|
fn from(key: &str) -> Self {
|
||||||
|
Self::new(key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<String> for FleetSecretRef {
|
||||||
|
fn from(key: String) -> Self {
|
||||||
|
Self::new(key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'de> Deserialize<'de> for FleetSecretRef {
|
||||||
|
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||||||
|
where
|
||||||
|
D: Deserializer<'de>,
|
||||||
|
{
|
||||||
|
#[derive(Deserialize)]
|
||||||
|
#[serde(untagged)]
|
||||||
|
enum SecretRefWire {
|
||||||
|
Key(String),
|
||||||
|
Structured {
|
||||||
|
key: String,
|
||||||
|
#[serde(default)]
|
||||||
|
source: Option<String>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
match SecretRefWire::deserialize(deserializer)? {
|
||||||
|
SecretRefWire::Key(key) if !key.trim().is_empty() => Ok(FleetSecretRef::new(key)),
|
||||||
|
SecretRefWire::Key(_) => Err(de::Error::custom("secret ref key cannot be empty")),
|
||||||
|
SecretRefWire::Structured { key, source } if !key.trim().is_empty() => {
|
||||||
|
Ok(FleetSecretRef { key, source })
|
||||||
|
}
|
||||||
|
SecretRefWire::Structured { .. } => {
|
||||||
|
Err(de::Error::custom("secret ref key cannot be empty"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// How a worker authenticates to the fleet manager.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
|
#[serde(tag = "method", rename_all = "snake_case")]
|
||||||
|
pub enum FleetWorkerAuth {
|
||||||
|
/// No authentication (local workers share the same uid).
|
||||||
|
None,
|
||||||
|
/// SSH key-based authentication with host-key verification.
|
||||||
|
SshKey {
|
||||||
|
/// Path to the SSH identity file (may be a FleetSecretRef in JSON
|
||||||
|
/// as `{"key": "...", "source": "file"}`).
|
||||||
|
identity: PathBuf,
|
||||||
|
/// Known hosts file for host-key verification.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
known_hosts: Option<PathBuf>,
|
||||||
|
/// Expected host key fingerprint for pinning.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
host_key_fingerprint: Option<String>,
|
||||||
|
/// SSH user for the connection.
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
user: Option<String>,
|
||||||
|
},
|
||||||
|
/// Token-based authentication for remote workers behind a fleet proxy.
|
||||||
|
Token {
|
||||||
|
/// Reference to the token secret.
|
||||||
|
token_ref: FleetSecretRef,
|
||||||
|
},
|
||||||
|
/// mTLS certificate-based authentication.
|
||||||
|
Mtls {
|
||||||
|
/// Path to the client certificate.
|
||||||
|
cert_path: PathBuf,
|
||||||
|
/// Reference to the private key secret.
|
||||||
|
key_ref: FleetSecretRef,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A capability grant that explicitly authorizes a worker to perform
|
||||||
|
/// a specific class of action.
|
||||||
|
///
|
||||||
|
/// By default, new workers get no grants (least privilege). Grants are
|
||||||
|
/// additive: a worker's effective capabilities are the union of its
|
||||||
|
/// trust-level defaults plus any explicit grants.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
|
pub struct FleetCapabilityGrant {
|
||||||
|
/// The capability being granted (e.g. `"network"`, `"git-push"`,
|
||||||
|
/// `"provider-secrets"`, `"release"`).
|
||||||
|
pub capability: String,
|
||||||
|
/// Optional scope limiting the grant (e.g. `"github.com"` for network,
|
||||||
|
/// `"crates/tui/**"` for file writes).
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub scope: Option<String>,
|
||||||
|
/// Optional justification for the grant (audit trail).
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub reason: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
/// Runtime status of a worker.
|
/// Runtime status of a worker.
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
#[serde(rename_all = "snake_case")]
|
#[serde(rename_all = "snake_case")]
|
||||||
@@ -469,18 +742,84 @@ pub enum FleetAlertEventClass {
|
|||||||
#[serde(tag = "kind", rename_all = "snake_case")]
|
#[serde(tag = "kind", rename_all = "snake_case")]
|
||||||
pub enum FleetAlertChannel {
|
pub enum FleetAlertChannel {
|
||||||
Slack {
|
Slack {
|
||||||
webhook_url: String,
|
/// Webhook URL, resolved from a secret ref or inline.
|
||||||
|
#[serde(flatten)]
|
||||||
|
webhook: FleetAlertEndpoint,
|
||||||
},
|
},
|
||||||
Webhook {
|
Webhook {
|
||||||
url: String,
|
#[serde(flatten)]
|
||||||
secret: Option<String>,
|
endpoint: FleetAlertEndpoint,
|
||||||
},
|
},
|
||||||
|
#[serde(alias = "pager_duty")]
|
||||||
|
#[serde(alias = "pagerduty")]
|
||||||
PagerDuty {
|
PagerDuty {
|
||||||
routing_key: String,
|
routing_key: String,
|
||||||
severity: String,
|
severity: String,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An alert channel endpoint, supporting both inline URLs and secret refs.
|
||||||
|
///
|
||||||
|
/// For Slack and generic webhook channels, the URL may be provided directly
|
||||||
|
/// or as a secret reference resolved at send time. When both `url` and
|
||||||
|
/// `url_ref` are present, `url_ref` takes precedence after resolution.
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||||
|
pub struct FleetAlertEndpoint {
|
||||||
|
/// Inline URL (plaintext; only for non-sensitive endpoints).
|
||||||
|
#[serde(
|
||||||
|
alias = "webhook_url",
|
||||||
|
alias = "endpoint_url",
|
||||||
|
skip_serializing_if = "Option::is_none"
|
||||||
|
)]
|
||||||
|
pub url: Option<String>,
|
||||||
|
/// Reference to a secret containing the webhook URL.
|
||||||
|
#[serde(
|
||||||
|
alias = "webhook_url_ref",
|
||||||
|
alias = "webhook_ref",
|
||||||
|
alias = "url_secret_ref",
|
||||||
|
skip_serializing_if = "Option::is_none"
|
||||||
|
)]
|
||||||
|
pub url_ref: Option<FleetSecretRef>,
|
||||||
|
/// Optional HMAC secret for webhook payload signing, as a secret ref.
|
||||||
|
#[serde(
|
||||||
|
alias = "secret",
|
||||||
|
alias = "webhook_secret",
|
||||||
|
alias = "signing_secret",
|
||||||
|
skip_serializing_if = "Option::is_none"
|
||||||
|
)]
|
||||||
|
pub secret_ref: Option<FleetSecretRef>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FleetAlertEndpoint {
|
||||||
|
/// Create an inline URL endpoint (for non-sensitive use).
|
||||||
|
#[must_use]
|
||||||
|
pub fn inline(url: impl Into<String>) -> Self {
|
||||||
|
Self {
|
||||||
|
url: Some(url.into()),
|
||||||
|
url_ref: None,
|
||||||
|
secret_ref: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a secret-backed URL endpoint.
|
||||||
|
#[must_use]
|
||||||
|
pub fn from_secret(url_ref: FleetSecretRef) -> Self {
|
||||||
|
Self {
|
||||||
|
url: None,
|
||||||
|
url_ref: Some(url_ref),
|
||||||
|
secret_ref: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Redacted display form for logging.
|
||||||
|
#[must_use]
|
||||||
|
pub fn redacted(&self) -> String {
|
||||||
|
self.url_ref
|
||||||
|
.as_ref()
|
||||||
|
.map_or_else(|| "<inline-url>".to_string(), |r| r.redacted())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Receipt produced when a task completes verification.
|
/// Receipt produced when a task completes verification.
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct FleetReceipt {
|
pub struct FleetReceipt {
|
||||||
@@ -573,6 +912,7 @@ mod tests {
|
|||||||
}],
|
}],
|
||||||
worker_specs: vec![],
|
worker_specs: vec![],
|
||||||
labels: BTreeMap::new(),
|
labels: BTreeMap::new(),
|
||||||
|
security_policy: None,
|
||||||
created_at: "2026-06-12T17:00:00Z".to_string(),
|
created_at: "2026-06-12T17:00:00Z".to_string(),
|
||||||
updated_at: None,
|
updated_at: None,
|
||||||
completed_at: None,
|
completed_at: None,
|
||||||
@@ -648,7 +988,7 @@ mod tests {
|
|||||||
let policy = FleetAlertPolicy {
|
let policy = FleetAlertPolicy {
|
||||||
events: vec![FleetAlertEventClass::Stale],
|
events: vec![FleetAlertEventClass::Stale],
|
||||||
channels: vec![FleetAlertChannel::Slack {
|
channels: vec![FleetAlertChannel::Slack {
|
||||||
webhook_url: "https://hooks.slack.com/test".to_string(),
|
webhook: FleetAlertEndpoint::inline("https://hooks.slack.com/test"),
|
||||||
}],
|
}],
|
||||||
after_attempts: Some(2),
|
after_attempts: Some(2),
|
||||||
after_minutes_stale: Some(10),
|
after_minutes_stale: Some(10),
|
||||||
@@ -687,6 +1027,8 @@ mod tests {
|
|||||||
port,
|
port,
|
||||||
user,
|
user,
|
||||||
identity,
|
identity,
|
||||||
|
known_hosts,
|
||||||
|
host_key_fingerprint,
|
||||||
working_directory,
|
working_directory,
|
||||||
env_allowlist,
|
env_allowlist,
|
||||||
codewhale_binary,
|
codewhale_binary,
|
||||||
@@ -695,6 +1037,8 @@ mod tests {
|
|||||||
assert_eq!(port, None);
|
assert_eq!(port, None);
|
||||||
assert_eq!(user, None);
|
assert_eq!(user, None);
|
||||||
assert_eq!(identity, None);
|
assert_eq!(identity, None);
|
||||||
|
assert_eq!(known_hosts, None);
|
||||||
|
assert_eq!(host_key_fingerprint, None);
|
||||||
assert_eq!(working_directory, None);
|
assert_eq!(working_directory, None);
|
||||||
assert!(env_allowlist.is_empty());
|
assert!(env_allowlist.is_empty());
|
||||||
assert_eq!(codewhale_binary, None);
|
assert_eq!(codewhale_binary, None);
|
||||||
@@ -801,4 +1145,131 @@ mod tests {
|
|||||||
assert_eq!(back.result, FleetTaskResult::Partial);
|
assert_eq!(back.result, FleetTaskResult::Partial);
|
||||||
assert_eq!(back.failure_kind, Some(FleetTaskFailureKind::Verifier));
|
assert_eq!(back.failure_kind, Some(FleetTaskFailureKind::Verifier));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn ssh_host_spec_with_key_pinning_round_trip() {
|
||||||
|
let spec = FleetHostSpec::Ssh {
|
||||||
|
host: "builder.trusted.example.com".to_string(),
|
||||||
|
port: Some(22),
|
||||||
|
user: Some("codewhale".to_string()),
|
||||||
|
identity: Some(PathBuf::from("~/.ssh/codewhale_fleet")),
|
||||||
|
known_hosts: Some(PathBuf::from("~/.ssh/known_hosts")),
|
||||||
|
host_key_fingerprint: Some("SHA256:aLGqZo1M6c...".to_string()),
|
||||||
|
working_directory: Some(PathBuf::from("/srv/codewhale/work")),
|
||||||
|
env_allowlist: vec!["CODEWHALE_PROFILE".to_string()],
|
||||||
|
codewhale_binary: Some("/usr/local/bin/codewhale".to_string()),
|
||||||
|
};
|
||||||
|
let json = serde_json::to_string_pretty(&spec).unwrap();
|
||||||
|
assert!(json.contains("\"known_hosts\""));
|
||||||
|
assert!(json.contains("\"host_key_fingerprint\""));
|
||||||
|
assert!(json.contains("SHA256:aLGqZo1M6c..."));
|
||||||
|
|
||||||
|
let back: FleetHostSpec = serde_json::from_str(&json).unwrap();
|
||||||
|
match back {
|
||||||
|
FleetHostSpec::Ssh {
|
||||||
|
host,
|
||||||
|
known_hosts,
|
||||||
|
host_key_fingerprint,
|
||||||
|
..
|
||||||
|
} => {
|
||||||
|
assert_eq!(host, "builder.trusted.example.com");
|
||||||
|
assert_eq!(known_hosts, Some(PathBuf::from("~/.ssh/known_hosts")));
|
||||||
|
assert_eq!(
|
||||||
|
host_key_fingerprint,
|
||||||
|
Some("SHA256:aLGqZo1M6c...".to_string())
|
||||||
|
);
|
||||||
|
}
|
||||||
|
other => panic!("expected ssh host spec, got {other:?}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn secret_ref_redacted_never_exposes_value() {
|
||||||
|
let ref_ = FleetSecretRef::new("DEEPSEEK_API_KEY");
|
||||||
|
let redacted = ref_.redacted();
|
||||||
|
assert!(redacted.contains("DEEPSEEK_API_KEY"));
|
||||||
|
assert!(!redacted.contains("sk-"));
|
||||||
|
assert!(redacted.contains("<secret:"));
|
||||||
|
|
||||||
|
let ref_ = FleetSecretRef::with_source("GH_TOKEN", "env");
|
||||||
|
let redacted = ref_.redacted();
|
||||||
|
assert!(redacted.contains("env.GH_TOKEN"));
|
||||||
|
assert!(!redacted.contains("ghp_"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn alert_endpoint_from_secret_round_trip() {
|
||||||
|
let endpoint = FleetAlertEndpoint::from_secret(FleetSecretRef::new("SLACK_WEBHOOK"));
|
||||||
|
let json = serde_json::to_string(&endpoint).unwrap();
|
||||||
|
assert!(json.contains("SLACK_WEBHOOK"));
|
||||||
|
assert!(!json.contains("hooks.slack.com"));
|
||||||
|
|
||||||
|
let back: FleetAlertEndpoint = serde_json::from_str(&json).unwrap();
|
||||||
|
assert_eq!(back.url_ref.as_ref().unwrap().key, "SLACK_WEBHOOK");
|
||||||
|
assert_eq!(back.url, None);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn secret_ref_accepts_legacy_string_wire_shape() {
|
||||||
|
let ref_: FleetSecretRef = serde_json::from_str(r#""CODEWHALE_FLEET_TOKEN""#).unwrap();
|
||||||
|
assert_eq!(ref_, FleetSecretRef::new("CODEWHALE_FLEET_TOKEN"));
|
||||||
|
|
||||||
|
let ref_: FleetSecretRef =
|
||||||
|
serde_json::from_str(r#"{"key":"GH_TOKEN","source":"env"}"#).unwrap();
|
||||||
|
assert_eq!(ref_, FleetSecretRef::with_source("GH_TOKEN", "env"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn trust_level_accepts_hyphenated_remote_verified() {
|
||||||
|
let trust: FleetTrustLevel = serde_json::from_str(r#""remote-verified""#).unwrap();
|
||||||
|
assert_eq!(trust, FleetTrustLevel::RemoteVerified);
|
||||||
|
|
||||||
|
let canonical = serde_json::to_string(&trust).unwrap();
|
||||||
|
assert_eq!(canonical, r#""remote_verified""#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn alert_channel_accepts_legacy_webhook_fields() {
|
||||||
|
let channel: FleetAlertChannel = serde_json::from_str(
|
||||||
|
r#"{
|
||||||
|
"kind": "slack",
|
||||||
|
"webhook_url": "https://hooks.slack.com/test",
|
||||||
|
"secret": "SLACK_SIGNING_SECRET"
|
||||||
|
}"#,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
match channel {
|
||||||
|
FleetAlertChannel::Slack { webhook } => {
|
||||||
|
assert_eq!(webhook.url.as_deref(), Some("https://hooks.slack.com/test"));
|
||||||
|
assert_eq!(
|
||||||
|
webhook.secret_ref,
|
||||||
|
Some(FleetSecretRef::new("SLACK_SIGNING_SECRET"))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
other => panic!("expected slack channel, got {other:?}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn security_policy_defaults_are_conservative() {
|
||||||
|
let policy = FleetSecurityPolicy::default();
|
||||||
|
assert_eq!(policy.default_trust_level, FleetTrustLevel::Sandbox);
|
||||||
|
assert!(policy.allowed_secrets.is_empty());
|
||||||
|
assert!(policy.capability_grants.is_empty());
|
||||||
|
assert_eq!(policy.max_trust_level, FleetTrustLevel::Operator);
|
||||||
|
assert!(!policy.require_identity_verification);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn trust_level_ordinal_reflects_privilege() {
|
||||||
|
assert!(FleetTrustLevel::Operator > FleetTrustLevel::RemoteVerified);
|
||||||
|
assert!(FleetTrustLevel::RemoteVerified > FleetTrustLevel::Local);
|
||||||
|
assert!(FleetTrustLevel::Local > FleetTrustLevel::Sandbox);
|
||||||
|
|
||||||
|
assert!(FleetTrustLevel::Operator.may_access_secrets());
|
||||||
|
assert!(!FleetTrustLevel::Sandbox.may_access_secrets());
|
||||||
|
assert!(!FleetTrustLevel::Sandbox.may_write_workspace());
|
||||||
|
assert!(FleetTrustLevel::Operator.may_write_workspace());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -737,6 +737,44 @@ impl Secrets {
|
|||||||
pub fn get(&self, name: &str) -> Result<Option<String>, SecretsError> {
|
pub fn get(&self, name: &str) -> Result<Option<String>, SecretsError> {
|
||||||
self.store.get(name)
|
self.store.get(name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Resolve a secret by key name with an optional source constraint.
|
||||||
|
///
|
||||||
|
/// This is the fleet-worker secret resolution path. Unlike
|
||||||
|
/// [`resolve`](Secrets::resolve), this does NOT map provider names
|
||||||
|
/// to their canonical env vars — the caller controls the exact key
|
||||||
|
/// and resolution order.
|
||||||
|
///
|
||||||
|
/// `source_hint` controls the resolution order:
|
||||||
|
/// - `Some("env")` — only check environment variables
|
||||||
|
/// - `Some("keyring")` — only check the keyring/file store
|
||||||
|
/// - `None` — try the store first, then fall back to environment
|
||||||
|
#[must_use]
|
||||||
|
pub fn resolve_direct(&self, key: &str, source_hint: Option<&str>) -> Option<String> {
|
||||||
|
match source_hint {
|
||||||
|
Some("env") => {
|
||||||
|
// Only check process environment — skip the store entirely.
|
||||||
|
std::env::var(key).ok().filter(|v| !v.trim().is_empty())
|
||||||
|
}
|
||||||
|
Some("keyring") | Some("file") => {
|
||||||
|
// Only check the store backend.
|
||||||
|
self.store
|
||||||
|
.get(key)
|
||||||
|
.ok()
|
||||||
|
.flatten()
|
||||||
|
.filter(|v| !v.trim().is_empty())
|
||||||
|
}
|
||||||
|
Some(_) | None => {
|
||||||
|
// Default: store first, then env fallback.
|
||||||
|
if let Ok(Some(v)) = self.store.get(key)
|
||||||
|
&& !v.trim().is_empty()
|
||||||
|
{
|
||||||
|
return Some(v);
|
||||||
|
}
|
||||||
|
std::env::var(key).ok().filter(|v| !v.trim().is_empty())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Map a canonical provider name to its environment variable(s), returning
|
/// Map a canonical provider name to its environment variable(s), returning
|
||||||
|
|||||||
@@ -1883,6 +1883,10 @@ pub struct Config {
|
|||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub context: ContextConfig,
|
pub context: ContextConfig,
|
||||||
|
|
||||||
|
/// Agent Fleet trust/security/role/exec config.
|
||||||
|
#[serde(default)]
|
||||||
|
pub fleet: Option<codewhale_config::FleetConfigToml>,
|
||||||
|
|
||||||
/// Sub-agent model overrides.
|
/// Sub-agent model overrides.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub subagents: Option<SubagentsConfig>,
|
pub subagents: Option<SubagentsConfig>,
|
||||||
@@ -5012,6 +5016,7 @@ fn merge_config(base: Config, override_cfg: Config) -> Config {
|
|||||||
.or(base.context.l3_threshold),
|
.or(base.context.l3_threshold),
|
||||||
seam_model: override_cfg.context.seam_model.or(base.context.seam_model),
|
seam_model: override_cfg.context.seam_model.or(base.context.seam_model),
|
||||||
},
|
},
|
||||||
|
fleet: override_cfg.fleet.or(base.fleet),
|
||||||
subagents: override_cfg.subagents.or(base.subagents),
|
subagents: override_cfg.subagents.or(base.subagents),
|
||||||
strict_tool_mode: override_cfg.strict_tool_mode.or(base.strict_tool_mode),
|
strict_tool_mode: override_cfg.strict_tool_mode.or(base.strict_tool_mode),
|
||||||
runtime_api: override_cfg.runtime_api.or(base.runtime_api),
|
runtime_api: override_cfg.runtime_api.or(base.runtime_api),
|
||||||
|
|||||||
@@ -373,6 +373,8 @@ pub struct SshFleetHostConfig {
|
|||||||
pub user: Option<String>,
|
pub user: Option<String>,
|
||||||
pub port: Option<u16>,
|
pub port: Option<u16>,
|
||||||
pub identity: Option<PathBuf>,
|
pub identity: Option<PathBuf>,
|
||||||
|
pub known_hosts: Option<PathBuf>,
|
||||||
|
pub host_key_fingerprint: Option<String>,
|
||||||
pub working_directory: PathBuf,
|
pub working_directory: PathBuf,
|
||||||
pub env_allowlist: BTreeSet<String>,
|
pub env_allowlist: BTreeSet<String>,
|
||||||
pub codewhale_binary: String,
|
pub codewhale_binary: String,
|
||||||
@@ -387,6 +389,8 @@ impl SshFleetHostConfig {
|
|||||||
user: None,
|
user: None,
|
||||||
port: None,
|
port: None,
|
||||||
identity: None,
|
identity: None,
|
||||||
|
known_hosts: None,
|
||||||
|
host_key_fingerprint: None,
|
||||||
working_directory: working_directory.into(),
|
working_directory: working_directory.into(),
|
||||||
env_allowlist: BTreeSet::new(),
|
env_allowlist: BTreeSet::new(),
|
||||||
codewhale_binary: "codewhale".to_string(),
|
codewhale_binary: "codewhale".to_string(),
|
||||||
@@ -401,6 +405,8 @@ impl SshFleetHostConfig {
|
|||||||
port,
|
port,
|
||||||
user,
|
user,
|
||||||
identity,
|
identity,
|
||||||
|
known_hosts,
|
||||||
|
host_key_fingerprint,
|
||||||
working_directory,
|
working_directory,
|
||||||
env_allowlist,
|
env_allowlist,
|
||||||
codewhale_binary,
|
codewhale_binary,
|
||||||
@@ -420,6 +426,8 @@ impl SshFleetHostConfig {
|
|||||||
config.port = *port;
|
config.port = *port;
|
||||||
config.user = user.clone();
|
config.user = user.clone();
|
||||||
config.identity = identity.clone();
|
config.identity = identity.clone();
|
||||||
|
config.known_hosts = known_hosts.clone();
|
||||||
|
config.host_key_fingerprint = host_key_fingerprint.clone();
|
||||||
config.env_allowlist = env_allowlist.iter().cloned().collect();
|
config.env_allowlist = env_allowlist.iter().cloned().collect();
|
||||||
config.codewhale_binary = codewhale_binary;
|
config.codewhale_binary = codewhale_binary;
|
||||||
config.validate()?;
|
config.validate()?;
|
||||||
@@ -918,6 +926,8 @@ mod tests {
|
|||||||
port: Some(2222),
|
port: Some(2222),
|
||||||
user: Some("fleet".to_string()),
|
user: Some("fleet".to_string()),
|
||||||
identity: Some(PathBuf::from("/tmp/fleet_id")),
|
identity: Some(PathBuf::from("/tmp/fleet_id")),
|
||||||
|
known_hosts: None,
|
||||||
|
host_key_fingerprint: None,
|
||||||
working_directory: Some(PathBuf::from("/srv/codewhale")),
|
working_directory: Some(PathBuf::from("/srv/codewhale")),
|
||||||
env_allowlist: vec!["FLEET_PROFILE".to_string()],
|
env_allowlist: vec!["FLEET_PROFILE".to_string()],
|
||||||
codewhale_binary: Some("/usr/local/bin/codewhale".to_string()),
|
codewhale_binary: Some("/usr/local/bin/codewhale".to_string()),
|
||||||
|
|||||||
@@ -661,12 +661,21 @@ fn sanitize_run_for_ledger(run: &FleetRun) -> FleetRun {
|
|||||||
if let Some(policy) = &mut task.alert_policy {
|
if let Some(policy) = &mut task.alert_policy {
|
||||||
for channel in &mut policy.channels {
|
for channel in &mut policy.channels {
|
||||||
match channel {
|
match channel {
|
||||||
FleetAlertChannel::Slack { webhook_url } => {
|
FleetAlertChannel::Slack { webhook } => {
|
||||||
*webhook_url = "<redacted>".to_string();
|
webhook.url = webhook.url.as_ref().map(|_| "<redacted>".to_string());
|
||||||
}
|
}
|
||||||
FleetAlertChannel::Webhook { url, secret } => {
|
FleetAlertChannel::Webhook { endpoint } => {
|
||||||
*url = "<redacted>".to_string();
|
*endpoint = FleetAlertEndpoint {
|
||||||
*secret = secret.as_ref().map(|_| "<redacted>".to_string());
|
url: endpoint.url.as_ref().map(|_| "<redacted>".to_string()),
|
||||||
|
url_ref: endpoint
|
||||||
|
.url_ref
|
||||||
|
.as_ref()
|
||||||
|
.map(|_| FleetSecretRef::new("<redacted>")),
|
||||||
|
secret_ref: endpoint
|
||||||
|
.secret_ref
|
||||||
|
.as_ref()
|
||||||
|
.map(|_| FleetSecretRef::new("<redacted>")),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
FleetAlertChannel::PagerDuty { routing_key, .. } => {
|
FleetAlertChannel::PagerDuty { routing_key, .. } => {
|
||||||
*routing_key = "<redacted>".to_string();
|
*routing_key = "<redacted>".to_string();
|
||||||
@@ -691,6 +700,7 @@ mod tests {
|
|||||||
task_specs: vec![],
|
task_specs: vec![],
|
||||||
worker_specs: vec![],
|
worker_specs: vec![],
|
||||||
labels: BTreeMap::new(),
|
labels: BTreeMap::new(),
|
||||||
|
security_policy: None,
|
||||||
created_at: "2026-06-12T17:00:00Z".to_string(),
|
created_at: "2026-06-12T17:00:00Z".to_string(),
|
||||||
updated_at: None,
|
updated_at: None,
|
||||||
completed_at: None,
|
completed_at: None,
|
||||||
|
|||||||
@@ -21,14 +21,38 @@ use super::task_spec::{
|
|||||||
FleetTaskSpecDocument, FleetTaskVerificationInput, load_task_spec_document,
|
FleetTaskSpecDocument, FleetTaskVerificationInput, load_task_spec_document,
|
||||||
record_verification_receipt, validate_task_spec_document, verify_task_result,
|
record_verification_receipt, validate_task_spec_document, verify_task_result,
|
||||||
};
|
};
|
||||||
|
use super::worker_runtime;
|
||||||
|
use crate::tools::subagent::SharedSubAgentManager;
|
||||||
|
|
||||||
const DEFAULT_STALE_AFTER_SECONDS: u64 = 300;
|
const DEFAULT_STALE_AFTER_SECONDS: u64 = 300;
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct FleetManager {
|
pub struct FleetManager {
|
||||||
workspace: PathBuf,
|
workspace: PathBuf,
|
||||||
ledger: FleetLedger,
|
ledger: FleetLedger,
|
||||||
stale_after: Duration,
|
stale_after: Duration,
|
||||||
|
exec_config: codewhale_config::FleetExecConfig,
|
||||||
|
/// Optional sub-agent manager for headless worker execution.
|
||||||
|
/// When set, fleet workers spawn real sub-agents; when None,
|
||||||
|
/// the manager falls back to local simulation.
|
||||||
|
sub_agent_manager: Option<SharedSubAgentManager>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for FleetManager {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("FleetManager")
|
||||||
|
.field("workspace", &self.workspace)
|
||||||
|
.field("ledger", &self.ledger)
|
||||||
|
.field("stale_after", &self.stale_after)
|
||||||
|
.field("exec_config", &self.exec_config)
|
||||||
|
.field(
|
||||||
|
"sub_agent_manager",
|
||||||
|
&self
|
||||||
|
.sub_agent_manager
|
||||||
|
.as_ref()
|
||||||
|
.map(|_| "SharedSubAgentManager"),
|
||||||
|
)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
@@ -78,6 +102,28 @@ pub struct FleetWorkerInspection {
|
|||||||
pub artifacts: Vec<FleetArtifactRef>,
|
pub artifacts: Vec<FleetArtifactRef>,
|
||||||
pub last_error: Option<String>,
|
pub last_error: Option<String>,
|
||||||
pub alert_state: Option<String>,
|
pub alert_state: Option<String>,
|
||||||
|
/// Lightweight projection from the sub-agent worker runtime.
|
||||||
|
/// Populated when a sub-agent manager is attached.
|
||||||
|
pub runtime_state: Option<FleetWorkerRuntimeProjection>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Lightweight TUI projection of a headless sub-agent worker's current state.
|
||||||
|
///
|
||||||
|
/// Derived from the sub-agent manager's `AgentWorkerRecord`.
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct FleetWorkerRuntimeProjection {
|
||||||
|
/// Sub-agent lifecycle status (Queued, Starting, Running, Completed, etc.)
|
||||||
|
pub agent_status: String,
|
||||||
|
/// Steps taken so far (tool calls + model turns)
|
||||||
|
pub steps_taken: u32,
|
||||||
|
/// Latest human-readable message from the worker
|
||||||
|
pub latest_message: Option<String>,
|
||||||
|
/// Error message if the worker failed
|
||||||
|
pub error: Option<String>,
|
||||||
|
/// Result summary if the worker completed
|
||||||
|
pub result_summary: Option<String>,
|
||||||
|
/// Whether the worker has a sub-agent session running
|
||||||
|
pub has_session: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl FleetManager {
|
impl FleetManager {
|
||||||
@@ -88,6 +134,8 @@ impl FleetManager {
|
|||||||
workspace,
|
workspace,
|
||||||
ledger,
|
ledger,
|
||||||
stale_after: Duration::from_secs(DEFAULT_STALE_AFTER_SECONDS),
|
stale_after: Duration::from_secs(DEFAULT_STALE_AFTER_SECONDS),
|
||||||
|
exec_config: codewhale_config::FleetExecConfig::default(),
|
||||||
|
sub_agent_manager: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -96,6 +144,23 @@ impl FleetManager {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Apply fleet headless-worker execution policy from config.
|
||||||
|
pub fn with_exec_config(mut self, exec_config: codewhale_config::FleetExecConfig) -> Self {
|
||||||
|
self.exec_config = exec_config;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Attach a sub-agent manager so fleet workers can spawn real headless agents.
|
||||||
|
pub fn with_sub_agent_manager(mut self, mgr: SharedSubAgentManager) -> Self {
|
||||||
|
self.sub_agent_manager = Some(mgr);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// True when the manager has a sub-agent runtime for headless worker execution.
|
||||||
|
pub fn has_worker_runtime(&self) -> bool {
|
||||||
|
self.sub_agent_manager.is_some()
|
||||||
|
}
|
||||||
|
|
||||||
pub fn ledger_path(&self) -> &Path {
|
pub fn ledger_path(&self) -> &Path {
|
||||||
self.ledger.path()
|
self.ledger.path()
|
||||||
}
|
}
|
||||||
@@ -139,6 +204,7 @@ impl FleetManager {
|
|||||||
task_specs: doc.tasks.clone(),
|
task_specs: doc.tasks.clone(),
|
||||||
worker_specs: doc.workers.clone(),
|
worker_specs: doc.workers.clone(),
|
||||||
labels: doc.labels,
|
labels: doc.labels,
|
||||||
|
security_policy: doc.security_policy.clone(),
|
||||||
created_at: now.clone(),
|
created_at: now.clone(),
|
||||||
updated_at: Some(now.clone()),
|
updated_at: Some(now.clone()),
|
||||||
completed_at: None,
|
completed_at: None,
|
||||||
@@ -280,6 +346,27 @@ impl FleetManager {
|
|||||||
.get(worker_id)
|
.get(worker_id)
|
||||||
.map(|heartbeat| heartbeat.timestamp.clone());
|
.map(|heartbeat| heartbeat.timestamp.clone());
|
||||||
let alert_state = latest_alert_for_worker(&state, worker_id);
|
let alert_state = latest_alert_for_worker(&state, worker_id);
|
||||||
|
|
||||||
|
// Enrich with sub-agent worker runtime state when available.
|
||||||
|
let runtime_state = self.sub_agent_manager.as_ref().and_then(|mgr| {
|
||||||
|
mgr.try_read()
|
||||||
|
.ok()
|
||||||
|
.and_then(|guard| guard.get_worker_record(worker_id))
|
||||||
|
.map(|record| FleetWorkerRuntimeProjection {
|
||||||
|
agent_status: format!("{:?}", record.status).to_lowercase(),
|
||||||
|
steps_taken: record.steps_taken,
|
||||||
|
latest_message: record.latest_message,
|
||||||
|
error: record.error,
|
||||||
|
result_summary: record.result_summary,
|
||||||
|
has_session: !matches!(
|
||||||
|
record.status,
|
||||||
|
crate::tools::subagent::AgentWorkerStatus::Completed
|
||||||
|
| crate::tools::subagent::AgentWorkerStatus::Failed
|
||||||
|
| crate::tools::subagent::AgentWorkerStatus::Cancelled
|
||||||
|
),
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
Ok(FleetWorkerInspection {
|
Ok(FleetWorkerInspection {
|
||||||
worker_id: worker_id.to_string(),
|
worker_id: worker_id.to_string(),
|
||||||
status,
|
status,
|
||||||
@@ -293,6 +380,7 @@ impl FleetManager {
|
|||||||
artifacts,
|
artifacts,
|
||||||
last_error,
|
last_error,
|
||||||
alert_state,
|
alert_state,
|
||||||
|
runtime_state,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -475,6 +563,45 @@ impl FleetManager {
|
|||||||
FleetWorkerEventPayload::Running,
|
FleetWorkerEventPayload::Running,
|
||||||
)?;
|
)?;
|
||||||
self.ledger.heartbeat(worker_id, ×tamp(), None, None)?;
|
self.ledger.heartbeat(worker_id, ×tamp(), None, None)?;
|
||||||
|
|
||||||
|
// Register with the sub-agent manager for headless worker tracking.
|
||||||
|
// The engine's agent_open path handles actual sub-agent spawning.
|
||||||
|
if let Some(ref mgr) = self.sub_agent_manager {
|
||||||
|
if let Ok(guard) = mgr.try_write() {
|
||||||
|
let run = self
|
||||||
|
.ledger
|
||||||
|
.rebuild_state()
|
||||||
|
.ok()
|
||||||
|
.and_then(|state| state.runs.get(&entry.run_id.0).cloned());
|
||||||
|
let worker_spec = run
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|r| r.worker_specs.iter().find(|w| w.id == worker_id).cloned())
|
||||||
|
.unwrap_or_else(|| FleetWorkerSpec {
|
||||||
|
id: worker_id.to_string(),
|
||||||
|
name: worker_id.to_string(),
|
||||||
|
host: FleetHostSpec::Local,
|
||||||
|
trust_level: Some(FleetTrustLevel::Local),
|
||||||
|
labels: BTreeMap::new(),
|
||||||
|
capabilities: vec![],
|
||||||
|
max_concurrent_tasks: Some(1),
|
||||||
|
});
|
||||||
|
let worker = worker_runtime::fleet_task_to_worker_spec(
|
||||||
|
worker_id,
|
||||||
|
&entry.run_id.0,
|
||||||
|
task_spec,
|
||||||
|
&worker_spec,
|
||||||
|
"auto",
|
||||||
|
&self.workspace,
|
||||||
|
);
|
||||||
|
let worker = worker_runtime::apply_exec_hardening(worker, &self.exec_config);
|
||||||
|
// drop guard after registering so we don't hold the write lock
|
||||||
|
drop(guard);
|
||||||
|
if let Ok(mut guard) = mgr.try_write() {
|
||||||
|
guard.register_worker(worker);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
self.maybe_complete_local_simulation(entry, worker_id, task_spec, log_artifact)
|
self.maybe_complete_local_simulation(entry, worker_id, task_spec, log_artifact)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -746,6 +873,7 @@ fn default_local_workers(run_id: &FleetRunId, max_workers: usize) -> Vec<FleetWo
|
|||||||
id: format!("{}-local-{}", run_id.0, index),
|
id: format!("{}-local-{}", run_id.0, index),
|
||||||
name: format!("Local worker {index}"),
|
name: format!("Local worker {index}"),
|
||||||
host: FleetHostSpec::Local,
|
host: FleetHostSpec::Local,
|
||||||
|
trust_level: Some(FleetTrustLevel::Local),
|
||||||
labels: BTreeMap::new(),
|
labels: BTreeMap::new(),
|
||||||
capabilities: vec!["local".to_string()],
|
capabilities: vec!["local".to_string()],
|
||||||
max_concurrent_tasks: Some(1),
|
max_concurrent_tasks: Some(1),
|
||||||
@@ -1267,4 +1395,159 @@ mod tests {
|
|||||||
Some("escalated via pagerduty alert_id=alert-1")
|
Some("escalated via pagerduty alert_id=alert-1")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fleet_dogfood_smoke_run_two_local_workers_two_tasks() {
|
||||||
|
let tmp = TempDir::new().unwrap();
|
||||||
|
let workspace = tmp.path().join("repo");
|
||||||
|
std::fs::create_dir_all(&workspace).unwrap();
|
||||||
|
// Create a minimal Cargo.toml so the cargo-check task can succeed.
|
||||||
|
std::fs::write(
|
||||||
|
workspace.join("Cargo.toml"),
|
||||||
|
"[package]\nname = \"smoke\"\nversion = \"0.1.0\"\nedition = \"2021\"\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
std::fs::create_dir_all(workspace.join("src")).unwrap();
|
||||||
|
std::fs::write(
|
||||||
|
workspace.join("src").join("lib.rs"),
|
||||||
|
"pub fn answer() -> u8 { 42 }\n",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let tasks = vec![
|
||||||
|
FleetTaskSpec {
|
||||||
|
id: "check".to_string(),
|
||||||
|
name: "check".to_string(),
|
||||||
|
description: None,
|
||||||
|
objective: Some("cargo check".to_string()),
|
||||||
|
instructions: "run cargo check and report result".to_string(),
|
||||||
|
worker: Some(FleetTaskWorkerProfile {
|
||||||
|
role: Some("release-checker".to_string()),
|
||||||
|
tool_profile: Some("read-only".to_string()),
|
||||||
|
tools: vec!["cargo".to_string()],
|
||||||
|
capabilities: vec!["rust".to_string()],
|
||||||
|
}),
|
||||||
|
workspace: Some(FleetWorkspaceRequirements {
|
||||||
|
root: None,
|
||||||
|
required_files: vec![PathBuf::from("Cargo.toml")],
|
||||||
|
writable_paths: vec![PathBuf::from(".codewhale/fleet")],
|
||||||
|
environment: Some(FleetEnvironmentRequirements {
|
||||||
|
required: vec!["PATH".to_string()],
|
||||||
|
allowlist: vec![],
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
input_files: vec![],
|
||||||
|
context: vec![],
|
||||||
|
budget: None,
|
||||||
|
tags: vec!["smoke".to_string()],
|
||||||
|
expected_artifacts: vec![FleetArtifactKind::Log, FleetArtifactKind::Receipt],
|
||||||
|
scorer: Some(FleetScorerSpec::ExitCode),
|
||||||
|
retry_policy: Some(FleetRetryPolicy {
|
||||||
|
max_attempts: 1,
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
alert_policy: None,
|
||||||
|
timeout_seconds: Some(60),
|
||||||
|
metadata: BTreeMap::new(),
|
||||||
|
},
|
||||||
|
FleetTaskSpec {
|
||||||
|
id: "review".to_string(),
|
||||||
|
name: "review".to_string(),
|
||||||
|
description: None,
|
||||||
|
objective: Some("review source".to_string()),
|
||||||
|
instructions: "read src/lib.rs and report findings".to_string(),
|
||||||
|
worker: Some(FleetTaskWorkerProfile {
|
||||||
|
role: Some("reviewer".to_string()),
|
||||||
|
tool_profile: Some("read-only".to_string()),
|
||||||
|
tools: vec!["cargo".to_string()],
|
||||||
|
capabilities: vec!["rust".to_string()],
|
||||||
|
}),
|
||||||
|
workspace: Some(FleetWorkspaceRequirements {
|
||||||
|
root: None,
|
||||||
|
required_files: vec![],
|
||||||
|
writable_paths: vec![],
|
||||||
|
environment: Some(FleetEnvironmentRequirements {
|
||||||
|
required: vec!["PATH".to_string()],
|
||||||
|
allowlist: vec![],
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
input_files: vec![],
|
||||||
|
context: vec![],
|
||||||
|
budget: None,
|
||||||
|
tags: vec!["smoke".to_string()],
|
||||||
|
expected_artifacts: vec![FleetArtifactKind::Log, FleetArtifactKind::Receipt],
|
||||||
|
scorer: None,
|
||||||
|
retry_policy: Some(FleetRetryPolicy {
|
||||||
|
max_attempts: 1,
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
alert_policy: None,
|
||||||
|
timeout_seconds: Some(60),
|
||||||
|
metadata: BTreeMap::new(),
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
let manager = FleetManager::open(&workspace).unwrap();
|
||||||
|
let report = manager
|
||||||
|
.create_run(
|
||||||
|
FleetTaskSpecDocument {
|
||||||
|
name: Some("dogfood smoke".to_string()),
|
||||||
|
labels: BTreeMap::new(),
|
||||||
|
security_policy: Some(FleetSecurityPolicy {
|
||||||
|
default_trust_level: FleetTrustLevel::Local,
|
||||||
|
..Default::default()
|
||||||
|
}),
|
||||||
|
workers: vec![],
|
||||||
|
tasks,
|
||||||
|
},
|
||||||
|
2,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
assert_eq!(report.task_count, 2);
|
||||||
|
assert!(!report.worker_ids.is_empty());
|
||||||
|
assert_eq!(report.worker_ids.len(), 2);
|
||||||
|
// After immediate scheduling, tasks may already be leased,
|
||||||
|
// so queued+running should total 2.
|
||||||
|
let status = manager.run_status(&report.run_id).unwrap();
|
||||||
|
assert_eq!(status.queued + status.running, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fleet_security_policy_propagates_from_task_spec_document_to_run() {
|
||||||
|
let tmp = TempDir::new().unwrap();
|
||||||
|
let manager = FleetManager::open(tmp.path()).unwrap();
|
||||||
|
let path = task_spec_file(&tmp, vec![task("task-a")]);
|
||||||
|
// Rewrite the spec file with a security_policy block.
|
||||||
|
let doc = serde_json::json!({
|
||||||
|
"name": "secure smoke",
|
||||||
|
"tasks": [{
|
||||||
|
"id": "task-a",
|
||||||
|
"name": "task-a",
|
||||||
|
"instructions": "report ok",
|
||||||
|
"expected_artifacts": ["log"]
|
||||||
|
}],
|
||||||
|
"security_policy": {
|
||||||
|
"default_trust_level": "local",
|
||||||
|
"allowed_secrets": [{"key": "GH_TOKEN", "source": "env"}],
|
||||||
|
"max_trust_level": "remote_verified",
|
||||||
|
"require_identity_verification": true
|
||||||
|
}
|
||||||
|
});
|
||||||
|
let spec_path = tmp.path().join("secure-tasks.json");
|
||||||
|
std::fs::write(&spec_path, serde_json::to_string_pretty(&doc).unwrap()).unwrap();
|
||||||
|
|
||||||
|
let report = manager
|
||||||
|
.create_run_from_task_spec_path(&spec_path, 1)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let state = manager.ledger.rebuild_state().unwrap();
|
||||||
|
let run = state.runs.get(&report.run_id.0).unwrap();
|
||||||
|
let policy = run.security_policy.as_ref().unwrap();
|
||||||
|
assert_eq!(policy.default_trust_level, FleetTrustLevel::Local);
|
||||||
|
assert_eq!(policy.allowed_secrets.len(), 1);
|
||||||
|
assert_eq!(policy.allowed_secrets[0].key, "GH_TOKEN");
|
||||||
|
assert_eq!(policy.max_trust_level, FleetTrustLevel::RemoteVerified);
|
||||||
|
assert!(policy.require_identity_verification);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,3 +6,4 @@ pub mod ledger;
|
|||||||
pub mod manager;
|
pub mod manager;
|
||||||
pub mod scheduler;
|
pub mod scheduler;
|
||||||
pub mod task_spec;
|
pub mod task_spec;
|
||||||
|
pub mod worker_runtime;
|
||||||
|
|||||||
@@ -573,6 +573,7 @@ mod tests {
|
|||||||
id: id.to_string(),
|
id: id.to_string(),
|
||||||
name: id.to_string(),
|
name: id.to_string(),
|
||||||
host: FleetHostSpec::Local,
|
host: FleetHostSpec::Local,
|
||||||
|
trust_level: Some(FleetTrustLevel::Local),
|
||||||
labels: BTreeMap::new(),
|
labels: BTreeMap::new(),
|
||||||
capabilities: vec!["local".to_string()],
|
capabilities: vec!["local".to_string()],
|
||||||
max_concurrent_tasks: Some(1),
|
max_concurrent_tasks: Some(1),
|
||||||
@@ -622,6 +623,7 @@ mod tests {
|
|||||||
.map(|idx| worker(&format!("worker-{idx}")))
|
.map(|idx| worker(&format!("worker-{idx}")))
|
||||||
.collect(),
|
.collect(),
|
||||||
labels: BTreeMap::new(),
|
labels: BTreeMap::new(),
|
||||||
|
security_policy: None,
|
||||||
created_at: scheduler.timestamp(),
|
created_at: scheduler.timestamp(),
|
||||||
updated_at: None,
|
updated_at: None,
|
||||||
completed_at: None,
|
completed_at: None,
|
||||||
@@ -704,7 +706,7 @@ mod tests {
|
|||||||
failing.alert_policy = Some(FleetAlertPolicy {
|
failing.alert_policy = Some(FleetAlertPolicy {
|
||||||
events: vec![FleetAlertEventClass::RestartExhausted],
|
events: vec![FleetAlertEventClass::RestartExhausted],
|
||||||
channels: vec![FleetAlertChannel::Slack {
|
channels: vec![FleetAlertChannel::Slack {
|
||||||
webhook_url: "https://hooks.slack.invalid/secret".to_string(),
|
webhook: FleetAlertEndpoint::inline("https://hooks.slack.invalid/secret"),
|
||||||
}],
|
}],
|
||||||
after_attempts: Some(1),
|
after_attempts: Some(1),
|
||||||
after_minutes_stale: Some(1),
|
after_minutes_stale: Some(1),
|
||||||
|
|||||||
@@ -23,6 +23,9 @@ pub struct FleetTaskSpecDocument {
|
|||||||
pub name: Option<String>,
|
pub name: Option<String>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub labels: BTreeMap<String, String>,
|
pub labels: BTreeMap<String, String>,
|
||||||
|
#[serde(default)]
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub security_policy: Option<FleetSecurityPolicy>,
|
||||||
#[serde(default, alias = "worker_specs")]
|
#[serde(default, alias = "worker_specs")]
|
||||||
pub workers: Vec<FleetWorkerSpec>,
|
pub workers: Vec<FleetWorkerSpec>,
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
@@ -49,12 +52,14 @@ impl FleetTaskSpecFile {
|
|||||||
Self::Tasks(tasks) => FleetTaskSpecDocument {
|
Self::Tasks(tasks) => FleetTaskSpecDocument {
|
||||||
name: Some(fallback_name),
|
name: Some(fallback_name),
|
||||||
labels: BTreeMap::new(),
|
labels: BTreeMap::new(),
|
||||||
|
security_policy: None,
|
||||||
workers: Vec::new(),
|
workers: Vec::new(),
|
||||||
tasks,
|
tasks,
|
||||||
},
|
},
|
||||||
Self::Single(task) => FleetTaskSpecDocument {
|
Self::Single(task) => FleetTaskSpecDocument {
|
||||||
name: Some(fallback_name),
|
name: Some(fallback_name),
|
||||||
labels: BTreeMap::new(),
|
labels: BTreeMap::new(),
|
||||||
|
security_policy: None,
|
||||||
workers: Vec::new(),
|
workers: Vec::new(),
|
||||||
tasks: vec![task],
|
tasks: vec![task],
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -0,0 +1,626 @@
|
|||||||
|
//! Fleet worker runtime — bridges fleet task specs to headless sub-agent execution.
|
||||||
|
//!
|
||||||
|
//! This module makes fleet workers real: instead of simulating task completion,
|
||||||
|
//! each fleet worker spawns a headless sub-agent that runs the task instructions
|
||||||
|
//! and streams progress back into the fleet ledger.
|
||||||
|
//!
|
||||||
|
//! Architecture:
|
||||||
|
//! - `FleetTaskSpec` + `FleetWorkerSpec` → `AgentWorkerSpec`
|
||||||
|
//! - `SubAgentManager::register_worker()` tracks the worker
|
||||||
|
//! - Sub-agent spawn happens through the existing `agent_open` machinery
|
||||||
|
//! - Mailbox events stream into fleet ledger as `FleetWorkerEventPayload`
|
||||||
|
//! - `FleetWorkerInspection` reads both ledger state and sub-agent worker records
|
||||||
|
|
||||||
|
#![allow(dead_code)]
|
||||||
|
|
||||||
|
use codewhale_protocol::fleet::{
|
||||||
|
FleetHostSpec, FleetTaskSpec, FleetTaskWorkerProfile, FleetWorkerEventPayload, FleetWorkerSpec,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::host::FleetHostKind;
|
||||||
|
use crate::tools::subagent::{
|
||||||
|
AgentWorkerSpec, AgentWorkerStatus, AgentWorkerToolProfile, SubAgentType,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Map a fleet worker spec's host kind to a display string.
|
||||||
|
pub fn fleet_host_kind_for_spec(spec: &FleetWorkerSpec) -> FleetHostKind {
|
||||||
|
match &spec.host {
|
||||||
|
FleetHostSpec::Local => FleetHostKind::LocalProcess,
|
||||||
|
FleetHostSpec::Ssh { .. } => FleetHostKind::Ssh,
|
||||||
|
FleetHostSpec::Docker { .. } => FleetHostKind::LocalProcess, // Docker runs local-ish
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Map a fleet host kind to a compact display label.
|
||||||
|
pub fn fleet_host_kind_label(kind: FleetHostKind) -> &'static str {
|
||||||
|
match kind {
|
||||||
|
FleetHostKind::LocalProcess => "local",
|
||||||
|
FleetHostKind::Ssh => "ssh",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Build a sub-agent `AgentWorkerSpec` from a fleet task spec and worker spec.
|
||||||
|
///
|
||||||
|
/// The fleet task's `instructions` become the sub-agent's `objective`, the
|
||||||
|
/// `worker.role` maps to a `SubAgentType`, and tool/capability restrictions
|
||||||
|
/// become an `AgentWorkerToolProfile`.
|
||||||
|
pub fn fleet_task_to_worker_spec(
|
||||||
|
worker_id: &str,
|
||||||
|
run_id: &str,
|
||||||
|
task_spec: &FleetTaskSpec,
|
||||||
|
_worker_spec: &FleetWorkerSpec,
|
||||||
|
model: &str,
|
||||||
|
workspace: &std::path::Path,
|
||||||
|
) -> AgentWorkerSpec {
|
||||||
|
let agent_type =
|
||||||
|
fleet_role_to_agent_type(task_spec.worker.as_ref().and_then(|w| w.role.as_deref()));
|
||||||
|
|
||||||
|
let tool_profile = fleet_tool_profile(task_spec.worker.as_ref());
|
||||||
|
|
||||||
|
let objective = fleet_task_prompt(task_spec);
|
||||||
|
|
||||||
|
AgentWorkerSpec {
|
||||||
|
worker_id: worker_id.to_string(),
|
||||||
|
run_id: run_id.to_string(),
|
||||||
|
parent_run_id: None,
|
||||||
|
session_name: Some(format!("fleet-{}-{}", worker_id, task_spec.id)),
|
||||||
|
objective,
|
||||||
|
role: task_spec.worker.as_ref().and_then(|w| w.role.clone()),
|
||||||
|
agent_type,
|
||||||
|
model: model.to_string(),
|
||||||
|
workspace: workspace.to_path_buf(),
|
||||||
|
git_branch: None,
|
||||||
|
context_mode: "fresh".to_string(),
|
||||||
|
fork_context: false,
|
||||||
|
tool_profile,
|
||||||
|
max_steps: task_spec
|
||||||
|
.budget
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|b| b.max_tool_calls)
|
||||||
|
.unwrap_or(u32::MAX),
|
||||||
|
spawn_depth: 0,
|
||||||
|
max_spawn_depth: codewhale_config::FleetExecConfig::default().max_spawn_depth,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn fleet_task_prompt(task_spec: &FleetTaskSpec) -> String {
|
||||||
|
let mut prompt = String::new();
|
||||||
|
prompt.push_str("Fleet task: ");
|
||||||
|
prompt.push_str(&task_spec.name);
|
||||||
|
|
||||||
|
if let Some(objective) = task_spec.objective.as_deref() {
|
||||||
|
prompt.push_str("\n\nObjective:\n");
|
||||||
|
prompt.push_str(objective);
|
||||||
|
} else if let Some(description) = task_spec.description.as_deref() {
|
||||||
|
prompt.push_str("\n\nObjective:\n");
|
||||||
|
prompt.push_str(description);
|
||||||
|
}
|
||||||
|
|
||||||
|
prompt.push_str("\n\nInstructions:\n");
|
||||||
|
prompt.push_str(&task_spec.instructions);
|
||||||
|
|
||||||
|
if !task_spec.context.is_empty() {
|
||||||
|
prompt.push_str("\n\nContext:\n");
|
||||||
|
for item in &task_spec.context {
|
||||||
|
prompt.push_str("- ");
|
||||||
|
prompt.push_str(item);
|
||||||
|
prompt.push('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !task_spec.input_files.is_empty() {
|
||||||
|
prompt.push_str("\nInput files:\n");
|
||||||
|
for path in &task_spec.input_files {
|
||||||
|
prompt.push_str("- ");
|
||||||
|
prompt.push_str(&path.display().to_string());
|
||||||
|
prompt.push('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
prompt
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Map a fleet role name to a `SubAgentType`. Unknown roles default to `General`.
|
||||||
|
fn fleet_role_to_agent_type(role: Option<&str>) -> SubAgentType {
|
||||||
|
match role {
|
||||||
|
Some("smoke-runner") | Some("read-only") => SubAgentType::ToolAgent,
|
||||||
|
Some("reviewer") => SubAgentType::Review,
|
||||||
|
Some("builder") => SubAgentType::Implementer,
|
||||||
|
Some("verifier") | Some("tester") => SubAgentType::Verifier,
|
||||||
|
Some("planner") => SubAgentType::Plan,
|
||||||
|
Some("explorer") => SubAgentType::Explore,
|
||||||
|
Some("general") | None => SubAgentType::General,
|
||||||
|
Some(other) => {
|
||||||
|
// Try parsing as a SubAgentType directly
|
||||||
|
SubAgentType::from_str(other).unwrap_or(SubAgentType::General)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Convert a fleet worker profile's tool list into an `AgentWorkerToolProfile`.
|
||||||
|
fn fleet_tool_profile(profile: Option<&FleetTaskWorkerProfile>) -> AgentWorkerToolProfile {
|
||||||
|
match profile {
|
||||||
|
Some(p) if !p.tools.is_empty() => AgentWorkerToolProfile::Explicit(p.tools.clone()),
|
||||||
|
_ => AgentWorkerToolProfile::Inherited,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a fleet artifact ref from a worker output.
|
||||||
|
///
|
||||||
|
/// Uses the fleet artifact conventions: logs go under `.codewhale/fleet/`,
|
||||||
|
/// reports under `.codewhale/fleet/reports/`.
|
||||||
|
pub fn fleet_artifact_ref(
|
||||||
|
_run_id: &str,
|
||||||
|
_worker_id: &str,
|
||||||
|
kind: codewhale_protocol::fleet::FleetArtifactKind,
|
||||||
|
path: std::path::PathBuf,
|
||||||
|
) -> codewhale_protocol::fleet::FleetArtifactRef {
|
||||||
|
codewhale_protocol::fleet::FleetArtifactRef {
|
||||||
|
kind,
|
||||||
|
path,
|
||||||
|
checksum: None,
|
||||||
|
mime_type: None,
|
||||||
|
size_bytes: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Map a sub-agent `AgentWorkerStatus` to a fleet `FleetWorkerEventPayload`.
|
||||||
|
///
|
||||||
|
/// This is the streaming bridge: as the sub-agent runs, each status transition
|
||||||
|
/// produces a corresponding fleet ledger event so the TUI surfaces stay in sync.
|
||||||
|
pub fn agent_status_to_fleet_event(
|
||||||
|
status: AgentWorkerStatus,
|
||||||
|
message: Option<&str>,
|
||||||
|
tool_name: Option<&str>,
|
||||||
|
) -> FleetWorkerEventPayload {
|
||||||
|
match status {
|
||||||
|
AgentWorkerStatus::Queued => FleetWorkerEventPayload::Queued,
|
||||||
|
AgentWorkerStatus::Starting => FleetWorkerEventPayload::Starting,
|
||||||
|
AgentWorkerStatus::Running => FleetWorkerEventPayload::Running,
|
||||||
|
AgentWorkerStatus::WaitingForUser => FleetWorkerEventPayload::ModelWait { model: None },
|
||||||
|
AgentWorkerStatus::ModelWait => FleetWorkerEventPayload::ModelWait { model: None },
|
||||||
|
AgentWorkerStatus::RunningTool => FleetWorkerEventPayload::RunningTool {
|
||||||
|
tool: tool_name.unwrap_or("unknown").to_string(),
|
||||||
|
call_id: None,
|
||||||
|
},
|
||||||
|
AgentWorkerStatus::Completed => FleetWorkerEventPayload::Completed {
|
||||||
|
exit_code: Some(0),
|
||||||
|
summary: message.map(|s| s.to_string()),
|
||||||
|
},
|
||||||
|
AgentWorkerStatus::Failed => FleetWorkerEventPayload::Failed {
|
||||||
|
reason: message.unwrap_or("unknown error").to_string(),
|
||||||
|
recoverable: false,
|
||||||
|
},
|
||||||
|
AgentWorkerStatus::Cancelled => FleetWorkerEventPayload::Cancelled { cancelled_by: None },
|
||||||
|
AgentWorkerStatus::Interrupted => FleetWorkerEventPayload::Interrupted {
|
||||||
|
signal: message.map(|s| s.to_string()),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Apply exec hardening to a worker spec from fleet config (#3027).
|
||||||
|
///
|
||||||
|
/// Filters tools against allowed/disallowed lists, caps max_steps to
|
||||||
|
/// config's max_turns, and returns the objective with system prompt
|
||||||
|
/// appended when configured.
|
||||||
|
pub fn apply_exec_hardening(
|
||||||
|
mut spec: AgentWorkerSpec,
|
||||||
|
exec: &codewhale_config::FleetExecConfig,
|
||||||
|
) -> AgentWorkerSpec {
|
||||||
|
// Cap max_steps to config max_turns
|
||||||
|
if exec.max_turns > 0 && exec.max_turns != u32::MAX {
|
||||||
|
spec.max_steps = spec.max_steps.min(exec.max_turns);
|
||||||
|
}
|
||||||
|
spec.max_spawn_depth = exec
|
||||||
|
.max_spawn_depth
|
||||||
|
.min(codewhale_config::MAX_SPAWN_DEPTH_CEILING);
|
||||||
|
|
||||||
|
// Apply tool filtering
|
||||||
|
if !exec.allowed_tools.is_empty() || !exec.disallowed_tools.is_empty() {
|
||||||
|
spec.tool_profile = filter_tool_profile(&spec.tool_profile, exec);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Append system prompt
|
||||||
|
if !exec.append_system_prompt.is_empty() {
|
||||||
|
spec.objective = format!(
|
||||||
|
"{}\n\n[Policy]\n{}",
|
||||||
|
spec.objective, exec.append_system_prompt
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
spec
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Filter a tool profile against allowed/disallowed lists.
|
||||||
|
fn filter_tool_profile(
|
||||||
|
profile: &AgentWorkerToolProfile,
|
||||||
|
exec: &codewhale_config::FleetExecConfig,
|
||||||
|
) -> AgentWorkerToolProfile {
|
||||||
|
match profile {
|
||||||
|
AgentWorkerToolProfile::Explicit(tools) => {
|
||||||
|
let filtered: Vec<String> = tools
|
||||||
|
.iter()
|
||||||
|
.filter(|t| {
|
||||||
|
// If allowed_tools is non-empty, only keep tools in the list
|
||||||
|
if !exec.allowed_tools.is_empty() && !exec.allowed_tools.contains(t) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Disallowed tools always win
|
||||||
|
!exec.disallowed_tools.contains(t)
|
||||||
|
})
|
||||||
|
.cloned()
|
||||||
|
.collect();
|
||||||
|
AgentWorkerToolProfile::Explicit(filtered)
|
||||||
|
}
|
||||||
|
AgentWorkerToolProfile::Inherited => {
|
||||||
|
// Inherited profiles can't be filtered at spec time;
|
||||||
|
// the sub-agent spawn path applies tool filtering.
|
||||||
|
AgentWorkerToolProfile::Inherited
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Determine whether a tool is safe for parallel execution (#2983).
|
||||||
|
///
|
||||||
|
/// Read-only tools that don't mutate state and have no side effects
|
||||||
|
/// are candidates for conservative parallel batching.
|
||||||
|
pub fn is_parallel_safe_read_only_tool(tool_name: &str) -> bool {
|
||||||
|
matches!(
|
||||||
|
tool_name,
|
||||||
|
"read_file"
|
||||||
|
| "grep_files"
|
||||||
|
| "file_search"
|
||||||
|
| "list_dir"
|
||||||
|
| "git_status"
|
||||||
|
| "git_diff"
|
||||||
|
| "git_log"
|
||||||
|
| "git_show"
|
||||||
|
| "git_blame"
|
||||||
|
| "fetch_url"
|
||||||
|
| "web_search"
|
||||||
|
| "tool_search_tool_regex"
|
||||||
|
| "tool_search_tool_bm25"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fleet_role_smoke_runner_maps_to_tool_agent() {
|
||||||
|
assert_eq!(
|
||||||
|
fleet_role_to_agent_type(Some("smoke-runner")),
|
||||||
|
SubAgentType::ToolAgent
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fleet_role_reviewer_maps_to_review() {
|
||||||
|
assert_eq!(
|
||||||
|
fleet_role_to_agent_type(Some("reviewer")),
|
||||||
|
SubAgentType::Review
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fleet_role_builder_maps_to_implementer() {
|
||||||
|
assert_eq!(
|
||||||
|
fleet_role_to_agent_type(Some("builder")),
|
||||||
|
SubAgentType::Implementer
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fleet_role_none_maps_to_general() {
|
||||||
|
assert_eq!(fleet_role_to_agent_type(None), SubAgentType::General);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn unknown_role_maps_to_general() {
|
||||||
|
assert_eq!(
|
||||||
|
fleet_role_to_agent_type(Some("nonexistent-role")),
|
||||||
|
SubAgentType::General
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fleet_tool_profile_empty_uses_inherited() {
|
||||||
|
let profile = FleetTaskWorkerProfile {
|
||||||
|
role: None,
|
||||||
|
tool_profile: None,
|
||||||
|
tools: vec![],
|
||||||
|
capabilities: vec![],
|
||||||
|
};
|
||||||
|
assert_eq!(
|
||||||
|
fleet_tool_profile(Some(&profile)),
|
||||||
|
AgentWorkerToolProfile::Inherited
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fleet_tool_profile_explicit_passes_tools() {
|
||||||
|
let profile = FleetTaskWorkerProfile {
|
||||||
|
role: None,
|
||||||
|
tool_profile: None,
|
||||||
|
tools: vec!["cargo".to_string(), "git".to_string()],
|
||||||
|
capabilities: vec![],
|
||||||
|
};
|
||||||
|
assert_eq!(
|
||||||
|
fleet_tool_profile(Some(&profile)),
|
||||||
|
AgentWorkerToolProfile::Explicit(vec!["cargo".to_string(), "git".to_string()])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fleet_task_prompt_includes_instructions_context_and_input_files() {
|
||||||
|
let task = FleetTaskSpec {
|
||||||
|
id: "review".to_string(),
|
||||||
|
name: "Review protocol".to_string(),
|
||||||
|
description: None,
|
||||||
|
objective: Some("Find protocol regressions".to_string()),
|
||||||
|
instructions: "Read the fleet protocol and report issues.".to_string(),
|
||||||
|
worker: None,
|
||||||
|
workspace: None,
|
||||||
|
input_files: vec![std::path::PathBuf::from("crates/protocol/src/fleet.rs")],
|
||||||
|
context: vec!["Keep the report concise.".to_string()],
|
||||||
|
budget: None,
|
||||||
|
tags: vec![],
|
||||||
|
expected_artifacts: vec![],
|
||||||
|
scorer: None,
|
||||||
|
retry_policy: None,
|
||||||
|
alert_policy: None,
|
||||||
|
timeout_seconds: None,
|
||||||
|
metadata: Default::default(),
|
||||||
|
};
|
||||||
|
|
||||||
|
let prompt = fleet_task_prompt(&task);
|
||||||
|
|
||||||
|
assert!(prompt.contains("Review protocol"));
|
||||||
|
assert!(prompt.contains("Find protocol regressions"));
|
||||||
|
assert!(prompt.contains("Read the fleet protocol and report issues."));
|
||||||
|
assert!(prompt.contains("Keep the report concise."));
|
||||||
|
assert!(prompt.contains("crates/protocol/src/fleet.rs"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn fleet_worker_spec_defaults_to_shared_subagent_spawn_depth() {
|
||||||
|
let task = FleetTaskSpec {
|
||||||
|
id: "task-1".to_string(),
|
||||||
|
name: "Task".to_string(),
|
||||||
|
description: None,
|
||||||
|
objective: None,
|
||||||
|
instructions: "Do the task.".to_string(),
|
||||||
|
worker: None,
|
||||||
|
workspace: None,
|
||||||
|
input_files: vec![],
|
||||||
|
context: vec![],
|
||||||
|
budget: None,
|
||||||
|
tags: vec![],
|
||||||
|
expected_artifacts: vec![],
|
||||||
|
scorer: None,
|
||||||
|
retry_policy: None,
|
||||||
|
alert_policy: None,
|
||||||
|
timeout_seconds: None,
|
||||||
|
metadata: Default::default(),
|
||||||
|
};
|
||||||
|
let worker = FleetWorkerSpec {
|
||||||
|
id: "worker-1".to_string(),
|
||||||
|
name: "Worker".to_string(),
|
||||||
|
host: FleetHostSpec::Local,
|
||||||
|
trust_level: None,
|
||||||
|
labels: Default::default(),
|
||||||
|
capabilities: vec![],
|
||||||
|
max_concurrent_tasks: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let spec = fleet_task_to_worker_spec(
|
||||||
|
"worker-1",
|
||||||
|
"run-1",
|
||||||
|
&task,
|
||||||
|
&worker,
|
||||||
|
"auto",
|
||||||
|
std::path::Path::new("/tmp"),
|
||||||
|
);
|
||||||
|
|
||||||
|
// Root fleet worker runs at depth 0; its budget equals the shared
|
||||||
|
// sub-agent default (3) so fleet and sub-agents are one substrate and
|
||||||
|
// at least 3 nested delegation levels are afforded.
|
||||||
|
assert_eq!(spec.spawn_depth, 0);
|
||||||
|
assert_eq!(spec.max_spawn_depth, codewhale_config::DEFAULT_SPAWN_DEPTH);
|
||||||
|
assert_eq!(spec.max_spawn_depth, 3);
|
||||||
|
|
||||||
|
// End-to-end reachability: walk the SAME gate the SubAgentRuntime
|
||||||
|
// enforces (`would_exceed_depth` = `spawn_depth + 1 > max_spawn_depth`).
|
||||||
|
// A depth-0 root must reach 3 nested levels, then stop. This fails if
|
||||||
|
// anyone lowers the shared default below 3 (Hunter: afford >= 3).
|
||||||
|
let hardened = apply_exec_hardening(spec, &codewhale_config::FleetExecConfig::default());
|
||||||
|
let would_exceed = |spawn_depth: u32| spawn_depth + 1 > hardened.max_spawn_depth;
|
||||||
|
assert!(
|
||||||
|
!would_exceed(0),
|
||||||
|
"root (depth 0) must spawn a child at depth 1"
|
||||||
|
);
|
||||||
|
assert!(!would_exceed(1), "depth-1 child must spawn to depth 2");
|
||||||
|
assert!(!would_exceed(2), "depth-2 child must spawn to depth 3");
|
||||||
|
assert!(
|
||||||
|
would_exceed(3),
|
||||||
|
"depth 3 is the afforded ceiling; depth 4 is blocked"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn exec_hardening_caps_max_steps_to_max_turns() {
|
||||||
|
let spec = AgentWorkerSpec {
|
||||||
|
worker_id: "w1".to_string(),
|
||||||
|
run_id: "r1".to_string(),
|
||||||
|
parent_run_id: None,
|
||||||
|
session_name: None,
|
||||||
|
objective: "test".to_string(),
|
||||||
|
role: None,
|
||||||
|
agent_type: SubAgentType::General,
|
||||||
|
model: "auto".to_string(),
|
||||||
|
workspace: std::path::PathBuf::from("/tmp"),
|
||||||
|
git_branch: None,
|
||||||
|
context_mode: "fresh".to_string(),
|
||||||
|
fork_context: false,
|
||||||
|
tool_profile: AgentWorkerToolProfile::Inherited,
|
||||||
|
max_steps: 1000,
|
||||||
|
spawn_depth: 0,
|
||||||
|
max_spawn_depth: 0,
|
||||||
|
};
|
||||||
|
let exec = codewhale_config::FleetExecConfig {
|
||||||
|
max_turns: 50,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let hardened = apply_exec_hardening(spec, &exec);
|
||||||
|
assert_eq!(hardened.max_steps, 50);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn exec_hardening_applies_and_clamps_spawn_depth() {
|
||||||
|
let spec = AgentWorkerSpec {
|
||||||
|
worker_id: "w1".to_string(),
|
||||||
|
run_id: "r1".to_string(),
|
||||||
|
parent_run_id: None,
|
||||||
|
session_name: None,
|
||||||
|
objective: "test".to_string(),
|
||||||
|
role: None,
|
||||||
|
agent_type: SubAgentType::General,
|
||||||
|
model: "auto".to_string(),
|
||||||
|
workspace: std::path::PathBuf::from("/tmp"),
|
||||||
|
git_branch: None,
|
||||||
|
context_mode: "fresh".to_string(),
|
||||||
|
fork_context: false,
|
||||||
|
tool_profile: AgentWorkerToolProfile::Inherited,
|
||||||
|
max_steps: 1000,
|
||||||
|
spawn_depth: 0,
|
||||||
|
max_spawn_depth: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
let exec = codewhale_config::FleetExecConfig {
|
||||||
|
max_spawn_depth: 2,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let hardened = apply_exec_hardening(spec.clone(), &exec);
|
||||||
|
assert_eq!(hardened.max_spawn_depth, 2);
|
||||||
|
|
||||||
|
let exec = codewhale_config::FleetExecConfig {
|
||||||
|
max_spawn_depth: 99,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let hardened = apply_exec_hardening(spec.clone(), &exec);
|
||||||
|
assert_eq!(hardened.max_spawn_depth, 3);
|
||||||
|
|
||||||
|
let exec = codewhale_config::FleetExecConfig {
|
||||||
|
max_spawn_depth: 0,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let hardened = apply_exec_hardening(spec, &exec);
|
||||||
|
assert_eq!(hardened.max_spawn_depth, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn exec_hardening_filters_disallowed_tools() {
|
||||||
|
let profile = AgentWorkerToolProfile::Explicit(vec![
|
||||||
|
"read_file".to_string(),
|
||||||
|
"exec_shell".to_string(),
|
||||||
|
"git_diff".to_string(),
|
||||||
|
]);
|
||||||
|
let exec = codewhale_config::FleetExecConfig {
|
||||||
|
disallowed_tools: vec!["exec_shell".to_string()],
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let filtered = filter_tool_profile(&profile, &exec);
|
||||||
|
assert_eq!(
|
||||||
|
filtered,
|
||||||
|
AgentWorkerToolProfile::Explicit(
|
||||||
|
vec!["read_file".to_string(), "git_diff".to_string(),]
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn exec_hardening_allowed_tools_acts_as_allowlist() {
|
||||||
|
let profile = AgentWorkerToolProfile::Explicit(vec![
|
||||||
|
"read_file".to_string(),
|
||||||
|
"exec_shell".to_string(),
|
||||||
|
"git_diff".to_string(),
|
||||||
|
]);
|
||||||
|
let exec = codewhale_config::FleetExecConfig {
|
||||||
|
allowed_tools: vec!["read_file".to_string(), "git_diff".to_string()],
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let filtered = filter_tool_profile(&profile, &exec);
|
||||||
|
assert_eq!(
|
||||||
|
filtered,
|
||||||
|
AgentWorkerToolProfile::Explicit(
|
||||||
|
vec!["read_file".to_string(), "git_diff".to_string(),]
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn exec_hardening_allowed_plus_disallowed_disallowed_wins() {
|
||||||
|
let profile = AgentWorkerToolProfile::Explicit(vec![
|
||||||
|
"read_file".to_string(),
|
||||||
|
"exec_shell".to_string(),
|
||||||
|
]);
|
||||||
|
let exec = codewhale_config::FleetExecConfig {
|
||||||
|
allowed_tools: vec!["read_file".to_string(), "exec_shell".to_string()],
|
||||||
|
disallowed_tools: vec!["exec_shell".to_string()],
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let filtered = filter_tool_profile(&profile, &exec);
|
||||||
|
assert_eq!(
|
||||||
|
filtered,
|
||||||
|
AgentWorkerToolProfile::Explicit(vec!["read_file".to_string(),])
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn parallel_safe_read_only_tools_includes_grep_and_read() {
|
||||||
|
assert!(is_parallel_safe_read_only_tool("read_file"));
|
||||||
|
assert!(is_parallel_safe_read_only_tool("grep_files"));
|
||||||
|
assert!(is_parallel_safe_read_only_tool("git_status"));
|
||||||
|
assert!(is_parallel_safe_read_only_tool("web_search"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn destructive_tools_not_parallel_safe() {
|
||||||
|
assert!(!is_parallel_safe_read_only_tool("exec_shell"));
|
||||||
|
assert!(!is_parallel_safe_read_only_tool("write_file"));
|
||||||
|
assert!(!is_parallel_safe_read_only_tool("edit_file"));
|
||||||
|
assert!(!is_parallel_safe_read_only_tool("apply_patch"));
|
||||||
|
assert!(!is_parallel_safe_read_only_tool("agent_open"));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn exec_hardening_appends_system_prompt() {
|
||||||
|
let spec = AgentWorkerSpec {
|
||||||
|
worker_id: "w1".to_string(),
|
||||||
|
run_id: "r1".to_string(),
|
||||||
|
parent_run_id: None,
|
||||||
|
session_name: None,
|
||||||
|
objective: "do the thing".to_string(),
|
||||||
|
role: None,
|
||||||
|
agent_type: SubAgentType::General,
|
||||||
|
model: "auto".to_string(),
|
||||||
|
workspace: std::path::PathBuf::from("/tmp"),
|
||||||
|
git_branch: None,
|
||||||
|
context_mode: "fresh".to_string(),
|
||||||
|
fork_context: false,
|
||||||
|
tool_profile: AgentWorkerToolProfile::Inherited,
|
||||||
|
max_steps: 100,
|
||||||
|
spawn_depth: 0,
|
||||||
|
max_spawn_depth: 0,
|
||||||
|
};
|
||||||
|
let exec = codewhale_config::FleetExecConfig {
|
||||||
|
append_system_prompt: "never push to main".to_string(),
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let hardened = apply_exec_hardening(spec, &exec);
|
||||||
|
assert!(hardened.objective.contains("do the thing"));
|
||||||
|
assert!(hardened.objective.contains("[Policy]"));
|
||||||
|
assert!(hardened.objective.contains("never push to main"));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -255,6 +255,7 @@ pub enum MessageId {
|
|||||||
ConfigSectionSidebar,
|
ConfigSectionSidebar,
|
||||||
ConfigSectionHistory,
|
ConfigSectionHistory,
|
||||||
ConfigSectionMcp,
|
ConfigSectionMcp,
|
||||||
|
ConfigSectionFleet,
|
||||||
ConfigSectionExperimental,
|
ConfigSectionExperimental,
|
||||||
ConfigScopeSession,
|
ConfigScopeSession,
|
||||||
ConfigScopeSaved,
|
ConfigScopeSaved,
|
||||||
@@ -699,6 +700,7 @@ pub const ALL_MESSAGE_IDS: &[MessageId] = &[
|
|||||||
MessageId::ConfigSectionSidebar,
|
MessageId::ConfigSectionSidebar,
|
||||||
MessageId::ConfigSectionHistory,
|
MessageId::ConfigSectionHistory,
|
||||||
MessageId::ConfigSectionMcp,
|
MessageId::ConfigSectionMcp,
|
||||||
|
MessageId::ConfigSectionFleet,
|
||||||
MessageId::ConfigSectionExperimental,
|
MessageId::ConfigSectionExperimental,
|
||||||
MessageId::ConfigScopeSession,
|
MessageId::ConfigScopeSession,
|
||||||
MessageId::ConfigScopeSaved,
|
MessageId::ConfigScopeSaved,
|
||||||
@@ -1315,6 +1317,7 @@ fn english(id: MessageId) -> &'static str {
|
|||||||
MessageId::ConfigSectionSidebar => "Sidebar",
|
MessageId::ConfigSectionSidebar => "Sidebar",
|
||||||
MessageId::ConfigSectionHistory => "History",
|
MessageId::ConfigSectionHistory => "History",
|
||||||
MessageId::ConfigSectionMcp => "MCP",
|
MessageId::ConfigSectionMcp => "MCP",
|
||||||
|
MessageId::ConfigSectionFleet => "Fleet",
|
||||||
MessageId::ConfigSectionExperimental => "Experimental",
|
MessageId::ConfigSectionExperimental => "Experimental",
|
||||||
MessageId::ConfigScopeSession => "SESSION",
|
MessageId::ConfigScopeSession => "SESSION",
|
||||||
MessageId::ConfigScopeSaved => "SAVED",
|
MessageId::ConfigScopeSaved => "SAVED",
|
||||||
@@ -1915,6 +1918,7 @@ fn vietnamese(id: MessageId) -> Option<&'static str> {
|
|||||||
MessageId::ConfigSectionSidebar => "Thanh bên",
|
MessageId::ConfigSectionSidebar => "Thanh bên",
|
||||||
MessageId::ConfigSectionHistory => "Lịch sử",
|
MessageId::ConfigSectionHistory => "Lịch sử",
|
||||||
MessageId::ConfigSectionMcp => "MCP",
|
MessageId::ConfigSectionMcp => "MCP",
|
||||||
|
MessageId::ConfigSectionFleet => "Fleet",
|
||||||
MessageId::ConfigSectionExperimental => "Thử nghiệm",
|
MessageId::ConfigSectionExperimental => "Thử nghiệm",
|
||||||
MessageId::ConfigScopeSession => "PHIÊN",
|
MessageId::ConfigScopeSession => "PHIÊN",
|
||||||
MessageId::ConfigScopeSaved => "ĐÃ LƯU",
|
MessageId::ConfigScopeSaved => "ĐÃ LƯU",
|
||||||
@@ -2624,6 +2628,7 @@ fn traditional_chinese(id: MessageId) -> Option<&'static str> {
|
|||||||
MessageId::ConfigSectionSidebar => "側邊欄",
|
MessageId::ConfigSectionSidebar => "側邊欄",
|
||||||
MessageId::ConfigSectionHistory => "歷史",
|
MessageId::ConfigSectionHistory => "歷史",
|
||||||
MessageId::ConfigSectionMcp => "MCP",
|
MessageId::ConfigSectionMcp => "MCP",
|
||||||
|
MessageId::ConfigSectionFleet => "艦隊",
|
||||||
MessageId::ConfigSectionExperimental => "實驗",
|
MessageId::ConfigSectionExperimental => "實驗",
|
||||||
MessageId::ConfigScopeSession => "會話",
|
MessageId::ConfigScopeSession => "會話",
|
||||||
MessageId::ConfigScopeSaved => "已儲存",
|
MessageId::ConfigScopeSaved => "已儲存",
|
||||||
@@ -2724,6 +2729,7 @@ fn japanese(id: MessageId) -> Option<&'static str> {
|
|||||||
MessageId::ConfigSectionSidebar => "サイドバー",
|
MessageId::ConfigSectionSidebar => "サイドバー",
|
||||||
MessageId::ConfigSectionHistory => "履歴",
|
MessageId::ConfigSectionHistory => "履歴",
|
||||||
MessageId::ConfigSectionMcp => "MCP",
|
MessageId::ConfigSectionMcp => "MCP",
|
||||||
|
MessageId::ConfigSectionFleet => "Fleet",
|
||||||
MessageId::ConfigSectionExperimental => "実験",
|
MessageId::ConfigSectionExperimental => "実験",
|
||||||
MessageId::ConfigScopeSession => "セッション",
|
MessageId::ConfigScopeSession => "セッション",
|
||||||
MessageId::ConfigScopeSaved => "保存済み",
|
MessageId::ConfigScopeSaved => "保存済み",
|
||||||
@@ -3315,6 +3321,7 @@ fn chinese_simplified(id: MessageId) -> Option<&'static str> {
|
|||||||
MessageId::ConfigSectionSidebar => "侧边栏",
|
MessageId::ConfigSectionSidebar => "侧边栏",
|
||||||
MessageId::ConfigSectionHistory => "历史",
|
MessageId::ConfigSectionHistory => "历史",
|
||||||
MessageId::ConfigSectionMcp => "MCP",
|
MessageId::ConfigSectionMcp => "MCP",
|
||||||
|
MessageId::ConfigSectionFleet => "舰队",
|
||||||
MessageId::ConfigSectionExperimental => "实验",
|
MessageId::ConfigSectionExperimental => "实验",
|
||||||
MessageId::ConfigScopeSession => "会话",
|
MessageId::ConfigScopeSession => "会话",
|
||||||
MessageId::ConfigScopeSaved => "已保存",
|
MessageId::ConfigScopeSaved => "已保存",
|
||||||
@@ -3842,6 +3849,7 @@ fn portuguese_brazil(id: MessageId) -> Option<&'static str> {
|
|||||||
MessageId::ConfigSectionSidebar => "Barra lateral",
|
MessageId::ConfigSectionSidebar => "Barra lateral",
|
||||||
MessageId::ConfigSectionHistory => "Histórico",
|
MessageId::ConfigSectionHistory => "Histórico",
|
||||||
MessageId::ConfigSectionMcp => "MCP",
|
MessageId::ConfigSectionMcp => "MCP",
|
||||||
|
MessageId::ConfigSectionFleet => "Fleet",
|
||||||
MessageId::ConfigSectionExperimental => "Experimental",
|
MessageId::ConfigSectionExperimental => "Experimental",
|
||||||
MessageId::ConfigScopeSession => "SESSÃO",
|
MessageId::ConfigScopeSession => "SESSÃO",
|
||||||
MessageId::ConfigScopeSaved => "SALVO",
|
MessageId::ConfigScopeSaved => "SALVO",
|
||||||
@@ -4461,6 +4469,7 @@ fn spanish_latin_america(id: MessageId) -> Option<&'static str> {
|
|||||||
MessageId::ConfigSectionSidebar => "Barra lateral",
|
MessageId::ConfigSectionSidebar => "Barra lateral",
|
||||||
MessageId::ConfigSectionHistory => "Historial",
|
MessageId::ConfigSectionHistory => "Historial",
|
||||||
MessageId::ConfigSectionMcp => "MCP",
|
MessageId::ConfigSectionMcp => "MCP",
|
||||||
|
MessageId::ConfigSectionFleet => "Fleet",
|
||||||
MessageId::ConfigSectionExperimental => "Experimental",
|
MessageId::ConfigSectionExperimental => "Experimental",
|
||||||
MessageId::ConfigScopeSession => "SESIÓN",
|
MessageId::ConfigScopeSession => "SESIÓN",
|
||||||
MessageId::ConfigScopeSaved => "GUARDADO",
|
MessageId::ConfigScopeSaved => "GUARDADO",
|
||||||
|
|||||||
@@ -1193,8 +1193,9 @@ async fn main() -> Result<()> {
|
|||||||
run_swebench_command(&config, &model, workspace, max_subagents, args).await
|
run_swebench_command(&config, &model, workspace, max_subagents, args).await
|
||||||
}
|
}
|
||||||
Commands::Fleet(args) => {
|
Commands::Fleet(args) => {
|
||||||
|
let config = load_config_from_cli(&cli)?;
|
||||||
let workspace = resolve_workspace(&cli);
|
let workspace = resolve_workspace(&cli);
|
||||||
run_fleet_command(&workspace, args).await
|
run_fleet_command(&workspace, &config, args).await
|
||||||
}
|
}
|
||||||
Commands::Review(args) => {
|
Commands::Review(args) => {
|
||||||
let config = load_config_from_cli(&cli)?;
|
let config = load_config_from_cli(&cli)?;
|
||||||
@@ -1458,7 +1459,7 @@ async fn run_swebench_command(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn run_fleet_command(workspace: &Path, args: FleetArgs) -> Result<()> {
|
async fn run_fleet_command(workspace: &Path, config: &Config, args: FleetArgs) -> Result<()> {
|
||||||
use crate::fleet::alerts::{
|
use crate::fleet::alerts::{
|
||||||
FleetAlertAdapterConfig, FleetAlertConfig, FleetAlertDispatcher, FleetAlertEvent,
|
FleetAlertAdapterConfig, FleetAlertConfig, FleetAlertDispatcher, FleetAlertEvent,
|
||||||
FleetEnvSecretResolver,
|
FleetEnvSecretResolver,
|
||||||
@@ -1712,7 +1713,12 @@ async fn run_fleet_command(workspace: &Path, args: FleetArgs) -> Result<()> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let manager = FleetManager::open(workspace)?;
|
let exec_config = config
|
||||||
|
.fleet
|
||||||
|
.as_ref()
|
||||||
|
.map(|fleet| fleet.exec.clone())
|
||||||
|
.unwrap_or_default();
|
||||||
|
let manager = FleetManager::open(workspace)?.with_exec_config(exec_config);
|
||||||
match args.command {
|
match args.command {
|
||||||
FleetCommand::Init => {
|
FleetCommand::Init => {
|
||||||
println!("fleet ledger: {}", manager.ledger_path().display());
|
println!("fleet ledger: {}", manager.ledger_path().display());
|
||||||
|
|||||||
@@ -3980,6 +3980,7 @@ mod tests {
|
|||||||
crate::fleet::task_spec::FleetTaskSpecDocument {
|
crate::fleet::task_spec::FleetTaskSpecDocument {
|
||||||
name: Some("api smoke".to_string()),
|
name: Some("api smoke".to_string()),
|
||||||
labels: std::collections::BTreeMap::new(),
|
labels: std::collections::BTreeMap::new(),
|
||||||
|
security_policy: None,
|
||||||
workers: Vec::new(),
|
workers: Vec::new(),
|
||||||
tasks: vec![task],
|
tasks: vec![task],
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -1161,8 +1161,13 @@ impl Default for PersistedSubAgentState {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Default cap on sub-agent recursion depth. Override via
|
/// Default cap on sub-agent recursion depth. Override via
|
||||||
/// `[runtime] max_spawn_depth = N` in `~/.deepseek/config.toml`.
|
/// `[runtime] max_spawn_depth = N` in config.
|
||||||
pub const DEFAULT_MAX_SPAWN_DEPTH: u32 = 3;
|
///
|
||||||
|
/// Sourced from [`codewhale_config::DEFAULT_SPAWN_DEPTH`] so standalone
|
||||||
|
/// sub-agents and fleet workers share ONE recursion axis (no "two moving
|
||||||
|
/// targets"). Configured/requested depths clamp to
|
||||||
|
/// [`codewhale_config::MAX_SPAWN_DEPTH_CEILING`].
|
||||||
|
pub const DEFAULT_MAX_SPAWN_DEPTH: u32 = codewhale_config::DEFAULT_SPAWN_DEPTH;
|
||||||
|
|
||||||
/// Terminal-state notification emitted to the engine's parent turn loop
|
/// Terminal-state notification emitted to the engine's parent turn loop
|
||||||
/// when one of its direct children finishes (issue #756). Carries the
|
/// when one of its direct children finishes (issue #756). Carries the
|
||||||
@@ -1794,7 +1799,7 @@ impl SubAgentManager {
|
|||||||
.retain(|worker_id, _| keep_ids.contains(worker_id));
|
.retain(|worker_id, _| keep_ids.contains(worker_id));
|
||||||
}
|
}
|
||||||
|
|
||||||
fn register_worker(&mut self, spec: AgentWorkerSpec) {
|
pub fn register_worker(&mut self, spec: AgentWorkerSpec) {
|
||||||
let worker_id = spec.worker_id.clone();
|
let worker_id = spec.worker_id.clone();
|
||||||
let now_ms = epoch_millis_now();
|
let now_ms = epoch_millis_now();
|
||||||
let mut record = AgentWorkerRecord::new(normalize_worker_spec(spec), now_ms);
|
let mut record = AgentWorkerRecord::new(normalize_worker_spec(spec), now_ms);
|
||||||
@@ -5927,15 +5932,18 @@ fn parse_spawn_request(input: &Value) -> Result<SpawnRequest, ToolError> {
|
|||||||
.or_else(|| input.get("max_spawn_depth"))
|
.or_else(|| input.get("max_spawn_depth"))
|
||||||
.and_then(Value::as_u64)
|
.and_then(Value::as_u64)
|
||||||
.map(|depth| {
|
.map(|depth| {
|
||||||
|
let ceiling = codewhale_config::MAX_SPAWN_DEPTH_CEILING;
|
||||||
u32::try_from(depth)
|
u32::try_from(depth)
|
||||||
.map_err(|_| ToolError::invalid_input("max_depth must be between 0 and 3"))
|
.map_err(|_| {
|
||||||
|
ToolError::invalid_input(format!("max_depth must be between 0 and {ceiling}"))
|
||||||
|
})
|
||||||
.and_then(|depth| {
|
.and_then(|depth| {
|
||||||
if depth <= 3 {
|
if depth <= ceiling {
|
||||||
Ok(depth)
|
Ok(depth)
|
||||||
} else {
|
} else {
|
||||||
Err(ToolError::invalid_input(
|
Err(ToolError::invalid_input(format!(
|
||||||
"max_depth must be between 0 and 3",
|
"max_depth must be between 0 and {ceiling}"
|
||||||
))
|
)))
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -412,6 +412,7 @@ enum ConfigSection {
|
|||||||
Sidebar,
|
Sidebar,
|
||||||
History,
|
History,
|
||||||
Mcp,
|
Mcp,
|
||||||
|
Fleet,
|
||||||
Experimental,
|
Experimental,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -429,6 +430,7 @@ impl ConfigSection {
|
|||||||
ConfigSection::Sidebar => MessageId::ConfigSectionSidebar,
|
ConfigSection::Sidebar => MessageId::ConfigSectionSidebar,
|
||||||
ConfigSection::History => MessageId::ConfigSectionHistory,
|
ConfigSection::History => MessageId::ConfigSectionHistory,
|
||||||
ConfigSection::Mcp => MessageId::ConfigSectionMcp,
|
ConfigSection::Mcp => MessageId::ConfigSectionMcp,
|
||||||
|
ConfigSection::Fleet => MessageId::ConfigSectionFleet,
|
||||||
ConfigSection::Experimental => MessageId::ConfigSectionExperimental,
|
ConfigSection::Experimental => MessageId::ConfigSectionExperimental,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
@@ -759,6 +761,18 @@ impl ConfigView {
|
|||||||
editable: true,
|
editable: true,
|
||||||
scope: ConfigScope::Saved,
|
scope: ConfigScope::Saved,
|
||||||
},
|
},
|
||||||
|
ConfigRow {
|
||||||
|
section: ConfigSection::Fleet,
|
||||||
|
key: "fleet.exec.max_spawn_depth".to_string(),
|
||||||
|
value: config
|
||||||
|
.fleet
|
||||||
|
.as_ref()
|
||||||
|
.map(|fleet| fleet.exec.max_spawn_depth)
|
||||||
|
.unwrap_or_else(|| codewhale_config::FleetExecConfig::default().max_spawn_depth)
|
||||||
|
.to_string(),
|
||||||
|
editable: false,
|
||||||
|
scope: ConfigScope::Saved,
|
||||||
|
},
|
||||||
];
|
];
|
||||||
rows.extend(experimental_config_rows(&config));
|
rows.extend(experimental_config_rows(&config));
|
||||||
|
|
||||||
@@ -1180,7 +1194,7 @@ fn experimental_config_rows(config: &Config) -> Vec<ConfigRow> {
|
|||||||
rows.push(ConfigRow {
|
rows.push(ConfigRow {
|
||||||
section: ConfigSection::Experimental,
|
section: ConfigSection::Experimental,
|
||||||
key: "whaleflow".to_string(),
|
key: "whaleflow".to_string(),
|
||||||
value: "preview placeholder (not stable; see #2981/#2974)".to_string(),
|
value: "preview overlay for workflow/fleet runs (not stable; see #3154/#3178)".to_string(),
|
||||||
editable: false,
|
editable: false,
|
||||||
scope: ConfigScope::Saved,
|
scope: ConfigScope::Saved,
|
||||||
});
|
});
|
||||||
@@ -1235,6 +1249,9 @@ fn config_hint_for_key(key: &str) -> &'static str {
|
|||||||
"DeepSeek: auto/off/high/max; Codex: low/medium/high/xhigh; default clears saved value"
|
"DeepSeek: auto/off/high/max; Codex: low/medium/high/xhigh; default clears saved value"
|
||||||
}
|
}
|
||||||
"mcp_config_path" => "path to mcp.json",
|
"mcp_config_path" => "path to mcp.json",
|
||||||
|
"fleet.exec.max_spawn_depth" => {
|
||||||
|
"0 blocks child agents; 3 default (same axis as sub-agents); capped at 3"
|
||||||
|
}
|
||||||
_ => "",
|
_ => "",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2393,6 +2410,7 @@ mod tests {
|
|||||||
"Sidebar",
|
"Sidebar",
|
||||||
"History",
|
"History",
|
||||||
"MCP",
|
"MCP",
|
||||||
|
"Fleet",
|
||||||
"Experimental",
|
"Experimental",
|
||||||
]
|
]
|
||||||
);
|
);
|
||||||
@@ -2429,6 +2447,7 @@ mod tests {
|
|||||||
assert!(keys.contains(&"cost_currency"));
|
assert!(keys.contains(&"cost_currency"));
|
||||||
assert!(keys.contains(&"prefer_external_pdftotext"));
|
assert!(keys.contains(&"prefer_external_pdftotext"));
|
||||||
assert!(keys.contains(&"mcp_config_path"));
|
assert!(keys.contains(&"mcp_config_path"));
|
||||||
|
assert!(keys.contains(&"fleet.exec.max_spawn_depth"));
|
||||||
assert!(keys.contains(&"features.subagents"));
|
assert!(keys.contains(&"features.subagents"));
|
||||||
assert!(keys.contains(&"features.web_search"));
|
assert!(keys.contains(&"features.web_search"));
|
||||||
assert!(keys.contains(&"features.apply_patch"));
|
assert!(keys.contains(&"features.apply_patch"));
|
||||||
@@ -2440,13 +2459,23 @@ mod tests {
|
|||||||
assert!(
|
assert!(
|
||||||
view.rows
|
view.rows
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|row| row.section != super::ConfigSection::Experimental)
|
.filter(|row| {
|
||||||
|
!matches!(
|
||||||
|
row.section,
|
||||||
|
super::ConfigSection::Experimental | super::ConfigSection::Fleet
|
||||||
|
)
|
||||||
|
})
|
||||||
.all(|row| row.editable)
|
.all(|row| row.editable)
|
||||||
);
|
);
|
||||||
assert!(
|
assert!(
|
||||||
view.rows
|
view.rows
|
||||||
.iter()
|
.iter()
|
||||||
.filter(|row| row.section == super::ConfigSection::Experimental)
|
.filter(|row| {
|
||||||
|
matches!(
|
||||||
|
row.section,
|
||||||
|
super::ConfigSection::Experimental | super::ConfigSection::Fleet
|
||||||
|
)
|
||||||
|
})
|
||||||
.all(|row| !row.editable)
|
.all(|row| !row.editable)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@@ -2497,6 +2526,36 @@ vision_model = true
|
|||||||
assert_eq!(subagents.value, "enabled (default enabled)");
|
assert_eq!(subagents.value, "enabled (default enabled)");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn config_view_shows_fleet_max_spawn_depth_from_config() {
|
||||||
|
let temp_root = std::env::temp_dir().join(format!(
|
||||||
|
"codewhale-fleet-config-view-test-{}",
|
||||||
|
std::process::id()
|
||||||
|
));
|
||||||
|
fs::create_dir_all(&temp_root).unwrap();
|
||||||
|
let config_path = temp_root.join("config.toml");
|
||||||
|
fs::write(
|
||||||
|
&config_path,
|
||||||
|
r#"
|
||||||
|
[fleet.exec]
|
||||||
|
max_spawn_depth = 2
|
||||||
|
"#,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let mut app = create_test_app();
|
||||||
|
app.config_path = Some(config_path);
|
||||||
|
let view = ConfigView::new_for_app(&app);
|
||||||
|
|
||||||
|
let row = view
|
||||||
|
.rows
|
||||||
|
.iter()
|
||||||
|
.find(|row| row.key == "fleet.exec.max_spawn_depth")
|
||||||
|
.expect("fleet spawn depth row");
|
||||||
|
assert_eq!(row.value, "2");
|
||||||
|
assert!(!row.editable);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn config_view_experimental_section_is_searchable() {
|
fn config_view_experimental_section_is_searchable() {
|
||||||
let mut view = create_config_view(Locale::En);
|
let mut view = create_config_view(Locale::En);
|
||||||
|
|||||||
+244
-95
@@ -19,6 +19,29 @@ Fleet state is stored under the workspace in `.codewhale/fleet.jsonl`. Worker
|
|||||||
logs and adapter logs are stored under `.codewhale/fleet/` and
|
logs and adapter logs are stored under `.codewhale/fleet/` and
|
||||||
`.codewhale/fleet-host/`.
|
`.codewhale/fleet-host/`.
|
||||||
|
|
||||||
|
## Naming: Modes, WhaleFlow, Fleet, and Swarm
|
||||||
|
|
||||||
|
These names describe different layers, not competing systems. Agent, Plan, and
|
||||||
|
YOLO stay the permission/work modes. WhaleFlow is an orchestration overlay that
|
||||||
|
can run on top of those modes when the task needs a continuous workflow.
|
||||||
|
|
||||||
|
- **WhaleFlow** is the repeatable workflow plan and user-facing orchestration
|
||||||
|
overlay: a script/IR that decides which phases and agents run next, keeps
|
||||||
|
intermediate results out of the main conversation, and can be inspected or
|
||||||
|
rerun. A WhaleFlow run should have a visible progress view and a clear active
|
||||||
|
header state instead of feeling like a hidden background task.
|
||||||
|
- **Fleet** is the execution substrate: headless workers, local/SSH hosts,
|
||||||
|
trust policy, leases, heartbeats, logs, receipts, and status APIs.
|
||||||
|
- **Swarm** is the high-fanout behavior inside WhaleFlow. It should compile into
|
||||||
|
a WhaleFlow-backed fleet run instead of reviving the old `agent_swarm` tool
|
||||||
|
surface.
|
||||||
|
|
||||||
|
UI guidance: keep the main transcript calm. A WhaleFlow run should appear as a
|
||||||
|
compact progress card plus Work/Agents sidebar rows with phase names, worker
|
||||||
|
counts, receipts, and nested indentation for child workers. Use the whale mark
|
||||||
|
sparingly as an active header/status signal; avoid repeating emoji-heavy rows
|
||||||
|
for every worker.
|
||||||
|
|
||||||
## Task Spec
|
## Task Spec
|
||||||
|
|
||||||
`codewhale fleet run` accepts JSON or TOML. A minimal JSON spec:
|
`codewhale fleet run` accepts JSON or TOML. A minimal JSON spec:
|
||||||
@@ -60,118 +83,79 @@ and `json_path`. Specs may also declare `command`,
|
|||||||
`code_whale_verifier_prompt`, or `manual`; those record a partial receipt until
|
`code_whale_verifier_prompt`, or `manual`; those record a partial receipt until
|
||||||
an explicit verifier pass completes.
|
an explicit verifier pass completes.
|
||||||
|
|
||||||
### Release Triage Example
|
### Using Role Presets
|
||||||
|
|
||||||
|
Tasks can reference a role name, and the fleet manager fills in defaults
|
||||||
|
from the role registry. Built-in roles (`smoke-runner`, `reviewer`, `builder`,
|
||||||
|
`read-only`) are always available; define your own in `[fleet.roles]`.
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"name": "v0.8.60 release triage",
|
"name": "smoke check",
|
||||||
"labels": {
|
|
||||||
"milestone": "v0.8.60"
|
|
||||||
},
|
|
||||||
"tasks": [
|
"tasks": [
|
||||||
{
|
{
|
||||||
"id": "release-issue-sweep",
|
"id": "lint",
|
||||||
"name": "Release issue sweep",
|
"name": "Lint check",
|
||||||
"objective": "Find open v0.8.60 blockers and credit-sensitive PRs.",
|
"instructions": "Run lint and report failures.",
|
||||||
"instructions": "Review the v0.8.60 milestone, linked PRs, changelog entries, and contributor-credit requirements. Write a concise blocker report.",
|
"worker": { "role": "smoke-runner" },
|
||||||
"worker": {
|
"expected_artifacts": ["log"]
|
||||||
"role": "release-triage",
|
|
||||||
"tool_profile": "read-only",
|
|
||||||
"tools": ["gh", "git"],
|
|
||||||
"capabilities": ["github", "release"]
|
|
||||||
},
|
|
||||||
"workspace": {
|
|
||||||
"required_files": ["Cargo.toml", "CHANGELOG.md", ".github/AUTHOR_MAP"],
|
|
||||||
"writable_paths": [".codewhale/fleet"],
|
|
||||||
"environment": {
|
|
||||||
"required": ["PATH"]
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"input_files": ["CHANGELOG.md", ".github/AUTHOR_MAP"],
|
|
||||||
"context": ["Treat community PRs as maintainer evidence."],
|
|
||||||
"budget": {
|
|
||||||
"max_tokens": 12000,
|
|
||||||
"max_tool_calls": 24,
|
|
||||||
"max_seconds": 900
|
|
||||||
},
|
|
||||||
"timeout_seconds": 900,
|
|
||||||
"expected_artifacts": ["log", "report", "receipt"],
|
|
||||||
"scorer": {
|
|
||||||
"kind": "exit_code"
|
|
||||||
},
|
|
||||||
"retry_policy": {
|
|
||||||
"max_attempts": 2,
|
|
||||||
"initial_backoff_seconds": 10,
|
|
||||||
"max_backoff_seconds": 60,
|
|
||||||
"backoff_multiplier": 2
|
|
||||||
},
|
|
||||||
"tags": ["release", "triage"],
|
|
||||||
"metadata": {
|
|
||||||
"class": "release"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
### Code Review Swarm Example
|
The task inherits the role's tool profile, budget, and timeout. You can
|
||||||
|
override any field in the task spec:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"name": "code review swarm",
|
"id": "deep-review",
|
||||||
|
"name": "Deep review",
|
||||||
|
"instructions": "Review the entire crate for soundness issues.",
|
||||||
|
"worker": {
|
||||||
|
"role": "reviewer",
|
||||||
|
"tools": ["cargo", "rg", "git"],
|
||||||
|
"capabilities": ["rust"]
|
||||||
|
},
|
||||||
|
"input_files": ["crates/**/*.rs"],
|
||||||
|
"budget": { "max_tokens": 32000 },
|
||||||
|
"expected_artifacts": ["log", "report"],
|
||||||
|
"scorer": { "kind": "regex_match", "path": ".codewhale/fleet/report.md", "pattern": "finding|all clear" }
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Multi-Task Run Example
|
||||||
|
|
||||||
|
A single fleet run can dispatch several independent tasks in parallel:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"name": "CI gate",
|
||||||
"tasks": [
|
"tasks": [
|
||||||
{
|
{
|
||||||
"id": "protocol-review",
|
"id": "check",
|
||||||
"name": "Protocol review",
|
"name": "Compile check",
|
||||||
"objective": "Review fleet protocol changes for compatibility and sparse JSON behavior.",
|
"instructions": "Run cargo check --workspace and report errors.",
|
||||||
"instructions": "Inspect crates/protocol/src/fleet.rs and report behavior regressions, missing serde defaults, or unsafe wire changes.",
|
"worker": { "role": "builder" },
|
||||||
"worker": {
|
"expected_artifacts": ["log"],
|
||||||
"role": "reviewer",
|
"scorer": { "kind": "exit_code" }
|
||||||
"tool_profile": "read-only",
|
|
||||||
"tools": ["git", "rg", "cargo"],
|
|
||||||
"capabilities": ["rust"]
|
|
||||||
},
|
|
||||||
"input_files": ["crates/protocol/src/fleet.rs"],
|
|
||||||
"budget": {
|
|
||||||
"max_tokens": 8000,
|
|
||||||
"max_tool_calls": 16,
|
|
||||||
"max_seconds": 600
|
|
||||||
},
|
|
||||||
"expected_artifacts": ["log", "report", "receipt"],
|
|
||||||
"scorer": {
|
|
||||||
"kind": "code_whale_verifier_prompt",
|
|
||||||
"prompt": "Verify the review includes at least one concrete file:line finding or explicitly says no issues were found."
|
|
||||||
},
|
|
||||||
"tags": ["review", "protocol"],
|
|
||||||
"metadata": {
|
|
||||||
"class": "code-review"
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": "tui-review",
|
"id": "clippy",
|
||||||
"name": "TUI review",
|
"name": "Clippy lint",
|
||||||
"objective": "Review fleet CLI and manager behavior for operator-visible regressions.",
|
"instructions": "Run cargo clippy --workspace and report warnings.",
|
||||||
"instructions": "Inspect crates/tui/src/fleet and crates/tui/src/main.rs. Focus on status output, receipt recording, and failure classification.",
|
"worker": { "role": "reviewer", "tools": ["cargo", "cargo-clippy"] },
|
||||||
"worker": {
|
"expected_artifacts": ["log"],
|
||||||
"role": "reviewer",
|
"scorer": { "kind": "exit_code" }
|
||||||
"tool_profile": "read-only",
|
},
|
||||||
"tools": ["git", "rg", "cargo"],
|
{
|
||||||
"capabilities": ["rust", "cli"]
|
"id": "security",
|
||||||
},
|
"name": "Secret audit",
|
||||||
"input_files": ["crates/tui/src/fleet", "crates/tui/src/main.rs"],
|
"instructions": "Search for plaintext secrets and report any matches.",
|
||||||
"budget": {
|
"worker": { "role": "read-only", "tools": ["rg"] },
|
||||||
"max_tokens": 10000,
|
"input_files": ["crates/**/*.rs"],
|
||||||
"max_tool_calls": 20,
|
"expected_artifacts": ["log", "report"],
|
||||||
"max_seconds": 600
|
"retry_policy": { "max_attempts": 1 }
|
||||||
},
|
|
||||||
"expected_artifacts": ["log", "report", "receipt"],
|
|
||||||
"scorer": {
|
|
||||||
"kind": "manual"
|
|
||||||
},
|
|
||||||
"tags": ["review", "tui"],
|
|
||||||
"metadata": {
|
|
||||||
"class": "code-review"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@@ -373,3 +357,168 @@ Defaults are intentionally conservative:
|
|||||||
`API_KEY`, and `PRIVATE_KEY` are rejected from adapter allowlists;
|
`API_KEY`, and `PRIVATE_KEY` are rejected from adapter allowlists;
|
||||||
- secrets should remain in CodeWhale config providers or remote host config,
|
- secrets should remain in CodeWhale config providers or remote host config,
|
||||||
not in task instructions, argv, or fleet logs.
|
not in task instructions, argv, or fleet logs.
|
||||||
|
|
||||||
|
## Security and Trust Boundaries
|
||||||
|
|
||||||
|
Agent Fleet enforces a trust-level model that separates workers into four tiers.
|
||||||
|
The trust level determines what a worker can access (secrets, network, workspace
|
||||||
|
writes) and how it must prove its identity before being granted those privileges.
|
||||||
|
|
||||||
|
### Trust Levels
|
||||||
|
|
||||||
|
| Level | Access | Requires |
|
||||||
|
|-------|--------|----------|
|
||||||
|
| `sandbox` | No network, no secrets, writes only to `.codewhale/fleet/` | Nothing — default for new workers |
|
||||||
|
| `local` | Workspace reads, gated writes, configured secrets | Local process (same uid) |
|
||||||
|
| `remote-verified` | Network access, bounded capability grants, configured secrets | SSH host-key verification or equivalent attestation |
|
||||||
|
| `operator` | Full access to all secrets, unrestricted writes, any action | Operator-owned machine |
|
||||||
|
|
||||||
|
The default trust level is `sandbox`. Operators must explicitly raise trust for
|
||||||
|
SSH or container workers through the security policy.
|
||||||
|
|
||||||
|
### Security Policy
|
||||||
|
|
||||||
|
A fleet run may carry an optional `security_policy` block that defines the
|
||||||
|
default trust level, which secrets workers may resolve, what capabilities are
|
||||||
|
granted, and a ceiling on the maximum trust level:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"security_policy": {
|
||||||
|
"default_trust_level": "sandbox",
|
||||||
|
"allowed_secrets": [
|
||||||
|
{"key": "GH_TOKEN", "source": "env"},
|
||||||
|
{"key": "CODEWHALE_API_KEY", "source": "keyring"}
|
||||||
|
],
|
||||||
|
"capability_grants": [
|
||||||
|
{
|
||||||
|
"capability": "network",
|
||||||
|
"scope": "github.com",
|
||||||
|
"reason": "PR review needs GitHub API access"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"max_trust_level": "remote_verified",
|
||||||
|
"require_identity_verification": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
When a run has no explicit `security_policy`, workers inherit conservative
|
||||||
|
defaults: `sandbox` trust, no secrets, no capability grants, and no identity
|
||||||
|
verification requirement.
|
||||||
|
|
||||||
|
### Secret References
|
||||||
|
|
||||||
|
Secrets are never stored as plaintext in task specs, alert configs, or worker
|
||||||
|
definitions. Instead, every secret is a `FleetSecretRef` — a key name plus an
|
||||||
|
optional source hint that tells the fleet manager where to resolve the value:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{"key": "GH_TOKEN", "source": "env"}
|
||||||
|
```
|
||||||
|
|
||||||
|
Supported sources:
|
||||||
|
- `"env"` — resolve from a process environment variable
|
||||||
|
- `"keyring"` — resolve from the OS keyring (macOS Keychain, Windows Credential Manager, Linux Secret Service)
|
||||||
|
- `"file"` — resolve from `~/.codewhale/secrets/`
|
||||||
|
- absent — try all sources in default order (store first, then env)
|
||||||
|
|
||||||
|
Secret refs are redacted in logs and ledger entries: `<secret:env.GH_TOKEN>`.
|
||||||
|
|
||||||
|
### Worker Authentication
|
||||||
|
|
||||||
|
Workers authenticate to the fleet manager using one of three methods:
|
||||||
|
|
||||||
|
- **None** — local workers sharing the same uid (default)
|
||||||
|
- **SSH key** — with optional host-key fingerprint pinning and known-hosts
|
||||||
|
verification. The `host_key_fingerprint` field (SHA256:...) pins the expected
|
||||||
|
server key, preventing MITM attacks on first connection.
|
||||||
|
- **Token** — a bearer token resolved from a `FleetSecretRef`, useful for remote
|
||||||
|
workers behind a fleet proxy.
|
||||||
|
- **mTLS** — mutual TLS with a client certificate and a secret-backed private key.
|
||||||
|
|
||||||
|
SSH workers should always set `host_key_fingerprint` in production:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"id": "builder-1",
|
||||||
|
"name": "Builder 1",
|
||||||
|
"trust_level": "remote_verified",
|
||||||
|
"host": {
|
||||||
|
"kind": "ssh",
|
||||||
|
"host": "builder.example.com",
|
||||||
|
"user": "codewhale",
|
||||||
|
"port": 22,
|
||||||
|
"identity": "~/.ssh/codewhale_fleet",
|
||||||
|
"host_key_fingerprint": "SHA256:aLGqZo1M6c...",
|
||||||
|
"known_hosts": "~/.ssh/known_hosts",
|
||||||
|
"working_directory": "/srv/codewhale/work",
|
||||||
|
"env_allowlist": ["CODEWHALE_PROFILE"],
|
||||||
|
"codewhale_binary": "/usr/local/bin/codewhale"
|
||||||
|
},
|
||||||
|
"capabilities": ["local", "linux", "tests"],
|
||||||
|
"max_concurrent_tasks": 1
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Alert Channel Secrets
|
||||||
|
|
||||||
|
Alert channels (Slack, generic webhook, PagerDuty) use `FleetAlertEndpoint`
|
||||||
|
instead of raw URLs. The webhook URL can be provided inline for non-sensitive
|
||||||
|
endpoints, or as a secret reference:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"kind": "slack",
|
||||||
|
"webhook": {
|
||||||
|
"url_ref": {"key": "CODEWHALE_FLEET_SLACK_WEBHOOK", "source": "env"},
|
||||||
|
"secret_ref": {"key": "CODEWHALE_FLEET_SLACK_SIGNING_SECRET", "source": "keyring"}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `secret_ref` field provides an optional HMAC secret for webhook payload
|
||||||
|
signing, never stored in plaintext.
|
||||||
|
|
||||||
|
### Config File
|
||||||
|
|
||||||
|
The `[fleet]` table in `config.toml` sets global trust policy defaults:
|
||||||
|
|
||||||
|
```toml
|
||||||
|
[fleet]
|
||||||
|
default_trust_level = "sandbox"
|
||||||
|
require_identity_verification = true
|
||||||
|
max_trust_level = "operator"
|
||||||
|
|
||||||
|
[fleet.exec]
|
||||||
|
# Recursion depth shares ONE axis with standalone sub-agents — a fleet worker
|
||||||
|
# IS a headless sub-agent. 0 blocks child agents (the root worker still runs);
|
||||||
|
# 3 is the default and the ceiling, affording at least three nested levels.
|
||||||
|
max_spawn_depth = 3
|
||||||
|
```
|
||||||
|
|
||||||
|
These defaults apply to fleet runs that don't carry their own `security_policy`.
|
||||||
|
Per-run policies always override the config defaults.
|
||||||
|
|
||||||
|
### Capability Grants
|
||||||
|
|
||||||
|
Capability grants are additive, scoped permissions that authorize specific
|
||||||
|
actions. By default, workers get no grants (least privilege). Common grants:
|
||||||
|
|
||||||
|
- `"network"` with scope `"github.com"` — allow outbound HTTP to GitHub
|
||||||
|
- `"git-push"` — allow `git push` to remotes
|
||||||
|
- `"provider-secrets"` — allow accessing provider API keys
|
||||||
|
- `"release"` — allow release-related operations (tagging, publishing)
|
||||||
|
- `"workspace-write"` with scope `"crates/tui/**"` — allow writes within a path
|
||||||
|
|
||||||
|
### Environment Sanitization
|
||||||
|
|
||||||
|
The host adapter layer enforces environment sanitization at worker start:
|
||||||
|
|
||||||
|
- Only `HOME`, `PATH`, and platform-specific vars (`SYSTEMROOT`, `COMSPEC`) are
|
||||||
|
injected into worker processes by default
|
||||||
|
- Environment allowlists reject any key containing `SECRET`, `TOKEN`, `PASSWORD`,
|
||||||
|
`PASSWD`, `API_KEY`, `CREDENTIAL`, or `PRIVATE_KEY`
|
||||||
|
- SSH workers only send explicitly allowlisted variables via OpenSSH `SendEnv`
|
||||||
|
- Secret values are never embedded in worker argv, task instructions, or fleet
|
||||||
|
logs — only secret refs appear, and they are always redacted
|
||||||
|
|||||||
@@ -4,11 +4,18 @@ codewhale has two related concepts:
|
|||||||
|
|
||||||
- **TUI mode**: what kind of visible interaction you're in (Plan/Agent/YOLO).
|
- **TUI mode**: what kind of visible interaction you're in (Plan/Agent/YOLO).
|
||||||
- **Approval mode**: how aggressively the UI asks before executing tools.
|
- **Approval mode**: how aggressively the UI asks before executing tools.
|
||||||
|
- **WhaleFlow overlay**: optional long-running workflow orchestration that can
|
||||||
|
run on top of any TUI mode when a task needs many coordinated workers.
|
||||||
|
|
||||||
Model selection is separate. `--model auto` and `/model auto` route each turn to
|
Model selection is separate. `--model auto` and `/model auto` route each turn to
|
||||||
a concrete model and thinking level; they are not TUI modes and are not part of
|
a concrete model and thinking level; they are not TUI modes and are not part of
|
||||||
the `Tab` cycle.
|
the `Tab` cycle.
|
||||||
|
|
||||||
|
WhaleFlow is also separate from the `Tab` mode cycle. It is the visible
|
||||||
|
continuous-work layer for repeatable workflows, fleet workers, and swarm-style
|
||||||
|
fanout. The active mode still controls permissions; WhaleFlow controls whether a
|
||||||
|
large task is planned into a resumable workflow with its own progress view.
|
||||||
|
|
||||||
Each user turn includes a small `<turn_meta>` block with the current local date
|
Each user turn includes a small `<turn_meta>` block with the current local date
|
||||||
and the concrete model sent to the provider. When `--model auto` is active, the
|
and the concrete model sent to the provider. When `--model auto` is active, the
|
||||||
same block also records that the model was auto-routed.
|
same block also records that the model was auto-routed.
|
||||||
@@ -56,6 +63,11 @@ the turn, `/goal complete` marks it done, `/goal blocked` marks it blocked, and
|
|||||||
approval mode, or model route. This remains distinct from `--model auto`, which
|
approval mode, or model route. This remains distinct from `--model auto`, which
|
||||||
only controls model and thinking selection.
|
only controls model and thinking selection.
|
||||||
|
|
||||||
|
WhaleFlow builds on the same separation: a goal can ask the agent to keep
|
||||||
|
working, while WhaleFlow supplies the repeatable workflow/progress surface for
|
||||||
|
large fanout. In the UI, a WhaleFlow run should be shown as an overlay on the
|
||||||
|
main screen, not as a fourth mode next to Agent, Plan, and YOLO.
|
||||||
|
|
||||||
App-server clients can persist a thread-scoped goal with `thread/goal/set`, read
|
App-server clients can persist a thread-scoped goal with `thread/goal/set`, read
|
||||||
it with `thread/goal/get`, and clear it with `thread/goal/clear`. That persisted
|
it with `thread/goal/get`, and clear it with `thread/goal/clear`. That persisted
|
||||||
record carries `active`, `paused`, `blocked`, `usage_limited`, `budget_limited`,
|
record carries `active`, `paused`, `blocked`, `usage_limited`, `budget_limited`,
|
||||||
|
|||||||
@@ -0,0 +1,52 @@
|
|||||||
|
# Agent Fleet dogfood smoke spec (#3166)
|
||||||
|
#
|
||||||
|
# This spec exercises the fleet end-to-end: create a run with two local
|
||||||
|
# workers, run a lint task and a review task, verify the ledger records
|
||||||
|
# receipts, and confirm the status surfaces work.
|
||||||
|
#
|
||||||
|
# Run:
|
||||||
|
# codewhale fleet run docs/examples/fleet-dogfood.toml --max-workers 2 --once
|
||||||
|
#
|
||||||
|
# Then check:
|
||||||
|
# codewhale fleet status
|
||||||
|
# codewhale fleet inspect <worker-id-from-status>
|
||||||
|
# codewhale fleet logs <worker-id-from-status>
|
||||||
|
|
||||||
|
name = "dogfood smoke"
|
||||||
|
labels = { milestone = "v0.8.60", class = "smoke" }
|
||||||
|
|
||||||
|
security_policy = { default_trust_level = "local", allowed_secrets = [], require_identity_verification = false }
|
||||||
|
|
||||||
|
[[tasks]]
|
||||||
|
id = "cargo-check"
|
||||||
|
name = "Workspace check"
|
||||||
|
description = "Run `cargo check --workspace` and report any compilation errors."
|
||||||
|
objective = "Verify the workspace compiles cleanly with zero errors."
|
||||||
|
instructions = "Run `cargo check --workspace` in the repo root. If it compiles cleanly, report success. If there are errors, list each file:line and the error message. Do NOT attempt to fix anything — just report what you found."
|
||||||
|
worker = { role = "release-checker", tool_profile = "read-only", tools = ["cargo"], capabilities = ["rust"] }
|
||||||
|
workspace = { required_files = ["Cargo.toml"], writable_paths = [".codewhale/fleet"], environment = { required = ["PATH"] } }
|
||||||
|
input_files = ["Cargo.toml"]
|
||||||
|
context = ["You are running in a fleet smoke test. Be concise. Only report the pass/fail and any specific errors."]
|
||||||
|
budget = { max_tokens = 8000, max_tool_calls = 12, max_seconds = 300 }
|
||||||
|
expected_artifacts = ["log", "report", "receipt"]
|
||||||
|
scorer = { kind = "exit_code" }
|
||||||
|
retry_policy = { max_attempts = 2, initial_backoff_seconds = 5, max_backoff_seconds = 30 }
|
||||||
|
timeout_seconds = 300
|
||||||
|
tags = ["smoke", "check"]
|
||||||
|
|
||||||
|
[[tasks]]
|
||||||
|
id = "protocol-review"
|
||||||
|
name = "Protocol review"
|
||||||
|
description = "Review fleet protocol types for security and correctness."
|
||||||
|
objective = "Inspect crates/protocol/src/fleet.rs and crates/secrets/src/lib.rs. Report any missing serde defaults, unsafe wire changes, or security-sensitive fields lacking SecretRef."
|
||||||
|
instructions = "Read crates/protocol/src/fleet.rs and crates/secrets/src/lib.rs. Check for: (1) new fields without serde(default) or skip_serializing_if, (2) raw secrets in struct fields instead of FleetSecretRef, (3) missing Clone/Debug/PartialEq derives on new types. Write a concise report with file:line references for each finding. If there are no findings, report 'all clear'."
|
||||||
|
worker = { role = "reviewer", tool_profile = "read-only", tools = ["rg", "git", "cargo"], capabilities = ["rust"] }
|
||||||
|
workspace = { required_files = ["crates/protocol/src/fleet.rs", "crates/secrets/src/lib.rs"], writable_paths = [".codewhale/fleet"], environment = { required = ["PATH"] } }
|
||||||
|
input_files = ["crates/protocol/src/fleet.rs", "crates/secrets/src/lib.rs"]
|
||||||
|
context = ["You are a fleet protocol reviewer. Be thorough but concise. Reference specific lines."]
|
||||||
|
budget = { max_tokens = 10000, max_tool_calls = 16, max_seconds = 600 }
|
||||||
|
expected_artifacts = ["log", "report", "receipt"]
|
||||||
|
scorer = { kind = "code_whale_verifier_prompt", prompt = "Verify the review includes at least one concrete file:line finding or explicitly says 'all clear'." }
|
||||||
|
retry_policy = { max_attempts = 1, initial_backoff_seconds = 10 }
|
||||||
|
timeout_seconds = 600
|
||||||
|
tags = ["smoke", "review", "protocol"]
|
||||||
Reference in New Issue
Block a user