fix(subagent): clearer role vocab, lifecycle signals, and eval ergonomics
Make the sub-agent surface easier for less-capable models to drive: - Unify role/type vocabulary (#2649): normalize_role_alias now accepts the full set SubAgentType::from_str accepts (reviewer/implementer/verifier/...), and SubAgentType::from_str learns `planner`, so the dual-validation pass no longer rejects natural roles with a stale four-value hint. Error strings and schema descriptions now enumerate the real accepted aliases. - agent_eval/agent_close always active (#2605) so a first call executes instead of hydrating its schema and forcing a double-invoke; both accept an `agent_name` session alias (#2650). - Self-diagnosing name conflicts (#2656): the duplicate-name error names the conflicting agent_id and its status. - Self-describing completion sentinels (#2658): subagent.done now carries result_clipped / summary_complete / next_action so the parent knows whether to trust the previous-line summary or call agent_eval. - Actionable child-model-unavailable diagnostics (#2653): a provider 403/404 is annotated with the model id and recovery path instead of a bare error. Tests: role vocabulary acceptance + error wording, agent_name resolution, duplicate-name diagnostics, clipped-result sentinel, child-model annotation, agent_eval/agent_close default-active. Full tui suite green (3948), clippy clean. Targets codex/v0.8.53 (v0.8.53 stabilization).
This commit is contained in:
@@ -520,6 +520,19 @@ fn non_yolo_mode_retains_default_defer_policy() {
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
// #2605: the fetch/close side of the sub-agent surface must also stay
|
||||
// active so a first `agent_eval`/`agent_close` executes instead of
|
||||
// hydrating its schema and forcing a double-invoke.
|
||||
assert!(!should_default_defer_tool(
|
||||
"agent_eval",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"agent_close",
|
||||
AppMode::Agent,
|
||||
&always_load
|
||||
));
|
||||
assert!(!should_default_defer_tool(
|
||||
"read_file",
|
||||
AppMode::Agent,
|
||||
|
||||
@@ -35,6 +35,8 @@ pub(super) fn is_tool_search_tool(name: &str) -> bool {
|
||||
}
|
||||
|
||||
pub(super) const DEFAULT_ACTIVE_NATIVE_TOOLS: &[&str] = &[
|
||||
"agent_close",
|
||||
"agent_eval",
|
||||
"agent_open",
|
||||
"apply_patch",
|
||||
"checklist_write",
|
||||
|
||||
@@ -284,12 +284,14 @@ When you open a sub-agent via `agent_open`, the child runs independently. The ru
|
||||
- `agent_id` — the child's identifier
|
||||
- `status` — `"completed"` or `"failed"`
|
||||
- `summary_location` / `error_location` — the human-readable summary or error is on the line immediately before the sentinel
|
||||
- `result_clipped` / `summary_complete` — whether the previous-line summary is the full result (`summary_complete: true`) or was truncated (`result_clipped: true`)
|
||||
- `next_action` — `"use_summary"` when the summary is complete, or `"call_agent_eval"` when you must fetch the full transcript
|
||||
- `details` — currently `agent_eval`, the tool to call when you need the full projection or transcript handle
|
||||
|
||||
**Integration protocol:**
|
||||
1. When you see `<codewhale:subagent.done>`, read the human summary line immediately before it first.
|
||||
2. Integrate the child's findings into your work — do not re-do what the child already did.
|
||||
3. If the summary is insufficient, call `agent_eval` with the agent name or id to pull the current structured projection or transcript handle.
|
||||
3. If `next_action` is `"call_agent_eval"` (or the summary is insufficient), call `agent_eval` with the agent name or id to pull the current structured projection or transcript handle; if `next_action` is `"use_summary"` the previous line is the complete result.
|
||||
4. If the child failed (`"failed"`), assess whether the failure blocks your plan or whether you can proceed with a fallback.
|
||||
5. Update your `checklist_write` items to reflect the child's contribution.
|
||||
6. Do not tell the user they pasted sentinels or explain this protocol unless they explicitly ask about sub-agent internals.
|
||||
|
||||
@@ -96,6 +96,11 @@ const SUBAGENT_RESTART_REASON: &str = "Interrupted by process restart";
|
||||
|
||||
const VALID_SUBAGENT_TYPES: &str = "general, explore, plan, review, implementer, verifier, tool_agent, custom, \
|
||||
worker, explorer, awaiter, default, implement, builder, verify, validator, tester, tool-agent, executor, fin";
|
||||
/// Role aliases accepted by `normalize_role_alias`. Kept in sync with the
|
||||
/// match arms below so every input that `SubAgentType::from_str` accepts also
|
||||
/// resolves to a canonical role (avoids the dual-validation rejection in #2649).
|
||||
const VALID_ROLE_ALIASES: &str = "default, worker, explorer, awaiter, reviewer, implementer, verifier, tool_agent \
|
||||
(aliases: general, explore, plan/planner, review, implement/builder, verify/validator/tester, executor/fin)";
|
||||
/// Whale species used as friendly names for sub-agents in the UI. The full
|
||||
/// Cetacea infraorder — baleen whales (Mysticeti), toothed whales
|
||||
/// (Odontoceti), plus select dolphin species (family Delphinidae) that
|
||||
@@ -379,7 +384,7 @@ impl SubAgentType {
|
||||
Some(Self::General)
|
||||
}
|
||||
"explore" | "exploration" | "explorer" => Some(Self::Explore),
|
||||
"plan" | "planning" | "awaiter" => Some(Self::Plan),
|
||||
"plan" | "planning" | "planner" | "awaiter" => Some(Self::Plan),
|
||||
"review" | "code-review" | "code_review" | "reviewer" => Some(Self::Review),
|
||||
"implementer" | "implement" | "implementation" | "builder" => Some(Self::Implementer),
|
||||
"verifier" | "verify" | "verification" | "validator" | "tester" => Some(Self::Verifier),
|
||||
@@ -1409,12 +1414,17 @@ impl SubAgentManager {
|
||||
.map(str::trim)
|
||||
.filter(|name| !name.is_empty())
|
||||
{
|
||||
if self
|
||||
if let Some(existing) = self
|
||||
.agents
|
||||
.values()
|
||||
.any(|existing| existing.session_name == name)
|
||||
.find(|existing| existing.session_name == name)
|
||||
{
|
||||
return Err(anyhow!("Sub-agent session name '{name}' is already in use"));
|
||||
return Err(anyhow!(
|
||||
"Sub-agent session name '{name}' is already in use by agent_id '{}' (status: {}). \
|
||||
Reuse that agent_id with agent_eval/agent_close, or open with a different name.",
|
||||
existing.id,
|
||||
subagent_status_name(&existing.status)
|
||||
));
|
||||
}
|
||||
agent.session_name = name.to_string();
|
||||
}
|
||||
@@ -1676,7 +1686,7 @@ impl SubAgentManager {
|
||||
let normalized = normalize_role_alias(&role)
|
||||
.ok_or_else(|| {
|
||||
anyhow!(
|
||||
"Invalid role alias '{role}'. Use: worker, explorer, awaiter, default"
|
||||
"Invalid role alias '{role}'. Use: {VALID_ROLE_ALIASES}"
|
||||
)
|
||||
})?
|
||||
.to_string();
|
||||
@@ -2083,7 +2093,7 @@ impl ToolSpec for AgentOpenTool {
|
||||
},
|
||||
"role": {
|
||||
"type": "string",
|
||||
"description": "Role alias: worker, explorer, awaiter, default"
|
||||
"description": "Role alias (canonical: default, worker, explorer, awaiter, reviewer, implementer, verifier, tool_agent). Must match `type` if both are given."
|
||||
},
|
||||
"agent_role": {
|
||||
"type": "string",
|
||||
@@ -2350,7 +2360,7 @@ impl ToolSpec for AgentSpawnTool {
|
||||
},
|
||||
"role": {
|
||||
"type": "string",
|
||||
"description": "Role alias: worker, explorer, awaiter, default"
|
||||
"description": "Role alias (canonical: default, worker, explorer, awaiter, reviewer, implementer, verifier, tool_agent). Must match `type` if both are given."
|
||||
},
|
||||
"agent_role": {
|
||||
"type": "string",
|
||||
@@ -2599,6 +2609,10 @@ impl ToolSpec for AgentEvalTool {
|
||||
"type": "string",
|
||||
"description": "Session name returned by agent_open"
|
||||
},
|
||||
"agent_name": {
|
||||
"type": "string",
|
||||
"description": "Alias for name (session name returned by agent_open)"
|
||||
},
|
||||
"agent_id": {
|
||||
"type": "string",
|
||||
"description": "Generated agent id returned by agent_open"
|
||||
@@ -2643,6 +2657,7 @@ impl ToolSpec for AgentEvalTool {
|
||||
async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError> {
|
||||
let agent_ref = input
|
||||
.get("name")
|
||||
.or_else(|| input.get("agent_name"))
|
||||
.or_else(|| input.get("agent_id"))
|
||||
.or_else(|| input.get("id"))
|
||||
.and_then(Value::as_str)
|
||||
@@ -2912,6 +2927,10 @@ impl ToolSpec for AgentCloseTool {
|
||||
"type": "string",
|
||||
"description": "Session name returned by agent_open"
|
||||
},
|
||||
"agent_name": {
|
||||
"type": "string",
|
||||
"description": "Alias for name (session name returned by agent_open)"
|
||||
},
|
||||
"agent_id": {
|
||||
"type": "string",
|
||||
"description": "Alias for id"
|
||||
@@ -2934,6 +2953,7 @@ impl ToolSpec for AgentCloseTool {
|
||||
async fn execute(&self, input: Value, context: &ToolContext) -> Result<ToolResult, ToolError> {
|
||||
let agent_id = input
|
||||
.get("name")
|
||||
.or_else(|| input.get("agent_name"))
|
||||
.or_else(|| input.get("id"))
|
||||
.or_else(|| input.get("agent_id"))
|
||||
.and_then(|v| v.as_str())
|
||||
@@ -3215,7 +3235,7 @@ impl ToolSpec for AgentAssignTool {
|
||||
},
|
||||
"role": {
|
||||
"type": "string",
|
||||
"description": "Updated role alias: worker, explorer, awaiter, default"
|
||||
"description": "Updated role alias (canonical: default, worker, explorer, awaiter, reviewer, implementer, verifier, tool_agent)."
|
||||
},
|
||||
"agent_role": {
|
||||
"type": "string",
|
||||
@@ -3459,7 +3479,7 @@ impl ToolSpec for DelegateToAgentTool {
|
||||
},
|
||||
"role": {
|
||||
"type": "string",
|
||||
"description": "Role alias: worker, explorer, awaiter, default"
|
||||
"description": "Role alias (canonical: default, worker, explorer, awaiter, reviewer, implementer, verifier, tool_agent). Must match `type` if both are given."
|
||||
},
|
||||
"agent_role": {
|
||||
"type": "string",
|
||||
@@ -3643,15 +3663,19 @@ async fn run_subagent_task(task: SubAgentTask) {
|
||||
// sentinel on the second. The sentinel uses an opaque tag
|
||||
// (`codewhale:subagent.done`) to avoid collision with normal user
|
||||
// text.
|
||||
let model_id = task.runtime.model.clone();
|
||||
let (summary, sentinel) = match &result {
|
||||
Ok(res) => (
|
||||
summarize_subagent_result(res),
|
||||
subagent_done_sentinel(&task.agent_id, res),
|
||||
),
|
||||
Err(err) => (
|
||||
format!("Failed: {err}"),
|
||||
subagent_failed_sentinel(&task.agent_id, &err.to_string()),
|
||||
),
|
||||
Err(err) => {
|
||||
let annotated = annotate_child_model_error(&err.to_string(), &model_id);
|
||||
(
|
||||
format!("Failed: {annotated}"),
|
||||
subagent_failed_sentinel(&task.agent_id, &annotated),
|
||||
)
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(mb) = task.runtime.mailbox.as_ref() {
|
||||
@@ -3662,7 +3686,7 @@ async fn run_subagent_task(task: SubAgentTask) {
|
||||
},
|
||||
Err(err) => MailboxMessage::Failed {
|
||||
agent_id: task.agent_id.clone(),
|
||||
error: err.to_string(),
|
||||
error: annotate_child_model_error(&err.to_string(), &model_id),
|
||||
},
|
||||
};
|
||||
let _ = mb.send(envelope);
|
||||
@@ -3681,7 +3705,9 @@ async fn run_subagent_task(task: SubAgentTask) {
|
||||
let mut manager = task.manager_handle.write().await;
|
||||
match &result {
|
||||
Ok(res) => manager.update_from_result(&agent_id, res.clone()),
|
||||
Err(err) => manager.update_failed(&agent_id, err.to_string()),
|
||||
Err(err) => {
|
||||
manager.update_failed(&agent_id, annotate_child_model_error(&err.to_string(), &model_id));
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(event_tx) = task.runtime.event_tx {
|
||||
@@ -3726,22 +3752,33 @@ pub(crate) fn emit_parent_completion(
|
||||
/// parent request's cache-miss tail. Wall-clock duration is useful UI
|
||||
/// telemetry, but it is volatile and not useful for model coordination.
|
||||
fn subagent_done_sentinel(agent_id: &str, res: &SubAgentResult) -> String {
|
||||
let clipped = subagent_result_clipped(res);
|
||||
let payload = json!({
|
||||
"agent_id": agent_id,
|
||||
"agent_type": res.agent_type.as_str(),
|
||||
"status": subagent_status_name(&res.status),
|
||||
"summary_location": "previous_line",
|
||||
"result_clipped": clipped,
|
||||
"summary_complete": !clipped,
|
||||
// What the parent should do next: trust the previous-line summary, or
|
||||
// fetch the full transcript with agent_eval when it was clipped.
|
||||
"next_action": if clipped { "call_agent_eval" } else { "use_summary" },
|
||||
"details": "agent_eval",
|
||||
});
|
||||
format!("<codewhale:subagent.done>{payload}</codewhale:subagent.done>")
|
||||
}
|
||||
|
||||
/// Build a `<codewhale:subagent.done>` sentinel for a failed child.
|
||||
///
|
||||
/// Kept lean: the (annotated) error is on the previous line (`error_location`)
|
||||
/// and the full projection is available via `agent_eval`, so the sentinel only
|
||||
/// signals the recovery path rather than re-embedding the error text.
|
||||
fn subagent_failed_sentinel(agent_id: &str, _err: &str) -> String {
|
||||
let payload = json!({
|
||||
"agent_id": agent_id,
|
||||
"status": "failed",
|
||||
"error_location": "previous_line",
|
||||
"next_action": "call_agent_eval",
|
||||
"details": "agent_eval",
|
||||
});
|
||||
format!("<codewhale:subagent.done>{payload}</codewhale:subagent.done>")
|
||||
@@ -4465,7 +4502,7 @@ fn parse_spawn_request(input: &Value) -> Result<SpawnRequest, ToolError> {
|
||||
.map(|role| {
|
||||
SubAgentType::from_str(role).ok_or_else(|| {
|
||||
ToolError::invalid_input(format!(
|
||||
"Invalid role alias '{role}'. Use: worker, explorer, awaiter, default"
|
||||
"Invalid role alias '{role}'. Use: {VALID_ROLE_ALIASES}"
|
||||
))
|
||||
})
|
||||
})
|
||||
@@ -4487,7 +4524,7 @@ fn parse_spawn_request(input: &Value) -> Result<SpawnRequest, ToolError> {
|
||||
&& normalize_role_alias(role).is_none()
|
||||
{
|
||||
return Err(ToolError::invalid_input(format!(
|
||||
"Invalid role alias '{role}'. Use: worker, explorer, awaiter, default"
|
||||
"Invalid role alias '{role}'. Use: {VALID_ROLE_ALIASES}"
|
||||
)));
|
||||
}
|
||||
|
||||
@@ -4843,7 +4880,7 @@ fn parse_assign_request(input: &Value) -> Result<AssignRequest, ToolError> {
|
||||
.map(|role| {
|
||||
normalize_role_alias(role).ok_or_else(|| {
|
||||
ToolError::invalid_input(format!(
|
||||
"Invalid role alias '{role}'. Use: worker, explorer, awaiter, default"
|
||||
"Invalid role alias '{role}'. Use: {VALID_ROLE_ALIASES}"
|
||||
))
|
||||
})
|
||||
})
|
||||
@@ -4868,15 +4905,25 @@ fn parse_assign_request(input: &Value) -> Result<AssignRequest, ToolError> {
|
||||
})
|
||||
}
|
||||
|
||||
/// Resolve a user-supplied role/agent_role value to a canonical role string.
|
||||
///
|
||||
/// This must accept the full set that [`SubAgentType::from_str`] accepts, plus
|
||||
/// role-only aliases (`worker`, `default`, `awaiter`). Before #2649 it covered
|
||||
/// only a subset, so `role: "reviewer"` (accepted by `from_str`) was rejected
|
||||
/// here by the second validation pass with a misleading four-value hint.
|
||||
fn normalize_role_alias(input: &str) -> Option<&'static str> {
|
||||
match input.to_ascii_lowercase().as_str() {
|
||||
"default" => Some("default"),
|
||||
"worker" | "general" => Some("worker"),
|
||||
"explorer" | "explore" => Some("explorer"),
|
||||
"awaiter" | "plan" | "planner" => Some("awaiter"),
|
||||
"worker" | "general" | "general-purpose" | "general_purpose" => Some("worker"),
|
||||
"explorer" | "explore" | "exploration" => Some("explorer"),
|
||||
"awaiter" | "plan" | "planner" | "planning" => Some("awaiter"),
|
||||
"reviewer" | "review" | "code-review" | "code_review" => Some("reviewer"),
|
||||
"implementer" | "implement" | "implementation" | "builder" => Some("implementer"),
|
||||
"verifier" | "verify" | "verification" | "validator" | "tester" => Some("verifier"),
|
||||
"tool-agent" | "tool_agent" | "toolagent" | "executor" | "execution" | "fin" => {
|
||||
Some("tool_agent")
|
||||
}
|
||||
"custom" => Some("custom"),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -5147,6 +5194,21 @@ fn build_allowed_tools(
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
/// When a child agent fails because its model is unavailable under the current
|
||||
/// access profile, a bare provider 403/404 (classified `Authorization` or
|
||||
/// `State`) is unactionable. Annotate it so the parent knows the likely cause
|
||||
/// and how to recover (#2653) without re-classifying the underlying error.
|
||||
fn annotate_child_model_error(err: &str, model: &str) -> String {
|
||||
match crate::error_taxonomy::classify_error_message(err) {
|
||||
crate::error_taxonomy::ErrorCategory::Authorization
|
||||
| crate::error_taxonomy::ErrorCategory::State => format!(
|
||||
"{err}\n(child model `{model}` may be unavailable under the current access profile — \
|
||||
retry agent_open with a different `model`, or remove `model` to inherit the parent's)"
|
||||
),
|
||||
_ => err.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn summarize_subagent_result(result: &SubAgentResult) -> String {
|
||||
match (&result.status, result.result.as_ref()) {
|
||||
(SubAgentStatus::Completed, Some(text)) => truncate_preview(text),
|
||||
@@ -5168,15 +5230,35 @@ fn subagent_status_name(status: &SubAgentStatus) -> &'static str {
|
||||
}
|
||||
}
|
||||
|
||||
/// Max length of the human-friendly one-line summary emitted alongside the
|
||||
/// completion sentinel. Longer results are clipped and the full transcript is
|
||||
/// available via `agent_eval` (see `result_clipped` in the sentinel payload).
|
||||
const SUBAGENT_SUMMARY_PREVIEW_MAX: usize = 240;
|
||||
|
||||
fn truncate_preview(text: &str) -> String {
|
||||
const MAX_LEN: usize = 240;
|
||||
if text.len() <= MAX_LEN {
|
||||
if text.len() <= SUBAGENT_SUMMARY_PREVIEW_MAX {
|
||||
text.to_string()
|
||||
} else {
|
||||
format!("{}...", text.chars().take(MAX_LEN).collect::<String>())
|
||||
format!(
|
||||
"{}...",
|
||||
text.chars()
|
||||
.take(SUBAGENT_SUMMARY_PREVIEW_MAX)
|
||||
.collect::<String>()
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether `summarize_subagent_result` clipped the preview, i.e. the parent's
|
||||
/// previous-line summary is incomplete and the model should call `agent_eval`
|
||||
/// for the full result rather than relying on the summary.
|
||||
fn subagent_result_clipped(res: &SubAgentResult) -> bool {
|
||||
matches!(res.status, SubAgentStatus::Completed)
|
||||
&& res
|
||||
.result
|
||||
.as_deref()
|
||||
.is_some_and(|text| text.len() > SUBAGENT_SUMMARY_PREVIEW_MAX)
|
||||
}
|
||||
|
||||
const SUBAGENT_OUTPUT_FORMAT: &str = include_str!("../../prompts/subagent_output_format.md");
|
||||
|
||||
const GENERAL_AGENT_INTRO: &str = concat!(
|
||||
|
||||
@@ -509,6 +509,43 @@ fn test_parse_spawn_request_rejects_invalid_role() {
|
||||
assert!(err.to_string().contains("Invalid role alias"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_spawn_request_accepts_full_role_vocabulary() {
|
||||
// Regression for #2649: roles that `SubAgentType::from_str` accepts must
|
||||
// also pass the second `normalize_role_alias` validation pass instead of
|
||||
// being rejected with a stale four-value hint.
|
||||
for (role, expected_type, expected_role) in [
|
||||
("reviewer", SubAgentType::Review, "reviewer"),
|
||||
("review", SubAgentType::Review, "reviewer"),
|
||||
("planner", SubAgentType::Plan, "awaiter"),
|
||||
("implementer", SubAgentType::Implementer, "implementer"),
|
||||
("builder", SubAgentType::Implementer, "implementer"),
|
||||
("verifier", SubAgentType::Verifier, "verifier"),
|
||||
("tester", SubAgentType::Verifier, "verifier"),
|
||||
] {
|
||||
let input = json!({ "prompt": "do work", "role": role });
|
||||
let parsed = parse_spawn_request(&input)
|
||||
.unwrap_or_else(|e| panic!("role {role:?} should parse, got {e}"));
|
||||
assert_eq!(parsed.agent_type, expected_type, "type for role {role:?}");
|
||||
assert_eq!(
|
||||
parsed.assignment.role.as_deref(),
|
||||
Some(expected_role),
|
||||
"canonical role for {role:?}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_invalid_role_error_lists_real_aliases() {
|
||||
// The hint must enumerate the actually-accepted vocabulary (#2649).
|
||||
let input = json!({ "prompt": "do work", "role": "nonsense" });
|
||||
let err = parse_spawn_request(&input)
|
||||
.expect_err("invalid role should fail")
|
||||
.to_string();
|
||||
assert!(err.contains("reviewer"), "hint should list reviewer: {err}");
|
||||
assert!(err.contains("verifier"), "hint should list verifier: {err}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_spawn_request_rejects_conflicting_type_and_role() {
|
||||
let input = json!({
|
||||
@@ -883,6 +920,89 @@ async fn agent_eval_on_completed_session_returns_full_projection_not_running_err
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn agent_eval_resolves_session_via_agent_name_alias() {
|
||||
// #2650: `agent_name` is accepted as an alias for `name`/session name.
|
||||
let manager = Arc::new(RwLock::new(SubAgentManager::new(PathBuf::from("."), 1)));
|
||||
let (input_tx, _input_rx) = mpsc::unbounded_channel();
|
||||
let mut agent = SubAgent::new(
|
||||
"test_agent_named".to_string(),
|
||||
SubAgentType::Explore,
|
||||
"scan".to_string(),
|
||||
make_assignment(),
|
||||
"deepseek-v4-flash".to_string(),
|
||||
Some("Blue".to_string()),
|
||||
Some(vec!["read_file".to_string()]),
|
||||
input_tx,
|
||||
"boot_test".to_string(),
|
||||
);
|
||||
agent.session_name = "researcher".to_string();
|
||||
agent.status = SubAgentStatus::Completed;
|
||||
agent.result = Some("done".to_string());
|
||||
let agent_id = agent.id.clone();
|
||||
{
|
||||
let mut guard = manager.write().await;
|
||||
guard.agents.insert(agent_id.clone(), agent);
|
||||
}
|
||||
|
||||
let ctx = ToolContext::new(".");
|
||||
let tool = AgentEvalTool::new(manager.clone());
|
||||
let result = tool
|
||||
.execute(json!({ "agent_name": "researcher", "block": false }), &ctx)
|
||||
.await
|
||||
.expect("agent_name alias must resolve the session");
|
||||
let projection: SubAgentSessionProjection =
|
||||
serde_json::from_str(&result.content).expect("projection deserializes");
|
||||
assert_eq!(projection.status, "completed");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn spawn_duplicate_session_name_error_names_conflicting_agent() {
|
||||
// #2656: the duplicate-name error must identify the conflicting agent so a
|
||||
// model can recover deterministically (reuse the id, or pick a new name).
|
||||
let manager = Arc::new(RwLock::new(SubAgentManager::new(PathBuf::from("."), 5)));
|
||||
let (input_tx, _input_rx) = mpsc::unbounded_channel();
|
||||
let mut existing = SubAgent::new(
|
||||
"test_agent_existing".to_string(),
|
||||
SubAgentType::Explore,
|
||||
"scan".to_string(),
|
||||
make_assignment(),
|
||||
"deepseek-v4-flash".to_string(),
|
||||
Some("Blue".to_string()),
|
||||
Some(vec!["read_file".to_string()]),
|
||||
input_tx,
|
||||
"boot_test".to_string(),
|
||||
);
|
||||
existing.session_name = "researcher".to_string();
|
||||
existing.status = SubAgentStatus::Running;
|
||||
let existing_id = existing.id.clone();
|
||||
{
|
||||
let mut guard = manager.write().await;
|
||||
guard.agents.insert(existing_id.clone(), existing);
|
||||
}
|
||||
|
||||
let err = {
|
||||
let mut guard = manager.write().await;
|
||||
guard
|
||||
.spawn_background_with_assignment_options(
|
||||
manager.clone(),
|
||||
stub_runtime(),
|
||||
SubAgentType::Explore,
|
||||
"new work".to_string(),
|
||||
make_assignment(),
|
||||
Some(vec!["read_file".to_string()]),
|
||||
SubAgentSpawnOptions {
|
||||
name: Some("researcher".to_string()),
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
.expect_err("duplicate session name must error")
|
||||
};
|
||||
let msg = err.to_string();
|
||||
assert!(msg.contains(&existing_id), "names the conflicting agent_id: {msg}");
|
||||
assert!(msg.contains("running"), "includes the conflicting status: {msg}");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_running_count_counts_only_agents_with_live_task_handles() {
|
||||
let mut manager = SubAgentManager::new(PathBuf::from("."), 1);
|
||||
@@ -1343,11 +1463,30 @@ fn subagent_done_sentinel_format_is_well_formed() {
|
||||
assert_eq!(parsed["agent_type"], "general");
|
||||
assert_eq!(parsed["summary_location"], "previous_line");
|
||||
assert_eq!(parsed["details"], "agent_eval");
|
||||
// Self-describing completion fields (#2658): a short result is complete, so
|
||||
// the parent should trust the previous-line summary.
|
||||
assert_eq!(parsed["result_clipped"], false);
|
||||
assert_eq!(parsed["summary_complete"], true);
|
||||
assert_eq!(parsed["next_action"], "use_summary");
|
||||
assert!(parsed.get("summary").is_none());
|
||||
assert!(parsed.get("duration_ms").is_none());
|
||||
assert!(parsed.get("steps").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subagent_done_sentinel_flags_clipped_result() {
|
||||
let mut res = make_snapshot(SubAgentStatus::Completed);
|
||||
res.result = Some("x".repeat(SUBAGENT_SUMMARY_PREVIEW_MAX + 1));
|
||||
let sentinel = subagent_done_sentinel("agent_big", &res);
|
||||
let inner = sentinel
|
||||
.trim_start_matches("<codewhale:subagent.done>")
|
||||
.trim_end_matches("</codewhale:subagent.done>");
|
||||
let parsed: serde_json::Value = serde_json::from_str(inner).expect("inner JSON parses");
|
||||
assert_eq!(parsed["result_clipped"], true);
|
||||
assert_eq!(parsed["summary_complete"], false);
|
||||
assert_eq!(parsed["next_action"], "call_agent_eval");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subagent_failed_sentinel_format_is_well_formed() {
|
||||
let sentinel = subagent_failed_sentinel("agent_zzz", "boom");
|
||||
@@ -1359,9 +1498,24 @@ fn subagent_failed_sentinel_format_is_well_formed() {
|
||||
assert_eq!(parsed["status"], "failed");
|
||||
assert_eq!(parsed["error_location"], "previous_line");
|
||||
assert_eq!(parsed["details"], "agent_eval");
|
||||
assert_eq!(parsed["next_action"], "call_agent_eval");
|
||||
// Stays lean — the error text lives on the previous line, not the sentinel.
|
||||
assert!(parsed.get("error").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn annotate_child_model_error_adds_actionable_hint() {
|
||||
// #2653: a bare provider 403 becomes actionable by naming the model and the
|
||||
// recovery path, while unrelated errors pass through unchanged.
|
||||
let auth = annotate_child_model_error("403 Forbidden", "kimi-k2");
|
||||
assert!(auth.contains("kimi-k2"), "names the model: {auth}");
|
||||
assert!(auth.contains("agent_open"), "names the recovery path: {auth}");
|
||||
assert!(auth.contains("403 Forbidden"), "preserves the original: {auth}");
|
||||
|
||||
let unrelated = annotate_child_model_error("connection reset by peer", "kimi-k2");
|
||||
assert_eq!(unrelated, "connection reset by peer");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subagent_runtime_default_max_depth_is_three() {
|
||||
// Sanity-check the constant — bumping it without a test means stale docs.
|
||||
|
||||
+3
-2
@@ -86,10 +86,11 @@ The model can spell each role multiple ways:
|
||||
|---------------|------------------------------------------------------------------|
|
||||
| `general` | `worker`, `default`, `general-purpose` |
|
||||
| `explore` | `explorer`, `exploration` |
|
||||
| `plan` | `planning`, `awaiter` |
|
||||
| `review` | `reviewer`, `code-review` |
|
||||
| `plan` | `planning`, `planner`, `awaiter` |
|
||||
| `review` | `reviewer`, `code-review`, `code_review` |
|
||||
| `implementer` | `implement`, `implementation`, `builder` |
|
||||
| `verifier` | `verify`, `verification`, `validator`, `tester` |
|
||||
| `tool_agent` | `tool-agent`, `toolagent`, `executor`, `execution`, `fin` |
|
||||
| `custom` | (none; explicit `allowed_tools` array required) |
|
||||
|
||||
All matching is case-insensitive. Unknown values produce a typed
|
||||
|
||||
Reference in New Issue
Block a user