From 725abeb60328cf21a24fd3334ae8a62f3bee6d82 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Wed, 3 Jun 2026 11:22:56 -0700 Subject: [PATCH] fix(subagent): clearer role vocab, lifecycle signals, and eval ergonomics Make the sub-agent surface easier for less-capable models to drive: - Unify role/type vocabulary (#2649): normalize_role_alias now accepts the full set SubAgentType::from_str accepts (reviewer/implementer/verifier/...), and SubAgentType::from_str learns `planner`, so the dual-validation pass no longer rejects natural roles with a stale four-value hint. Error strings and schema descriptions now enumerate the real accepted aliases. - agent_eval/agent_close always active (#2605) so a first call executes instead of hydrating its schema and forcing a double-invoke; both accept an `agent_name` session alias (#2650). - Self-diagnosing name conflicts (#2656): the duplicate-name error names the conflicting agent_id and its status. - Self-describing completion sentinels (#2658): subagent.done now carries result_clipped / summary_complete / next_action so the parent knows whether to trust the previous-line summary or call agent_eval. - Actionable child-model-unavailable diagnostics (#2653): a provider 403/404 is annotated with the model id and recovery path instead of a bare error. Tests: role vocabulary acceptance + error wording, agent_name resolution, duplicate-name diagnostics, clipped-result sentinel, child-model annotation, agent_eval/agent_close default-active. Full tui suite green (3948), clippy clean. Targets codex/v0.8.53 (v0.8.53 stabilization). --- crates/tui/src/core/engine/tests.rs | 13 ++ crates/tui/src/core/engine/tool_catalog.rs | 2 + crates/tui/src/prompts/base.md | 4 +- crates/tui/src/tools/subagent/mod.rs | 130 +++++++++++++---- crates/tui/src/tools/subagent/tests.rs | 154 +++++++++++++++++++++ docs/SUBAGENTS.md | 5 +- 6 files changed, 281 insertions(+), 27 deletions(-) diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index d6e56cdb..0304f986 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -520,6 +520,19 @@ fn non_yolo_mode_retains_default_defer_policy() { AppMode::Agent, &always_load )); + // #2605: the fetch/close side of the sub-agent surface must also stay + // active so a first `agent_eval`/`agent_close` executes instead of + // hydrating its schema and forcing a double-invoke. + assert!(!should_default_defer_tool( + "agent_eval", + AppMode::Agent, + &always_load + )); + assert!(!should_default_defer_tool( + "agent_close", + AppMode::Agent, + &always_load + )); assert!(!should_default_defer_tool( "read_file", AppMode::Agent, diff --git a/crates/tui/src/core/engine/tool_catalog.rs b/crates/tui/src/core/engine/tool_catalog.rs index e88416a3..6f2829f5 100644 --- a/crates/tui/src/core/engine/tool_catalog.rs +++ b/crates/tui/src/core/engine/tool_catalog.rs @@ -35,6 +35,8 @@ pub(super) fn is_tool_search_tool(name: &str) -> bool { } pub(super) const DEFAULT_ACTIVE_NATIVE_TOOLS: &[&str] = &[ + "agent_close", + "agent_eval", "agent_open", "apply_patch", "checklist_write", diff --git a/crates/tui/src/prompts/base.md b/crates/tui/src/prompts/base.md index fa441e07..abbfc0c9 100644 --- a/crates/tui/src/prompts/base.md +++ b/crates/tui/src/prompts/base.md @@ -284,12 +284,14 @@ When you open a sub-agent via `agent_open`, the child runs independently. The ru - `agent_id` — the child's identifier - `status` — `"completed"` or `"failed"` - `summary_location` / `error_location` — the human-readable summary or error is on the line immediately before the sentinel +- `result_clipped` / `summary_complete` — whether the previous-line summary is the full result (`summary_complete: true`) or was truncated (`result_clipped: true`) +- `next_action` — `"use_summary"` when the summary is complete, or `"call_agent_eval"` when you must fetch the full transcript - `details` — currently `agent_eval`, the tool to call when you need the full projection or transcript handle **Integration protocol:** 1. When you see ``, read the human summary line immediately before it first. 2. Integrate the child's findings into your work — do not re-do what the child already did. -3. If the summary is insufficient, call `agent_eval` with the agent name or id to pull the current structured projection or transcript handle. +3. If `next_action` is `"call_agent_eval"` (or the summary is insufficient), call `agent_eval` with the agent name or id to pull the current structured projection or transcript handle; if `next_action` is `"use_summary"` the previous line is the complete result. 4. If the child failed (`"failed"`), assess whether the failure blocks your plan or whether you can proceed with a fallback. 5. Update your `checklist_write` items to reflect the child's contribution. 6. Do not tell the user they pasted sentinels or explain this protocol unless they explicitly ask about sub-agent internals. diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs index 08884906..4275fa57 100644 --- a/crates/tui/src/tools/subagent/mod.rs +++ b/crates/tui/src/tools/subagent/mod.rs @@ -96,6 +96,11 @@ const SUBAGENT_RESTART_REASON: &str = "Interrupted by process restart"; const VALID_SUBAGENT_TYPES: &str = "general, explore, plan, review, implementer, verifier, tool_agent, custom, \ worker, explorer, awaiter, default, implement, builder, verify, validator, tester, tool-agent, executor, fin"; +/// Role aliases accepted by `normalize_role_alias`. Kept in sync with the +/// match arms below so every input that `SubAgentType::from_str` accepts also +/// resolves to a canonical role (avoids the dual-validation rejection in #2649). +const VALID_ROLE_ALIASES: &str = "default, worker, explorer, awaiter, reviewer, implementer, verifier, tool_agent \ + (aliases: general, explore, plan/planner, review, implement/builder, verify/validator/tester, executor/fin)"; /// Whale species used as friendly names for sub-agents in the UI. The full /// Cetacea infraorder — baleen whales (Mysticeti), toothed whales /// (Odontoceti), plus select dolphin species (family Delphinidae) that @@ -379,7 +384,7 @@ impl SubAgentType { Some(Self::General) } "explore" | "exploration" | "explorer" => Some(Self::Explore), - "plan" | "planning" | "awaiter" => Some(Self::Plan), + "plan" | "planning" | "planner" | "awaiter" => Some(Self::Plan), "review" | "code-review" | "code_review" | "reviewer" => Some(Self::Review), "implementer" | "implement" | "implementation" | "builder" => Some(Self::Implementer), "verifier" | "verify" | "verification" | "validator" | "tester" => Some(Self::Verifier), @@ -1409,12 +1414,17 @@ impl SubAgentManager { .map(str::trim) .filter(|name| !name.is_empty()) { - if self + if let Some(existing) = self .agents .values() - .any(|existing| existing.session_name == name) + .find(|existing| existing.session_name == name) { - return Err(anyhow!("Sub-agent session name '{name}' is already in use")); + return Err(anyhow!( + "Sub-agent session name '{name}' is already in use by agent_id '{}' (status: {}). \ + Reuse that agent_id with agent_eval/agent_close, or open with a different name.", + existing.id, + subagent_status_name(&existing.status) + )); } agent.session_name = name.to_string(); } @@ -1676,7 +1686,7 @@ impl SubAgentManager { let normalized = normalize_role_alias(&role) .ok_or_else(|| { anyhow!( - "Invalid role alias '{role}'. Use: worker, explorer, awaiter, default" + "Invalid role alias '{role}'. Use: {VALID_ROLE_ALIASES}" ) })? .to_string(); @@ -2083,7 +2093,7 @@ impl ToolSpec for AgentOpenTool { }, "role": { "type": "string", - "description": "Role alias: worker, explorer, awaiter, default" + "description": "Role alias (canonical: default, worker, explorer, awaiter, reviewer, implementer, verifier, tool_agent). Must match `type` if both are given." }, "agent_role": { "type": "string", @@ -2350,7 +2360,7 @@ impl ToolSpec for AgentSpawnTool { }, "role": { "type": "string", - "description": "Role alias: worker, explorer, awaiter, default" + "description": "Role alias (canonical: default, worker, explorer, awaiter, reviewer, implementer, verifier, tool_agent). Must match `type` if both are given." }, "agent_role": { "type": "string", @@ -2599,6 +2609,10 @@ impl ToolSpec for AgentEvalTool { "type": "string", "description": "Session name returned by agent_open" }, + "agent_name": { + "type": "string", + "description": "Alias for name (session name returned by agent_open)" + }, "agent_id": { "type": "string", "description": "Generated agent id returned by agent_open" @@ -2643,6 +2657,7 @@ impl ToolSpec for AgentEvalTool { async fn execute(&self, input: Value, context: &ToolContext) -> Result { let agent_ref = input .get("name") + .or_else(|| input.get("agent_name")) .or_else(|| input.get("agent_id")) .or_else(|| input.get("id")) .and_then(Value::as_str) @@ -2912,6 +2927,10 @@ impl ToolSpec for AgentCloseTool { "type": "string", "description": "Session name returned by agent_open" }, + "agent_name": { + "type": "string", + "description": "Alias for name (session name returned by agent_open)" + }, "agent_id": { "type": "string", "description": "Alias for id" @@ -2934,6 +2953,7 @@ impl ToolSpec for AgentCloseTool { async fn execute(&self, input: Value, context: &ToolContext) -> Result { let agent_id = input .get("name") + .or_else(|| input.get("agent_name")) .or_else(|| input.get("id")) .or_else(|| input.get("agent_id")) .and_then(|v| v.as_str()) @@ -3215,7 +3235,7 @@ impl ToolSpec for AgentAssignTool { }, "role": { "type": "string", - "description": "Updated role alias: worker, explorer, awaiter, default" + "description": "Updated role alias (canonical: default, worker, explorer, awaiter, reviewer, implementer, verifier, tool_agent)." }, "agent_role": { "type": "string", @@ -3459,7 +3479,7 @@ impl ToolSpec for DelegateToAgentTool { }, "role": { "type": "string", - "description": "Role alias: worker, explorer, awaiter, default" + "description": "Role alias (canonical: default, worker, explorer, awaiter, reviewer, implementer, verifier, tool_agent). Must match `type` if both are given." }, "agent_role": { "type": "string", @@ -3643,15 +3663,19 @@ async fn run_subagent_task(task: SubAgentTask) { // sentinel on the second. The sentinel uses an opaque tag // (`codewhale:subagent.done`) to avoid collision with normal user // text. + let model_id = task.runtime.model.clone(); let (summary, sentinel) = match &result { Ok(res) => ( summarize_subagent_result(res), subagent_done_sentinel(&task.agent_id, res), ), - Err(err) => ( - format!("Failed: {err}"), - subagent_failed_sentinel(&task.agent_id, &err.to_string()), - ), + Err(err) => { + let annotated = annotate_child_model_error(&err.to_string(), &model_id); + ( + format!("Failed: {annotated}"), + subagent_failed_sentinel(&task.agent_id, &annotated), + ) + } }; if let Some(mb) = task.runtime.mailbox.as_ref() { @@ -3662,7 +3686,7 @@ async fn run_subagent_task(task: SubAgentTask) { }, Err(err) => MailboxMessage::Failed { agent_id: task.agent_id.clone(), - error: err.to_string(), + error: annotate_child_model_error(&err.to_string(), &model_id), }, }; let _ = mb.send(envelope); @@ -3681,7 +3705,9 @@ async fn run_subagent_task(task: SubAgentTask) { let mut manager = task.manager_handle.write().await; match &result { Ok(res) => manager.update_from_result(&agent_id, res.clone()), - Err(err) => manager.update_failed(&agent_id, err.to_string()), + Err(err) => { + manager.update_failed(&agent_id, annotate_child_model_error(&err.to_string(), &model_id)); + } } if let Some(event_tx) = task.runtime.event_tx { @@ -3726,22 +3752,33 @@ pub(crate) fn emit_parent_completion( /// parent request's cache-miss tail. Wall-clock duration is useful UI /// telemetry, but it is volatile and not useful for model coordination. fn subagent_done_sentinel(agent_id: &str, res: &SubAgentResult) -> String { + let clipped = subagent_result_clipped(res); let payload = json!({ "agent_id": agent_id, "agent_type": res.agent_type.as_str(), "status": subagent_status_name(&res.status), "summary_location": "previous_line", + "result_clipped": clipped, + "summary_complete": !clipped, + // What the parent should do next: trust the previous-line summary, or + // fetch the full transcript with agent_eval when it was clipped. + "next_action": if clipped { "call_agent_eval" } else { "use_summary" }, "details": "agent_eval", }); format!("{payload}") } /// Build a `` sentinel for a failed child. +/// +/// Kept lean: the (annotated) error is on the previous line (`error_location`) +/// and the full projection is available via `agent_eval`, so the sentinel only +/// signals the recovery path rather than re-embedding the error text. fn subagent_failed_sentinel(agent_id: &str, _err: &str) -> String { let payload = json!({ "agent_id": agent_id, "status": "failed", "error_location": "previous_line", + "next_action": "call_agent_eval", "details": "agent_eval", }); format!("{payload}") @@ -4465,7 +4502,7 @@ fn parse_spawn_request(input: &Value) -> Result { .map(|role| { SubAgentType::from_str(role).ok_or_else(|| { ToolError::invalid_input(format!( - "Invalid role alias '{role}'. Use: worker, explorer, awaiter, default" + "Invalid role alias '{role}'. Use: {VALID_ROLE_ALIASES}" )) }) }) @@ -4487,7 +4524,7 @@ fn parse_spawn_request(input: &Value) -> Result { && normalize_role_alias(role).is_none() { return Err(ToolError::invalid_input(format!( - "Invalid role alias '{role}'. Use: worker, explorer, awaiter, default" + "Invalid role alias '{role}'. Use: {VALID_ROLE_ALIASES}" ))); } @@ -4843,7 +4880,7 @@ fn parse_assign_request(input: &Value) -> Result { .map(|role| { normalize_role_alias(role).ok_or_else(|| { ToolError::invalid_input(format!( - "Invalid role alias '{role}'. Use: worker, explorer, awaiter, default" + "Invalid role alias '{role}'. Use: {VALID_ROLE_ALIASES}" )) }) }) @@ -4868,15 +4905,25 @@ fn parse_assign_request(input: &Value) -> Result { }) } +/// Resolve a user-supplied role/agent_role value to a canonical role string. +/// +/// This must accept the full set that [`SubAgentType::from_str`] accepts, plus +/// role-only aliases (`worker`, `default`, `awaiter`). Before #2649 it covered +/// only a subset, so `role: "reviewer"` (accepted by `from_str`) was rejected +/// here by the second validation pass with a misleading four-value hint. fn normalize_role_alias(input: &str) -> Option<&'static str> { match input.to_ascii_lowercase().as_str() { "default" => Some("default"), - "worker" | "general" => Some("worker"), - "explorer" | "explore" => Some("explorer"), - "awaiter" | "plan" | "planner" => Some("awaiter"), + "worker" | "general" | "general-purpose" | "general_purpose" => Some("worker"), + "explorer" | "explore" | "exploration" => Some("explorer"), + "awaiter" | "plan" | "planner" | "planning" => Some("awaiter"), + "reviewer" | "review" | "code-review" | "code_review" => Some("reviewer"), + "implementer" | "implement" | "implementation" | "builder" => Some("implementer"), + "verifier" | "verify" | "verification" | "validator" | "tester" => Some("verifier"), "tool-agent" | "tool_agent" | "toolagent" | "executor" | "execution" | "fin" => { Some("tool_agent") } + "custom" => Some("custom"), _ => None, } } @@ -5147,6 +5194,21 @@ fn build_allowed_tools( Ok(None) } +/// When a child agent fails because its model is unavailable under the current +/// access profile, a bare provider 403/404 (classified `Authorization` or +/// `State`) is unactionable. Annotate it so the parent knows the likely cause +/// and how to recover (#2653) without re-classifying the underlying error. +fn annotate_child_model_error(err: &str, model: &str) -> String { + match crate::error_taxonomy::classify_error_message(err) { + crate::error_taxonomy::ErrorCategory::Authorization + | crate::error_taxonomy::ErrorCategory::State => format!( + "{err}\n(child model `{model}` may be unavailable under the current access profile — \ + retry agent_open with a different `model`, or remove `model` to inherit the parent's)" + ), + _ => err.to_string(), + } +} + fn summarize_subagent_result(result: &SubAgentResult) -> String { match (&result.status, result.result.as_ref()) { (SubAgentStatus::Completed, Some(text)) => truncate_preview(text), @@ -5168,15 +5230,35 @@ fn subagent_status_name(status: &SubAgentStatus) -> &'static str { } } +/// Max length of the human-friendly one-line summary emitted alongside the +/// completion sentinel. Longer results are clipped and the full transcript is +/// available via `agent_eval` (see `result_clipped` in the sentinel payload). +const SUBAGENT_SUMMARY_PREVIEW_MAX: usize = 240; + fn truncate_preview(text: &str) -> String { - const MAX_LEN: usize = 240; - if text.len() <= MAX_LEN { + if text.len() <= SUBAGENT_SUMMARY_PREVIEW_MAX { text.to_string() } else { - format!("{}...", text.chars().take(MAX_LEN).collect::()) + format!( + "{}...", + text.chars() + .take(SUBAGENT_SUMMARY_PREVIEW_MAX) + .collect::() + ) } } +/// Whether `summarize_subagent_result` clipped the preview, i.e. the parent's +/// previous-line summary is incomplete and the model should call `agent_eval` +/// for the full result rather than relying on the summary. +fn subagent_result_clipped(res: &SubAgentResult) -> bool { + matches!(res.status, SubAgentStatus::Completed) + && res + .result + .as_deref() + .is_some_and(|text| text.len() > SUBAGENT_SUMMARY_PREVIEW_MAX) +} + const SUBAGENT_OUTPUT_FORMAT: &str = include_str!("../../prompts/subagent_output_format.md"); const GENERAL_AGENT_INTRO: &str = concat!( diff --git a/crates/tui/src/tools/subagent/tests.rs b/crates/tui/src/tools/subagent/tests.rs index 3ec14a46..d49833aa 100644 --- a/crates/tui/src/tools/subagent/tests.rs +++ b/crates/tui/src/tools/subagent/tests.rs @@ -509,6 +509,43 @@ fn test_parse_spawn_request_rejects_invalid_role() { assert!(err.to_string().contains("Invalid role alias")); } +#[test] +fn test_parse_spawn_request_accepts_full_role_vocabulary() { + // Regression for #2649: roles that `SubAgentType::from_str` accepts must + // also pass the second `normalize_role_alias` validation pass instead of + // being rejected with a stale four-value hint. + for (role, expected_type, expected_role) in [ + ("reviewer", SubAgentType::Review, "reviewer"), + ("review", SubAgentType::Review, "reviewer"), + ("planner", SubAgentType::Plan, "awaiter"), + ("implementer", SubAgentType::Implementer, "implementer"), + ("builder", SubAgentType::Implementer, "implementer"), + ("verifier", SubAgentType::Verifier, "verifier"), + ("tester", SubAgentType::Verifier, "verifier"), + ] { + let input = json!({ "prompt": "do work", "role": role }); + let parsed = parse_spawn_request(&input) + .unwrap_or_else(|e| panic!("role {role:?} should parse, got {e}")); + assert_eq!(parsed.agent_type, expected_type, "type for role {role:?}"); + assert_eq!( + parsed.assignment.role.as_deref(), + Some(expected_role), + "canonical role for {role:?}" + ); + } +} + +#[test] +fn test_invalid_role_error_lists_real_aliases() { + // The hint must enumerate the actually-accepted vocabulary (#2649). + let input = json!({ "prompt": "do work", "role": "nonsense" }); + let err = parse_spawn_request(&input) + .expect_err("invalid role should fail") + .to_string(); + assert!(err.contains("reviewer"), "hint should list reviewer: {err}"); + assert!(err.contains("verifier"), "hint should list verifier: {err}"); +} + #[test] fn test_parse_spawn_request_rejects_conflicting_type_and_role() { let input = json!({ @@ -883,6 +920,89 @@ async fn agent_eval_on_completed_session_returns_full_projection_not_running_err ); } +#[tokio::test] +async fn agent_eval_resolves_session_via_agent_name_alias() { + // #2650: `agent_name` is accepted as an alias for `name`/session name. + let manager = Arc::new(RwLock::new(SubAgentManager::new(PathBuf::from("."), 1))); + let (input_tx, _input_rx) = mpsc::unbounded_channel(); + let mut agent = SubAgent::new( + "test_agent_named".to_string(), + SubAgentType::Explore, + "scan".to_string(), + make_assignment(), + "deepseek-v4-flash".to_string(), + Some("Blue".to_string()), + Some(vec!["read_file".to_string()]), + input_tx, + "boot_test".to_string(), + ); + agent.session_name = "researcher".to_string(); + agent.status = SubAgentStatus::Completed; + agent.result = Some("done".to_string()); + let agent_id = agent.id.clone(); + { + let mut guard = manager.write().await; + guard.agents.insert(agent_id.clone(), agent); + } + + let ctx = ToolContext::new("."); + let tool = AgentEvalTool::new(manager.clone()); + let result = tool + .execute(json!({ "agent_name": "researcher", "block": false }), &ctx) + .await + .expect("agent_name alias must resolve the session"); + let projection: SubAgentSessionProjection = + serde_json::from_str(&result.content).expect("projection deserializes"); + assert_eq!(projection.status, "completed"); +} + +#[tokio::test] +async fn spawn_duplicate_session_name_error_names_conflicting_agent() { + // #2656: the duplicate-name error must identify the conflicting agent so a + // model can recover deterministically (reuse the id, or pick a new name). + let manager = Arc::new(RwLock::new(SubAgentManager::new(PathBuf::from("."), 5))); + let (input_tx, _input_rx) = mpsc::unbounded_channel(); + let mut existing = SubAgent::new( + "test_agent_existing".to_string(), + SubAgentType::Explore, + "scan".to_string(), + make_assignment(), + "deepseek-v4-flash".to_string(), + Some("Blue".to_string()), + Some(vec!["read_file".to_string()]), + input_tx, + "boot_test".to_string(), + ); + existing.session_name = "researcher".to_string(); + existing.status = SubAgentStatus::Running; + let existing_id = existing.id.clone(); + { + let mut guard = manager.write().await; + guard.agents.insert(existing_id.clone(), existing); + } + + let err = { + let mut guard = manager.write().await; + guard + .spawn_background_with_assignment_options( + manager.clone(), + stub_runtime(), + SubAgentType::Explore, + "new work".to_string(), + make_assignment(), + Some(vec!["read_file".to_string()]), + SubAgentSpawnOptions { + name: Some("researcher".to_string()), + ..Default::default() + }, + ) + .expect_err("duplicate session name must error") + }; + let msg = err.to_string(); + assert!(msg.contains(&existing_id), "names the conflicting agent_id: {msg}"); + assert!(msg.contains("running"), "includes the conflicting status: {msg}"); +} + #[tokio::test] async fn test_running_count_counts_only_agents_with_live_task_handles() { let mut manager = SubAgentManager::new(PathBuf::from("."), 1); @@ -1343,11 +1463,30 @@ fn subagent_done_sentinel_format_is_well_formed() { assert_eq!(parsed["agent_type"], "general"); assert_eq!(parsed["summary_location"], "previous_line"); assert_eq!(parsed["details"], "agent_eval"); + // Self-describing completion fields (#2658): a short result is complete, so + // the parent should trust the previous-line summary. + assert_eq!(parsed["result_clipped"], false); + assert_eq!(parsed["summary_complete"], true); + assert_eq!(parsed["next_action"], "use_summary"); assert!(parsed.get("summary").is_none()); assert!(parsed.get("duration_ms").is_none()); assert!(parsed.get("steps").is_none()); } +#[test] +fn subagent_done_sentinel_flags_clipped_result() { + let mut res = make_snapshot(SubAgentStatus::Completed); + res.result = Some("x".repeat(SUBAGENT_SUMMARY_PREVIEW_MAX + 1)); + let sentinel = subagent_done_sentinel("agent_big", &res); + let inner = sentinel + .trim_start_matches("") + .trim_end_matches(""); + let parsed: serde_json::Value = serde_json::from_str(inner).expect("inner JSON parses"); + assert_eq!(parsed["result_clipped"], true); + assert_eq!(parsed["summary_complete"], false); + assert_eq!(parsed["next_action"], "call_agent_eval"); +} + #[test] fn subagent_failed_sentinel_format_is_well_formed() { let sentinel = subagent_failed_sentinel("agent_zzz", "boom"); @@ -1359,9 +1498,24 @@ fn subagent_failed_sentinel_format_is_well_formed() { assert_eq!(parsed["status"], "failed"); assert_eq!(parsed["error_location"], "previous_line"); assert_eq!(parsed["details"], "agent_eval"); + assert_eq!(parsed["next_action"], "call_agent_eval"); + // Stays lean — the error text lives on the previous line, not the sentinel. assert!(parsed.get("error").is_none()); } +#[test] +fn annotate_child_model_error_adds_actionable_hint() { + // #2653: a bare provider 403 becomes actionable by naming the model and the + // recovery path, while unrelated errors pass through unchanged. + let auth = annotate_child_model_error("403 Forbidden", "kimi-k2"); + assert!(auth.contains("kimi-k2"), "names the model: {auth}"); + assert!(auth.contains("agent_open"), "names the recovery path: {auth}"); + assert!(auth.contains("403 Forbidden"), "preserves the original: {auth}"); + + let unrelated = annotate_child_model_error("connection reset by peer", "kimi-k2"); + assert_eq!(unrelated, "connection reset by peer"); +} + #[test] fn subagent_runtime_default_max_depth_is_three() { // Sanity-check the constant — bumping it without a test means stale docs. diff --git a/docs/SUBAGENTS.md b/docs/SUBAGENTS.md index e3f06e54..5c61f4b2 100644 --- a/docs/SUBAGENTS.md +++ b/docs/SUBAGENTS.md @@ -86,10 +86,11 @@ The model can spell each role multiple ways: |---------------|------------------------------------------------------------------| | `general` | `worker`, `default`, `general-purpose` | | `explore` | `explorer`, `exploration` | -| `plan` | `planning`, `awaiter` | -| `review` | `reviewer`, `code-review` | +| `plan` | `planning`, `planner`, `awaiter` | +| `review` | `reviewer`, `code-review`, `code_review` | | `implementer` | `implement`, `implementation`, `builder` | | `verifier` | `verify`, `verification`, `validator`, `tester` | +| `tool_agent` | `tool-agent`, `toolagent`, `executor`, `execution`, `fin` | | `custom` | (none; explicit `allowed_tools` array required) | All matching is case-insensitive. Unknown values produce a typed