From aa231826741575f7a0dab83d0ecda128c7ea712e Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Sat, 2 May 2026 01:30:23 -0500 Subject: [PATCH] chore(tools): remove /swarm command + agent_swarm/spawn_agents_on_csv tool surface; park swarm.rs pending #357 cascade (#336) Surface removed: /swarm slash command, agent_swarm, spawn_agents_on_csv, swarm_status, swarm_result, swarm_cancel tools, report_agent_job_result. Prompts/docs/tests updated. swarm.rs parked with #![allow(dead_code)] pending the full cascade in #357. RLM prompt audit tracked in #358. --- AGENTS.md | 4 +- crates/tui/src/commands/core.rs | 103 --- crates/tui/src/commands/mod.rs | 7 - crates/tui/src/prompts/agent.txt | 2 +- crates/tui/src/prompts/base.md | 9 +- crates/tui/src/prompts/base.txt | 5 +- crates/tui/src/tools/registry.rs | 22 +- crates/tui/src/tools/subagent/mod.rs | 944 +------------------------ crates/tui/src/tools/subagent/tests.rs | 146 ---- crates/tui/src/tools/swarm.rs | 8 + crates/tui/src/tui/ui/tests.rs | 677 +----------------- docs/TOOL_SURFACE.md | 8 +- 12 files changed, 27 insertions(+), 1908 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 7fb64599..5e247547 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -44,4 +44,6 @@ Issues may be closed when the acceptance criteria have been verified or when the ## Important Notes - **Token/cost tracking inaccuracies**: Token counting and cost estimation may be inflated due to thinking token accounting bugs. Use `/compact` to manage context, and treat cost estimates as approximate. -- **Modes**: Three modes — Plan (read-only investigation), Agent (tool use with approval), YOLO (auto-approved). See `docs/MODES.md` for details. All three modes can call the `rlm_query` tool for parallel/batched LLM fan-out (`crates/tui/src/tools/rlm_query.rs`). +- **Modes**: Three modes — Plan (read-only investigation), Agent (tool use with approval), YOLO (auto-approved). See `docs/MODES.md` for details. +- **Sub-agents**: Single model-callable surface is `agent_spawn` (returns an `agent_id` immediately; parent keeps working) plus `agent_wait` / `agent_result` / `agent_cancel` / `agent_list` / `agent_send_input` / `agent_resume` / `agent_assign`. The old `agent_swarm` / `spawn_agents_on_csv` / `/swarm` surface was removed in v0.8.5 (#336). +- **`rlm` tool** (`crates/tui/src/tools/rlm.rs`): a sandboxed Python REPL where a sub-LLM can call in-REPL helpers (`llm_query()`, `llm_query_batched()`, `rlm_query()`, `rlm_query_batched()`) — those `*_query` names are **Python helpers inside the REPL**, not separately-registered model-visible tools. Always loaded across all modes. diff --git a/crates/tui/src/commands/core.rs b/crates/tui/src/commands/core.rs index 20448e7b..4c5ea35f 100644 --- a/crates/tui/src/commands/core.rs +++ b/crates/tui/src/commands/core.rs @@ -144,109 +144,6 @@ pub fn deepseek_links(app: &mut App) -> CommandResult { )) } -/// Collaboration pattern for `/swarm` multi-agent turns. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum SwarmMode { - /// Linear pipeline: Planner → Critic → Solver - Sequential, - /// Parallel ensemble: domain specialists + synthesizer - Mixture, - /// Expert–Learner pair for knowledge distillation - Distill, - /// Reflector + Tool-Caller iterative deliberation - Deliberate, -} - -impl SwarmMode { - pub fn from_str(s: &str) -> Option { - match s.to_ascii_lowercase().as_str() { - "sequential" | "seq" | "pipeline" => Some(Self::Sequential), - "mixture" | "mix" | "ensemble" | "parallel" => Some(Self::Mixture), - "distill" | "distillation" | "transfer" => Some(Self::Distill), - "deliberate" | "deliberation" | "dialectic" | "reflect" => Some(Self::Deliberate), - _ => None, - } - } - - pub fn label(self) -> &'static str { - match self { - Self::Sequential => "sequential", - Self::Mixture => "mixture", - Self::Distill => "distill", - Self::Deliberate => "deliberate", - } - } - - pub fn description(self) -> &'static str { - match self { - Self::Sequential => "Linear pipeline: Planner → Critic → Solver", - Self::Mixture => "Parallel ensemble: domain specialists + synthesizer", - Self::Distill => "Expert–Learner pair for knowledge distillation", - Self::Deliberate => "Reflector + Tool-Caller iterative deliberation", - } - } -} - -/// Initiate a multi-agent swarm turn with the requested collaboration pattern. -/// -/// Phase A foundation — currently sets up the prompt context so the model -/// uses `agent_swarm` with the appropriate topology. Direct orchestration -/// from the slash command (bypassing the model) is planned for Phase B. -pub fn swarm(app: &mut App, arg: Option<&str>) -> CommandResult { - let raw = arg.map(str::trim).unwrap_or(""); - let mut parts = raw.splitn(2, char::is_whitespace); - let mode_str = parts.next().unwrap_or(""); - let description = parts.next().map(str::trim).unwrap_or(""); - - if mode_str.is_empty() { - let mut help = String::from("Usage: /swarm [description]\n\nModes:\n"); - for mode in [ - SwarmMode::Sequential, - SwarmMode::Mixture, - SwarmMode::Distill, - SwarmMode::Deliberate, - ] { - help.push_str(&format!(" {} — {}\n", mode.label(), mode.description())); - } - return CommandResult::message(help); - } - - let Some(mode) = SwarmMode::from_str(mode_str) else { - return CommandResult::error(format!( - "Unknown swarm mode: {mode_str}. Try /swarm for a list of modes." - )); - }; - - let msg = if description.is_empty() { - format!( - "Swarm mode: {}. Describe the task and I will delegate it using the {} pattern.", - mode.label(), - mode.label() - ) - } else { - format!( - "Swarm mode: {}. Delegating using the {} pattern:\n{}", - mode.label(), - mode.label(), - description - ) - }; - - // Queue a system message that primes the model to use agent_swarm - // with the requested topology. In Phase B this will be replaced by - // direct engine-side orchestration. - let system_hint = format!( - "The user has requested a {} swarm. Use the agent_swarm tool with the appropriate topology. \ - For sequential: use depends_on chains. For mixture: spawn specialists in parallel then synthesize. \ - For distill: spawn an expert and a learner, then have the learner absorb the expert's output. \ - For deliberate: spawn a reflector and a tool-caller in a critique→refine loop.", - mode.label() - ); - - app.system_prompt = Some(crate::models::SystemPrompt::Text(system_hint)); - - CommandResult::message(msg) -} /// Show home dashboard with stats and quick actions pub fn home_dashboard(app: &mut App) -> CommandResult { let locale = app.ui_locale; diff --git a/crates/tui/src/commands/mod.rs b/crates/tui/src/commands/mod.rs index 4231a88d..0575575e 100644 --- a/crates/tui/src/commands/mod.rs +++ b/crates/tui/src/commands/mod.rs @@ -390,12 +390,6 @@ pub const COMMANDS: &[CommandInfo] = &[ usage: "/cache [count]", description_id: MessageId::CmdCacheDescription, }, - CommandInfo { - name: "swarm", - aliases: &[], - usage: "/swarm [description]", - description_id: MessageId::CmdSwarmDescription, - }, ]; /// Execute a slash command @@ -416,7 +410,6 @@ pub fn execute(cmd: &str, app: &mut App) -> CommandResult { "provider" => provider::provider(app, arg), "queue" | "queued" => queue::queue(app, arg), "subagents" | "agents" => core::subagents(app), - "swarm" => core::swarm(app, arg), "links" | "dashboard" | "api" => core::deepseek_links(app), "home" | "stats" | "overview" => core::home_dashboard(app), "note" => note::note(app, arg), diff --git a/crates/tui/src/prompts/agent.txt b/crates/tui/src/prompts/agent.txt index 6ab8a7bc..5ce82e18 100644 --- a/crates/tui/src/prompts/agent.txt +++ b/crates/tui/src/prompts/agent.txt @@ -9,7 +9,7 @@ Decomposition builds trust — a clear plan gets faster approvals. ## Sub-agent completion sentinel -When you spawn a sub-agent via `agent_spawn` (or `agent_swarm`), the child runs independently. +When you spawn a sub-agent via `agent_spawn`, the child runs independently. You will receive a `` element in the transcript when it finishes. Read its `summary` field and integrate the work — do not re-do what the child already did. You can also call `agent_result` to pull the full structured result. diff --git a/crates/tui/src/prompts/base.md b/crates/tui/src/prompts/base.md index b37d2e3d..aa257eaa 100644 --- a/crates/tui/src/prompts/base.md +++ b/crates/tui/src/prompts/base.md @@ -24,7 +24,7 @@ Use three decomposition patterns, selected by task scope: **PREVIEW** — Before diving into a large task, survey the terrain. Scan directory structure (`list_dir`), file headers, module trees. Identify problem boundaries and estimate complexity. A 30-second preview prevents hours of wrong-path exploration. -**CHUNK + map-reduce** — When a task exceeds single-pass capacity: split into independent sub-tasks, process each independently (parallel where possible via parallel tool calls or `agent_swarm`), then synthesize findings into a coherent whole. Track chunks with `checklist_write`. +**CHUNK + map-reduce** — When a task exceeds single-pass capacity: split into independent sub-tasks, process each independently (parallel where possible via parallel tool calls or `agent_spawn`), then synthesize findings into a coherent whole. Track chunks with `checklist_write`. **RECURSIVE** — When sub-tasks reveal sub-problems: decompose recursively until each leaf is tractable. Maintain the task tree via `update_plan` (strategy) layered above `checklist_write` (leaf tasks). Propagate findings upward when sub-problems resolve. @@ -32,7 +32,7 @@ Your default workflow for any non-trivial request: 1. **`checklist_write`** — break the work into concrete, verifiable steps. Mark the first one `in_progress`. This populates the sidebar so the user can see what you're doing. 2. **Execute** — work through each checklist item, updating status as you go. 3. **For complex initiatives**, layer `update_plan` (high-level strategy) above `checklist_write` (granular steps). -4. **For parallel work**, spawn sub-agents (`agent_spawn` / `agent_swarm`) — each does one thing well. Link them to plan/todo items in your thinking. Batch independent tool calls in a single turn. +4. **For parallel work**, spawn sub-agents (`agent_spawn`) — each does one thing well. Link them to plan/todo items in your thinking. Batch independent tool calls in a single turn. 5. **For long inputs, recursive sub-LLM work, or high-leverage parallel reasoning**, use `rlm` — it loads input into a Python REPL as `context` and runs sub-LLM calls there so long strings and batched deliberation stay out of your window. 6. **For persistent cross-session memory**, use `note` sparingly for important decisions, open blockers, and architectural context. @@ -85,8 +85,7 @@ When context is deep (past a soft seam): cache reasoning conclusions in concise - **Task evidence**: `task_gate_run` for verification gates; `pr_attempt_record` / `pr_attempt_list` / `pr_attempt_read` / `pr_attempt_preflight`; `github_issue_context` / `github_pr_context` (read-only); `github_comment` / `github_close_issue` (approval + evidence required); `automation_*` scheduling tools. - **Structured search**: `grep_files`, `file_search`, `web_search`, `fetch_url`, `web.run` (browse). - **Git / diag / tests**: `git_status`, `git_diff`, `git_show`, `git_log`, `git_blame`, `diagnostics`, `run_tests`, `review`. -- **Sub-agents**: `agent_spawn` (`spawn_agent`, `delegate_to_agent`), `agent_swarm` (background by default), `swarm_status`, `swarm_result`, `swarm_cancel`, `agent_result`, `agent_cancel` (`close_agent`), `agent_list`, `agent_wait` (`wait`), `agent_send_input` (`send_input`), `agent_assign` (`assign_agent`), `resume_agent`. -- **CSV batch**: `spawn_agents_on_csv`, `report_agent_job_result`. +- **Sub-agents**: `agent_spawn` (`spawn_agent`, `delegate_to_agent`), `agent_result`, `agent_cancel` (`close_agent`), `agent_list`, `agent_wait` (`wait`), `agent_send_input` (`send_input`), `agent_assign` (`assign_agent`), `resume_agent`. - **Recursive LM (long inputs / parallel reasoning)**: `rlm` — load a file/string as `context` in a Python REPL, sub-agent writes Python that calls `llm_query`/`llm_query_batched`/`rlm_query` to chunk, compare, critique, and synthesize; returns the synthesized answer. Read-only. - **Other**: `code_execution` (Python sandbox), `validate_data` (JSON/TOML), `request_user_input`, `finance` (market quotes), `tool_search_tool_regex`, `tool_search_tool_bm25` (deferred tool discovery). @@ -134,7 +133,7 @@ Inside the `rlm` REPL, the sub-LLM has access to `llm_query()`, `llm_query_batch ## Sub-agent completion sentinel -When you spawn a sub-agent via `agent_spawn` (or `agent_swarm`), the child runs independently in its own context. `agent_swarm` returns a `swarm_id` immediately unless you explicitly pass `block: true`; keep working and call `swarm_status` or `swarm_result` when you need the collected results. You will receive a `` element in the transcript when an individual child finishes. This sentinel carries: +When you spawn a sub-agent via `agent_spawn`, the child runs independently. You will receive a `` element in the transcript when it finishes. This sentinel carries: - `agent_id` — the child's identifier - `summary` — a human-readable summary of what the child found or did diff --git a/crates/tui/src/prompts/base.txt b/crates/tui/src/prompts/base.txt index 153ded68..962fbdf0 100644 --- a/crates/tui/src/prompts/base.txt +++ b/crates/tui/src/prompts/base.txt @@ -8,7 +8,7 @@ Your default workflow for any non-trivial request: 1. **`checklist_write`** — break the work into concrete, verifiable steps. Mark the first one `in_progress`. This populates the sidebar so the user can see what you're doing. 2. **Execute** — work through each checklist item, updating status as you go. 3. **For complex initiatives**, layer `update_plan` (high-level strategy) above `checklist_write` (granular steps). -4. **For parallel work**, spawn sub-agents (`agent_spawn` / `agent_swarm`) — each does one thing well. Link them to plan/todo items in your thinking. +4. **For parallel work**, spawn sub-agents (`agent_spawn`) — each does one thing well. Link them to plan/todo items in your thinking. 5. **For long inputs, recursive sub-LLM work, or high-leverage parallel reasoning**, use `rlm` — it loads input into a Python REPL as `context` and runs sub-LLM calls there so long strings and batched deliberation stay out of your window. 6. **For persistent cross-session memory**, use `note` sparingly for important decisions, open blockers, and architectural context. @@ -33,8 +33,7 @@ Model notes: DeepSeek V4 models emit *thinking tokens* (`ContentBlock::Thinking` - **Task evidence**: `task_gate_run` for verification gates; `pr_attempt_record` / `pr_attempt_list` / `pr_attempt_read` / `pr_attempt_preflight`; `github_issue_context` / `github_pr_context` (read-only); `github_comment` / `github_close_issue` (approval + evidence required); `automation_*` scheduling tools. - **Structured search**: `grep_files`, `file_search`, `web_search`, `fetch_url`, `web.run` (browse). - **Git / diag / tests**: `git_status`, `git_diff`, `git_show`, `git_log`, `git_blame`, `diagnostics`, `run_tests`, `review`. -- **Sub-agents**: `agent_spawn` (`spawn_agent`, `delegate_to_agent`), `agent_swarm`, `agent_result`, `agent_cancel` (`close_agent`), `agent_list`, `agent_wait` (`wait`), `agent_send_input` (`send_input`), `agent_assign` (`assign_agent`), `resume_agent`. -- **CSV batch**: `spawn_agents_on_csv`, `report_agent_job_result`. +- **Sub-agents**: `agent_spawn` (`spawn_agent`, `delegate_to_agent`), `agent_result`, `agent_cancel` (`close_agent`), `agent_list`, `agent_wait` (`wait`), `agent_send_input` (`send_input`), `agent_assign` (`assign_agent`), `resume_agent`. - **Recursive LM (long inputs / parallel reasoning)**: `rlm` — load a file/string as `context` in a Python REPL, sub-agent writes Python that calls `llm_query`/`llm_query_batched`/`rlm_query` to chunk, compare, critique, and synthesize; returns the synthesized answer. Read-only. - **Other**: `code_execution` (Python sandbox), `validate_data` (JSON/TOML), `request_user_input`, `finance` (market quotes), `tool_search_tool_regex`, `tool_search_tool_bm25` (deferred tool discovery). diff --git a/crates/tui/src/tools/registry.rs b/crates/tui/src/tools/registry.rs index 1d6c5014..a2b08700 100644 --- a/crates/tui/src/tools/registry.rs +++ b/crates/tui/src/tools/registry.rs @@ -621,9 +621,8 @@ impl ToolRegistryBuilder { use super::subagent::{ AgentAssignTool, AgentCancelTool, AgentCloseTool, AgentListTool, AgentResultTool, AgentResumeTool, AgentSendInputTool, AgentSpawnTool, AgentWaitTool, - DelegateToAgentTool, ReportAgentJobResultTool, SpawnAgentsOnCsvTool, + DelegateToAgentTool }; - use super::swarm::{AgentSwarmTool, SwarmCancelTool, SwarmResultTool, SwarmStatusTool}; self.with_tool(Arc::new(AgentSpawnTool::new( manager.clone(), @@ -638,25 +637,6 @@ impl ToolRegistryBuilder { manager.clone(), runtime.clone(), ))) - .with_tool(Arc::new(AgentSwarmTool::new( - manager.clone(), - runtime.clone(), - ))) - .with_tool(Arc::new(SpawnAgentsOnCsvTool::new( - manager.clone(), - runtime.clone(), - ))) - .with_tool(Arc::new(ReportAgentJobResultTool)) - .with_tool(Arc::new(SwarmStatusTool::new( - runtime.context.workspace.clone(), - ))) - .with_tool(Arc::new(SwarmResultTool::new( - runtime.context.workspace.clone(), - ))) - .with_tool(Arc::new(SwarmCancelTool::new( - manager.clone(), - runtime.context.workspace.clone(), - ))) .with_tool(Arc::new(AgentResultTool::new(manager.clone()))) .with_tool(Arc::new(AgentSendInputTool::new( manager.clone(), diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs index 67793209..f4054fbc 100644 --- a/crates/tui/src/tools/subagent/mod.rs +++ b/crates/tui/src/tools/subagent/mod.rs @@ -4,17 +4,15 @@ //! and retrieve results. Sub-agents run with a filtered toolset and //! inherit the workspace configuration from the main session. -use std::collections::{HashMap, HashSet, VecDeque}; +use std::collections::{HashMap, VecDeque}; use std::fs; use std::path::{Path, PathBuf}; -use std::sync::atomic::{AtomicBool, Ordering}; -use std::sync::{Arc, Mutex as StdMutex, OnceLock}; +use std::sync::Arc; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; -use tokio::sync::{Mutex, Semaphore}; +use tokio::sync::Mutex; use anyhow::{Result, anyhow}; use async_trait::async_trait; -use futures_util::stream::{FuturesUnordered, StreamExt}; use serde::{Deserialize, Serialize}; use serde_json::{Value, json}; use tokio::{sync::mpsc, task::JoinHandle}; @@ -54,9 +52,6 @@ const COMPLETED_AGENT_RETENTION: Duration = Duration::from_secs(60 * 60); const SUBAGENT_STATE_SCHEMA_VERSION: u32 = 1; const SUBAGENT_STATE_FILE: &str = "subagents.v1.json"; const SUBAGENT_RESTART_REASON: &str = "Interrupted by process restart"; -const DEFAULT_CSV_MAX_CONCURRENCY: u64 = 16; -const DEFAULT_CSV_MAX_RUNTIME_SECONDS: u64 = 1800; -const MAX_CSV_MAX_RUNTIME_SECONDS: u64 = 86_400; const VALID_SUBAGENT_TYPES: &str = "general, explore, plan, review, custom, worker, explorer, awaiter, default"; @@ -90,10 +85,6 @@ pub const WHALE_NICKNAMES: &[&str] = &[ /// Removal version for deprecated tool aliases. const DEPRECATION_REMOVAL_VERSION: &str = "0.8.0"; -static AGENT_JOB_REPORTS: OnceLock>>> = - OnceLock::new(); -static AGENT_JOB_ASSIGNMENTS: OnceLock>>> = - OnceLock::new(); #[must_use] pub fn whale_nickname_for_index(index: usize) -> String { @@ -266,7 +257,6 @@ impl SubAgentType { "todo_update", "todo_list", "update_plan", - "report_agent_job_result", ], Self::Explore => vec![ "list_dir", @@ -399,47 +389,6 @@ struct AssignRequest { interrupt: bool, } -#[derive(Debug, Clone)] -struct CsvRowTask { - row_index: usize, - item_id: String, - values: HashMap, -} - -#[derive(Debug, Clone, Serialize)] -struct CsvWorkerOutcome { - #[serde(skip_serializing)] - row_index: usize, - item_id: String, - status: String, - #[serde(skip_serializing_if = "Option::is_none")] - agent_id: Option, - duration_ms: u64, - #[serde(skip_serializing_if = "Option::is_none")] - error: Option, - #[serde(skip_serializing_if = "Option::is_none")] - result: Option, - #[serde(skip_serializing_if = "Option::is_none")] - result_json: Option, -} - -#[derive(Debug, Clone)] -struct AgentJobReport { - result: Value, - stop: bool, -} - -#[derive(Debug, Clone, Serialize)] -struct SpawnAgentsOnCsvSummary { - job_id: String, - total: usize, - completed: usize, - failed: usize, - timed_out: usize, - skipped: usize, - output_csv_path: String, - results: Vec, -} #[derive(Debug, Clone, Serialize, Deserialize)] struct PersistedSubAgent { @@ -2280,325 +2229,6 @@ impl ToolSpec for DelegateToAgentTool { } } -/// Tool to process CSV rows by spawning one worker sub-agent per row. -pub struct SpawnAgentsOnCsvTool { - manager: SharedSubAgentManager, - runtime: SubAgentRuntime, -} - -struct AgentJobReportCleanup { - job_id: String, -} - -impl AgentJobReportCleanup { - fn new(job_id: String) -> Self { - clear_agent_job_results(&job_id); - Self { job_id } - } -} - -impl Drop for AgentJobReportCleanup { - fn drop(&mut self) { - clear_agent_job_results(&self.job_id); - } -} - -impl SpawnAgentsOnCsvTool { - /// Create a new CSV batch orchestration tool. - #[must_use] - pub fn new(manager: SharedSubAgentManager, runtime: SubAgentRuntime) -> Self { - Self { manager, runtime } - } -} - -#[async_trait] -impl ToolSpec for SpawnAgentsOnCsvTool { - fn name(&self) -> &'static str { - "spawn_agents_on_csv" - } - - fn description(&self) -> &'static str { - "Process a CSV by spawning one worker sub-agent per row. The instruction string is a template where `{column}` placeholders are replaced with row values. Each worker must call `report_agent_job_result` with a JSON object (matching `output_schema` when provided); missing reports are treated as failures. This call blocks until all rows finish and automatically exports results to `output_csv_path` (or a default path)." - } - - fn input_schema(&self) -> Value { - json!({ - "type": "object", - "properties": { - "csv_path": { - "type": "string", - "description": "Path to the input CSV file" - }, - "instruction": { - "type": "string", - "description": "Instruction template. Use {column_name} placeholders for row values." - }, - "id_column": { - "type": "string", - "description": "Optional CSV column name used as stable item id" - }, - "max_concurrency": { - "type": "integer", - "description": "Maximum concurrent workers (default: 16)" - }, - "max_workers": { - "type": "integer", - "description": "Alias for max_concurrency" - }, - "max_runtime_seconds": { - "type": "integer", - "description": "Per-worker timeout in seconds (default: 1800)" - }, - "output_csv_path": { - "type": "string", - "description": "Optional output CSV path for worker results" - }, - "output_schema": { - "type": "object", - "description": "Optional JSON schema-like object used to validate worker JSON output" - } - }, - "required": ["csv_path", "instruction"] - }) - } - - fn capabilities(&self) -> Vec { - vec![ - ToolCapability::ExecutesCode, - ToolCapability::RequiresApproval, - ] - } - - fn approval_requirement(&self) -> ApprovalRequirement { - ApprovalRequirement::Required - } - - async fn execute(&self, input: Value, context: &ToolContext) -> Result { - let csv_path_raw = required_str(&input, "csv_path")?; - let csv_path = context.resolve_path(csv_path_raw)?; - let instruction_template = required_str(&input, "instruction")?; - if instruction_template.trim().is_empty() { - return Err(ToolError::invalid_input( - "instruction cannot be empty".to_string(), - )); - } - - let id_column = optional_input_str(&input, &["id_column"]).map(str::to_string); - let rows = load_csv_rows(&csv_path, id_column.as_deref())?; - if rows.is_empty() { - return Err(ToolError::invalid_input(format!( - "CSV '{}' has no data rows", - csv_path.display() - ))); - } - - let output_schema = input.get("output_schema").cloned(); - let output_csv_path = resolve_results_csv_path(context, &input, &csv_path)?; - let max_runtime_seconds = optional_u64( - &input, - "max_runtime_seconds", - DEFAULT_CSV_MAX_RUNTIME_SECONDS, - ) - .clamp(1, MAX_CSV_MAX_RUNTIME_SECONDS); - let requested_concurrency = parse_csv_concurrency(&input); - - let max_agents = { - let manager = self.manager.lock().await; - manager.max_agents().max(1) - }; - let max_concurrency = requested_concurrency.clamp(1, max_agents as u64) as usize; - - let semaphore = Arc::new(Semaphore::new(max_concurrency)); - let timeout = Duration::from_secs(max_runtime_seconds); - let job_id = format!("job_{}", &Uuid::new_v4().to_string()[..8]); - let _cleanup = AgentJobReportCleanup::new(job_id.clone()); - let stop_requested = Arc::new(AtomicBool::new(false)); - let mut workers = FuturesUnordered::new(); - - for row in rows { - let permit = semaphore - .clone() - .acquire_owned() - .await - .map_err(|_| ToolError::execution_failed("Worker semaphore closed"))?; - let manager = self.manager.clone(); - let runtime = self.runtime.clone(); - let template = instruction_template.to_string(); - let schema = output_schema.clone(); - let job_id = job_id.clone(); - let stop_requested = stop_requested.clone(); - - workers.push(tokio::spawn(async move { - let _permit = permit; - run_csv_row_agent( - manager, - runtime, - &job_id, - row, - &template, - timeout, - schema, - stop_requested, - ) - .await - })); - } - - let mut outcomes = Vec::new(); - while let Some(joined) = workers.next().await { - match joined { - Ok(outcome) => outcomes.push(outcome), - Err(err) => outcomes.push(CsvWorkerOutcome { - row_index: usize::MAX, - item_id: "worker_join".to_string(), - status: "failed".to_string(), - agent_id: None, - duration_ms: 0, - error: Some(format!("Worker task failed to join: {err}")), - result: None, - result_json: None, - }), - } - } - - outcomes.sort_by_key(|outcome| outcome.row_index); - - write_csv_worker_outcomes(&output_csv_path, &outcomes).map_err(|err| { - ToolError::execution_failed(format!("Failed to write output CSV: {err}")) - })?; - - let completed = outcomes - .iter() - .filter(|outcome| outcome.status == "completed") - .count(); - let skipped = outcomes - .iter() - .filter(|outcome| outcome.status == "skipped") - .count(); - let timed_out = outcomes - .iter() - .filter(|outcome| outcome.status == "timed_out") - .count(); - let failed = outcomes - .iter() - .filter(|outcome| outcome.status == "failed") - .count() - + timed_out; - - let summary = SpawnAgentsOnCsvSummary { - job_id, - total: outcomes.len(), - completed, - failed, - timed_out, - skipped, - output_csv_path: output_csv_path.display().to_string(), - results: outcomes, - }; - let status = if summary.failed > 0 { - if summary.completed == 0 && summary.skipped == 0 { - "Failed" - } else { - "Partial" - } - } else if stop_requested.load(Ordering::Relaxed) || summary.skipped > 0 { - "Cancelled" - } else { - "Completed" - }; - let mut result = - ToolResult::json(&summary).map_err(|e| ToolError::execution_failed(e.to_string()))?; - result.metadata = Some(json!({ - "status": status, - "job_id": summary.job_id, - "completed": summary.completed, - "failed": summary.failed, - "timed_out": summary.timed_out, - "skipped": summary.skipped, - "stop_requested": stop_requested.load(Ordering::Relaxed), - "output_csv_path": summary.output_csv_path, - })); - Ok(result) - } -} - -/// Worker-oriented tool to report structured row outcomes for CSV agent jobs. -pub struct ReportAgentJobResultTool; - -#[async_trait] -impl ToolSpec for ReportAgentJobResultTool { - fn name(&self) -> &'static str { - "report_agent_job_result" - } - - fn description(&self) -> &'static str { - "Worker-only tool to report a structured result for a spawn_agents_on_csv row." - } - - fn input_schema(&self) -> Value { - json!({ - "type": "object", - "properties": { - "job_id": { - "type": "string", - "description": "Identifier of the CSV job" - }, - "item_id": { - "type": "string", - "description": "Identifier of the CSV row item" - }, - "result": { - "type": "object", - "description": "Structured JSON result to record for the row" - }, - "stop": { - "type": "boolean", - "description": "Optional. When true, cancels remaining unstarted CSV rows for this job." - } - }, - "required": ["job_id", "item_id", "result"] - }) - } - - fn capabilities(&self) -> Vec { - vec![] - } - - async fn execute(&self, input: Value, _context: &ToolContext) -> Result { - let job_id = required_str(&input, "job_id")?.trim(); - let item_id = required_str(&input, "item_id")?.trim(); - if job_id.is_empty() { - return Err(ToolError::invalid_input("job_id cannot be empty")); - } - if item_id.is_empty() { - return Err(ToolError::invalid_input("item_id cannot be empty")); - } - let result = input - .get("result") - .cloned() - .ok_or_else(|| ToolError::missing_field("result"))?; - if !result.is_object() { - return Err(ToolError::invalid_input("result must be a JSON object")); - } - let reporting_agent_id = input - .get("__reporting_agent_id") - .and_then(Value::as_str) - .map(str::trim) - .filter(|value| !value.is_empty()); - let stop = optional_bool(&input, "stop", false); - let accepted = - record_agent_job_result(job_id, item_id, result.clone(), stop, reporting_agent_id); - - let payload = json!({ - "job_id": job_id, - "item_id": item_id, - "accepted": accepted, - "stop": stop, - "result": result - }); - ToolResult::json(&payload).map_err(|e| ToolError::execution_failed(e.to_string())) - } -} // === Sub-agent Execution === @@ -3419,562 +3049,6 @@ fn parse_assign_request(input: &Value) -> Result { }) } -fn parse_csv_concurrency(input: &Value) -> u64 { - if input.get("max_concurrency").is_some() { - return optional_u64(input, "max_concurrency", DEFAULT_CSV_MAX_CONCURRENCY).max(1); - } - if input.get("max_workers").is_some() { - return optional_u64(input, "max_workers", DEFAULT_CSV_MAX_CONCURRENCY).max(1); - } - DEFAULT_CSV_MAX_CONCURRENCY -} - -fn agent_job_reports_store() -> &'static StdMutex>> -{ - AGENT_JOB_REPORTS.get_or_init(|| StdMutex::new(HashMap::new())) -} - -fn agent_job_assignments_store() -> &'static StdMutex>> { - AGENT_JOB_ASSIGNMENTS.get_or_init(|| StdMutex::new(HashMap::new())) -} - -fn record_agent_job_assignment(job_id: &str, item_id: &str, agent_id: &str) { - let mut store = agent_job_assignments_store() - .lock() - .expect("agent job assignments lock poisoned"); - let job = store.entry(job_id.to_string()).or_default(); - job.insert(item_id.to_string(), agent_id.to_string()); -} - -fn remove_agent_job_assignment(job_id: &str, item_id: &str) { - let mut store = agent_job_assignments_store() - .lock() - .expect("agent job assignments lock poisoned"); - if let Some(job) = store.get_mut(job_id) { - job.remove(item_id); - if job.is_empty() { - store.remove(job_id); - } - } -} - -fn clear_agent_job_assignments(job_id: &str) { - let mut store = agent_job_assignments_store() - .lock() - .expect("agent job assignments lock poisoned"); - store.remove(job_id); -} - -fn report_matches_assignment( - job_id: &str, - item_id: &str, - reporting_agent_id: Option<&str>, -) -> bool { - let Some(reporting_agent_id) = reporting_agent_id else { - return false; - }; - let store = agent_job_assignments_store() - .lock() - .expect("agent job assignments lock poisoned"); - store - .get(job_id) - .and_then(|job| job.get(item_id)) - .is_some_and(|expected| expected == reporting_agent_id) -} - -fn record_agent_job_result( - job_id: &str, - item_id: &str, - result: Value, - stop: bool, - reporting_agent_id: Option<&str>, -) -> bool { - if !report_matches_assignment(job_id, item_id, reporting_agent_id) { - return false; - } - let mut store = agent_job_reports_store() - .lock() - .expect("agent job reports lock poisoned"); - let job = store.entry(job_id.to_string()).or_default(); - if job.contains_key(item_id) { - return false; - } - job.insert(item_id.to_string(), AgentJobReport { result, stop }); - true -} - -fn take_agent_job_result(job_id: &str, item_id: &str) -> Option { - let mut store = agent_job_reports_store() - .lock() - .expect("agent job reports lock poisoned"); - let result = store.get_mut(job_id).and_then(|job| job.remove(item_id)); - if store - .get(job_id) - .is_some_and(|job_results| job_results.is_empty()) - { - store.remove(job_id); - } - remove_agent_job_assignment(job_id, item_id); - result -} - -fn clear_agent_job_results(job_id: &str) { - let mut store = agent_job_reports_store() - .lock() - .expect("agent job reports lock poisoned"); - store.remove(job_id); - clear_agent_job_assignments(job_id); -} - -fn resolve_results_csv_path( - context: &ToolContext, - input: &Value, - csv_path: &Path, -) -> Result { - if let Some(path) = optional_input_str(input, &["output_csv_path"]) { - context.resolve_path(path) - } else { - Ok(default_results_csv_path(csv_path)) - } -} - -fn default_results_csv_path(csv_path: &Path) -> PathBuf { - let stem = csv_path - .file_stem() - .and_then(|stem| stem.to_str()) - .filter(|stem| !stem.is_empty()) - .unwrap_or("results"); - csv_path.with_file_name(format!("{stem}.results.csv")) -} - -fn load_csv_rows(csv_path: &Path, id_column: Option<&str>) -> Result, ToolError> { - let mut reader = csv::ReaderBuilder::new() - .from_path(csv_path) - .map_err(|err| { - ToolError::execution_failed(format!( - "Failed to read CSV '{}': {err}", - csv_path.display() - )) - })?; - - let headers = reader - .headers() - .map_err(|err| { - ToolError::execution_failed(format!( - "Failed to read CSV headers '{}': {err}", - csv_path.display() - )) - })? - .clone(); - if headers.is_empty() { - return Err(ToolError::invalid_input(format!( - "CSV '{}' has no headers", - csv_path.display() - ))); - } - let mut seen_headers = HashSet::new(); - for header in &headers { - if !seen_headers.insert(header.to_string()) { - return Err(ToolError::invalid_input(format!( - "CSV '{}' has duplicate header '{}'", - csv_path.display(), - header - ))); - } - } - - let id_index = if let Some(column_name) = id_column { - let trimmed = column_name.trim(); - if trimmed.is_empty() { - None - } else { - let index = headers - .iter() - .position(|header| header == trimmed) - .ok_or_else(|| { - ToolError::invalid_input(format!( - "CSV '{}' is missing id_column '{trimmed}'", - csv_path.display() - )) - })?; - Some(index) - } - } else { - None - }; - - let mut rows = Vec::new(); - let mut seen_item_ids = HashSet::new(); - for (row_index, row) in reader.records().enumerate() { - let record = row.map_err(|err| { - ToolError::execution_failed(format!( - "Failed to parse CSV row {} in '{}': {err}", - row_index + 1, - csv_path.display() - )) - })?; - let mut values = HashMap::new(); - for (idx, header) in headers.iter().enumerate() { - values.insert( - header.to_string(), - record.get(idx).unwrap_or_default().to_string(), - ); - } - let base_item_id = id_index - .and_then(|idx| record.get(idx)) - .map(str::trim) - .filter(|value| !value.is_empty()) - .map(str::to_string) - .unwrap_or_else(|| format!("row-{}", row_index + 1)); - let mut item_id = base_item_id.clone(); - let mut suffix = 2usize; - while !seen_item_ids.insert(item_id.clone()) { - item_id = format!("{base_item_id}-{suffix}"); - suffix = suffix.saturating_add(1); - } - - rows.push(CsvRowTask { - row_index, - item_id, - values, - }); - } - - Ok(rows) -} - -fn render_instruction_template(template: &str, values: &HashMap) -> String { - const OPEN_BRACE_SENTINEL: &str = "__DEEPSEEK_OPEN_BRACE__"; - const CLOSE_BRACE_SENTINEL: &str = "__DEEPSEEK_CLOSE_BRACE__"; - - let mut rendered = template - .replace("{{", OPEN_BRACE_SENTINEL) - .replace("}}", CLOSE_BRACE_SENTINEL); - for (key, value) in values { - rendered = rendered.replace(&format!("{{{key}}}"), value); - } - rendered - .replace(OPEN_BRACE_SENTINEL, "{") - .replace(CLOSE_BRACE_SENTINEL, "}") -} - -fn validate_output_schema(schema: &Value, payload: &Value) -> Result<(), String> { - let object = payload - .as_object() - .ok_or_else(|| "Expected JSON object output".to_string())?; - if let Some(expected_type) = schema.get("type").and_then(Value::as_str) - && expected_type != "object" - { - return Err("output_schema.type must be 'object' when provided".to_string()); - } - if let Some(required_fields) = schema.get("required").and_then(Value::as_array) { - for field in required_fields { - let Some(field_name) = field.as_str() else { - continue; - }; - if !object.contains_key(field_name) { - return Err(format!( - "Worker output missing required field '{field_name}'" - )); - } - } - } - Ok(()) -} - -fn write_csv_worker_outcomes(csv_path: &Path, outcomes: &[CsvWorkerOutcome]) -> Result<()> { - if let Some(parent) = csv_path.parent() { - fs::create_dir_all(parent)?; - } - let mut writer = csv::WriterBuilder::new().from_path(csv_path)?; - writer.write_record([ - "item_id", - "status", - "agent_id", - "duration_ms", - "error", - "result", - "result_json", - ])?; - for outcome in outcomes { - let result_json = outcome - .result_json - .as_ref() - .map(serde_json::to_string) - .transpose()? - .unwrap_or_default(); - writer.write_record([ - outcome.item_id.clone(), - outcome.status.clone(), - outcome.agent_id.clone().unwrap_or_default(), - outcome.duration_ms.to_string(), - outcome.error.clone().unwrap_or_default(), - outcome.result.clone().unwrap_or_default(), - result_json, - ])?; - } - writer.flush()?; - Ok(()) -} - -#[allow(clippy::too_many_arguments)] -async fn run_csv_row_agent( - manager: SharedSubAgentManager, - runtime: SubAgentRuntime, - job_id: &str, - row: CsvRowTask, - instruction_template: &str, - timeout: Duration, - output_schema: Option, - stop_requested: Arc, -) -> CsvWorkerOutcome { - let CsvRowTask { - row_index, - item_id, - values, - } = row; - - if stop_requested.load(Ordering::Relaxed) { - return CsvWorkerOutcome { - row_index, - item_id, - status: "skipped".to_string(), - agent_id: None, - duration_ms: 0, - error: Some("Skipped because stop=true was reported by another worker".to_string()), - result: None, - result_json: None, - }; - } - - let schema_text = output_schema - .as_ref() - .map(serde_json::to_string_pretty) - .transpose() - .unwrap_or(None) - .unwrap_or_else(|| "{}".to_string()); - let rendered_instruction = render_instruction_template(instruction_template, &values); - let row_json = serde_json::to_string_pretty(&values).unwrap_or_else(|_| "{}".to_string()); - let prompt = format!( - "You are processing one item for a spawn_agents_on_csv job.\n\ -Job ID: {job_id}\n\ -Item ID: {item_id}\n\n\ -Task instruction:\n\ -{rendered_instruction}\n\n\ -Input row (JSON):\n\ -{row_json}\n\n\ -Expected result schema (JSON Schema or {{}}):\n\ -{schema_text}\n\n\ -You MUST call the `report_agent_job_result` tool exactly once with:\n\ -1. `job_id` = \"{job_id}\"\n\ -2. `item_id` = \"{item_id}\"\n\ -3. `result` = a JSON object for this row.\n\n\ -If you need to stop the job early, include `stop` = true in the same tool call.\n\n\ -After the tool call succeeds, stop.", - item_id = item_id.as_str() - ); - - let assignment = SubAgentAssignment::new( - format!("Process CSV item '{item_id}' for job '{job_id}'"), - Some("worker".to_string()), - ); - let spawn_deadline = Instant::now() + timeout.min(Duration::from_secs(60)); - let spawned = loop { - if stop_requested.load(Ordering::Relaxed) { - return CsvWorkerOutcome { - row_index, - item_id, - status: "skipped".to_string(), - agent_id: None, - duration_ms: 0, - error: Some("Skipped because stop=true was reported by another worker".to_string()), - result: None, - result_json: None, - }; - } - let attempt = { - let mut manager_guard = manager.lock().await; - manager_guard.spawn_background_with_assignment( - manager.clone(), - runtime.clone(), - SubAgentType::General, - prompt.clone(), - assignment.clone(), - None, - ) - }; - - match attempt { - Ok(snapshot) => break Ok(snapshot), - Err(err) => { - let message = err.to_string(); - if message.contains("Sub-agent limit reached") && Instant::now() < spawn_deadline { - tokio::time::sleep(RESULT_POLL_INTERVAL).await; - continue; - } - break Err(message); - } - } - }; - - let spawn_snapshot = match spawned { - Ok(snapshot) => snapshot, - Err(error) => { - return CsvWorkerOutcome { - row_index, - item_id, - status: "failed".to_string(), - agent_id: None, - duration_ms: 0, - error: Some(error), - result: None, - result_json: None, - }; - } - }; - - let agent_id = spawn_snapshot.agent_id.clone(); - record_agent_job_assignment(job_id, item_id.as_str(), &agent_id); - let deadline = Instant::now() + timeout; - let final_snapshot = loop { - let snapshot = { - let manager = manager.lock().await; - manager.get_result(&agent_id) - }; - match snapshot { - Ok(snapshot) if snapshot.status != SubAgentStatus::Running => break Ok(snapshot), - Ok(snapshot) => { - if Instant::now() >= deadline { - let cancelled = { - let mut manager = manager.lock().await; - manager.cancel(&agent_id) - }; - let mut outcome = CsvWorkerOutcome { - row_index, - item_id, - status: "timed_out".to_string(), - agent_id: Some(agent_id.clone()), - duration_ms: snapshot.duration_ms, - error: Some("Worker timed out and was cancelled".to_string()), - result: snapshot.result, - result_json: None, - }; - if let Ok(cancelled_snapshot) = cancelled { - outcome.duration_ms = cancelled_snapshot.duration_ms; - } - return outcome; - } - tokio::time::sleep(RESULT_POLL_INTERVAL).await; - } - Err(err) => break Err(err.to_string()), - } - }; - - let snapshot = match final_snapshot { - Ok(snapshot) => snapshot, - Err(error) => { - return CsvWorkerOutcome { - row_index, - item_id, - status: "failed".to_string(), - agent_id: Some(agent_id), - duration_ms: 0, - error: Some(error), - result: None, - result_json: None, - }; - } - }; - - match snapshot.status { - SubAgentStatus::Completed => { - let Some(report) = take_agent_job_result(job_id, item_id.as_str()) else { - return CsvWorkerOutcome { - row_index, - item_id, - status: "failed".to_string(), - agent_id: Some(snapshot.agent_id), - duration_ms: snapshot.duration_ms, - error: Some( - "Worker finished without calling report_agent_job_result".to_string(), - ), - result: snapshot.result, - result_json: None, - }; - }; - - if let Some(schema) = output_schema.as_ref() - && let Err(error) = validate_output_schema(schema, &report.result) - { - return CsvWorkerOutcome { - row_index, - item_id, - status: "failed".to_string(), - agent_id: Some(snapshot.agent_id), - duration_ms: snapshot.duration_ms, - error: Some(error), - result: snapshot.result, - result_json: Some(report.result), - }; - } - - if report.stop { - stop_requested.store(true, Ordering::Relaxed); - } - - CsvWorkerOutcome { - row_index, - item_id, - status: "completed".to_string(), - agent_id: Some(snapshot.agent_id), - duration_ms: snapshot.duration_ms, - error: None, - result: snapshot.result, - result_json: Some(report.result), - } - } - SubAgentStatus::Interrupted(error) => CsvWorkerOutcome { - row_index, - item_id, - status: "interrupted".to_string(), - agent_id: Some(snapshot.agent_id), - duration_ms: snapshot.duration_ms, - error: Some(error), - result: snapshot.result, - result_json: None, - }, - SubAgentStatus::Failed(error) => CsvWorkerOutcome { - row_index, - item_id, - status: "failed".to_string(), - agent_id: Some(snapshot.agent_id), - duration_ms: snapshot.duration_ms, - error: Some(error), - result: snapshot.result, - result_json: None, - }, - SubAgentStatus::Cancelled => CsvWorkerOutcome { - row_index, - item_id, - status: "failed".to_string(), - agent_id: Some(snapshot.agent_id), - duration_ms: snapshot.duration_ms, - error: Some("Worker cancelled".to_string()), - result: snapshot.result, - result_json: None, - }, - SubAgentStatus::Running => CsvWorkerOutcome { - row_index, - item_id, - status: "failed".to_string(), - agent_id: Some(snapshot.agent_id), - duration_ms: snapshot.duration_ms, - error: Some("Worker did not reach terminal status".to_string()), - result: snapshot.result, - result_json: None, - }, - } -} fn normalize_role_alias(input: &str) -> Option<&'static str> { match input.to_ascii_lowercase().as_str() { @@ -4058,7 +3132,6 @@ impl SubAgentToolRegistry { todo_list, plan_state, ) - .with_tool(Arc::new(ReportAgentJobResultTool)) .build(context); Self { @@ -4098,19 +3171,10 @@ impl SubAgentToolRegistry { } } - async fn execute(&self, agent_id: &str, name: &str, mut input: Value) -> Result { + async fn execute(&self, _agent_id: &str, name: &str, input: Value) -> Result { if !self.is_tool_allowed(name) { return Err(anyhow!("Tool {name} not allowed for this sub-agent")); } - if name == "report_agent_job_result" - && let Some(object) = input.as_object_mut() - { - object.insert( - "__reporting_agent_id".to_string(), - Value::String(agent_id.to_string()), - ); - } - self.registry .execute(name, input) .await diff --git a/crates/tui/src/tools/subagent/tests.rs b/crates/tui/src/tools/subagent/tests.rs index f428fea3..11304a4c 100644 --- a/crates/tui/src/tools/subagent/tests.rs +++ b/crates/tui/src/tools/subagent/tests.rs @@ -163,154 +163,8 @@ fn test_parse_assign_request_requires_update_fields() { ); } -#[test] -fn test_render_instruction_template_replaces_columns() { - let mut values = HashMap::new(); - values.insert("name".to_string(), "alpha".to_string()); - values.insert("owner".to_string(), "hunter".to_string()); - let rendered = render_instruction_template("Inspect {name} for {owner}", &values); - assert_eq!(rendered, "Inspect alpha for hunter"); -} -#[test] -fn test_render_instruction_template_preserves_escaped_braces() { - let mut values = HashMap::new(); - values.insert("name".to_string(), "alpha".to_string()); - - let rendered = render_instruction_template("literal {{x}} and {name}", &values); - assert_eq!(rendered, "literal {x} and alpha"); -} - -#[test] -fn test_record_agent_job_result_accepts_first_report_only() { - let job_id = "job_test_reports"; - clear_agent_job_results(job_id); - record_agent_job_assignment(job_id, "item-1", "agent_1"); - - assert!(record_agent_job_result( - job_id, - "item-1", - json!({"status":"ok"}), - false, - Some("agent_1") - )); - assert!(!record_agent_job_result( - job_id, - "item-1", - json!({"status":"duplicate"}), - true, - Some("agent_1") - )); - - let report = take_agent_job_result(job_id, "item-1").expect("report should exist"); - assert_eq!(report.result["status"], "ok"); - assert!(!report.stop); - assert!(take_agent_job_result(job_id, "item-1").is_none()); - clear_agent_job_results(job_id); -} - -#[test] -fn test_record_agent_job_result_rejects_wrong_agent_assignment() { - let job_id = "job_test_reports_wrong_agent"; - clear_agent_job_results(job_id); - record_agent_job_assignment(job_id, "item-1", "agent_good"); - - assert!(!record_agent_job_result( - job_id, - "item-1", - json!({"status":"bad"}), - false, - Some("agent_bad") - )); - assert!(take_agent_job_result(job_id, "item-1").is_none()); - clear_agent_job_results(job_id); -} - -#[test] -fn test_record_agent_job_result_rejects_missing_agent_assignment_context() { - let job_id = "job_test_reports_missing_agent_context"; - clear_agent_job_results(job_id); - record_agent_job_assignment(job_id, "item-1", "agent_good"); - - assert!(!record_agent_job_result( - job_id, - "item-1", - json!({"status":"bad"}), - false, - None - )); - assert!(take_agent_job_result(job_id, "item-1").is_none()); - clear_agent_job_results(job_id); -} - -#[test] -fn test_validate_output_schema_enforces_required_fields() { - let schema = json!({ - "type": "object", - "required": ["status", "score"] - }); - let ok_payload = json!({"status":"ok","score":1}); - assert!(validate_output_schema(&schema, &ok_payload).is_ok()); - - let missing = json!({"status":"ok"}); - let err = validate_output_schema(&schema, &missing).expect_err("missing required field"); - assert!(err.contains("missing required field 'score'")); -} - -#[test] -fn test_default_results_csv_path_uses_input_stem() { - let path = PathBuf::from("/tmp/inventory.csv"); - let output = default_results_csv_path(&path); - assert_eq!(output, PathBuf::from("/tmp/inventory.results.csv")); -} - -#[test] -fn test_parse_csv_concurrency_prefers_max_concurrency() { - let input = json!({ - "max_workers": 3, - "max_concurrency": 9 - }); - assert_eq!(parse_csv_concurrency(&input), 9); -} - -#[test] -fn test_load_csv_rows_uses_id_column_and_row_fallback() { - let tmp = tempdir().expect("tempdir"); - let csv_path = tmp.path().join("rows.csv"); - std::fs::write(&csv_path, "id,name\nalpha,First\n,Second\n").expect("write csv"); - - let rows = load_csv_rows(&csv_path, Some("id")).expect("load rows"); - assert_eq!(rows.len(), 2); - assert_eq!(rows[0].item_id, "alpha"); - assert_eq!(rows[1].item_id, "row-2"); - assert_eq!( - rows[1].values.get("name").map(String::as_str), - Some("Second") - ); -} - -#[test] -fn test_load_csv_rows_dedupes_item_ids() { - let tmp = tempdir().expect("tempdir"); - let csv_path = tmp.path().join("rows.csv"); - std::fs::write(&csv_path, "id,name\nfoo,First\nfoo,Second\n").expect("write csv"); - - let rows = load_csv_rows(&csv_path, Some("id")).expect("load rows"); - assert_eq!(rows.len(), 2); - assert_eq!(rows[0].item_id, "foo"); - assert_eq!(rows[1].item_id, "foo-2"); -} - -#[test] -fn test_load_csv_rows_rejects_duplicate_headers() { - let tmp = tempdir().expect("tempdir"); - let csv_path = tmp.path().join("rows.csv"); - std::fs::write(&csv_path, "id,id\nfoo,bar\n").expect("write csv"); - - let err = load_csv_rows(&csv_path, Some("id")).expect_err("duplicate headers should fail"); - assert!(err.to_string().contains("duplicate header")); -} #[test] fn test_send_input_schema_does_not_require_message_field() { diff --git a/crates/tui/src/tools/swarm.rs b/crates/tui/src/tools/swarm.rs index 9fafb7e2..32348e0e 100644 --- a/crates/tui/src/tools/swarm.rs +++ b/crates/tui/src/tools/swarm.rs @@ -1,4 +1,12 @@ //! Swarm orchestration for spawning multiple sub-agents with dependencies. +//! +//! NOTE: As of #336 the model-callable swarm tool surface has been removed. +//! The types in this file (SwarmOutcome, SwarmTaskStatus, etc.) are still +//! consumed by `core/events.rs::Event::SwarmProgress` and the matching UI +//! handler in `tui/ui.rs`, so the file is parked rather than deleted. +//! Full cascade (delete this file + Event variant + UI handlers + app state +//! + routing helpers) tracked in #357. +#![allow(dead_code)] use std::collections::{HashMap, HashSet, VecDeque}; use std::fs; diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index 2a2631e3..65ad6cd1 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -5,7 +5,7 @@ use crate::tui::file_mention::{ try_autocomplete_file_mention, user_request_with_file_mentions, visible_mention_menu_entries, }; use crate::tui::history::{ - ExecCell, ExecSource, GenericToolCell, HistoryCell, SubAgentCell, ToolCell, ToolStatus, + ExecCell, ExecSource, GenericToolCell, HistoryCell, ToolCell, ToolStatus, }; use crate::tui::views::{ModalView, ViewAction}; use crate::working_set::Workspace; @@ -2379,338 +2379,6 @@ fn non_fanout_tool_does_not_populate_prompts() { "non-fan-out tool must not populate prompts" ); } - -#[test] -fn agent_swarm_populates_prompt_rows_from_tasks() { - let mut app = create_test_app(); - - handle_tool_call_started( - &mut app, - "swarm-1", - "agent_swarm", - &serde_json::json!({ - "tasks": [ - { - "id": "state", - "objective": "Read the current repo state", - "prompt": "Inspect git status and recent commits" - }, - { - "id": "docs", - "prompt": "Update docs for the release" - } - ] - }), - ); - - let active = app.active_cell.as_ref().expect("active cell present"); - let HistoryCell::Tool(ToolCell::Generic(generic)) = &active.entries()[0] else { - panic!("expected GenericToolCell for agent_swarm"); - }; - - assert_eq!( - generic.prompts.as_ref(), - Some(&vec![ - "Read the current repo state".to_string(), - "Update docs for the release".to_string(), - ]) - ); -} - -#[test] -fn agent_swarm_seeded_fanout_card_uses_declared_task_count() { - let mut app = create_test_app(); - - assert!(seed_fanout_card_from_tool_call( - &mut app, - "agent_swarm", - &serde_json::json!({ - "tasks": [ - { "id": "a", "prompt": "First task" }, - { "id": "b", "prompt": "Second task" }, - { "id": "c", "prompt": "Third task" } - ] - }), - )); - - // Card is deferred until first SwarmProgress (#236/#238). - // Before that, only the pending task count is stored. - assert_eq!(app.pending_swarm_task_count, Some(3)); - assert!( - app.history.is_empty(), - "no card pre-seeded before SwarmProgress" - ); - assert_eq!( - active_fanout_counts(&app), - Some((0, 3)), - "sidebar reads pending count" - ); -} - -#[test] -fn seeded_fanout_card_preserves_existing_active_tool_indices() { - let mut app = create_test_app(); - handle_tool_call_started( - &mut app, - "search-1", - "file_search", - &serde_json::json!({ "query": "swarm" }), - ); - assert_eq!(app.tool_cells.get("search-1").copied(), Some(0)); - - assert!(seed_fanout_card_from_tool_call( - &mut app, - "agent_swarm", - &serde_json::json!({ - "tasks": [ - { "id": "a", "prompt": "First task" }, - { "id": "b", "prompt": "Second task" } - ] - }), - )); - - // No card created → no history insertion → tool_cells indices unchanged. - assert_eq!( - app.tool_cells.get("search-1").copied(), - Some(0), - "active tool virtual index unchanged when card is deferred" - ); - - let result = crate::tools::spec::ToolResult::success("done"); - handle_tool_call_complete(&mut app, "search-1", "file_search", &Ok(result)); - let active = app.active_cell.as_ref().expect("active cell present"); - let HistoryCell::Tool(ToolCell::Generic(generic)) = &active.entries()[0] else { - panic!("expected GenericToolCell for file_search"); - }; - assert_eq!(generic.status, ToolStatus::Success); -} - -#[test] -fn agent_swarm_result_sync_replaces_seeded_slots_with_final_task_outcomes() { - let mut app = create_test_app(); - assert!(seed_fanout_card_from_tool_call( - &mut app, - "agent_swarm", - &serde_json::json!({ - "tasks": [ - { "id": "a", "prompt": "First task" }, - { "id": "b", "prompt": "Second task" } - ] - }), - )); - - let result = crate::tools::spec::ToolResult::success( - serde_json::json!({ - "swarm_id": "swarm_test", - "status": "partial", - "duration_ms": 100, - "counts": { - "total": 2, - "completed": 1, - "interrupted": 0, - "failed": 0, - "cancelled": 1, - "skipped": 0, - "running": 0, - "pending": 0 - }, - "tasks": [ - { - "task_id": "a", - "agent_id": "agent_done", - "status": "completed", - "result": "ok", - "steps_taken": 1, - "duration_ms": 50 - }, - { - "task_id": "b", - "agent_id": null, - "status": "cancelled", - "error": "Cancelled", - "steps_taken": 0, - "duration_ms": 0 - } - ] - }) - .to_string(), - ); - - assert!(sync_fanout_card_from_tool_result( - &mut app, - "agent_swarm", - &Ok(result), - )); - - let HistoryCell::SubAgent(SubAgentCell::Fanout(card)) = &app.history[0] else { - panic!("expected synced fanout card"); - }; - assert_eq!(card.worker_count(), 2); - assert_eq!(card.workers[0].agent_id, "agent_done"); - assert_eq!( - card.workers[0].status, - crate::tui::widgets::agent_card::AgentLifecycle::Completed - ); - assert_eq!(card.workers[1].agent_id, "task:b"); - assert_eq!( - card.workers[1].status, - crate::tui::widgets::agent_card::AgentLifecycle::Cancelled - ); -} - -#[test] -fn agent_swarm_progress_event_replaces_stale_pending_slots() { - let mut app = create_test_app(); - assert!(seed_fanout_card_from_tool_call( - &mut app, - "agent_swarm", - &serde_json::json!({ - "tasks": [ - { "id": "a", "prompt": "First task" }, - { "id": "b", "prompt": "Second task" }, - { "id": "c", "prompt": "Third task" } - ] - }), - )); - - let outcome = crate::tools::swarm::SwarmOutcome { - swarm_id: "swarm_done".to_string(), - status: crate::tools::swarm::SwarmStatus::Completed, - duration_ms: 250, - counts: crate::tools::swarm::SwarmCounts { - total: 3, - completed: 3, - interrupted: 0, - failed: 0, - cancelled: 0, - skipped: 0, - running: 0, - pending: 0, - }, - tasks: vec![ - crate::tools::swarm::SwarmTaskOutcome { - task_id: "a".to_string(), - worker_id: "swarm_done:a".to_string(), - agent_id: Some("agent_a".to_string()), - label: "First task".to_string(), - model: "deepseek-v4-flash".to_string(), - nickname: Some("Blue".to_string()), - status: crate::tools::swarm::SwarmTaskStatus::Completed, - result: Some("a ok".to_string()), - error: None, - steps_taken: 1, - duration_ms: 100, - started_at_ms: Some(0), - ended_at_ms: Some(100), - }, - crate::tools::swarm::SwarmTaskOutcome { - task_id: "b".to_string(), - worker_id: "swarm_done:b".to_string(), - agent_id: Some("agent_b".to_string()), - label: "Second task".to_string(), - model: "deepseek-v4-flash".to_string(), - nickname: Some("Humpback".to_string()), - status: crate::tools::swarm::SwarmTaskStatus::Completed, - result: Some("b ok".to_string()), - error: None, - steps_taken: 1, - duration_ms: 100, - started_at_ms: Some(0), - ended_at_ms: Some(100), - }, - crate::tools::swarm::SwarmTaskOutcome { - task_id: "c".to_string(), - worker_id: "swarm_done:c".to_string(), - agent_id: Some("agent_c".to_string()), - label: "Third task".to_string(), - model: "deepseek-v4-flash".to_string(), - nickname: Some("Sperm".to_string()), - status: crate::tools::swarm::SwarmTaskStatus::Completed, - result: Some("c ok".to_string()), - error: None, - steps_taken: 1, - duration_ms: 100, - started_at_ms: Some(0), - ended_at_ms: Some(100), - }, - ], - }; - - assert!(sync_fanout_card_from_swarm_outcome(&mut app, &outcome)); - - let HistoryCell::SubAgent(SubAgentCell::Fanout(card)) = &app.history[0] else { - panic!("expected synced fanout card"); - }; - assert_eq!(card.worker_count(), 3); - assert_eq!(active_fanout_counts(&app), Some((0, 3))); - assert!(card.workers.iter().all(|slot| matches!( - slot.status, - crate::tui::widgets::agent_card::AgentLifecycle::Completed - ))); - assert_eq!(app.subagent_card_index.get("agent_a").copied(), Some(0)); -} - -#[test] -fn fanout_counts_use_canonical_swarm_outcome_not_stale_card_slots() { - let mut app = create_test_app(); - assert!(seed_fanout_card_from_tool_call( - &mut app, - "agent_swarm", - &serde_json::json!({ - "tasks": [ - { "id": "a", "prompt": "A" }, - { "id": "b", "prompt": "B" }, - { "id": "c", "prompt": "C" }, - { "id": "d", "prompt": "D" }, - { "id": "e", "prompt": "E" } - ] - }), - )); - - let outcome = crate::tools::swarm::SwarmOutcome { - swarm_id: "swarm_live".to_string(), - status: crate::tools::swarm::SwarmStatus::Running, - duration_ms: 1000, - counts: crate::tools::swarm::SwarmCounts { - total: 5, - completed: 4, - interrupted: 0, - failed: 0, - cancelled: 0, - skipped: 0, - running: 1, - pending: 0, - }, - tasks: (0..5) - .map(|idx| { - let task_id = char::from(b'a' + idx as u8).to_string(); - crate::tools::swarm::SwarmTaskOutcome { - task_id: task_id.clone(), - worker_id: format!("swarm_live:{task_id}"), - agent_id: Some(format!("agent_{task_id}")), - label: task_id.clone(), - model: "deepseek-v4-flash".to_string(), - nickname: Some(["Blue", "Humpback", "Sperm", "Fin", "Sei"][idx].to_string()), - status: if idx == 4 { - crate::tools::swarm::SwarmTaskStatus::Running - } else { - crate::tools::swarm::SwarmTaskStatus::Completed - }, - result: None, - error: None, - steps_taken: 0, - duration_ms: 0, - started_at_ms: Some(0), - ended_at_ms: (idx != 4).then_some(0), - } - }) - .collect(), - }; - - assert!(sync_fanout_card_from_swarm_outcome(&mut app, &outcome)); - assert_eq!(active_fanout_counts(&app), Some((1, 5))); -} - #[test] fn noisy_subagent_progress_keeps_existing_objective_summary() { let mut app = create_test_app(); @@ -3109,349 +2777,6 @@ fn duplicate_mailbox_token_usage_does_not_regress_displayed_cost() { handle_subagent_mailbox(&mut app, 12, &usage); assert!(app.displayed_session_cost() > baseline); } - -/// Regression for issue #238: two overlapping `agent_swarm` invocations must -/// each project to their own FanoutCard. Without per-swarm card binding, -/// SwarmProgress for an older background swarm would clobber the freshly -/// seeded card of a newer fanout — the contradictory state the user saw. -#[test] -fn overlapping_swarms_project_to_distinct_fanout_cards() { - use crate::tools::swarm::{SwarmCounts, SwarmOutcome, SwarmStatus, SwarmTaskStatus}; - - let mut app = create_test_app(); - - // Seed swarm A. - assert!(seed_fanout_card_from_tool_call( - &mut app, - "agent_swarm", - &serde_json::json!({"tasks": [{"id": "a1", "prompt": "A1"}, {"id": "a2", "prompt": "A2"}]}), - )); - - let outcome_a_initial = SwarmOutcome { - swarm_id: "swarm_A".to_string(), - status: SwarmStatus::Running, - duration_ms: 0, - counts: SwarmCounts { - total: 2, - completed: 0, - interrupted: 0, - failed: 0, - cancelled: 0, - skipped: 0, - running: 2, - pending: 0, - }, - tasks: vec![ - mk_task("a1", SwarmTaskStatus::Running), - mk_task("a2", SwarmTaskStatus::Running), - ], - }; - sync_fanout_card_from_swarm_outcome(&mut app, &outcome_a_initial); - let card_a_idx = *app - .swarm_card_index - .get("swarm_A") - .expect("swarm A bound to a card"); - - // Now seed swarm B before A finishes. - app.last_fanout_card_index = None; - app.last_swarm_id = None; - assert!(seed_fanout_card_from_tool_call( - &mut app, - "agent_swarm", - &serde_json::json!({"tasks": [{"id": "b1", "prompt": "B1"}]}), - )); - let outcome_b_initial = SwarmOutcome { - swarm_id: "swarm_B".to_string(), - status: SwarmStatus::Running, - duration_ms: 0, - counts: SwarmCounts { - total: 1, - completed: 0, - interrupted: 0, - failed: 0, - cancelled: 0, - skipped: 0, - running: 1, - pending: 0, - }, - tasks: vec![mk_task("b1", SwarmTaskStatus::Running)], - }; - sync_fanout_card_from_swarm_outcome(&mut app, &outcome_b_initial); - let card_b_idx = *app - .swarm_card_index - .get("swarm_B") - .expect("swarm B bound to its own card"); - assert_ne!(card_a_idx, card_b_idx, "each swarm gets its own card"); - - // A's terminal SwarmProgress arrives later; it must update card A, - // *not* card B. - let outcome_a_done = SwarmOutcome { - swarm_id: "swarm_A".to_string(), - status: SwarmStatus::Completed, - duration_ms: 100, - counts: SwarmCounts { - total: 2, - completed: 2, - interrupted: 0, - failed: 0, - cancelled: 0, - skipped: 0, - running: 0, - pending: 0, - }, - tasks: vec![ - mk_task("a1", SwarmTaskStatus::Completed), - mk_task("a2", SwarmTaskStatus::Completed), - ], - }; - sync_fanout_card_from_swarm_outcome(&mut app, &outcome_a_done); - - // Card A reflects A's completion; card B still reflects B's pending state. - let HistoryCell::SubAgent(SubAgentCell::Fanout(card_a)) = &app.history[card_a_idx] else { - panic!("card A is not a fanout cell"); - }; - assert_eq!(card_a.worker_count(), 2); - assert!(card_a.workers.iter().all(|s| matches!( - s.status, - crate::tui::widgets::agent_card::AgentLifecycle::Completed - ))); - - let HistoryCell::SubAgent(SubAgentCell::Fanout(card_b)) = &app.history[card_b_idx] else { - panic!("card B is not a fanout cell"); - }; - assert_eq!(card_b.worker_count(), 1); - assert!(matches!( - card_b.workers[0].status, - crate::tui::widgets::agent_card::AgentLifecycle::Running - )); -} - -fn mk_task( - id: &str, - status: crate::tools::swarm::SwarmTaskStatus, -) -> crate::tools::swarm::SwarmTaskOutcome { - crate::tools::swarm::SwarmTaskOutcome { - task_id: id.to_string(), - worker_id: format!("task:{id}"), - agent_id: Some(format!("agent_{id}")), - label: id.to_string(), - model: "deepseek-v4-flash".to_string(), - nickname: None, - status, - result: None, - error: None, - steps_taken: 0, - duration_ms: 0, - started_at_ms: Some(0), - ended_at_ms: None, - } -} - -/// Regression for issue #236/#238: the footer must not double-count a -/// fanout-class tool. Sidebar and FanoutCard already represent the swarm, -/// so `active_tool_status_label` skipping these tools is what keeps the -/// "tool agent_swarm · 1 active" line from appearing simultaneously with -/// "Agents 3 done" + "0 done · 0 running · 0 failed · 3 pending". -#[test] -fn footer_active_tool_label_suppresses_fanout_tools() { - let mut app = create_test_app(); - app.active_cell = Some(crate::tui::active_cell::ActiveCell::new()); - let active = app.active_cell.as_mut().unwrap(); - active.push_tool( - "tool-1".to_string(), - HistoryCell::Tool(ToolCell::Generic(GenericToolCell { - name: "agent_swarm".to_string(), - status: ToolStatus::Running, - input_summary: None, - output: None, - prompts: None, - })), - ); - - let label = active_tool_status_label(&app); - assert!( - label.is_none(), - "active fanout-class tools must not appear in the footer 'tool ... · X active' line, got: {label:?}" - ); -} - -/// Regression for issue #243: pressing Esc during an active fanout must -/// leave the parent in a clean state — active_cell flushed, in-flight -/// tool entries marked Failed/Interrupted, but the canonical -/// `swarm_jobs` cache for background `block:false` swarms preserved so -/// `swarm_status` / `swarm_result` / the FanoutCard stay coherent. -#[test] -fn esc_during_fanout_clears_active_cell_but_preserves_background_swarm() { - use crate::tools::swarm::{SwarmCounts, SwarmOutcome, SwarmStatus}; - - let mut app = create_test_app(); - - // Seed an in-flight fanout: a Generic tool entry in active_cell PLUS - // a registered swarm in swarm_jobs (the background tokio task that - // would keep running after Esc). - app.active_cell = Some(crate::tui::active_cell::ActiveCell::new()); - let active = app.active_cell.as_mut().unwrap(); - active.push_tool( - "tool-1".to_string(), - HistoryCell::Tool(ToolCell::Generic(GenericToolCell { - name: "agent_swarm".to_string(), - status: ToolStatus::Running, - input_summary: None, - output: None, - prompts: None, - })), - ); - let outcome = SwarmOutcome { - swarm_id: "swarm_bg".to_string(), - status: SwarmStatus::Running, - duration_ms: 0, - counts: SwarmCounts { - total: 3, - completed: 0, - interrupted: 0, - failed: 0, - cancelled: 0, - skipped: 0, - running: 3, - pending: 0, - }, - tasks: vec![ - mk_task("a", crate::tools::swarm::SwarmTaskStatus::Running), - mk_task("b", crate::tools::swarm::SwarmTaskStatus::Running), - mk_task("c", crate::tools::swarm::SwarmTaskStatus::Running), - ], - }; - app.swarm_jobs - .insert("swarm_bg".to_string(), outcome.clone()); - app.last_swarm_id = Some("swarm_bg".to_string()); - - // Apply the Esc/CancelRequest mutations the UI loop performs. - app.is_loading = true; - app.finalize_active_cell_as_interrupted(); - app.is_loading = false; - - // Active cell flushed → footer no longer reports "tool ... · X active". - assert!( - app.active_cell.is_none(), - "active_cell must be flushed after Esc" - ); - - // Background swarm record preserved — swarm_status / swarm_result and - // any future SwarmProgress event can still update the canonical store. - assert!( - app.swarm_jobs.contains_key("swarm_bg"), - "background swarm record must survive Esc" - ); - assert_eq!(app.last_swarm_id.as_deref(), Some("swarm_bg")); - - // Composer can submit the next message immediately — is_loading is - // false, no modal is open, runtime_turn_status is cleared. - assert!(!app.is_loading); -} - -/// Regression for issue #243: after Esc during fanout, a subsequent -/// TurnComplete (Interrupted) event arriving from the engine must be -/// handled idempotently — `finalize_active_cell_as_interrupted` and -/// `finalize_streaming_assistant_as_interrupted` are both called by -/// both the Esc handler and the TurnComplete handler; the second call -/// must be a no-op (guarded by `Option::take()`). -#[test] -fn turn_complete_after_esc_is_idempotent() { - let mut app = create_test_app(); - - // Simulate a live fanout with an active cell and a streaming assistant. - let mut active = ActiveCell::new(); - active.push_tool( - "tool-1".to_string(), - HistoryCell::Tool(ToolCell::Generic(GenericToolCell { - name: "agent_swarm".to_string(), - status: ToolStatus::Running, - input_summary: None, - output: None, - prompts: None, - })), - ); - app.active_cell = Some(active); - app.add_message(HistoryCell::Assistant { - content: "partial text".to_string(), - streaming: true, - }); - let assistant_idx = app.history.len() - 1; - app.streaming_message_index = Some(assistant_idx); - app.is_loading = true; - app.runtime_turn_status = Some("in_progress".to_string()); - - // Step 1: Esc handler fires (simulated). - app.finalize_active_cell_as_interrupted(); - app.finalize_streaming_assistant_as_interrupted(); - app.runtime_turn_status = None; - app.is_loading = false; - - // Verify first call cleared the active cell and stopped loading. - assert!(app.active_cell.is_none(), "active_cell cleared by Esc"); - assert!(!app.is_loading, "is_loading false after Esc"); - assert!( - app.runtime_turn_status.is_none(), - "runtime_turn_status cleared by Esc" - ); - // Streaming assistant cell was marked interrupted. - if let Some(HistoryCell::Assistant { streaming, content }) = app.history.get(assistant_idx) { - assert!(!streaming, "streaming stopped"); - assert!( - content.starts_with("[interrupted]"), - "content should have interruption prefix" - ); - } - - // Step 2: Simulate TurnComplete (Interrupted) arriving from engine. - // This calls the same methods again — must be a no-op. - app.finalize_active_cell_as_interrupted(); - app.finalize_streaming_assistant_as_interrupted(); - app.is_loading = false; - app.runtime_turn_status = Some("interrupted".to_string()); - - // State remains consistent — active_cell still None, streaming still - // stopped, no double-interruption prefix. - assert!( - app.active_cell.is_none(), - "active_cell still cleared after 2nd call" - ); - assert!(!app.is_loading, "is_loading still false after 2nd call"); - assert_eq!( - app.runtime_turn_status.as_deref(), - Some("interrupted"), - "runtime_turn_status reflects final outcome" - ); - // The streaming assistant should still have only ONE interruption prefix. - if let Some(HistoryCell::Assistant { content, .. }) = app.history.get(assistant_idx) { - assert_eq!( - content.matches("[interrupted]").count(), - 1, - "content must not double-prefix [interrupted]: {content}" - ); - } - - // Background cell in history (the flushed tool entry) must exist and - // have the Failed status. - let tool_cells: Vec<_> = app - .history - .iter() - .filter_map(|c| match c { - HistoryCell::Tool(ToolCell::Generic(g)) => Some(g), - _ => None, - }) - .collect(); - assert_eq!(tool_cells.len(), 1); - assert_eq!( - tool_cells[0].status, - ToolStatus::Failed, - "interrupted tool marked Failed" - ); -} - -/// Regression for issue #241: `checklist_write` results render as a -/// dedicated checklist card with completed/total + percent header and -/// per-item status markers — not as a generic dumped JSON tool block. #[test] fn checklist_write_renders_dedicated_card() { let cell = GenericToolCell { diff --git a/docs/TOOL_SURFACE.md b/docs/TOOL_SURFACE.md index ebfe6d71..8545734d 100644 --- a/docs/TOOL_SURFACE.md +++ b/docs/TOOL_SURFACE.md @@ -136,11 +136,9 @@ Large logs and command outputs should be artifacts with compact summaries in the ### Sub-agents -`agent_spawn`, `agent_swarm`, `spawn_agents_on_csv`, plus the supporting -tools (`agent_result` / `swarm_result` / `wait` / `send_input` / -`agent_assign` / `agent_cancel` / `resume_agent` / `agent_list` / -`report_agent_job_result` / `swarm_status`). See `agent.txt` for the -delegation protocol. +`agent_spawn` plus the supporting tools (`agent_result` / `wait` / `send_input` / +`agent_assign` / `agent_cancel` / `resume_agent` / `agent_list`). +See `agent.txt` for the delegation protocol. ### Parallel fan-out: cost-class caps