chore(tools): remove /swarm command + agent_swarm/spawn_agents_on_csv tool surface; park swarm.rs pending #357 cascade (#336)

Surface removed: /swarm slash command, agent_swarm, spawn_agents_on_csv, swarm_status, swarm_result, swarm_cancel tools, report_agent_job_result. Prompts/docs/tests updated. swarm.rs parked with #![allow(dead_code)] pending the full cascade in #357. RLM prompt audit tracked in #358.
This commit is contained in:
Hunter Bown
2026-05-02 01:30:23 -05:00
parent 0ca0570a86
commit aa23182674
12 changed files with 27 additions and 1908 deletions
+3 -1
View File
@@ -44,4 +44,6 @@ Issues may be closed when the acceptance criteria have been verified or when the
## Important Notes
- **Token/cost tracking inaccuracies**: Token counting and cost estimation may be inflated due to thinking token accounting bugs. Use `/compact` to manage context, and treat cost estimates as approximate.
- **Modes**: Three modes — Plan (read-only investigation), Agent (tool use with approval), YOLO (auto-approved). See `docs/MODES.md` for details. All three modes can call the `rlm_query` tool for parallel/batched LLM fan-out (`crates/tui/src/tools/rlm_query.rs`).
- **Modes**: Three modes — Plan (read-only investigation), Agent (tool use with approval), YOLO (auto-approved). See `docs/MODES.md` for details.
- **Sub-agents**: Single model-callable surface is `agent_spawn` (returns an `agent_id` immediately; parent keeps working) plus `agent_wait` / `agent_result` / `agent_cancel` / `agent_list` / `agent_send_input` / `agent_resume` / `agent_assign`. The old `agent_swarm` / `spawn_agents_on_csv` / `/swarm` surface was removed in v0.8.5 (#336).
- **`rlm` tool** (`crates/tui/src/tools/rlm.rs`): a sandboxed Python REPL where a sub-LLM can call in-REPL helpers (`llm_query()`, `llm_query_batched()`, `rlm_query()`, `rlm_query_batched()`) — those `*_query` names are **Python helpers inside the REPL**, not separately-registered model-visible tools. Always loaded across all modes.
-103
View File
@@ -144,109 +144,6 @@ pub fn deepseek_links(app: &mut App) -> CommandResult {
))
}
/// Collaboration pattern for `/swarm` multi-agent turns.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SwarmMode {
/// Linear pipeline: Planner → Critic → Solver
Sequential,
/// Parallel ensemble: domain specialists + synthesizer
Mixture,
/// ExpertLearner pair for knowledge distillation
Distill,
/// Reflector + Tool-Caller iterative deliberation
Deliberate,
}
impl SwarmMode {
pub fn from_str(s: &str) -> Option<Self> {
match s.to_ascii_lowercase().as_str() {
"sequential" | "seq" | "pipeline" => Some(Self::Sequential),
"mixture" | "mix" | "ensemble" | "parallel" => Some(Self::Mixture),
"distill" | "distillation" | "transfer" => Some(Self::Distill),
"deliberate" | "deliberation" | "dialectic" | "reflect" => Some(Self::Deliberate),
_ => None,
}
}
pub fn label(self) -> &'static str {
match self {
Self::Sequential => "sequential",
Self::Mixture => "mixture",
Self::Distill => "distill",
Self::Deliberate => "deliberate",
}
}
pub fn description(self) -> &'static str {
match self {
Self::Sequential => "Linear pipeline: Planner → Critic → Solver",
Self::Mixture => "Parallel ensemble: domain specialists + synthesizer",
Self::Distill => "ExpertLearner pair for knowledge distillation",
Self::Deliberate => "Reflector + Tool-Caller iterative deliberation",
}
}
}
/// Initiate a multi-agent swarm turn with the requested collaboration pattern.
///
/// Phase A foundation — currently sets up the prompt context so the model
/// uses `agent_swarm` with the appropriate topology. Direct orchestration
/// from the slash command (bypassing the model) is planned for Phase B.
pub fn swarm(app: &mut App, arg: Option<&str>) -> CommandResult {
let raw = arg.map(str::trim).unwrap_or("");
let mut parts = raw.splitn(2, char::is_whitespace);
let mode_str = parts.next().unwrap_or("");
let description = parts.next().map(str::trim).unwrap_or("");
if mode_str.is_empty() {
let mut help = String::from("Usage: /swarm <mode> [description]\n\nModes:\n");
for mode in [
SwarmMode::Sequential,
SwarmMode::Mixture,
SwarmMode::Distill,
SwarmMode::Deliberate,
] {
help.push_str(&format!(" {}{}\n", mode.label(), mode.description()));
}
return CommandResult::message(help);
}
let Some(mode) = SwarmMode::from_str(mode_str) else {
return CommandResult::error(format!(
"Unknown swarm mode: {mode_str}. Try /swarm for a list of modes."
));
};
let msg = if description.is_empty() {
format!(
"Swarm mode: {}. Describe the task and I will delegate it using the {} pattern.",
mode.label(),
mode.label()
)
} else {
format!(
"Swarm mode: {}. Delegating using the {} pattern:\n{}",
mode.label(),
mode.label(),
description
)
};
// Queue a system message that primes the model to use agent_swarm
// with the requested topology. In Phase B this will be replaced by
// direct engine-side orchestration.
let system_hint = format!(
"The user has requested a {} swarm. Use the agent_swarm tool with the appropriate topology. \
For sequential: use depends_on chains. For mixture: spawn specialists in parallel then synthesize. \
For distill: spawn an expert and a learner, then have the learner absorb the expert's output. \
For deliberate: spawn a reflector and a tool-caller in a critique→refine loop.",
mode.label()
);
app.system_prompt = Some(crate::models::SystemPrompt::Text(system_hint));
CommandResult::message(msg)
}
/// Show home dashboard with stats and quick actions
pub fn home_dashboard(app: &mut App) -> CommandResult {
let locale = app.ui_locale;
-7
View File
@@ -390,12 +390,6 @@ pub const COMMANDS: &[CommandInfo] = &[
usage: "/cache [count]",
description_id: MessageId::CmdCacheDescription,
},
CommandInfo {
name: "swarm",
aliases: &[],
usage: "/swarm <sequential|mixture|distill|deliberate> [description]",
description_id: MessageId::CmdSwarmDescription,
},
];
/// Execute a slash command
@@ -416,7 +410,6 @@ pub fn execute(cmd: &str, app: &mut App) -> CommandResult {
"provider" => provider::provider(app, arg),
"queue" | "queued" => queue::queue(app, arg),
"subagents" | "agents" => core::subagents(app),
"swarm" => core::swarm(app, arg),
"links" | "dashboard" | "api" => core::deepseek_links(app),
"home" | "stats" | "overview" => core::home_dashboard(app),
"note" => note::note(app, arg),
+1 -1
View File
@@ -9,7 +9,7 @@ Decomposition builds trust — a clear plan gets faster approvals.
## Sub-agent completion sentinel
When you spawn a sub-agent via `agent_spawn` (or `agent_swarm`), the child runs independently.
When you spawn a sub-agent via `agent_spawn`, the child runs independently.
You will receive a `<deepseek:subagent.done>` element in the transcript when it finishes.
Read its `summary` field and integrate the work — do not re-do what the child already did.
You can also call `agent_result` to pull the full structured result.
+4 -5
View File
@@ -24,7 +24,7 @@ Use three decomposition patterns, selected by task scope:
**PREVIEW** — Before diving into a large task, survey the terrain. Scan directory structure (`list_dir`), file headers, module trees. Identify problem boundaries and estimate complexity. A 30-second preview prevents hours of wrong-path exploration.
**CHUNK + map-reduce** — When a task exceeds single-pass capacity: split into independent sub-tasks, process each independently (parallel where possible via parallel tool calls or `agent_swarm`), then synthesize findings into a coherent whole. Track chunks with `checklist_write`.
**CHUNK + map-reduce** — When a task exceeds single-pass capacity: split into independent sub-tasks, process each independently (parallel where possible via parallel tool calls or `agent_spawn`), then synthesize findings into a coherent whole. Track chunks with `checklist_write`.
**RECURSIVE** — When sub-tasks reveal sub-problems: decompose recursively until each leaf is tractable. Maintain the task tree via `update_plan` (strategy) layered above `checklist_write` (leaf tasks). Propagate findings upward when sub-problems resolve.
@@ -32,7 +32,7 @@ Your default workflow for any non-trivial request:
1. **`checklist_write`** — break the work into concrete, verifiable steps. Mark the first one `in_progress`. This populates the sidebar so the user can see what you're doing.
2. **Execute** — work through each checklist item, updating status as you go.
3. **For complex initiatives**, layer `update_plan` (high-level strategy) above `checklist_write` (granular steps).
4. **For parallel work**, spawn sub-agents (`agent_spawn` / `agent_swarm`) — each does one thing well. Link them to plan/todo items in your thinking. Batch independent tool calls in a single turn.
4. **For parallel work**, spawn sub-agents (`agent_spawn`) — each does one thing well. Link them to plan/todo items in your thinking. Batch independent tool calls in a single turn.
5. **For long inputs, recursive sub-LLM work, or high-leverage parallel reasoning**, use `rlm` — it loads input into a Python REPL as `context` and runs sub-LLM calls there so long strings and batched deliberation stay out of your window.
6. **For persistent cross-session memory**, use `note` sparingly for important decisions, open blockers, and architectural context.
@@ -85,8 +85,7 @@ When context is deep (past a soft seam): cache reasoning conclusions in concise
- **Task evidence**: `task_gate_run` for verification gates; `pr_attempt_record` / `pr_attempt_list` / `pr_attempt_read` / `pr_attempt_preflight`; `github_issue_context` / `github_pr_context` (read-only); `github_comment` / `github_close_issue` (approval + evidence required); `automation_*` scheduling tools.
- **Structured search**: `grep_files`, `file_search`, `web_search`, `fetch_url`, `web.run` (browse).
- **Git / diag / tests**: `git_status`, `git_diff`, `git_show`, `git_log`, `git_blame`, `diagnostics`, `run_tests`, `review`.
- **Sub-agents**: `agent_spawn` (`spawn_agent`, `delegate_to_agent`), `agent_swarm` (background by default), `swarm_status`, `swarm_result`, `swarm_cancel`, `agent_result`, `agent_cancel` (`close_agent`), `agent_list`, `agent_wait` (`wait`), `agent_send_input` (`send_input`), `agent_assign` (`assign_agent`), `resume_agent`.
- **CSV batch**: `spawn_agents_on_csv`, `report_agent_job_result`.
- **Sub-agents**: `agent_spawn` (`spawn_agent`, `delegate_to_agent`), `agent_result`, `agent_cancel` (`close_agent`), `agent_list`, `agent_wait` (`wait`), `agent_send_input` (`send_input`), `agent_assign` (`assign_agent`), `resume_agent`.
- **Recursive LM (long inputs / parallel reasoning)**: `rlm` — load a file/string as `context` in a Python REPL, sub-agent writes Python that calls `llm_query`/`llm_query_batched`/`rlm_query` to chunk, compare, critique, and synthesize; returns the synthesized answer. Read-only.
- **Other**: `code_execution` (Python sandbox), `validate_data` (JSON/TOML), `request_user_input`, `finance` (market quotes), `tool_search_tool_regex`, `tool_search_tool_bm25` (deferred tool discovery).
@@ -134,7 +133,7 @@ Inside the `rlm` REPL, the sub-LLM has access to `llm_query()`, `llm_query_batch
## Sub-agent completion sentinel
When you spawn a sub-agent via `agent_spawn` (or `agent_swarm`), the child runs independently in its own context. `agent_swarm` returns a `swarm_id` immediately unless you explicitly pass `block: true`; keep working and call `swarm_status` or `swarm_result` when you need the collected results. You will receive a `<deepseek:subagent.done>` element in the transcript when an individual child finishes. This sentinel carries:
When you spawn a sub-agent via `agent_spawn`, the child runs independently. You will receive a `<deepseek:subagent.done>` element in the transcript when it finishes. This sentinel carries:
- `agent_id` — the child's identifier
- `summary` — a human-readable summary of what the child found or did
+2 -3
View File
@@ -8,7 +8,7 @@ Your default workflow for any non-trivial request:
1. **`checklist_write`** — break the work into concrete, verifiable steps. Mark the first one `in_progress`. This populates the sidebar so the user can see what you're doing.
2. **Execute** — work through each checklist item, updating status as you go.
3. **For complex initiatives**, layer `update_plan` (high-level strategy) above `checklist_write` (granular steps).
4. **For parallel work**, spawn sub-agents (`agent_spawn` / `agent_swarm`) — each does one thing well. Link them to plan/todo items in your thinking.
4. **For parallel work**, spawn sub-agents (`agent_spawn`) — each does one thing well. Link them to plan/todo items in your thinking.
5. **For long inputs, recursive sub-LLM work, or high-leverage parallel reasoning**, use `rlm` — it loads input into a Python REPL as `context` and runs sub-LLM calls there so long strings and batched deliberation stay out of your window.
6. **For persistent cross-session memory**, use `note` sparingly for important decisions, open blockers, and architectural context.
@@ -33,8 +33,7 @@ Model notes: DeepSeek V4 models emit *thinking tokens* (`ContentBlock::Thinking`
- **Task evidence**: `task_gate_run` for verification gates; `pr_attempt_record` / `pr_attempt_list` / `pr_attempt_read` / `pr_attempt_preflight`; `github_issue_context` / `github_pr_context` (read-only); `github_comment` / `github_close_issue` (approval + evidence required); `automation_*` scheduling tools.
- **Structured search**: `grep_files`, `file_search`, `web_search`, `fetch_url`, `web.run` (browse).
- **Git / diag / tests**: `git_status`, `git_diff`, `git_show`, `git_log`, `git_blame`, `diagnostics`, `run_tests`, `review`.
- **Sub-agents**: `agent_spawn` (`spawn_agent`, `delegate_to_agent`), `agent_swarm`, `agent_result`, `agent_cancel` (`close_agent`), `agent_list`, `agent_wait` (`wait`), `agent_send_input` (`send_input`), `agent_assign` (`assign_agent`), `resume_agent`.
- **CSV batch**: `spawn_agents_on_csv`, `report_agent_job_result`.
- **Sub-agents**: `agent_spawn` (`spawn_agent`, `delegate_to_agent`), `agent_result`, `agent_cancel` (`close_agent`), `agent_list`, `agent_wait` (`wait`), `agent_send_input` (`send_input`), `agent_assign` (`assign_agent`), `resume_agent`.
- **Recursive LM (long inputs / parallel reasoning)**: `rlm` — load a file/string as `context` in a Python REPL, sub-agent writes Python that calls `llm_query`/`llm_query_batched`/`rlm_query` to chunk, compare, critique, and synthesize; returns the synthesized answer. Read-only.
- **Other**: `code_execution` (Python sandbox), `validate_data` (JSON/TOML), `request_user_input`, `finance` (market quotes), `tool_search_tool_regex`, `tool_search_tool_bm25` (deferred tool discovery).
+1 -21
View File
@@ -621,9 +621,8 @@ impl ToolRegistryBuilder {
use super::subagent::{
AgentAssignTool, AgentCancelTool, AgentCloseTool, AgentListTool, AgentResultTool,
AgentResumeTool, AgentSendInputTool, AgentSpawnTool, AgentWaitTool,
DelegateToAgentTool, ReportAgentJobResultTool, SpawnAgentsOnCsvTool,
DelegateToAgentTool
};
use super::swarm::{AgentSwarmTool, SwarmCancelTool, SwarmResultTool, SwarmStatusTool};
self.with_tool(Arc::new(AgentSpawnTool::new(
manager.clone(),
@@ -638,25 +637,6 @@ impl ToolRegistryBuilder {
manager.clone(),
runtime.clone(),
)))
.with_tool(Arc::new(AgentSwarmTool::new(
manager.clone(),
runtime.clone(),
)))
.with_tool(Arc::new(SpawnAgentsOnCsvTool::new(
manager.clone(),
runtime.clone(),
)))
.with_tool(Arc::new(ReportAgentJobResultTool))
.with_tool(Arc::new(SwarmStatusTool::new(
runtime.context.workspace.clone(),
)))
.with_tool(Arc::new(SwarmResultTool::new(
runtime.context.workspace.clone(),
)))
.with_tool(Arc::new(SwarmCancelTool::new(
manager.clone(),
runtime.context.workspace.clone(),
)))
.with_tool(Arc::new(AgentResultTool::new(manager.clone())))
.with_tool(Arc::new(AgentSendInputTool::new(
manager.clone(),
File diff suppressed because it is too large Load Diff
-146
View File
@@ -163,154 +163,8 @@ fn test_parse_assign_request_requires_update_fields() {
);
}
#[test]
fn test_render_instruction_template_replaces_columns() {
let mut values = HashMap::new();
values.insert("name".to_string(), "alpha".to_string());
values.insert("owner".to_string(), "hunter".to_string());
let rendered = render_instruction_template("Inspect {name} for {owner}", &values);
assert_eq!(rendered, "Inspect alpha for hunter");
}
#[test]
fn test_render_instruction_template_preserves_escaped_braces() {
let mut values = HashMap::new();
values.insert("name".to_string(), "alpha".to_string());
let rendered = render_instruction_template("literal {{x}} and {name}", &values);
assert_eq!(rendered, "literal {x} and alpha");
}
#[test]
fn test_record_agent_job_result_accepts_first_report_only() {
let job_id = "job_test_reports";
clear_agent_job_results(job_id);
record_agent_job_assignment(job_id, "item-1", "agent_1");
assert!(record_agent_job_result(
job_id,
"item-1",
json!({"status":"ok"}),
false,
Some("agent_1")
));
assert!(!record_agent_job_result(
job_id,
"item-1",
json!({"status":"duplicate"}),
true,
Some("agent_1")
));
let report = take_agent_job_result(job_id, "item-1").expect("report should exist");
assert_eq!(report.result["status"], "ok");
assert!(!report.stop);
assert!(take_agent_job_result(job_id, "item-1").is_none());
clear_agent_job_results(job_id);
}
#[test]
fn test_record_agent_job_result_rejects_wrong_agent_assignment() {
let job_id = "job_test_reports_wrong_agent";
clear_agent_job_results(job_id);
record_agent_job_assignment(job_id, "item-1", "agent_good");
assert!(!record_agent_job_result(
job_id,
"item-1",
json!({"status":"bad"}),
false,
Some("agent_bad")
));
assert!(take_agent_job_result(job_id, "item-1").is_none());
clear_agent_job_results(job_id);
}
#[test]
fn test_record_agent_job_result_rejects_missing_agent_assignment_context() {
let job_id = "job_test_reports_missing_agent_context";
clear_agent_job_results(job_id);
record_agent_job_assignment(job_id, "item-1", "agent_good");
assert!(!record_agent_job_result(
job_id,
"item-1",
json!({"status":"bad"}),
false,
None
));
assert!(take_agent_job_result(job_id, "item-1").is_none());
clear_agent_job_results(job_id);
}
#[test]
fn test_validate_output_schema_enforces_required_fields() {
let schema = json!({
"type": "object",
"required": ["status", "score"]
});
let ok_payload = json!({"status":"ok","score":1});
assert!(validate_output_schema(&schema, &ok_payload).is_ok());
let missing = json!({"status":"ok"});
let err = validate_output_schema(&schema, &missing).expect_err("missing required field");
assert!(err.contains("missing required field 'score'"));
}
#[test]
fn test_default_results_csv_path_uses_input_stem() {
let path = PathBuf::from("/tmp/inventory.csv");
let output = default_results_csv_path(&path);
assert_eq!(output, PathBuf::from("/tmp/inventory.results.csv"));
}
#[test]
fn test_parse_csv_concurrency_prefers_max_concurrency() {
let input = json!({
"max_workers": 3,
"max_concurrency": 9
});
assert_eq!(parse_csv_concurrency(&input), 9);
}
#[test]
fn test_load_csv_rows_uses_id_column_and_row_fallback() {
let tmp = tempdir().expect("tempdir");
let csv_path = tmp.path().join("rows.csv");
std::fs::write(&csv_path, "id,name\nalpha,First\n,Second\n").expect("write csv");
let rows = load_csv_rows(&csv_path, Some("id")).expect("load rows");
assert_eq!(rows.len(), 2);
assert_eq!(rows[0].item_id, "alpha");
assert_eq!(rows[1].item_id, "row-2");
assert_eq!(
rows[1].values.get("name").map(String::as_str),
Some("Second")
);
}
#[test]
fn test_load_csv_rows_dedupes_item_ids() {
let tmp = tempdir().expect("tempdir");
let csv_path = tmp.path().join("rows.csv");
std::fs::write(&csv_path, "id,name\nfoo,First\nfoo,Second\n").expect("write csv");
let rows = load_csv_rows(&csv_path, Some("id")).expect("load rows");
assert_eq!(rows.len(), 2);
assert_eq!(rows[0].item_id, "foo");
assert_eq!(rows[1].item_id, "foo-2");
}
#[test]
fn test_load_csv_rows_rejects_duplicate_headers() {
let tmp = tempdir().expect("tempdir");
let csv_path = tmp.path().join("rows.csv");
std::fs::write(&csv_path, "id,id\nfoo,bar\n").expect("write csv");
let err = load_csv_rows(&csv_path, Some("id")).expect_err("duplicate headers should fail");
assert!(err.to_string().contains("duplicate header"));
}
#[test]
fn test_send_input_schema_does_not_require_message_field() {
+8
View File
@@ -1,4 +1,12 @@
//! Swarm orchestration for spawning multiple sub-agents with dependencies.
//!
//! NOTE: As of #336 the model-callable swarm tool surface has been removed.
//! The types in this file (SwarmOutcome, SwarmTaskStatus, etc.) are still
//! consumed by `core/events.rs::Event::SwarmProgress` and the matching UI
//! handler in `tui/ui.rs`, so the file is parked rather than deleted.
//! Full cascade (delete this file + Event variant + UI handlers + app state
//! + routing helpers) tracked in #357.
#![allow(dead_code)]
use std::collections::{HashMap, HashSet, VecDeque};
use std::fs;
+1 -676
View File
@@ -5,7 +5,7 @@ use crate::tui::file_mention::{
try_autocomplete_file_mention, user_request_with_file_mentions, visible_mention_menu_entries,
};
use crate::tui::history::{
ExecCell, ExecSource, GenericToolCell, HistoryCell, SubAgentCell, ToolCell, ToolStatus,
ExecCell, ExecSource, GenericToolCell, HistoryCell, ToolCell, ToolStatus,
};
use crate::tui::views::{ModalView, ViewAction};
use crate::working_set::Workspace;
@@ -2379,338 +2379,6 @@ fn non_fanout_tool_does_not_populate_prompts() {
"non-fan-out tool must not populate prompts"
);
}
#[test]
fn agent_swarm_populates_prompt_rows_from_tasks() {
let mut app = create_test_app();
handle_tool_call_started(
&mut app,
"swarm-1",
"agent_swarm",
&serde_json::json!({
"tasks": [
{
"id": "state",
"objective": "Read the current repo state",
"prompt": "Inspect git status and recent commits"
},
{
"id": "docs",
"prompt": "Update docs for the release"
}
]
}),
);
let active = app.active_cell.as_ref().expect("active cell present");
let HistoryCell::Tool(ToolCell::Generic(generic)) = &active.entries()[0] else {
panic!("expected GenericToolCell for agent_swarm");
};
assert_eq!(
generic.prompts.as_ref(),
Some(&vec![
"Read the current repo state".to_string(),
"Update docs for the release".to_string(),
])
);
}
#[test]
fn agent_swarm_seeded_fanout_card_uses_declared_task_count() {
let mut app = create_test_app();
assert!(seed_fanout_card_from_tool_call(
&mut app,
"agent_swarm",
&serde_json::json!({
"tasks": [
{ "id": "a", "prompt": "First task" },
{ "id": "b", "prompt": "Second task" },
{ "id": "c", "prompt": "Third task" }
]
}),
));
// Card is deferred until first SwarmProgress (#236/#238).
// Before that, only the pending task count is stored.
assert_eq!(app.pending_swarm_task_count, Some(3));
assert!(
app.history.is_empty(),
"no card pre-seeded before SwarmProgress"
);
assert_eq!(
active_fanout_counts(&app),
Some((0, 3)),
"sidebar reads pending count"
);
}
#[test]
fn seeded_fanout_card_preserves_existing_active_tool_indices() {
let mut app = create_test_app();
handle_tool_call_started(
&mut app,
"search-1",
"file_search",
&serde_json::json!({ "query": "swarm" }),
);
assert_eq!(app.tool_cells.get("search-1").copied(), Some(0));
assert!(seed_fanout_card_from_tool_call(
&mut app,
"agent_swarm",
&serde_json::json!({
"tasks": [
{ "id": "a", "prompt": "First task" },
{ "id": "b", "prompt": "Second task" }
]
}),
));
// No card created → no history insertion → tool_cells indices unchanged.
assert_eq!(
app.tool_cells.get("search-1").copied(),
Some(0),
"active tool virtual index unchanged when card is deferred"
);
let result = crate::tools::spec::ToolResult::success("done");
handle_tool_call_complete(&mut app, "search-1", "file_search", &Ok(result));
let active = app.active_cell.as_ref().expect("active cell present");
let HistoryCell::Tool(ToolCell::Generic(generic)) = &active.entries()[0] else {
panic!("expected GenericToolCell for file_search");
};
assert_eq!(generic.status, ToolStatus::Success);
}
#[test]
fn agent_swarm_result_sync_replaces_seeded_slots_with_final_task_outcomes() {
let mut app = create_test_app();
assert!(seed_fanout_card_from_tool_call(
&mut app,
"agent_swarm",
&serde_json::json!({
"tasks": [
{ "id": "a", "prompt": "First task" },
{ "id": "b", "prompt": "Second task" }
]
}),
));
let result = crate::tools::spec::ToolResult::success(
serde_json::json!({
"swarm_id": "swarm_test",
"status": "partial",
"duration_ms": 100,
"counts": {
"total": 2,
"completed": 1,
"interrupted": 0,
"failed": 0,
"cancelled": 1,
"skipped": 0,
"running": 0,
"pending": 0
},
"tasks": [
{
"task_id": "a",
"agent_id": "agent_done",
"status": "completed",
"result": "ok",
"steps_taken": 1,
"duration_ms": 50
},
{
"task_id": "b",
"agent_id": null,
"status": "cancelled",
"error": "Cancelled",
"steps_taken": 0,
"duration_ms": 0
}
]
})
.to_string(),
);
assert!(sync_fanout_card_from_tool_result(
&mut app,
"agent_swarm",
&Ok(result),
));
let HistoryCell::SubAgent(SubAgentCell::Fanout(card)) = &app.history[0] else {
panic!("expected synced fanout card");
};
assert_eq!(card.worker_count(), 2);
assert_eq!(card.workers[0].agent_id, "agent_done");
assert_eq!(
card.workers[0].status,
crate::tui::widgets::agent_card::AgentLifecycle::Completed
);
assert_eq!(card.workers[1].agent_id, "task:b");
assert_eq!(
card.workers[1].status,
crate::tui::widgets::agent_card::AgentLifecycle::Cancelled
);
}
#[test]
fn agent_swarm_progress_event_replaces_stale_pending_slots() {
let mut app = create_test_app();
assert!(seed_fanout_card_from_tool_call(
&mut app,
"agent_swarm",
&serde_json::json!({
"tasks": [
{ "id": "a", "prompt": "First task" },
{ "id": "b", "prompt": "Second task" },
{ "id": "c", "prompt": "Third task" }
]
}),
));
let outcome = crate::tools::swarm::SwarmOutcome {
swarm_id: "swarm_done".to_string(),
status: crate::tools::swarm::SwarmStatus::Completed,
duration_ms: 250,
counts: crate::tools::swarm::SwarmCounts {
total: 3,
completed: 3,
interrupted: 0,
failed: 0,
cancelled: 0,
skipped: 0,
running: 0,
pending: 0,
},
tasks: vec![
crate::tools::swarm::SwarmTaskOutcome {
task_id: "a".to_string(),
worker_id: "swarm_done:a".to_string(),
agent_id: Some("agent_a".to_string()),
label: "First task".to_string(),
model: "deepseek-v4-flash".to_string(),
nickname: Some("Blue".to_string()),
status: crate::tools::swarm::SwarmTaskStatus::Completed,
result: Some("a ok".to_string()),
error: None,
steps_taken: 1,
duration_ms: 100,
started_at_ms: Some(0),
ended_at_ms: Some(100),
},
crate::tools::swarm::SwarmTaskOutcome {
task_id: "b".to_string(),
worker_id: "swarm_done:b".to_string(),
agent_id: Some("agent_b".to_string()),
label: "Second task".to_string(),
model: "deepseek-v4-flash".to_string(),
nickname: Some("Humpback".to_string()),
status: crate::tools::swarm::SwarmTaskStatus::Completed,
result: Some("b ok".to_string()),
error: None,
steps_taken: 1,
duration_ms: 100,
started_at_ms: Some(0),
ended_at_ms: Some(100),
},
crate::tools::swarm::SwarmTaskOutcome {
task_id: "c".to_string(),
worker_id: "swarm_done:c".to_string(),
agent_id: Some("agent_c".to_string()),
label: "Third task".to_string(),
model: "deepseek-v4-flash".to_string(),
nickname: Some("Sperm".to_string()),
status: crate::tools::swarm::SwarmTaskStatus::Completed,
result: Some("c ok".to_string()),
error: None,
steps_taken: 1,
duration_ms: 100,
started_at_ms: Some(0),
ended_at_ms: Some(100),
},
],
};
assert!(sync_fanout_card_from_swarm_outcome(&mut app, &outcome));
let HistoryCell::SubAgent(SubAgentCell::Fanout(card)) = &app.history[0] else {
panic!("expected synced fanout card");
};
assert_eq!(card.worker_count(), 3);
assert_eq!(active_fanout_counts(&app), Some((0, 3)));
assert!(card.workers.iter().all(|slot| matches!(
slot.status,
crate::tui::widgets::agent_card::AgentLifecycle::Completed
)));
assert_eq!(app.subagent_card_index.get("agent_a").copied(), Some(0));
}
#[test]
fn fanout_counts_use_canonical_swarm_outcome_not_stale_card_slots() {
let mut app = create_test_app();
assert!(seed_fanout_card_from_tool_call(
&mut app,
"agent_swarm",
&serde_json::json!({
"tasks": [
{ "id": "a", "prompt": "A" },
{ "id": "b", "prompt": "B" },
{ "id": "c", "prompt": "C" },
{ "id": "d", "prompt": "D" },
{ "id": "e", "prompt": "E" }
]
}),
));
let outcome = crate::tools::swarm::SwarmOutcome {
swarm_id: "swarm_live".to_string(),
status: crate::tools::swarm::SwarmStatus::Running,
duration_ms: 1000,
counts: crate::tools::swarm::SwarmCounts {
total: 5,
completed: 4,
interrupted: 0,
failed: 0,
cancelled: 0,
skipped: 0,
running: 1,
pending: 0,
},
tasks: (0..5)
.map(|idx| {
let task_id = char::from(b'a' + idx as u8).to_string();
crate::tools::swarm::SwarmTaskOutcome {
task_id: task_id.clone(),
worker_id: format!("swarm_live:{task_id}"),
agent_id: Some(format!("agent_{task_id}")),
label: task_id.clone(),
model: "deepseek-v4-flash".to_string(),
nickname: Some(["Blue", "Humpback", "Sperm", "Fin", "Sei"][idx].to_string()),
status: if idx == 4 {
crate::tools::swarm::SwarmTaskStatus::Running
} else {
crate::tools::swarm::SwarmTaskStatus::Completed
},
result: None,
error: None,
steps_taken: 0,
duration_ms: 0,
started_at_ms: Some(0),
ended_at_ms: (idx != 4).then_some(0),
}
})
.collect(),
};
assert!(sync_fanout_card_from_swarm_outcome(&mut app, &outcome));
assert_eq!(active_fanout_counts(&app), Some((1, 5)));
}
#[test]
fn noisy_subagent_progress_keeps_existing_objective_summary() {
let mut app = create_test_app();
@@ -3109,349 +2777,6 @@ fn duplicate_mailbox_token_usage_does_not_regress_displayed_cost() {
handle_subagent_mailbox(&mut app, 12, &usage);
assert!(app.displayed_session_cost() > baseline);
}
/// Regression for issue #238: two overlapping `agent_swarm` invocations must
/// each project to their own FanoutCard. Without per-swarm card binding,
/// SwarmProgress for an older background swarm would clobber the freshly
/// seeded card of a newer fanout — the contradictory state the user saw.
#[test]
fn overlapping_swarms_project_to_distinct_fanout_cards() {
use crate::tools::swarm::{SwarmCounts, SwarmOutcome, SwarmStatus, SwarmTaskStatus};
let mut app = create_test_app();
// Seed swarm A.
assert!(seed_fanout_card_from_tool_call(
&mut app,
"agent_swarm",
&serde_json::json!({"tasks": [{"id": "a1", "prompt": "A1"}, {"id": "a2", "prompt": "A2"}]}),
));
let outcome_a_initial = SwarmOutcome {
swarm_id: "swarm_A".to_string(),
status: SwarmStatus::Running,
duration_ms: 0,
counts: SwarmCounts {
total: 2,
completed: 0,
interrupted: 0,
failed: 0,
cancelled: 0,
skipped: 0,
running: 2,
pending: 0,
},
tasks: vec![
mk_task("a1", SwarmTaskStatus::Running),
mk_task("a2", SwarmTaskStatus::Running),
],
};
sync_fanout_card_from_swarm_outcome(&mut app, &outcome_a_initial);
let card_a_idx = *app
.swarm_card_index
.get("swarm_A")
.expect("swarm A bound to a card");
// Now seed swarm B before A finishes.
app.last_fanout_card_index = None;
app.last_swarm_id = None;
assert!(seed_fanout_card_from_tool_call(
&mut app,
"agent_swarm",
&serde_json::json!({"tasks": [{"id": "b1", "prompt": "B1"}]}),
));
let outcome_b_initial = SwarmOutcome {
swarm_id: "swarm_B".to_string(),
status: SwarmStatus::Running,
duration_ms: 0,
counts: SwarmCounts {
total: 1,
completed: 0,
interrupted: 0,
failed: 0,
cancelled: 0,
skipped: 0,
running: 1,
pending: 0,
},
tasks: vec![mk_task("b1", SwarmTaskStatus::Running)],
};
sync_fanout_card_from_swarm_outcome(&mut app, &outcome_b_initial);
let card_b_idx = *app
.swarm_card_index
.get("swarm_B")
.expect("swarm B bound to its own card");
assert_ne!(card_a_idx, card_b_idx, "each swarm gets its own card");
// A's terminal SwarmProgress arrives later; it must update card A,
// *not* card B.
let outcome_a_done = SwarmOutcome {
swarm_id: "swarm_A".to_string(),
status: SwarmStatus::Completed,
duration_ms: 100,
counts: SwarmCounts {
total: 2,
completed: 2,
interrupted: 0,
failed: 0,
cancelled: 0,
skipped: 0,
running: 0,
pending: 0,
},
tasks: vec![
mk_task("a1", SwarmTaskStatus::Completed),
mk_task("a2", SwarmTaskStatus::Completed),
],
};
sync_fanout_card_from_swarm_outcome(&mut app, &outcome_a_done);
// Card A reflects A's completion; card B still reflects B's pending state.
let HistoryCell::SubAgent(SubAgentCell::Fanout(card_a)) = &app.history[card_a_idx] else {
panic!("card A is not a fanout cell");
};
assert_eq!(card_a.worker_count(), 2);
assert!(card_a.workers.iter().all(|s| matches!(
s.status,
crate::tui::widgets::agent_card::AgentLifecycle::Completed
)));
let HistoryCell::SubAgent(SubAgentCell::Fanout(card_b)) = &app.history[card_b_idx] else {
panic!("card B is not a fanout cell");
};
assert_eq!(card_b.worker_count(), 1);
assert!(matches!(
card_b.workers[0].status,
crate::tui::widgets::agent_card::AgentLifecycle::Running
));
}
fn mk_task(
id: &str,
status: crate::tools::swarm::SwarmTaskStatus,
) -> crate::tools::swarm::SwarmTaskOutcome {
crate::tools::swarm::SwarmTaskOutcome {
task_id: id.to_string(),
worker_id: format!("task:{id}"),
agent_id: Some(format!("agent_{id}")),
label: id.to_string(),
model: "deepseek-v4-flash".to_string(),
nickname: None,
status,
result: None,
error: None,
steps_taken: 0,
duration_ms: 0,
started_at_ms: Some(0),
ended_at_ms: None,
}
}
/// Regression for issue #236/#238: the footer must not double-count a
/// fanout-class tool. Sidebar and FanoutCard already represent the swarm,
/// so `active_tool_status_label` skipping these tools is what keeps the
/// "tool agent_swarm · 1 active" line from appearing simultaneously with
/// "Agents 3 done" + "0 done · 0 running · 0 failed · 3 pending".
#[test]
fn footer_active_tool_label_suppresses_fanout_tools() {
let mut app = create_test_app();
app.active_cell = Some(crate::tui::active_cell::ActiveCell::new());
let active = app.active_cell.as_mut().unwrap();
active.push_tool(
"tool-1".to_string(),
HistoryCell::Tool(ToolCell::Generic(GenericToolCell {
name: "agent_swarm".to_string(),
status: ToolStatus::Running,
input_summary: None,
output: None,
prompts: None,
})),
);
let label = active_tool_status_label(&app);
assert!(
label.is_none(),
"active fanout-class tools must not appear in the footer 'tool ... · X active' line, got: {label:?}"
);
}
/// Regression for issue #243: pressing Esc during an active fanout must
/// leave the parent in a clean state — active_cell flushed, in-flight
/// tool entries marked Failed/Interrupted, but the canonical
/// `swarm_jobs` cache for background `block:false` swarms preserved so
/// `swarm_status` / `swarm_result` / the FanoutCard stay coherent.
#[test]
fn esc_during_fanout_clears_active_cell_but_preserves_background_swarm() {
use crate::tools::swarm::{SwarmCounts, SwarmOutcome, SwarmStatus};
let mut app = create_test_app();
// Seed an in-flight fanout: a Generic tool entry in active_cell PLUS
// a registered swarm in swarm_jobs (the background tokio task that
// would keep running after Esc).
app.active_cell = Some(crate::tui::active_cell::ActiveCell::new());
let active = app.active_cell.as_mut().unwrap();
active.push_tool(
"tool-1".to_string(),
HistoryCell::Tool(ToolCell::Generic(GenericToolCell {
name: "agent_swarm".to_string(),
status: ToolStatus::Running,
input_summary: None,
output: None,
prompts: None,
})),
);
let outcome = SwarmOutcome {
swarm_id: "swarm_bg".to_string(),
status: SwarmStatus::Running,
duration_ms: 0,
counts: SwarmCounts {
total: 3,
completed: 0,
interrupted: 0,
failed: 0,
cancelled: 0,
skipped: 0,
running: 3,
pending: 0,
},
tasks: vec![
mk_task("a", crate::tools::swarm::SwarmTaskStatus::Running),
mk_task("b", crate::tools::swarm::SwarmTaskStatus::Running),
mk_task("c", crate::tools::swarm::SwarmTaskStatus::Running),
],
};
app.swarm_jobs
.insert("swarm_bg".to_string(), outcome.clone());
app.last_swarm_id = Some("swarm_bg".to_string());
// Apply the Esc/CancelRequest mutations the UI loop performs.
app.is_loading = true;
app.finalize_active_cell_as_interrupted();
app.is_loading = false;
// Active cell flushed → footer no longer reports "tool ... · X active".
assert!(
app.active_cell.is_none(),
"active_cell must be flushed after Esc"
);
// Background swarm record preserved — swarm_status / swarm_result and
// any future SwarmProgress event can still update the canonical store.
assert!(
app.swarm_jobs.contains_key("swarm_bg"),
"background swarm record must survive Esc"
);
assert_eq!(app.last_swarm_id.as_deref(), Some("swarm_bg"));
// Composer can submit the next message immediately — is_loading is
// false, no modal is open, runtime_turn_status is cleared.
assert!(!app.is_loading);
}
/// Regression for issue #243: after Esc during fanout, a subsequent
/// TurnComplete (Interrupted) event arriving from the engine must be
/// handled idempotently — `finalize_active_cell_as_interrupted` and
/// `finalize_streaming_assistant_as_interrupted` are both called by
/// both the Esc handler and the TurnComplete handler; the second call
/// must be a no-op (guarded by `Option::take()`).
#[test]
fn turn_complete_after_esc_is_idempotent() {
let mut app = create_test_app();
// Simulate a live fanout with an active cell and a streaming assistant.
let mut active = ActiveCell::new();
active.push_tool(
"tool-1".to_string(),
HistoryCell::Tool(ToolCell::Generic(GenericToolCell {
name: "agent_swarm".to_string(),
status: ToolStatus::Running,
input_summary: None,
output: None,
prompts: None,
})),
);
app.active_cell = Some(active);
app.add_message(HistoryCell::Assistant {
content: "partial text".to_string(),
streaming: true,
});
let assistant_idx = app.history.len() - 1;
app.streaming_message_index = Some(assistant_idx);
app.is_loading = true;
app.runtime_turn_status = Some("in_progress".to_string());
// Step 1: Esc handler fires (simulated).
app.finalize_active_cell_as_interrupted();
app.finalize_streaming_assistant_as_interrupted();
app.runtime_turn_status = None;
app.is_loading = false;
// Verify first call cleared the active cell and stopped loading.
assert!(app.active_cell.is_none(), "active_cell cleared by Esc");
assert!(!app.is_loading, "is_loading false after Esc");
assert!(
app.runtime_turn_status.is_none(),
"runtime_turn_status cleared by Esc"
);
// Streaming assistant cell was marked interrupted.
if let Some(HistoryCell::Assistant { streaming, content }) = app.history.get(assistant_idx) {
assert!(!streaming, "streaming stopped");
assert!(
content.starts_with("[interrupted]"),
"content should have interruption prefix"
);
}
// Step 2: Simulate TurnComplete (Interrupted) arriving from engine.
// This calls the same methods again — must be a no-op.
app.finalize_active_cell_as_interrupted();
app.finalize_streaming_assistant_as_interrupted();
app.is_loading = false;
app.runtime_turn_status = Some("interrupted".to_string());
// State remains consistent — active_cell still None, streaming still
// stopped, no double-interruption prefix.
assert!(
app.active_cell.is_none(),
"active_cell still cleared after 2nd call"
);
assert!(!app.is_loading, "is_loading still false after 2nd call");
assert_eq!(
app.runtime_turn_status.as_deref(),
Some("interrupted"),
"runtime_turn_status reflects final outcome"
);
// The streaming assistant should still have only ONE interruption prefix.
if let Some(HistoryCell::Assistant { content, .. }) = app.history.get(assistant_idx) {
assert_eq!(
content.matches("[interrupted]").count(),
1,
"content must not double-prefix [interrupted]: {content}"
);
}
// Background cell in history (the flushed tool entry) must exist and
// have the Failed status.
let tool_cells: Vec<_> = app
.history
.iter()
.filter_map(|c| match c {
HistoryCell::Tool(ToolCell::Generic(g)) => Some(g),
_ => None,
})
.collect();
assert_eq!(tool_cells.len(), 1);
assert_eq!(
tool_cells[0].status,
ToolStatus::Failed,
"interrupted tool marked Failed"
);
}
/// Regression for issue #241: `checklist_write` results render as a
/// dedicated checklist card with completed/total + percent header and
/// per-item status markers — not as a generic dumped JSON tool block.
#[test]
fn checklist_write_renders_dedicated_card() {
let cell = GenericToolCell {
+3 -5
View File
@@ -136,11 +136,9 @@ Large logs and command outputs should be artifacts with compact summaries in the
### Sub-agents
`agent_spawn`, `agent_swarm`, `spawn_agents_on_csv`, plus the supporting
tools (`agent_result` / `swarm_result` / `wait` / `send_input` /
`agent_assign` / `agent_cancel` / `resume_agent` / `agent_list` /
`report_agent_job_result` / `swarm_status`). See `agent.txt` for the
delegation protocol.
`agent_spawn` plus the supporting tools (`agent_result` / `wait` / `send_input` /
`agent_assign` / `agent_cancel` / `resume_agent` / `agent_list`).
See `agent.txt` for the delegation protocol.
### Parallel fan-out: cost-class caps