diff --git a/crates/tui/src/tools/rlm_query.rs b/crates/tui/src/tools/rlm_query.rs index 43acb9da..5540e4ff 100644 --- a/crates/tui/src/tools/rlm_query.rs +++ b/crates/tui/src/tools/rlm_query.rs @@ -56,7 +56,8 @@ impl ToolSpec for RlmQueryTool { and return the joined results. Pass `prompts: [...]` for a parallel batch or \ `prompt` for a single child. Children run in isolation with an optional shared \ `system` prompt; results come back as `[i] ` blocks separated by `---` (or \ - just the text for N=1). Read-only — no file or shell side-effects." + just the text for N=1). Max 16 children per call (each is a one-shot flash query; \ + use agent_spawn for full multi-turn sub-agents). Read-only — no file or shell side-effects." } fn input_schema(&self) -> Value { @@ -70,7 +71,7 @@ impl ToolSpec for RlmQueryTool { "prompts": { "type": "array", "items": { "type": "string" }, - "description": "Up to 16 prompts to run concurrently. Returns indexed `[0] ... [N-1]` blocks." + "description": "Up to 16 prompts to run concurrently (each is a one-shot flash query). Returns indexed `[0] ... [N-1]` blocks." }, "model": { "type": "string", diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs index 135fc3ad..861482bf 100644 --- a/crates/tui/src/tools/subagent/mod.rs +++ b/crates/tui/src/tools/subagent/mod.rs @@ -649,7 +649,7 @@ impl SubAgentManager { if self.running_count() >= self.max_agents { return Err(anyhow!( - "Sub-agent limit reached (max {}, running {}). Cancel, close, or wait for an existing agent to finish.", + "Sub-agent limit reached (max {}, running {}). Cancel, close, or wait for an existing agent to finish. Consider rlm_query (max 16 children) for parallel one-shot queries instead.", self.max_agents, self.running_count() )); @@ -757,7 +757,7 @@ impl SubAgentManager { if self.running_count() >= self.max_agents { return Err(anyhow!( - "Sub-agent limit reached (max {}, running {}). Close or wait for an existing agent before resuming.", + "Sub-agent limit reached (max {}, running {}). Close or wait for an existing agent before resuming. Consider rlm_query (max 16 children) for parallel one-shot queries instead.", self.max_agents, self.running_count() )); @@ -1067,7 +1067,9 @@ impl ToolSpec for AgentSpawnTool { fn description(&self) -> &'static str { "Spawn a background sub-agent for a focused task. Returns an agent_id immediately; \ - follow with agent_result to retrieve the final result." + follow with agent_result to retrieve the final result. Max 5 in flight (each is a \ + full sub-agent loop; cancel or wait if you hit the cap). For parallel one-shot LLM \ + queries (cheaper, up to 16 children per call), use rlm_query instead." } fn input_schema(&self) -> Value { diff --git a/docs/TOOL_SURFACE.md b/docs/TOOL_SURFACE.md index 2714746c..b8edfbde 100644 --- a/docs/TOOL_SURFACE.md +++ b/docs/TOOL_SURFACE.md @@ -69,6 +69,22 @@ tools (`agent_result` / `swarm_result` / `wait` / `send_input` / `report_agent_job_result` / `swarm_status`). See `agent.txt` for the delegation protocol. +### Parallel fan-out: cost-class caps + +Two tools offer parallel fan-out with different concurrency limits that +reflect very different cost classes: + +| Tool | What each child does | Wall-clock | Token cost | Cap | +|---|---|---|---|---| +| `agent_spawn` | Full sub-agent loop (planning, tool calls, multi-turn streaming, can spawn children) | minutes | thousands of tokens | 5 in flight | +| `rlm_query` | One-shot non-streaming Chat Completions call to `deepseek-v4-flash` | seconds | ~hundreds of tokens | 16 per call | + +The caps appear in each tool's description and error messages so the model +(and the user) can choose the right tool for the job. If one sub-agent is +enough but you need parallel lookups, prefer `rlm_query`; if each task needs +its own tool-carrying agent loop, use `agent_spawn` (and cancel completed +ones to free slots). + ## Recently consolidated (v0.5.1) Removed from the prompt as duplicates of equivalent tools (the underlying