feat: explain parallel fan-out caps in tool descriptions and error messages (fixes #81)

2026-04-26 13:16:12 -05:00
parent 38069700cc
commit feb3cf1e0c
3 changed files with 24 additions and 5 deletions
@@ -56,7 +56,8 @@ impl ToolSpec for RlmQueryTool {
         and return the joined results. Pass `prompts: [...]` for a parallel batch or \
         `prompt` for a single child. Children run in isolation with an optional shared \
         `system` prompt; results come back as `[i] <text>` blocks separated by `---` (or \
-         just the text for N=1). Read-only — no file or shell side-effects."
+         just the text for N=1). Max 16 children per call (each is a one-shot flash query; \
+         use agent_spawn for full multi-turn sub-agents). Read-only — no file or shell side-effects."
    }

    fn input_schema(&self) -> Value {
@@ -70,7 +71,7 @@ impl ToolSpec for RlmQueryTool {
                "prompts": {
                    "type": "array",
                    "items": { "type": "string" },
-                    "description": "Up to 16 prompts to run concurrently. Returns indexed `[0] ... [N-1]` blocks."
+                    "description": "Up to 16 prompts to run concurrently (each is a one-shot flash query). Returns indexed `[0] ... [N-1]` blocks."
                },
                "model": {
                    "type": "string",
@@ -649,7 +649,7 @@ impl SubAgentManager {

        if self.running_count() >= self.max_agents {
            return Err(anyhow!(
-                "Sub-agent limit reached (max {}, running {}). Cancel, close, or wait for an existing agent to finish.",
+                "Sub-agent limit reached (max {}, running {}). Cancel, close, or wait for an existing agent to finish. Consider rlm_query (max 16 children) for parallel one-shot queries instead.",
                self.max_agents,
                self.running_count()
            ));
@@ -757,7 +757,7 @@ impl SubAgentManager {

        if self.running_count() >= self.max_agents {
            return Err(anyhow!(
-                "Sub-agent limit reached (max {}, running {}). Close or wait for an existing agent before resuming.",
+                "Sub-agent limit reached (max {}, running {}). Close or wait for an existing agent before resuming. Consider rlm_query (max 16 children) for parallel one-shot queries instead.",
                self.max_agents,
                self.running_count()
            ));
@@ -1067,7 +1067,9 @@ impl ToolSpec for AgentSpawnTool {

    fn description(&self) -> &'static str {
        "Spawn a background sub-agent for a focused task. Returns an agent_id immediately; \
-         follow with agent_result to retrieve the final result."
+         follow with agent_result to retrieve the final result. Max 5 in flight (each is a \
+         full sub-agent loop; cancel or wait if you hit the cap). For parallel one-shot LLM \
+         queries (cheaper, up to 16 children per call), use rlm_query instead."
    }

    fn input_schema(&self) -> Value {
@@ -69,6 +69,22 @@ tools (`agent_result` / `swarm_result` / `wait` / `send_input` /
 `report_agent_job_result` / `swarm_status`). See `agent.txt` for the
 delegation protocol.

+### Parallel fan-out: cost-class caps
+
+Two tools offer parallel fan-out with different concurrency limits that
+reflect very different cost classes:
+
+| Tool | What each child does | Wall-clock | Token cost | Cap |
+|---|---|---|---|---|
+| `agent_spawn` | Full sub-agent loop (planning, tool calls, multi-turn streaming, can spawn children) | minutes | thousands of tokens | 5 in flight |
+| `rlm_query` | One-shot non-streaming Chat Completions call to `deepseek-v4-flash` | seconds | ~hundreds of tokens | 16 per call |
+
+The caps appear in each tool's description and error messages so the model
+(and the user) can choose the right tool for the job. If one sub-agent is
+enough but you need parallel lookups, prefer `rlm_query`; if each task needs
+its own tool-carrying agent loop, use `agent_spawn` (and cancel completed
+ones to free slots).
+
 ## Recently consolidated (v0.5.1)

 Removed from the prompt as duplicates of equivalent tools (the underlying