diff --git a/crates/tui/src/tools/rlm_query.rs b/crates/tui/src/tools/rlm_query.rs
index 43acb9da..5540e4ff 100644
--- a/crates/tui/src/tools/rlm_query.rs
+++ b/crates/tui/src/tools/rlm_query.rs
@@ -56,7 +56,8 @@ impl ToolSpec for RlmQueryTool {
          and return the joined results. Pass `prompts: [...]` for a parallel batch or \
          `prompt` for a single child. Children run in isolation with an optional shared \
          `system` prompt; results come back as `[i] <text>` blocks separated by `---` (or \
-         just the text for N=1). Read-only — no file or shell side-effects."
+         just the text for N=1). Max 16 children per call (each is a one-shot flash query; \
+         use agent_spawn for full multi-turn sub-agents). Read-only — no file or shell side-effects."
     }
 
     fn input_schema(&self) -> Value {
@@ -70,7 +71,7 @@ impl ToolSpec for RlmQueryTool {
                 "prompts": {
                     "type": "array",
                     "items": { "type": "string" },
-                    "description": "Up to 16 prompts to run concurrently. Returns indexed `[0] ... [N-1]` blocks."
+                    "description": "Up to 16 prompts to run concurrently (each is a one-shot flash query). Returns indexed `[0] ... [N-1]` blocks."
                 },
                 "model": {
                     "type": "string",
diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs
index 135fc3ad..861482bf 100644
--- a/crates/tui/src/tools/subagent/mod.rs
+++ b/crates/tui/src/tools/subagent/mod.rs
@@ -649,7 +649,7 @@ impl SubAgentManager {
 
         if self.running_count() >= self.max_agents {
             return Err(anyhow!(
-                "Sub-agent limit reached (max {}, running {}). Cancel, close, or wait for an existing agent to finish.",
+                "Sub-agent limit reached (max {}, running {}). Cancel, close, or wait for an existing agent to finish. Consider rlm_query (max 16 children) for parallel one-shot queries instead.",
                 self.max_agents,
                 self.running_count()
             ));
@@ -757,7 +757,7 @@ impl SubAgentManager {
 
         if self.running_count() >= self.max_agents {
             return Err(anyhow!(
-                "Sub-agent limit reached (max {}, running {}). Close or wait for an existing agent before resuming.",
+                "Sub-agent limit reached (max {}, running {}). Close or wait for an existing agent before resuming. Consider rlm_query (max 16 children) for parallel one-shot queries instead.",
                 self.max_agents,
                 self.running_count()
             ));
@@ -1067,7 +1067,9 @@ impl ToolSpec for AgentSpawnTool {
 
     fn description(&self) -> &'static str {
         "Spawn a background sub-agent for a focused task. Returns an agent_id immediately; \
-         follow with agent_result to retrieve the final result."
+         follow with agent_result to retrieve the final result. Max 5 in flight (each is a \
+         full sub-agent loop; cancel or wait if you hit the cap). For parallel one-shot LLM \
+         queries (cheaper, up to 16 children per call), use rlm_query instead."
     }
 
     fn input_schema(&self) -> Value {
diff --git a/docs/TOOL_SURFACE.md b/docs/TOOL_SURFACE.md
index 2714746c..b8edfbde 100644
--- a/docs/TOOL_SURFACE.md
+++ b/docs/TOOL_SURFACE.md
@@ -69,6 +69,22 @@ tools (`agent_result` / `swarm_result` / `wait` / `send_input` /
 `report_agent_job_result` / `swarm_status`). See `agent.txt` for the
 delegation protocol.
 
+### Parallel fan-out: cost-class caps
+
+Two tools offer parallel fan-out with different concurrency limits that
+reflect very different cost classes:
+
+| Tool | What each child does | Wall-clock | Token cost | Cap |
+|---|---|---|---|---|
+| `agent_spawn` | Full sub-agent loop (planning, tool calls, multi-turn streaming, can spawn children) | minutes | thousands of tokens | 5 in flight |
+| `rlm_query` | One-shot non-streaming Chat Completions call to `deepseek-v4-flash` | seconds | ~hundreds of tokens | 16 per call |
+
+The caps appear in each tool's description and error messages so the model
+(and the user) can choose the right tool for the job. If one sub-agent is
+enough but you need parallel lookups, prefer `rlm_query`; if each task needs
+its own tool-carrying agent loop, use `agent_spawn` (and cancel completed
+ones to free slots).
+
 ## Recently consolidated (v0.5.1)
 
 Removed from the prompt as duplicates of equivalent tools (the underlying