feat: explain parallel fan-out caps in tool descriptions and error messages (fixes #81)
This commit is contained in:
@@ -56,7 +56,8 @@ impl ToolSpec for RlmQueryTool {
|
||||
and return the joined results. Pass `prompts: [...]` for a parallel batch or \
|
||||
`prompt` for a single child. Children run in isolation with an optional shared \
|
||||
`system` prompt; results come back as `[i] <text>` blocks separated by `---` (or \
|
||||
just the text for N=1). Read-only — no file or shell side-effects."
|
||||
just the text for N=1). Max 16 children per call (each is a one-shot flash query; \
|
||||
use agent_spawn for full multi-turn sub-agents). Read-only — no file or shell side-effects."
|
||||
}
|
||||
|
||||
fn input_schema(&self) -> Value {
|
||||
@@ -70,7 +71,7 @@ impl ToolSpec for RlmQueryTool {
|
||||
"prompts": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" },
|
||||
"description": "Up to 16 prompts to run concurrently. Returns indexed `[0] ... [N-1]` blocks."
|
||||
"description": "Up to 16 prompts to run concurrently (each is a one-shot flash query). Returns indexed `[0] ... [N-1]` blocks."
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
|
||||
@@ -649,7 +649,7 @@ impl SubAgentManager {
|
||||
|
||||
if self.running_count() >= self.max_agents {
|
||||
return Err(anyhow!(
|
||||
"Sub-agent limit reached (max {}, running {}). Cancel, close, or wait for an existing agent to finish.",
|
||||
"Sub-agent limit reached (max {}, running {}). Cancel, close, or wait for an existing agent to finish. Consider rlm_query (max 16 children) for parallel one-shot queries instead.",
|
||||
self.max_agents,
|
||||
self.running_count()
|
||||
));
|
||||
@@ -757,7 +757,7 @@ impl SubAgentManager {
|
||||
|
||||
if self.running_count() >= self.max_agents {
|
||||
return Err(anyhow!(
|
||||
"Sub-agent limit reached (max {}, running {}). Close or wait for an existing agent before resuming.",
|
||||
"Sub-agent limit reached (max {}, running {}). Close or wait for an existing agent before resuming. Consider rlm_query (max 16 children) for parallel one-shot queries instead.",
|
||||
self.max_agents,
|
||||
self.running_count()
|
||||
));
|
||||
@@ -1067,7 +1067,9 @@ impl ToolSpec for AgentSpawnTool {
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Spawn a background sub-agent for a focused task. Returns an agent_id immediately; \
|
||||
follow with agent_result to retrieve the final result."
|
||||
follow with agent_result to retrieve the final result. Max 5 in flight (each is a \
|
||||
full sub-agent loop; cancel or wait if you hit the cap). For parallel one-shot LLM \
|
||||
queries (cheaper, up to 16 children per call), use rlm_query instead."
|
||||
}
|
||||
|
||||
fn input_schema(&self) -> Value {
|
||||
|
||||
@@ -69,6 +69,22 @@ tools (`agent_result` / `swarm_result` / `wait` / `send_input` /
|
||||
`report_agent_job_result` / `swarm_status`). See `agent.txt` for the
|
||||
delegation protocol.
|
||||
|
||||
### Parallel fan-out: cost-class caps
|
||||
|
||||
Two tools offer parallel fan-out with different concurrency limits that
|
||||
reflect very different cost classes:
|
||||
|
||||
| Tool | What each child does | Wall-clock | Token cost | Cap |
|
||||
|---|---|---|---|---|
|
||||
| `agent_spawn` | Full sub-agent loop (planning, tool calls, multi-turn streaming, can spawn children) | minutes | thousands of tokens | 5 in flight |
|
||||
| `rlm_query` | One-shot non-streaming Chat Completions call to `deepseek-v4-flash` | seconds | ~hundreds of tokens | 16 per call |
|
||||
|
||||
The caps appear in each tool's description and error messages so the model
|
||||
(and the user) can choose the right tool for the job. If one sub-agent is
|
||||
enough but you need parallel lookups, prefer `rlm_query`; if each task needs
|
||||
its own tool-carrying agent loop, use `agent_spawn` (and cancel completed
|
||||
ones to free slots).
|
||||
|
||||
## Recently consolidated (v0.5.1)
|
||||
|
||||
Removed from the prompt as duplicates of equivalent tools (the underlying
|
||||
|
||||
Reference in New Issue
Block a user