feat(tools/agent_spawn): teach parent that subagent results are self-reports

2026-05-08 18:02:39 +08:00
parent 682e915857
commit 250953ad35
5 changed files with 172 additions and 215 deletions
@@ -214,6 +214,9 @@ fn compact_subagent_tool_result_for_context(tool_name: &str, raw: &str) -> Optio
    };

    let mut out = String::from("[sub-agent result summarized for parent context]\n");
+    out.push_str(
+        "Child results are self-reports; verify side effects with tools like read_file or list_dir before claiming success.\n",
+    );
    out.push_str("Use `agent_result` again only if you need the full raw payload.\n");
    for (idx, snapshot) in snapshots.iter().enumerate() {
        if idx >= 8 {
@@ -761,6 +761,9 @@ fn subagent_results_are_summarized_before_parent_context_insertion() {
    assert!(context.contains("Inspect the RLM rendering path"));
    assert!(context.contains("steps=12"));
    assert!(context.len() < output.content.len());
+    assert!(context.contains("self-report"));
+    assert!(context.contains("verify side effects"));
+    assert!(context.contains("read_file") && context.contains("list_dir"));
 }

 #[test]
@@ -270,15 +270,16 @@ impl SubAgentType {
    /// Get the system prompt for this agent type.
    #[must_use]
    pub fn system_prompt(&self) -> String {
-        match self {
-            Self::General => GENERAL_AGENT_PROMPT.to_string(),
-            Self::Explore => EXPLORE_AGENT_PROMPT.to_string(),
-            Self::Plan => PLAN_AGENT_PROMPT.to_string(),
-            Self::Review => REVIEW_AGENT_PROMPT.to_string(),
-            Self::Implementer => IMPLEMENTER_AGENT_PROMPT.to_string(),
-            Self::Verifier => VERIFIER_AGENT_PROMPT.to_string(),
-            Self::Custom => CUSTOM_AGENT_PROMPT.to_string(),
-        }
+        let role_intro = match self {
+            Self::General => GENERAL_AGENT_INTRO,
+            Self::Explore => EXPLORE_AGENT_INTRO,
+            Self::Plan => PLAN_AGENT_INTRO,
+            Self::Review => REVIEW_AGENT_INTRO,
+            Self::Implementer => IMPLEMENTER_AGENT_INTRO,
+            Self::Verifier => VERIFIER_AGENT_INTRO,
+            Self::Custom => CUSTOM_AGENT_INTRO,
+        };
+        format!("{role_intro}{SUBAGENT_OUTPUT_FORMAT}")
    }

    /// Get the default allowed tools for this agent type.
@@ -1581,11 +1582,13 @@ impl ToolSpec for AgentSpawnTool {
    }

    fn description(&self) -> &'static str {
-        "Spawn a background sub-agent for a focused task. Returns an agent_id immediately; \
-         follow with agent_result to retrieve the final result. Default cap of 10 concurrent \
-         sub-agents (configurable via `[subagents].max_concurrent`); each is a full sub-agent \
-         loop, so cancel or wait if you hit the cap. For parallel one-shot LLM queries, just \
-         emit multiple tool calls in one turn — the dispatcher runs them in parallel."
+        concat!(
+            "Spawn a background sub-agent for a focused task. Returns an agent_id immediately; follow with agent_result to retrieve the final result. Default cap of 10 concurrent sub-agents (configurable via `[subagents].max_concurrent`); each is a full sub-agent loop, so cancel or wait if you hit the cap. For parallel one-shot LLM queries, just emit multiple tool calls in one turn — the dispatcher runs them in parallel.\n\n",
+            "## Trust model: subagent results are self-reports, not verified facts\n\n",
+            "`agent_result` returns the child's narrative summary of what happened. For operations with external side effects, the child's summary may be wrong. Re-verify before reporting success to the user:\n\n",
+            "| Side effect | Re-verify with |\n|---|---|\n| URL claimed posted/written | `fetch_url` and check the response |\n| File claimed created | `read_file` or `list_dir` |\n| File claimed edited | `read_file` and check the change is present |\n| HTTP POST/PUT response | inspect status code and body |\n| Git operation | `git_status` / `git_diff` |\n| Test claimed passing | `run_tests` |\n| Process claimed started | `exec_shell` (e.g. `pgrep`, `lsof -i`) |\n\n",
+            "If the child returns a verifiable handle (URL, file path, exit code, commit SHA), check it. If it doesn't, ask the child to return one or verify yourself before proceeding."
+        )
    }

    fn input_schema(&self) -> Value {
@@ -3965,179 +3968,54 @@ fn truncate_preview(text: &str) -> String {
    }
 }

-// === System prompts ===
-//
-// Each per-agent-type prompt is composed from two parts:
-//
-//   1. A short role-specific intro that names the agent's job, its scope,
-//      and any role-specific tactics or stop conditions.
-//   2. The shared `subagent_output_format.md` block, which is the single
-//      source of truth for the SUMMARY / EVIDENCE / CHANGES / RISKS /
-//      BLOCKERS contract, the stop condition, and the typed-tool-surface
-//      conventions. Tweaks to the contract live in that one file.
-//
-// `concat!` resolves at compile time, so the per-type constants remain
-// `&'static str` and `system_prompt()` keeps its `String` return type.
-// The `include_str!` calls inside each `concat!` all point at the same
-// file, so the format is defined once even though it's inlined many times.
+const SUBAGENT_OUTPUT_FORMAT: &str = include_str!("../../prompts/subagent_output_format.md");

-const GENERAL_AGENT_PROMPT: &str = concat!(
+const GENERAL_AGENT_INTRO: &str = concat!(
    "You are a general-purpose sub-agent spawned to handle a specific task autonomously.\n",
-    "\n",
-    "Your scope is exactly what the parent assigned to you. Do not expand the\n",
-    "objective — if you discover related work that needs doing, surface it under\n",
-    "RISKS or BLOCKERS rather than starting it. Work autonomously: the parent is\n",
-    "not available to answer questions mid-run.\n",
-    "\n",
-    "Plan before you act. Use `checklist_write` for any multi-step task so your work\n",
-    "is visible in the parent's sidebar. For complex initiatives, layer\n",
-    "`update_plan` (strategy) above `checklist_write` (tactics).\n",
-    "\n",
-    include_str!("../../prompts/subagent_output_format.md"),
+    "Stay inside the assigned scope; put adjacent work under RISKS/BLOCKERS.\n",
+    "Plan multi-step work with `checklist_write`; add `update_plan` for complex strategy.\n\n"
 );

-const EXPLORE_AGENT_PROMPT: &str = concat!(
-    "You are an exploration sub-agent. Your job is to map the relevant region\n",
-    "of the codebase fast and report what is there. You are read-only by\n",
-    "convention — do not write, patch, or run side-effectful commands. If the\n",
-    "task seems to require a write, stop and put it under BLOCKERS.\n",
-    "\n",
-    "Method:\n",
-    "- Start with `list_dir` and `file_search` to orient.\n",
-    "- Use `grep_files` (NOT `exec_shell rg`) to find call sites, type defs,\n",
-    "  and string literals. Prefer narrow, structured queries over broad scans.\n",
-    "- Read each candidate file with `read_file`. Skim, then quote line ranges.\n",
-    "- Stop reading once you have enough evidence — exhaustive sweeps are not\n",
-    "  the goal. The parent will spawn a follow-up explorer if needed.\n",
-    "\n",
-    "EVIDENCE is the load-bearing section for explorers. Cite every file you\n",
-    "read with `path:line-range` and one line per finding. The parent uses your\n",
-    "EVIDENCE list as a working set for the next turn, so be precise.\n",
-    "\n",
-    "CHANGES will almost always be \"None.\" for an explorer.\n",
-    "\n",
-    include_str!("../../prompts/subagent_output_format.md"),
+const EXPLORE_AGENT_INTRO: &str = concat!(
+    "You are an exploration sub-agent. Map the relevant code quickly and stay read-only.\n",
+    "Use list_dir/file_search, grep_files, and read_file; stop once evidence is sufficient.\n",
+    "EVIDENCE is load-bearing: cite `path:line-range` for each finding.\n",
+    "CHANGES will almost always be \"None.\" for an explorer.\n\n"
 );

-const PLAN_AGENT_PROMPT: &str = concat!(
-    "You are a planning sub-agent. Your job is to take an objective and\n",
-    "produce a prioritized, executable plan — not to execute it. Keep writes\n",
-    "to a minimum (notes and plan artifacts only); avoid patches and shell\n",
-    "side effects.\n",
-    "\n",
-    "Method:\n",
-    "- Read enough of the codebase to ground the plan in reality. A plan\n",
-    "  written without `read_file` evidence is a guess.\n",
-    "- Decompose the objective into ordered, verifiable steps. Each step names\n",
-    "  the artifact it produces and the check that proves it works.\n",
-    "- Surface trade-offs explicitly. If two approaches are viable, name both\n",
-    "  and pick one with a reason — don't leave the parent with a fork.\n",
-    "- Use `update_plan` to record the high-level strategy and `checklist_write` to\n",
-    "  emit the granular backlog. The parent (and the user) reads these from\n",
-    "  the sidebar after you finish.\n",
-    "\n",
-    "Prioritization: order todos by the dependency graph first, then by the\n",
-    "ratio of risk reduced to effort spent. Tag each item with `[P0]` / `[P1]`\n",
-    "/ `[P2]` so the parent can pick a slice without re-reading the whole plan.\n",
-    "\n",
-    "CHANGES should list the plan artifacts you wrote (e.g. `update_plan` rows,\n",
-    "`checklist_write` ids, any notes). Do not include speculative future edits.\n",
-    "\n",
-    include_str!("../../prompts/subagent_output_format.md"),
+const PLAN_AGENT_INTRO: &str = concat!(
+    "You are a planning sub-agent. Produce a grounded, prioritized plan, not patches.\n",
+    "Read enough code to avoid guessing; each step names its artifact and verification.\n",
+    "Use update_plan/checklist_write for plan artifacts and explain key trade-offs.\n",
+    "CHANGES should list plan artifacts only, not future speculative edits.\n\n"
 );

-const REVIEW_AGENT_PROMPT: &str = concat!(
-    "You are a code review sub-agent. Your job is to read the code under\n",
-    "review and emit a severity-scored list of findings. You are read-only by\n",
-    "convention — do not patch the code under review even if a fix is obvious;\n",
-    "describe the fix in the finding so the parent can apply it.\n",
-    "\n",
-    "Method:\n",
-    "- Read the diff or files end-to-end with `read_file` before scoring.\n",
-    "- Use `grep_files` to check for sibling call sites, similar patterns\n",
-    "  elsewhere, and existing tests covering the same surface.\n",
-    "- For each finding, score severity as one of:\n",
-    "    BLOCKER  — correctness, security, data loss, or contract break.\n",
-    "    MAJOR    — likely bug, missing error path, perf regression at scale.\n",
-    "    MINOR    — style, naming, redundancy, suboptimal but correct code.\n",
-    "    NIT      — taste; reasonable people may disagree.\n",
-    "- Order EVIDENCE bullets by severity, BLOCKER first. Each bullet:\n",
-    "  `[SEVERITY] path:line-range — one-line description; suggested fix`.\n",
-    "- Be constructive. Cite the failure mode, not the author.\n",
-    "\n",
-    "If you find no issues at MAJOR or above, say so plainly in SUMMARY — a\n",
-    "clean review is a valid result and the parent benefits from knowing it.\n",
-    "\n",
-    "CHANGES will almost always be \"None.\" for a reviewer.\n",
-    "\n",
-    include_str!("../../prompts/subagent_output_format.md"),
+const REVIEW_AGENT_INTRO: &str = concat!(
+    "You are a code review sub-agent. Stay read-only and report severity-scored findings.\n",
+    "Read the diff/files, grep sibling patterns/tests, then order EVIDENCE by severity.\n",
+    "Use BLOCKER/MAJOR/MINOR/NIT and include path:line-range plus suggested fix.\n",
+    "If no MAJOR+ issues exist, say so plainly in SUMMARY.\n",
+    "CHANGES will almost always be \"None.\" for a reviewer.\n\n"
 );

-const CUSTOM_AGENT_PROMPT: &str = concat!(
-    "You are a custom sub-agent. The parent has given you a narrowed tool\n",
-    "registry — only the tools you see at runtime are available. Do not try\n",
-    "to reach for a tool that is not registered; if the task needs one, put\n",
-    "the gap under BLOCKERS and stop.\n",
-    "\n",
-    "Stay tightly scoped to the assigned objective. The parent chose Custom\n",
-    "specifically to constrain you — do not expand into adjacent work.\n",
-    "\n",
-    include_str!("../../prompts/subagent_output_format.md"),
+const CUSTOM_AGENT_INTRO: &str = concat!(
+    "You are a custom sub-agent with a narrowed tool registry.\n",
+    "Use only tools available at runtime; put missing capabilities under BLOCKERS and stop.\n",
+    "Stay tightly scoped to the assigned objective.\n\n"
 );

-const IMPLEMENTER_AGENT_PROMPT: &str = concat!(
-    "You are an implementation sub-agent. Your job is to land the change\n",
-    "the parent assigned to you — write the code, modify the files, satisfy\n",
-    "the contract — with the *minimum* surrounding edit. You do not refactor\n",
-    "adjacent code. You do not rename unused variables. You do not 'tidy up'\n",
-    "while you're in the file. If you see related work that should happen,\n",
-    "surface it under RISKS or BLOCKERS rather than starting it.\n",
-    "\n",
-    "Method:\n",
-    "- Read the target file(s) end-to-end before editing. Edits made without\n",
-    "  reading the file produce structurally wrong patches.\n",
-    "- Prefer `edit_file` (single search/replace) for narrow changes.\n",
-    "  Reach for `apply_patch` only when the change spans multiple hunks\n",
-    "  or is structurally tricky.\n",
-    "- After every batch of edits, run a quick verification: a relevant\n",
-    "  `cargo check` / `npm run lint` / `pytest -k <test>` so you don't\n",
-    "  hand the parent a half-baked implementation.\n",
-    "- If the change requires writing tests, write them first or alongside\n",
-    "  the implementation — never as a follow-up the parent has to ask for.\n",
-    "\n",
-    "CHANGES is the load-bearing section for implementers. List every file\n",
-    "you modified with a one-line summary of what changed and why. The parent\n",
-    "uses CHANGES to decide what to inspect next.\n",
-    "\n",
-    include_str!("../../prompts/subagent_output_format.md"),
+const IMPLEMENTER_AGENT_INTRO: &str = concat!(
+    "You are an implementation sub-agent. Land the assigned change with minimal surrounding edits.\n",
+    "Read target files before editing; prefer edit_file for narrow changes and apply_patch for hunks.\n",
+    "Run relevant verification after edit batches; write needed tests with the implementation.\n",
+    "CHANGES is load-bearing: list every modified file with a one-line why.\n\n"
 );

-const VERIFIER_AGENT_PROMPT: &str = concat!(
-    "You are a verification sub-agent. Your job is to *run* the project's\n",
-    "test suite (or other validation gates) and report pass/fail with the\n",
-    "evidence the parent needs to act. You are read-only by convention —\n",
-    "do not patch failing tests, do not 'fix' lints, do not modify code.\n",
-    "If a fix seems obvious, describe it under RISKS so the parent can\n",
-    "spawn an Implementer.\n",
-    "\n",
-    "Method:\n",
-    "- Run the right gate for the language: `cargo test --workspace`,\n",
-    "  `npm test`, `pytest`, `go test ./...`. Use `run_tests` when it's\n",
-    "  available; fall back to `exec_shell` when the project has a custom\n",
-    "  invocation.\n",
-    "- Run lints if requested: `cargo clippy -- -D warnings`,\n",
-    "  `npm run lint`, `ruff check .`. Don't run lints the parent didn't\n",
-    "  ask for; lint noise drowns the signal you were spawned to surface.\n",
-    "- Capture the exact failing assertion plus the stack trace / file:line\n",
-    "  in EVIDENCE. A failure summarised as 'cargo test failed' is useless;\n",
-    "  the parent needs the actual panic.\n",
-    "\n",
-    "OUTCOME goes at the top of SUMMARY: PASS / FAIL / FLAKY. If FLAKY,\n",
-    "say which test and how many runs you tried.\n",
-    "\n",
-    "CHANGES will almost always be \"None.\" for a verifier.\n",
-    "\n",
-    include_str!("../../prompts/subagent_output_format.md"),
+const VERIFIER_AGENT_INTRO: &str = concat!(
+    "You are a verification sub-agent. Run requested gates and stay read-only.\n",
+    "Report PASS/FAIL/FLAKY at the top of SUMMARY with exact command evidence.\n",
+    "Capture failing assertion and file:line; put obvious fixes under RISKS.\n",
+    "CHANGES will almost always be \"None.\" for a verifier.\n\n"
 );

 // === Tests ===
@@ -27,6 +27,10 @@ fn message_text(message: &Message) -> &str {
    }
 }

+fn estimate_tool_description_tokens_conservative(text: &str) -> usize {
+    text.chars().count().div_ceil(3)
+}
+
 #[test]
 fn test_agent_type_from_str() {
    assert_eq!(
@@ -149,6 +153,59 @@ fn test_implementer_and_verifier_have_distinct_prompts() {
    );
 }

+#[test]
+fn test_agent_type_prompts_include_shared_output_contract_once() {
+    for (agent_type, marker) in [
+        (SubAgentType::General, "general-purpose sub-agent"),
+        (SubAgentType::Explore, "exploration sub-agent"),
+        (SubAgentType::Plan, "planning sub-agent"),
+        (SubAgentType::Review, "code review sub-agent"),
+        (SubAgentType::Implementer, "implementation sub-agent"),
+        (SubAgentType::Verifier, "verification sub-agent"),
+        (SubAgentType::Custom, "custom sub-agent"),
+    ] {
+        let prompt = agent_type.system_prompt();
+        assert!(prompt.contains(marker));
+        assert_eq!(
+            prompt.matches("## Output contract (mandatory)").count(),
+            1,
+            "{agent_type:?} prompt should include the shared output contract exactly once"
+        );
+        assert!(prompt.contains("### SUMMARY") && prompt.contains("### BLOCKERS"));
+    }
+}
+
+#[test]
+fn agent_spawn_description_warns_parent_to_verify_self_reports_within_budget() {
+    let tmp = tempdir().expect("tempdir");
+    let manager = new_shared_subagent_manager(tmp.path().to_path_buf(), 1);
+    let tool = AgentSpawnTool::new(manager, stub_runtime());
+    let description = tool.description();
+
+    assert!(
+        description
+            .contains("## Trust model: subagent results are self-reports, not verified facts")
+    );
+    assert!(description.contains("`agent_result` returns the child's narrative summary"));
+    assert!(description.contains("| Side effect | Re-verify with |"));
+    assert!(description.contains("If the child returns a verifiable handle"));
+    for row in [
+        "| URL claimed posted/written | `fetch_url` and check the response |",
+        "| File claimed created | `read_file` or `list_dir` |",
+        "| File claimed edited | `read_file` and check the change is present |",
+        "| HTTP POST/PUT response | inspect status code and body |",
+        "| Git operation | `git_status` / `git_diff` |",
+        "| Test claimed passing | `run_tests` |",
+        "| Process claimed started | `exec_shell` (e.g. `pgrep`, `lsof -i`) |",
+    ] {
+        assert!(description.contains(row));
+    }
+    assert!(
+        estimate_tool_description_tokens_conservative(description) <= 1024,
+        "agent_spawn description exceeds the conservative 1024-token budget"
+    );
+}
+
 #[test]
 fn test_implementer_allowed_tools_include_writes() {
    // Implementer is the write-heavy role; the deprecated
@@ -410,56 +410,72 @@ async fn compaction_non_streaming_returns_queued_message_response() {

 // === 6. Sub-agent style turn ================================================
 //
-// Sub-agents share the trait boundary: a parent's tool-call (`agent_spawn`)
-// causes a child runtime to be created with its own `Arc<dyn LlmClient>`.
-// At the trait level the test is identical to a normal turn — what changes
-// is which mock instance answers. This test demonstrates two independent
-// mocks (parent + child) cooperating on the same protocol.
+// The next turn after an `agent_result` summary must re-verify the claimed
+// side effect before reporting success.

 #[tokio::test]
-async fn sub_agent_parent_and_child_each_drive_independent_mocks() {
-    // Parent decides to delegate.
-    let parent_turn = vec![
-        canned::message_start("parent_t1"),
-        canned::tool_use_block_start(0, "spawn_id", "agent_spawn"),
-        canned::tool_input_delta(0, r#"{"prompt":"compute 2+2"}"#),
-        canned::block_stop(0),
+async fn v4_parent_reverifies_subagent_file_self_report_before_claiming_success() {
+    let tmp = tempfile::tempdir().expect("tempdir");
+    let missing = tmp.path().join("child-claimed-write.txt");
+    assert!(!missing.exists(), "fixture path must start missing");
+    let missing_path = missing.display().to_string();
+
+    let parent = MockLlmClient::new(vec![vec![
+        canned::message_start("parent_verify"),
+        canned::thinking_delta(0, "Verify the child's file-write self-report first."),
+        canned::tool_use_block_start(1, "verify_file", "read_file"),
+        canned::tool_input_delta(1, &serde_json::json!({ "path": &missing_path }).to_string()),
+        canned::block_stop(1),
        canned::message_delta("tool_use", None),
        canned::message_stop(),
-    ];
-    let parent = MockLlmClient::new(vec![parent_turn])
-        .with_provider("mock-parent")
-        .with_model("deepseek-v4-pro");
+    ]])
+    .with_model("deepseek-v4-pro");
+    let tool_summary = format!(
+        "[sub-agent result summarized for parent context]\n\
+Child results are self-reports; verify side effects with tools like read_file or list_dir before claiming success.\n\
+- agent_filecheck (implementer) status=Completed\n  result: Wrote {missing_path} successfully."
+    );

-    // Child does the work and replies with text.
-    let child_turn = vec![
-        canned::message_start("child_t1"),
-        canned::text_block_start(0),
-        canned::text_delta(0, "4"),
-        canned::block_stop(0),
-        canned::message_delta("end_turn", None),
-        canned::message_stop(),
-    ];
-    let child = MockLlmClient::new(vec![child_turn])
-        .with_provider("mock-child")
-        .with_model("deepseek-v4-flash");
-
-    // Drive both mocks against their own request streams.
-    let _ = parent
-        .create_message_stream(make_request(vec![user_message("delegate")]))
+    let mut stream = parent
+        .create_message_stream(make_request(vec![
+            user_message("Use a child to create the file, then report back."),
+            assistant_tool_call(
+                "agent_result_call",
+                "agent_result",
+                serde_json::json!({
+                    "agent_id": "agent_filecheck"
+                }),
+            ),
+            tool_result_message("agent_result_call", &tool_summary),
+        ]))
        .await
-        .unwrap()
-        .next()
-        .await;
+        .unwrap();

-    let (child_text, _) =
-        drain_stream_text(&child, make_request(vec![user_message("compute 2+2")])).await;
-    assert_eq!(child_text, "4");
+    let mut text_before_verification = String::new();
+    let mut tool_name = None;
+    let mut tool_input = String::new();
+    while let Some(ev) = stream.next().await {
+        match ev.unwrap() {
+            StreamEvent::ContentBlockStart { content_block, .. } => {
+                use crate::models::ContentBlockStart;
+                if let ContentBlockStart::ToolUse { name, .. } = content_block {
+                    tool_name = Some(name);
+                }
+            }
+            StreamEvent::ContentBlockDelta { delta, .. } => match delta {
+                Delta::InputJsonDelta { partial_json } => tool_input.push_str(&partial_json),
+                Delta::TextDelta { text } => text_before_verification.push_str(&text),
+                _ => {}
+            },
+            StreamEvent::MessageStop => break,
+            _ => {}
+        }
+    }

-    assert_eq!(parent.provider_name(), "mock-parent");
-    assert_eq!(child.provider_name(), "mock-child");
-    assert_eq!(parent.captured_requests().len(), 1);
-    assert_eq!(child.captured_requests().len(), 1);
+    assert_eq!(text_before_verification, "");
+    assert_eq!(tool_name.as_deref(), Some("read_file"));
+    let parsed: serde_json::Value = serde_json::from_str(&tool_input).expect("tool input JSON");
+    assert_eq!(parsed["path"], missing_path);
 }

 // === 7. Capacity-gate observation ===========================================