feat(tools/agent_spawn): teach parent that subagent results are self-reports
This commit is contained in:
committed by
Hunter Bown
parent
682e915857
commit
250953ad35
@@ -214,6 +214,9 @@ fn compact_subagent_tool_result_for_context(tool_name: &str, raw: &str) -> Optio
|
||||
};
|
||||
|
||||
let mut out = String::from("[sub-agent result summarized for parent context]\n");
|
||||
out.push_str(
|
||||
"Child results are self-reports; verify side effects with tools like read_file or list_dir before claiming success.\n",
|
||||
);
|
||||
out.push_str("Use `agent_result` again only if you need the full raw payload.\n");
|
||||
for (idx, snapshot) in snapshots.iter().enumerate() {
|
||||
if idx >= 8 {
|
||||
|
||||
@@ -761,6 +761,9 @@ fn subagent_results_are_summarized_before_parent_context_insertion() {
|
||||
assert!(context.contains("Inspect the RLM rendering path"));
|
||||
assert!(context.contains("steps=12"));
|
||||
assert!(context.len() < output.content.len());
|
||||
assert!(context.contains("self-report"));
|
||||
assert!(context.contains("verify side effects"));
|
||||
assert!(context.contains("read_file") && context.contains("list_dir"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -270,15 +270,16 @@ impl SubAgentType {
|
||||
/// Get the system prompt for this agent type.
|
||||
#[must_use]
|
||||
pub fn system_prompt(&self) -> String {
|
||||
match self {
|
||||
Self::General => GENERAL_AGENT_PROMPT.to_string(),
|
||||
Self::Explore => EXPLORE_AGENT_PROMPT.to_string(),
|
||||
Self::Plan => PLAN_AGENT_PROMPT.to_string(),
|
||||
Self::Review => REVIEW_AGENT_PROMPT.to_string(),
|
||||
Self::Implementer => IMPLEMENTER_AGENT_PROMPT.to_string(),
|
||||
Self::Verifier => VERIFIER_AGENT_PROMPT.to_string(),
|
||||
Self::Custom => CUSTOM_AGENT_PROMPT.to_string(),
|
||||
}
|
||||
let role_intro = match self {
|
||||
Self::General => GENERAL_AGENT_INTRO,
|
||||
Self::Explore => EXPLORE_AGENT_INTRO,
|
||||
Self::Plan => PLAN_AGENT_INTRO,
|
||||
Self::Review => REVIEW_AGENT_INTRO,
|
||||
Self::Implementer => IMPLEMENTER_AGENT_INTRO,
|
||||
Self::Verifier => VERIFIER_AGENT_INTRO,
|
||||
Self::Custom => CUSTOM_AGENT_INTRO,
|
||||
};
|
||||
format!("{role_intro}{SUBAGENT_OUTPUT_FORMAT}")
|
||||
}
|
||||
|
||||
/// Get the default allowed tools for this agent type.
|
||||
@@ -1581,11 +1582,13 @@ impl ToolSpec for AgentSpawnTool {
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Spawn a background sub-agent for a focused task. Returns an agent_id immediately; \
|
||||
follow with agent_result to retrieve the final result. Default cap of 10 concurrent \
|
||||
sub-agents (configurable via `[subagents].max_concurrent`); each is a full sub-agent \
|
||||
loop, so cancel or wait if you hit the cap. For parallel one-shot LLM queries, just \
|
||||
emit multiple tool calls in one turn — the dispatcher runs them in parallel."
|
||||
concat!(
|
||||
"Spawn a background sub-agent for a focused task. Returns an agent_id immediately; follow with agent_result to retrieve the final result. Default cap of 10 concurrent sub-agents (configurable via `[subagents].max_concurrent`); each is a full sub-agent loop, so cancel or wait if you hit the cap. For parallel one-shot LLM queries, just emit multiple tool calls in one turn — the dispatcher runs them in parallel.\n\n",
|
||||
"## Trust model: subagent results are self-reports, not verified facts\n\n",
|
||||
"`agent_result` returns the child's narrative summary of what happened. For operations with external side effects, the child's summary may be wrong. Re-verify before reporting success to the user:\n\n",
|
||||
"| Side effect | Re-verify with |\n|---|---|\n| URL claimed posted/written | `fetch_url` and check the response |\n| File claimed created | `read_file` or `list_dir` |\n| File claimed edited | `read_file` and check the change is present |\n| HTTP POST/PUT response | inspect status code and body |\n| Git operation | `git_status` / `git_diff` |\n| Test claimed passing | `run_tests` |\n| Process claimed started | `exec_shell` (e.g. `pgrep`, `lsof -i`) |\n\n",
|
||||
"If the child returns a verifiable handle (URL, file path, exit code, commit SHA), check it. If it doesn't, ask the child to return one or verify yourself before proceeding."
|
||||
)
|
||||
}
|
||||
|
||||
fn input_schema(&self) -> Value {
|
||||
@@ -3965,179 +3968,54 @@ fn truncate_preview(text: &str) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
// === System prompts ===
|
||||
//
|
||||
// Each per-agent-type prompt is composed from two parts:
|
||||
//
|
||||
// 1. A short role-specific intro that names the agent's job, its scope,
|
||||
// and any role-specific tactics or stop conditions.
|
||||
// 2. The shared `subagent_output_format.md` block, which is the single
|
||||
// source of truth for the SUMMARY / EVIDENCE / CHANGES / RISKS /
|
||||
// BLOCKERS contract, the stop condition, and the typed-tool-surface
|
||||
// conventions. Tweaks to the contract live in that one file.
|
||||
//
|
||||
// `concat!` resolves at compile time, so the per-type constants remain
|
||||
// `&'static str` and `system_prompt()` keeps its `String` return type.
|
||||
// The `include_str!` calls inside each `concat!` all point at the same
|
||||
// file, so the format is defined once even though it's inlined many times.
|
||||
const SUBAGENT_OUTPUT_FORMAT: &str = include_str!("../../prompts/subagent_output_format.md");
|
||||
|
||||
const GENERAL_AGENT_PROMPT: &str = concat!(
|
||||
const GENERAL_AGENT_INTRO: &str = concat!(
|
||||
"You are a general-purpose sub-agent spawned to handle a specific task autonomously.\n",
|
||||
"\n",
|
||||
"Your scope is exactly what the parent assigned to you. Do not expand the\n",
|
||||
"objective — if you discover related work that needs doing, surface it under\n",
|
||||
"RISKS or BLOCKERS rather than starting it. Work autonomously: the parent is\n",
|
||||
"not available to answer questions mid-run.\n",
|
||||
"\n",
|
||||
"Plan before you act. Use `checklist_write` for any multi-step task so your work\n",
|
||||
"is visible in the parent's sidebar. For complex initiatives, layer\n",
|
||||
"`update_plan` (strategy) above `checklist_write` (tactics).\n",
|
||||
"\n",
|
||||
include_str!("../../prompts/subagent_output_format.md"),
|
||||
"Stay inside the assigned scope; put adjacent work under RISKS/BLOCKERS.\n",
|
||||
"Plan multi-step work with `checklist_write`; add `update_plan` for complex strategy.\n\n"
|
||||
);
|
||||
|
||||
const EXPLORE_AGENT_PROMPT: &str = concat!(
|
||||
"You are an exploration sub-agent. Your job is to map the relevant region\n",
|
||||
"of the codebase fast and report what is there. You are read-only by\n",
|
||||
"convention — do not write, patch, or run side-effectful commands. If the\n",
|
||||
"task seems to require a write, stop and put it under BLOCKERS.\n",
|
||||
"\n",
|
||||
"Method:\n",
|
||||
"- Start with `list_dir` and `file_search` to orient.\n",
|
||||
"- Use `grep_files` (NOT `exec_shell rg`) to find call sites, type defs,\n",
|
||||
" and string literals. Prefer narrow, structured queries over broad scans.\n",
|
||||
"- Read each candidate file with `read_file`. Skim, then quote line ranges.\n",
|
||||
"- Stop reading once you have enough evidence — exhaustive sweeps are not\n",
|
||||
" the goal. The parent will spawn a follow-up explorer if needed.\n",
|
||||
"\n",
|
||||
"EVIDENCE is the load-bearing section for explorers. Cite every file you\n",
|
||||
"read with `path:line-range` and one line per finding. The parent uses your\n",
|
||||
"EVIDENCE list as a working set for the next turn, so be precise.\n",
|
||||
"\n",
|
||||
"CHANGES will almost always be \"None.\" for an explorer.\n",
|
||||
"\n",
|
||||
include_str!("../../prompts/subagent_output_format.md"),
|
||||
const EXPLORE_AGENT_INTRO: &str = concat!(
|
||||
"You are an exploration sub-agent. Map the relevant code quickly and stay read-only.\n",
|
||||
"Use list_dir/file_search, grep_files, and read_file; stop once evidence is sufficient.\n",
|
||||
"EVIDENCE is load-bearing: cite `path:line-range` for each finding.\n",
|
||||
"CHANGES will almost always be \"None.\" for an explorer.\n\n"
|
||||
);
|
||||
|
||||
const PLAN_AGENT_PROMPT: &str = concat!(
|
||||
"You are a planning sub-agent. Your job is to take an objective and\n",
|
||||
"produce a prioritized, executable plan — not to execute it. Keep writes\n",
|
||||
"to a minimum (notes and plan artifacts only); avoid patches and shell\n",
|
||||
"side effects.\n",
|
||||
"\n",
|
||||
"Method:\n",
|
||||
"- Read enough of the codebase to ground the plan in reality. A plan\n",
|
||||
" written without `read_file` evidence is a guess.\n",
|
||||
"- Decompose the objective into ordered, verifiable steps. Each step names\n",
|
||||
" the artifact it produces and the check that proves it works.\n",
|
||||
"- Surface trade-offs explicitly. If two approaches are viable, name both\n",
|
||||
" and pick one with a reason — don't leave the parent with a fork.\n",
|
||||
"- Use `update_plan` to record the high-level strategy and `checklist_write` to\n",
|
||||
" emit the granular backlog. The parent (and the user) reads these from\n",
|
||||
" the sidebar after you finish.\n",
|
||||
"\n",
|
||||
"Prioritization: order todos by the dependency graph first, then by the\n",
|
||||
"ratio of risk reduced to effort spent. Tag each item with `[P0]` / `[P1]`\n",
|
||||
"/ `[P2]` so the parent can pick a slice without re-reading the whole plan.\n",
|
||||
"\n",
|
||||
"CHANGES should list the plan artifacts you wrote (e.g. `update_plan` rows,\n",
|
||||
"`checklist_write` ids, any notes). Do not include speculative future edits.\n",
|
||||
"\n",
|
||||
include_str!("../../prompts/subagent_output_format.md"),
|
||||
const PLAN_AGENT_INTRO: &str = concat!(
|
||||
"You are a planning sub-agent. Produce a grounded, prioritized plan, not patches.\n",
|
||||
"Read enough code to avoid guessing; each step names its artifact and verification.\n",
|
||||
"Use update_plan/checklist_write for plan artifacts and explain key trade-offs.\n",
|
||||
"CHANGES should list plan artifacts only, not future speculative edits.\n\n"
|
||||
);
|
||||
|
||||
const REVIEW_AGENT_PROMPT: &str = concat!(
|
||||
"You are a code review sub-agent. Your job is to read the code under\n",
|
||||
"review and emit a severity-scored list of findings. You are read-only by\n",
|
||||
"convention — do not patch the code under review even if a fix is obvious;\n",
|
||||
"describe the fix in the finding so the parent can apply it.\n",
|
||||
"\n",
|
||||
"Method:\n",
|
||||
"- Read the diff or files end-to-end with `read_file` before scoring.\n",
|
||||
"- Use `grep_files` to check for sibling call sites, similar patterns\n",
|
||||
" elsewhere, and existing tests covering the same surface.\n",
|
||||
"- For each finding, score severity as one of:\n",
|
||||
" BLOCKER — correctness, security, data loss, or contract break.\n",
|
||||
" MAJOR — likely bug, missing error path, perf regression at scale.\n",
|
||||
" MINOR — style, naming, redundancy, suboptimal but correct code.\n",
|
||||
" NIT — taste; reasonable people may disagree.\n",
|
||||
"- Order EVIDENCE bullets by severity, BLOCKER first. Each bullet:\n",
|
||||
" `[SEVERITY] path:line-range — one-line description; suggested fix`.\n",
|
||||
"- Be constructive. Cite the failure mode, not the author.\n",
|
||||
"\n",
|
||||
"If you find no issues at MAJOR or above, say so plainly in SUMMARY — a\n",
|
||||
"clean review is a valid result and the parent benefits from knowing it.\n",
|
||||
"\n",
|
||||
"CHANGES will almost always be \"None.\" for a reviewer.\n",
|
||||
"\n",
|
||||
include_str!("../../prompts/subagent_output_format.md"),
|
||||
const REVIEW_AGENT_INTRO: &str = concat!(
|
||||
"You are a code review sub-agent. Stay read-only and report severity-scored findings.\n",
|
||||
"Read the diff/files, grep sibling patterns/tests, then order EVIDENCE by severity.\n",
|
||||
"Use BLOCKER/MAJOR/MINOR/NIT and include path:line-range plus suggested fix.\n",
|
||||
"If no MAJOR+ issues exist, say so plainly in SUMMARY.\n",
|
||||
"CHANGES will almost always be \"None.\" for a reviewer.\n\n"
|
||||
);
|
||||
|
||||
const CUSTOM_AGENT_PROMPT: &str = concat!(
|
||||
"You are a custom sub-agent. The parent has given you a narrowed tool\n",
|
||||
"registry — only the tools you see at runtime are available. Do not try\n",
|
||||
"to reach for a tool that is not registered; if the task needs one, put\n",
|
||||
"the gap under BLOCKERS and stop.\n",
|
||||
"\n",
|
||||
"Stay tightly scoped to the assigned objective. The parent chose Custom\n",
|
||||
"specifically to constrain you — do not expand into adjacent work.\n",
|
||||
"\n",
|
||||
include_str!("../../prompts/subagent_output_format.md"),
|
||||
const CUSTOM_AGENT_INTRO: &str = concat!(
|
||||
"You are a custom sub-agent with a narrowed tool registry.\n",
|
||||
"Use only tools available at runtime; put missing capabilities under BLOCKERS and stop.\n",
|
||||
"Stay tightly scoped to the assigned objective.\n\n"
|
||||
);
|
||||
|
||||
const IMPLEMENTER_AGENT_PROMPT: &str = concat!(
|
||||
"You are an implementation sub-agent. Your job is to land the change\n",
|
||||
"the parent assigned to you — write the code, modify the files, satisfy\n",
|
||||
"the contract — with the *minimum* surrounding edit. You do not refactor\n",
|
||||
"adjacent code. You do not rename unused variables. You do not 'tidy up'\n",
|
||||
"while you're in the file. If you see related work that should happen,\n",
|
||||
"surface it under RISKS or BLOCKERS rather than starting it.\n",
|
||||
"\n",
|
||||
"Method:\n",
|
||||
"- Read the target file(s) end-to-end before editing. Edits made without\n",
|
||||
" reading the file produce structurally wrong patches.\n",
|
||||
"- Prefer `edit_file` (single search/replace) for narrow changes.\n",
|
||||
" Reach for `apply_patch` only when the change spans multiple hunks\n",
|
||||
" or is structurally tricky.\n",
|
||||
"- After every batch of edits, run a quick verification: a relevant\n",
|
||||
" `cargo check` / `npm run lint` / `pytest -k <test>` so you don't\n",
|
||||
" hand the parent a half-baked implementation.\n",
|
||||
"- If the change requires writing tests, write them first or alongside\n",
|
||||
" the implementation — never as a follow-up the parent has to ask for.\n",
|
||||
"\n",
|
||||
"CHANGES is the load-bearing section for implementers. List every file\n",
|
||||
"you modified with a one-line summary of what changed and why. The parent\n",
|
||||
"uses CHANGES to decide what to inspect next.\n",
|
||||
"\n",
|
||||
include_str!("../../prompts/subagent_output_format.md"),
|
||||
const IMPLEMENTER_AGENT_INTRO: &str = concat!(
|
||||
"You are an implementation sub-agent. Land the assigned change with minimal surrounding edits.\n",
|
||||
"Read target files before editing; prefer edit_file for narrow changes and apply_patch for hunks.\n",
|
||||
"Run relevant verification after edit batches; write needed tests with the implementation.\n",
|
||||
"CHANGES is load-bearing: list every modified file with a one-line why.\n\n"
|
||||
);
|
||||
|
||||
const VERIFIER_AGENT_PROMPT: &str = concat!(
|
||||
"You are a verification sub-agent. Your job is to *run* the project's\n",
|
||||
"test suite (or other validation gates) and report pass/fail with the\n",
|
||||
"evidence the parent needs to act. You are read-only by convention —\n",
|
||||
"do not patch failing tests, do not 'fix' lints, do not modify code.\n",
|
||||
"If a fix seems obvious, describe it under RISKS so the parent can\n",
|
||||
"spawn an Implementer.\n",
|
||||
"\n",
|
||||
"Method:\n",
|
||||
"- Run the right gate for the language: `cargo test --workspace`,\n",
|
||||
" `npm test`, `pytest`, `go test ./...`. Use `run_tests` when it's\n",
|
||||
" available; fall back to `exec_shell` when the project has a custom\n",
|
||||
" invocation.\n",
|
||||
"- Run lints if requested: `cargo clippy -- -D warnings`,\n",
|
||||
" `npm run lint`, `ruff check .`. Don't run lints the parent didn't\n",
|
||||
" ask for; lint noise drowns the signal you were spawned to surface.\n",
|
||||
"- Capture the exact failing assertion plus the stack trace / file:line\n",
|
||||
" in EVIDENCE. A failure summarised as 'cargo test failed' is useless;\n",
|
||||
" the parent needs the actual panic.\n",
|
||||
"\n",
|
||||
"OUTCOME goes at the top of SUMMARY: PASS / FAIL / FLAKY. If FLAKY,\n",
|
||||
"say which test and how many runs you tried.\n",
|
||||
"\n",
|
||||
"CHANGES will almost always be \"None.\" for a verifier.\n",
|
||||
"\n",
|
||||
include_str!("../../prompts/subagent_output_format.md"),
|
||||
const VERIFIER_AGENT_INTRO: &str = concat!(
|
||||
"You are a verification sub-agent. Run requested gates and stay read-only.\n",
|
||||
"Report PASS/FAIL/FLAKY at the top of SUMMARY with exact command evidence.\n",
|
||||
"Capture failing assertion and file:line; put obvious fixes under RISKS.\n",
|
||||
"CHANGES will almost always be \"None.\" for a verifier.\n\n"
|
||||
);
|
||||
|
||||
// === Tests ===
|
||||
|
||||
@@ -27,6 +27,10 @@ fn message_text(message: &Message) -> &str {
|
||||
}
|
||||
}
|
||||
|
||||
fn estimate_tool_description_tokens_conservative(text: &str) -> usize {
|
||||
text.chars().count().div_ceil(3)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_agent_type_from_str() {
|
||||
assert_eq!(
|
||||
@@ -149,6 +153,59 @@ fn test_implementer_and_verifier_have_distinct_prompts() {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_agent_type_prompts_include_shared_output_contract_once() {
|
||||
for (agent_type, marker) in [
|
||||
(SubAgentType::General, "general-purpose sub-agent"),
|
||||
(SubAgentType::Explore, "exploration sub-agent"),
|
||||
(SubAgentType::Plan, "planning sub-agent"),
|
||||
(SubAgentType::Review, "code review sub-agent"),
|
||||
(SubAgentType::Implementer, "implementation sub-agent"),
|
||||
(SubAgentType::Verifier, "verification sub-agent"),
|
||||
(SubAgentType::Custom, "custom sub-agent"),
|
||||
] {
|
||||
let prompt = agent_type.system_prompt();
|
||||
assert!(prompt.contains(marker));
|
||||
assert_eq!(
|
||||
prompt.matches("## Output contract (mandatory)").count(),
|
||||
1,
|
||||
"{agent_type:?} prompt should include the shared output contract exactly once"
|
||||
);
|
||||
assert!(prompt.contains("### SUMMARY") && prompt.contains("### BLOCKERS"));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn agent_spawn_description_warns_parent_to_verify_self_reports_within_budget() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
let manager = new_shared_subagent_manager(tmp.path().to_path_buf(), 1);
|
||||
let tool = AgentSpawnTool::new(manager, stub_runtime());
|
||||
let description = tool.description();
|
||||
|
||||
assert!(
|
||||
description
|
||||
.contains("## Trust model: subagent results are self-reports, not verified facts")
|
||||
);
|
||||
assert!(description.contains("`agent_result` returns the child's narrative summary"));
|
||||
assert!(description.contains("| Side effect | Re-verify with |"));
|
||||
assert!(description.contains("If the child returns a verifiable handle"));
|
||||
for row in [
|
||||
"| URL claimed posted/written | `fetch_url` and check the response |",
|
||||
"| File claimed created | `read_file` or `list_dir` |",
|
||||
"| File claimed edited | `read_file` and check the change is present |",
|
||||
"| HTTP POST/PUT response | inspect status code and body |",
|
||||
"| Git operation | `git_status` / `git_diff` |",
|
||||
"| Test claimed passing | `run_tests` |",
|
||||
"| Process claimed started | `exec_shell` (e.g. `pgrep`, `lsof -i`) |",
|
||||
] {
|
||||
assert!(description.contains(row));
|
||||
}
|
||||
assert!(
|
||||
estimate_tool_description_tokens_conservative(description) <= 1024,
|
||||
"agent_spawn description exceeds the conservative 1024-token budget"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_implementer_allowed_tools_include_writes() {
|
||||
// Implementer is the write-heavy role; the deprecated
|
||||
|
||||
@@ -410,56 +410,72 @@ async fn compaction_non_streaming_returns_queued_message_response() {
|
||||
|
||||
// === 6. Sub-agent style turn ================================================
|
||||
//
|
||||
// Sub-agents share the trait boundary: a parent's tool-call (`agent_spawn`)
|
||||
// causes a child runtime to be created with its own `Arc<dyn LlmClient>`.
|
||||
// At the trait level the test is identical to a normal turn — what changes
|
||||
// is which mock instance answers. This test demonstrates two independent
|
||||
// mocks (parent + child) cooperating on the same protocol.
|
||||
// The next turn after an `agent_result` summary must re-verify the claimed
|
||||
// side effect before reporting success.
|
||||
|
||||
#[tokio::test]
|
||||
async fn sub_agent_parent_and_child_each_drive_independent_mocks() {
|
||||
// Parent decides to delegate.
|
||||
let parent_turn = vec![
|
||||
canned::message_start("parent_t1"),
|
||||
canned::tool_use_block_start(0, "spawn_id", "agent_spawn"),
|
||||
canned::tool_input_delta(0, r#"{"prompt":"compute 2+2"}"#),
|
||||
canned::block_stop(0),
|
||||
async fn v4_parent_reverifies_subagent_file_self_report_before_claiming_success() {
|
||||
let tmp = tempfile::tempdir().expect("tempdir");
|
||||
let missing = tmp.path().join("child-claimed-write.txt");
|
||||
assert!(!missing.exists(), "fixture path must start missing");
|
||||
let missing_path = missing.display().to_string();
|
||||
|
||||
let parent = MockLlmClient::new(vec![vec![
|
||||
canned::message_start("parent_verify"),
|
||||
canned::thinking_delta(0, "Verify the child's file-write self-report first."),
|
||||
canned::tool_use_block_start(1, "verify_file", "read_file"),
|
||||
canned::tool_input_delta(1, &serde_json::json!({ "path": &missing_path }).to_string()),
|
||||
canned::block_stop(1),
|
||||
canned::message_delta("tool_use", None),
|
||||
canned::message_stop(),
|
||||
];
|
||||
let parent = MockLlmClient::new(vec![parent_turn])
|
||||
.with_provider("mock-parent")
|
||||
.with_model("deepseek-v4-pro");
|
||||
]])
|
||||
.with_model("deepseek-v4-pro");
|
||||
let tool_summary = format!(
|
||||
"[sub-agent result summarized for parent context]\n\
|
||||
Child results are self-reports; verify side effects with tools like read_file or list_dir before claiming success.\n\
|
||||
- agent_filecheck (implementer) status=Completed\n result: Wrote {missing_path} successfully."
|
||||
);
|
||||
|
||||
// Child does the work and replies with text.
|
||||
let child_turn = vec![
|
||||
canned::message_start("child_t1"),
|
||||
canned::text_block_start(0),
|
||||
canned::text_delta(0, "4"),
|
||||
canned::block_stop(0),
|
||||
canned::message_delta("end_turn", None),
|
||||
canned::message_stop(),
|
||||
];
|
||||
let child = MockLlmClient::new(vec![child_turn])
|
||||
.with_provider("mock-child")
|
||||
.with_model("deepseek-v4-flash");
|
||||
|
||||
// Drive both mocks against their own request streams.
|
||||
let _ = parent
|
||||
.create_message_stream(make_request(vec![user_message("delegate")]))
|
||||
let mut stream = parent
|
||||
.create_message_stream(make_request(vec![
|
||||
user_message("Use a child to create the file, then report back."),
|
||||
assistant_tool_call(
|
||||
"agent_result_call",
|
||||
"agent_result",
|
||||
serde_json::json!({
|
||||
"agent_id": "agent_filecheck"
|
||||
}),
|
||||
),
|
||||
tool_result_message("agent_result_call", &tool_summary),
|
||||
]))
|
||||
.await
|
||||
.unwrap()
|
||||
.next()
|
||||
.await;
|
||||
.unwrap();
|
||||
|
||||
let (child_text, _) =
|
||||
drain_stream_text(&child, make_request(vec![user_message("compute 2+2")])).await;
|
||||
assert_eq!(child_text, "4");
|
||||
let mut text_before_verification = String::new();
|
||||
let mut tool_name = None;
|
||||
let mut tool_input = String::new();
|
||||
while let Some(ev) = stream.next().await {
|
||||
match ev.unwrap() {
|
||||
StreamEvent::ContentBlockStart { content_block, .. } => {
|
||||
use crate::models::ContentBlockStart;
|
||||
if let ContentBlockStart::ToolUse { name, .. } = content_block {
|
||||
tool_name = Some(name);
|
||||
}
|
||||
}
|
||||
StreamEvent::ContentBlockDelta { delta, .. } => match delta {
|
||||
Delta::InputJsonDelta { partial_json } => tool_input.push_str(&partial_json),
|
||||
Delta::TextDelta { text } => text_before_verification.push_str(&text),
|
||||
_ => {}
|
||||
},
|
||||
StreamEvent::MessageStop => break,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
assert_eq!(parent.provider_name(), "mock-parent");
|
||||
assert_eq!(child.provider_name(), "mock-child");
|
||||
assert_eq!(parent.captured_requests().len(), 1);
|
||||
assert_eq!(child.captured_requests().len(), 1);
|
||||
assert_eq!(text_before_verification, "");
|
||||
assert_eq!(tool_name.as_deref(), Some("read_file"));
|
||||
let parsed: serde_json::Value = serde_json::from_str(&tool_input).expect("tool input JSON");
|
||||
assert_eq!(parsed["path"], missing_path);
|
||||
}
|
||||
|
||||
// === 7. Capacity-gate observation ===========================================
|
||||
|
||||
Reference in New Issue
Block a user