From 8c36c1c6befdc1ce9ae979143fcc84213a5a7725 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Thu, 7 May 2026 06:33:04 -0500 Subject: [PATCH] feat(subagents): fork parent context on demand (#1048) --- crates/tui/src/core/engine.rs | 27 ++++- crates/tui/src/prompts/base.md | 2 +- crates/tui/src/tools/subagent/mod.rs | 153 ++++++++++++++++++++++--- crates/tui/src/tools/subagent/tests.rs | 90 +++++++++++++++ docs/SUBAGENTS.md | 15 +++ docs/TOOL_SURFACE.md | 6 + 6 files changed, 277 insertions(+), 16 deletions(-) diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index b571ca03..3eabbbce 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -47,8 +47,8 @@ use crate::tools::shell::{SharedShellManager, new_shared_shell_manager}; use crate::tools::spec::RuntimeToolServices; use crate::tools::spec::{ApprovalRequirement, ToolError, ToolResult}; use crate::tools::subagent::{ - Mailbox, SharedSubAgentManager, SubAgentCompletion, SubAgentRuntime, SubAgentType, - new_shared_subagent_manager, resolve_subagent_assignment_route, + Mailbox, SharedSubAgentManager, SubAgentCompletion, SubAgentForkContext, SubAgentRuntime, + SubAgentType, new_shared_subagent_manager, resolve_subagent_assignment_route, }; use crate::tools::todo::{SharedTodoList, new_shared_todo_list}; use crate::tools::user_input::{UserInputRequest, UserInputResponse}; @@ -975,6 +975,26 @@ impl Engine { let tool_context = self.build_tool_context(mode, auto_approve); let builder = self.build_turn_tool_registry_builder(mode, todo_list, plan_state); + let fork_context_for_runtime = if self.config.features.enabled(Feature::Subagents) { + let state = StructuredState::capture( + mode.label(), + self.config.workspace.clone(), + std::env::current_dir().ok(), + &self.session.working_set, + &self.config.todos, + &self.config.plan_state, + Some(&self.subagent_manager), + ) + .await; + Some(SubAgentForkContext { + system: self.session.system_prompt.clone(), + messages: self.messages_with_turn_metadata(), + structured_state_block: state.to_system_block(), + }) + } else { + None + }; + // Mailbox for structured sub-agent envelopes (#128/#130). One per // turn: the receiver is drained by a short-lived task that converts // envelopes into `Event::SubAgentMailbox` so the UI can route them @@ -1027,6 +1047,9 @@ impl Engine { ) .with_max_spawn_depth(self.config.max_spawn_depth) .with_parent_completion_tx(self.tx_subagent_completion.clone()); + if let Some(context) = fork_context_for_runtime.clone() { + rt = rt.with_fork_context(context); + } if let Some((mailbox, cancel_token)) = mailbox_for_runtime.as_ref() { rt = rt .with_mailbox(mailbox.clone()) diff --git a/crates/tui/src/prompts/base.md b/crates/tui/src/prompts/base.md index c77bc897..e001d26c 100644 --- a/crates/tui/src/prompts/base.md +++ b/crates/tui/src/prompts/base.md @@ -163,7 +163,7 @@ Use `edit_file` for one clear replacement in one file. Use `apply_patch` when th Use `exec_shell` for shell-native diagnostics, pipelines, and bounded commands. Use structured tools for structured operations when they map directly (`grep_files`, `git_diff`, `read_file`). For long commands, servers, full test suites, or release computations, start background work with `task_shell_start` or `exec_shell` using `background: true`, then poll with `task_shell_wait` or `exec_shell_wait`. ### `agent_spawn` -Use `agent_spawn` for independent investigations or implementation slices that can run while you continue coordinating. Use `agent_wait` when you need one or more completions. Use `agent_result` when the sentinel summary is too thin or you need the full structured output. Keep tiny single-read/search tasks local so the transcript stays compact. +Use `agent_spawn` for independent investigations or implementation slices that can run while you continue coordinating. Use `fork_context: true` when the child must inherit the current transcript, plan/todo state, and byte-identical parent system/message prefix for DeepSeek prefix-cache reuse. Use `agent_wait` when you need one or more completions. Use `agent_result` when the sentinel summary is too thin or you need the full structured output. Keep tiny single-read/search tasks local so the transcript stays compact. ### `rlm` Use `rlm` for long-context semantic work, bulk classification/extraction, and decomposition where a Python REPL plus child LLM helpers is useful. Use deterministic Python inside RLM for exact counts and structured aggregation; use `grep_files` or `exec_shell` directly when that is the clearest deterministic check. diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs index bcc977cb..3922309b 100644 --- a/crates/tui/src/tools/subagent/mod.rs +++ b/crates/tui/src/tools/subagent/mod.rs @@ -433,6 +433,7 @@ fn is_false(b: &bool) -> bool { pub(crate) struct SubAgentSpawnOptions { pub model: Option, pub nickname: Option, + pub fork_context: bool, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -492,6 +493,9 @@ struct SpawnRequest { /// locality. A global ownership table prevents two agents from holding /// a resident lease on the same file simultaneously. resident_file: Option, + /// When true, seed the child with the parent's system prompt and message + /// prefix before appending the child task. + fork_context: bool, } #[derive(Debug, Clone)] @@ -564,6 +568,16 @@ pub struct SubAgentCompletion { pub payload: String, } +/// Parent transcript snapshot available to sub-agents that opt into context +/// forking. The system prompt and leading messages are kept byte-identical to +/// the parent request so DeepSeek's prefix cache can reuse the warmed prefix. +#[derive(Clone, Debug)] +pub struct SubAgentForkContext { + pub system: Option, + pub messages: Vec, + pub structured_state_block: Option, +} + /// Runtime configuration for spawning sub-agents. /// /// Carries everything a child needs to (a) build its own tool registry — @@ -603,6 +617,8 @@ pub struct SubAgentRuntime { /// parent isn't flooded with grandchild completions it didn't directly /// orchestrate. `None` when no consumer is wired (tests / legacy paths). pub parent_completion_tx: Option>, + /// Snapshot of the request prefix visible to an opt-in forked child. + pub fork_context: Option, } impl SubAgentRuntime { @@ -635,6 +651,7 @@ impl SubAgentRuntime { cancel_token: CancellationToken::new(), mailbox: None, parent_completion_tx: None, + fork_context: None, } } @@ -651,6 +668,13 @@ impl SubAgentRuntime { self } + /// Attach the current parent request prefix for `fork_context` spawns. + #[must_use] + pub fn with_fork_context(mut self, context: SubAgentForkContext) -> Self { + self.fork_context = Some(context); + self + } + /// Attach a `Mailbox` so this runtime (and every descendant — children /// clone it) publishes structured `MailboxMessage` envelopes alongside /// the legacy `Event` stream. Pair with [`Self::with_cancel_token`] when @@ -751,6 +775,7 @@ impl SubAgentRuntime { cancel_token: self.cancel_token.child_token(), mailbox: self.mailbox.clone(), parent_completion_tx: self.parent_completion_tx.clone(), + fork_context: self.fork_context.clone(), } } @@ -1115,6 +1140,7 @@ impl SubAgentManager { prompt, assignment, allowed_tools: tools, + fork_context: options.fork_context, started_at, max_steps, input_rx, @@ -1220,6 +1246,7 @@ impl SubAgentManager { prompt: agent.prompt.clone(), assignment: agent.assignment.clone(), allowed_tools: agent.allowed_tools.clone(), + fork_context: false, started_at: restarted_at, max_steps: self.max_steps, input_rx, @@ -1620,6 +1647,10 @@ impl ToolSpec for AgentSpawnTool { "resident_file": { "type": "string", "description": "Optional file path for cache-aware resident mode. When set, the child's system prefix is augmented with the full contents of this file so DeepSeek's prefix cache stays warm across follow-up send_input calls. Only one agent may hold a resident lease on a given file at a time — a second spawn with the same path receives a conflict warning in the result." + }, + "fork_context": { + "type": "boolean", + "description": "When true, inherit the parent's system prompt and conversation prefix before appending this task. This preserves DeepSeek prefix-cache reuse and gives the child full parent context. Defaults to false for independent exploration." } } }) @@ -1752,6 +1783,7 @@ impl ToolSpec for AgentSpawnTool { SubAgentSpawnOptions { model: Some(effective_model), nickname: None, + fork_context: spawn_request.fork_context, }, ) .map_err(|e| ToolError::execution_failed(format!("Failed to spawn sub-agent: {e}")))?; @@ -2474,7 +2506,8 @@ impl ToolSpec for AgentWaitTool { } } -/// Tool to delegate a task to a specialized agent (alias for agent_spawn). +/// Compatibility delegate tool. It routes through `agent_spawn`, but defaults +/// to `fork_context=true` because delegation is usually continuation work. pub struct DelegateToAgentTool { manager: SharedSubAgentManager, runtime: SubAgentRuntime, @@ -2495,8 +2528,9 @@ impl ToolSpec for DelegateToAgentTool { } fn description(&self) -> &'static str { - "Delegate a task to a specialized sub-agent. This is an alias for agent_spawn — same schema, \ - same behavior. Use `type` (or `agent_name`, `agent_type`) to pick the agent flavor." + "Delegate a task to a specialized sub-agent. Compatibility wrapper around agent_spawn; \ + defaults fork_context=true so the child inherits the parent transcript. Use `type` \ + (or `agent_name`, `agent_type`) to pick the agent flavor." } fn input_schema(&self) -> Value { @@ -2546,6 +2580,10 @@ impl ToolSpec for DelegateToAgentTool { "type": "array", "items": { "type": "string" }, "description": "Explicit tool allowlist (required for custom type)" + }, + "fork_context": { + "type": "boolean", + "description": "When true, inherit the parent's system prompt and conversation prefix before appending this task. delegate_to_agent defaults this to true." } } }) @@ -2564,6 +2602,7 @@ impl ToolSpec for DelegateToAgentTool { async fn execute(&self, input: Value, context: &ToolContext) -> Result { let spawn_tool = AgentSpawnTool::new(self.manager.clone(), self.runtime.clone()); + let input = with_default_fork_context(input, true); let result = spawn_tool.execute(input, context).await?; Ok(wrap_with_deprecation_notice( result, @@ -2600,6 +2639,64 @@ fn build_subagent_system_prompt( } } +fn subagent_request_system_prompt( + subagent_system_prompt: &str, + fork_context: Option<&SubAgentForkContext>, +) -> SystemPrompt { + fork_context + .and_then(|context| context.system.clone()) + .unwrap_or_else(|| SystemPrompt::Text(subagent_system_prompt.to_string())) +} + +fn build_initial_subagent_messages( + prompt: &str, + assignment: &SubAgentAssignment, + agent_type: &SubAgentType, + fork_context: Option<&SubAgentForkContext>, +) -> Vec { + let mut messages = fork_context + .map(|context| context.messages.clone()) + .unwrap_or_default(); + + if let Some(context) = fork_context { + if let Some(state) = context + .structured_state_block + .as_deref() + .map(str::trim) + .filter(|state| !state.is_empty()) + { + messages.push(system_text_message(format!( + "\n{state}\n" + ))); + } + + messages.push(system_text_message(format!( + "\n{}\n", + build_subagent_system_prompt(agent_type, assignment) + ))); + } + + messages.push(Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: build_assignment_prompt(prompt, assignment, agent_type), + cache_control: None, + }], + }); + + messages +} + +fn system_text_message(text: String) -> Message { + Message { + role: "system".to_string(), + content: vec![ContentBlock::Text { + text, + cache_control: None, + }], + } +} + struct SubAgentTask { manager_handle: SharedSubAgentManager, runtime: SubAgentRuntime, @@ -2609,6 +2706,7 @@ struct SubAgentTask { assignment: SubAgentAssignment, /// `None` = full registry inheritance. `Some(list)` = explicit narrow. allowed_tools: Option>, + fork_context: bool, started_at: Instant, max_steps: u32, input_rx: mpsc::UnboundedReceiver, @@ -2623,6 +2721,7 @@ async fn run_subagent_task(task: SubAgentTask) { task.prompt, task.assignment, task.allowed_tools, + task.fork_context, task.started_at, task.max_steps, task.input_rx, @@ -2740,13 +2839,25 @@ async fn run_subagent( prompt: String, assignment: SubAgentAssignment, allowed_tools: Option>, + fork_context: bool, started_at: Instant, max_steps: u32, mut input_rx: mpsc::UnboundedReceiver, ) -> Result { let system_prompt = build_subagent_system_prompt(&agent_type, &assignment); + let fork_context = fork_context + .then_some(runtime.fork_context.as_ref()) + .flatten(); + let request_system = subagent_request_system_prompt(&system_prompt, fork_context); + let mut messages = + build_initial_subagent_messages(&prompt, &assignment, &agent_type, fork_context); + let runtime_for_tools = runtime.clone().with_fork_context(SubAgentForkContext { + system: Some(request_system.clone()), + messages: messages.clone(), + structured_state_block: None, + }); let tool_registry = SubAgentToolRegistry::new( - runtime.clone(), + runtime_for_tools, allowed_tools.clone(), Arc::new(Mutex::new(TodoList::new())), Arc::new(Mutex::new(PlanState::default())), @@ -2769,14 +2880,6 @@ async fn run_subagent( format!("started ({})", agent_type.as_str()), ); - let mut messages = vec![Message { - role: "user".to_string(), - content: vec![ContentBlock::Text { - text: build_assignment_prompt(&prompt, &assignment, &agent_type), - cache_control: None, - }], - }]; - let mut steps = 0; let mut final_result: Option = None; let mut pending_inputs: VecDeque = VecDeque::new(); @@ -2842,7 +2945,7 @@ async fn run_subagent( model: runtime.model.clone(), messages: messages.clone(), max_tokens: 4096, - system: Some(SystemPrompt::Text(system_prompt.clone())), + system: Some(request_system.clone()), tools: Some(tools.clone()), tool_choice: Some(json!({ "type": "auto" })), metadata: None, @@ -3328,6 +3431,9 @@ fn parse_spawn_request(input: &Value) -> Result { .and_then(|v| v.as_str()) .map(str::to_string) .filter(|s| !s.trim().is_empty()); + let fork_context = + parse_optional_bool(input, &["fork_context", "forkContext", "inherit_context"]) + .unwrap_or(false); Ok(SpawnRequest { prompt: prompt.clone(), @@ -3337,9 +3443,30 @@ fn parse_spawn_request(input: &Value) -> Result { model, cwd, resident_file, + fork_context, }) } +fn parse_optional_bool(input: &Value, names: &[&str]) -> Option { + names + .iter() + .find_map(|name| input.get(*name)) + .and_then(Value::as_bool) +} + +fn with_default_fork_context(mut input: Value, default: bool) -> Value { + let Some(object) = input.as_object_mut() else { + return input; + }; + if !object.contains_key("fork_context") + && !object.contains_key("forkContext") + && !object.contains_key("inherit_context") + { + object.insert("fork_context".to_string(), Value::Bool(default)); + } + input +} + pub(crate) fn normalize_requested_subagent_model( value: &str, field: &str, diff --git a/crates/tui/src/tools/subagent/tests.rs b/crates/tui/src/tools/subagent/tests.rs index bd8f29bb..6bf6cfb2 100644 --- a/crates/tui/src/tools/subagent/tests.rs +++ b/crates/tui/src/tools/subagent/tests.rs @@ -20,6 +20,13 @@ fn make_snapshot(status: SubAgentStatus) -> SubAgentResult { } } +fn message_text(message: &Message) -> &str { + match message.content.first() { + Some(ContentBlock::Text { text, .. }) => text.as_str(), + other => panic!("expected text content block, got {other:?}"), + } +} + #[test] fn test_agent_type_from_str() { assert_eq!( @@ -207,6 +214,88 @@ fn test_parse_spawn_request_accepts_items_payload() { assert_eq!(parsed.agent_type, SubAgentType::Explore); } +#[test] +fn test_parse_spawn_request_accepts_fork_context() { + let input = json!({ + "prompt": "continue from here", + "fork_context": true + }); + let parsed = parse_spawn_request(&input).expect("spawn request should parse"); + assert!(parsed.fork_context); + + let input = json!({ + "prompt": "continue from here", + "inherit_context": true + }); + let parsed = parse_spawn_request(&input).expect("spawn request should parse"); + assert!(parsed.fork_context); +} + +#[test] +fn test_delegate_defaults_to_fork_context() { + let input = with_default_fork_context(json!({ "prompt": "review current work" }), true); + let parsed = parse_spawn_request(&input).expect("delegate request should parse"); + assert!(parsed.fork_context); + + let input = with_default_fork_context( + json!({ "prompt": "fresh exploration", "fork_context": false }), + true, + ); + let parsed = parse_spawn_request(&input).expect("delegate override should parse"); + assert!(!parsed.fork_context); +} + +#[test] +fn forked_subagent_messages_preserve_parent_prefix_then_append_task() { + let parent_system = SystemPrompt::Text("parent system".to_string()); + let parent_message = Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: "parent turn".to_string(), + cache_control: None, + }], + }; + let fork_context = SubAgentForkContext { + system: Some(parent_system.clone()), + messages: vec![parent_message.clone()], + structured_state_block: Some( + "## Cycle State (Auto-Preserved)\n- Mode: `AGENT`".to_string(), + ), + }; + + let assignment = SubAgentAssignment::new("inspect parser".to_string(), Some("worker".into())); + let messages = build_initial_subagent_messages( + "inspect parser", + &assignment, + &SubAgentType::General, + Some(&fork_context), + ); + + assert_eq!( + subagent_request_system_prompt("child system", Some(&fork_context)), + parent_system + ); + assert_eq!(messages.first(), Some(&parent_message)); + assert_eq!(messages.len(), 4); + assert_eq!(messages[1].role, "system"); + assert!(message_text(&messages[1]).contains("")); + assert_eq!(messages[2].role, "system"); + assert!(message_text(&messages[2]).contains("")); + assert_eq!(messages[3].role, "user"); + assert!(message_text(&messages[3]).contains("inspect parser")); +} + +#[test] +fn fresh_subagent_messages_keep_existing_single_turn_shape() { + let assignment = SubAgentAssignment::new("list files".to_string(), None); + let messages = + build_initial_subagent_messages("list files", &assignment, &SubAgentType::Explore, None); + + assert_eq!(messages.len(), 1); + assert_eq!(messages[0].role, "user"); + assert!(message_text(&messages[0]).contains("list files")); +} + #[test] fn test_parse_spawn_request_rejects_text_and_items_together() { let input = json!({ @@ -1224,6 +1313,7 @@ fn stub_runtime() -> SubAgentRuntime { cancel_token: CancellationToken::new(), mailbox: None, parent_completion_tx: None, + fork_context: None, } } diff --git a/docs/SUBAGENTS.md b/docs/SUBAGENTS.md index 3fe83329..25482dfb 100644 --- a/docs/SUBAGENTS.md +++ b/docs/SUBAGENTS.md @@ -35,6 +35,21 @@ Each role's full system prompt lives in child agent boots; the parent's spawn prompt becomes the first turn's user message. +## Context forking + +`agent_spawn` starts fresh by default: the child gets its role prompt +plus the task you pass. Use `fork_context: true` when the child should +continue from the parent's current request prefix instead. In fork +mode the child request keeps the parent's system prompt and message +history byte-identical, appends a structured state snapshot, then +adds the sub-agent role instructions and task at the tail. That keeps +DeepSeek prefix-cache reuse high while giving the child the context +needed for continuation, review, summarization, or compaction work. + +Use fresh spawns for independent exploration. Use forked spawns when +the task depends on decisions, files, todos, or plan state already in +the parent transcript. + ### When to pick which role - **`general`** — when the task is "do this whole thing", not "go diff --git a/docs/TOOL_SURFACE.md b/docs/TOOL_SURFACE.md index 68cece8d..5ae8b1a9 100644 --- a/docs/TOOL_SURFACE.md +++ b/docs/TOOL_SURFACE.md @@ -143,6 +143,12 @@ See `agent.txt` for the delegation protocol and (`general` / `explore` / `plan` / `review` / `implementer` / `verifier` / `custom`). +`agent_spawn` defaults to a fresh child conversation. Pass +`fork_context: true` for continuation-style work that should inherit the +parent's system prompt and message prefix for DeepSeek prefix-cache reuse. +The deprecated `delegate_to_agent` compatibility wrapper routes through +`agent_spawn` and defaults `fork_context` to true. + ### Parallel fan-out: cost-class caps Two tools offer parallel fan-out with different concurrency limits that