From 5ca618d70a7a03ca2b2f8695dad223e5810f6089 Mon Sep 17 00:00:00 2001
From: Hunter B <hmbown@gmail.com>
Date: Sat, 13 Jun 2026 13:31:21 -0700
Subject: [PATCH] fix(subagents): make eval and queued steering nonblocking

Make agent_eval return a running projection by default so follow-up steering does not wait for child model calls. Keep checkpoint resume blocking by default unless block=false is explicit.

Teach /agent, /swarm, prompts, and docs to poll workers nonblocking and reserve block:true for deliberate terminal waits. Add Ctrl+S as a reliable queued-message send path before falling back to draft stash.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                               |  4 ++
 crates/tui/CHANGELOG.md                    |  4 ++
 crates/tui/src/commands/groups/core/mod.rs |  4 +-
 crates/tui/src/prompts/base.md             |  4 +-
 crates/tui/src/tools/subagent/mod.rs       |  9 ++-
 crates/tui/src/tools/subagent/tests.rs     | 56 +++++++++++++++
 crates/tui/src/tui/ui.rs                   | 80 +++++++++++++++++++---
 crates/tui/src/tui/ui/tests.rs             | 51 ++++++++++++++
 docs/SUBAGENTS.md                          | 12 ++--
 docs/TOOL_SURFACE.md                       |  2 +-
 10 files changed, 205 insertions(+), 21 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 124bc5ef..363f0862 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -42,6 +42,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- **Sub-agent eval no longer blocks by default.** `agent_eval` now returns the
+  current projection immediately and delivers follow-up input without waiting
+  for a running child to finish its provider call. Pass `block:true` for an
+  intentional terminal wait.
 - **Z.ai GLM thinking traces.** Direct Z.ai requests now use the documented
   `thinking` shape, preserve and replay `reasoning_content`, classify GLM
   reasoning streams as thinking output, and accept `ultracode` as a max-effort
diff --git a/crates/tui/CHANGELOG.md b/crates/tui/CHANGELOG.md
index b252c149..2d852aa5 100644
--- a/crates/tui/CHANGELOG.md
+++ b/crates/tui/CHANGELOG.md
@@ -42,6 +42,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- **Sub-agent eval no longer blocks by default.** `agent_eval` now returns the
+  current projection immediately and delivers follow-up input without waiting
+  for a running child to finish its provider call. Pass `block:true` for an
+  intentional terminal wait.
 - **Z.ai GLM thinking traces.** Direct Z.ai requests now use the documented
   `thinking` shape, preserve and replay `reasoning_content`, classify GLM
   reasoning streams as thinking output, and accept `ultracode` as a max-effort
diff --git a/crates/tui/src/commands/groups/core/mod.rs b/crates/tui/src/commands/groups/core/mod.rs
index d07953d8..d92267da 100644
--- a/crates/tui/src/commands/groups/core/mod.rs
+++ b/crates/tui/src/commands/groups/core/mod.rs
@@ -364,7 +364,7 @@ pub fn agent(_app: &mut App, arg: Option<&str>) -> CommandResult {
         }
     };
     let message = format!(
-        "Open a persistent sub-agent session for this task. Call `agent_open` with name `slash_agent`, `prompt: {task:?}`, and `max_depth: {max_depth}`. Use `agent_eval` to wait for the next terminal/current projection and `handle_read` on the returned transcript_handle if you need more detail. Verify any claimed side effects before reporting success."
+        "Open a persistent sub-agent session for this task. Call `agent_open` with name `slash_agent`, `prompt: {task:?}`, and `max_depth: {max_depth}`. Use nonblocking `agent_eval` to poll the current projection or send follow-up input while you keep working; pass `block:true` only when you deliberately want to wait for a terminal result. Use `handle_read` on the returned transcript_handle if you need more detail. Verify any claimed side effects before reporting success."
     );
     CommandResult::with_message_and_action(
         format!("Opening persistent sub-agent at depth {max_depth}..."),
@@ -393,7 +393,7 @@ pub fn swarm(_app: &mut App, arg: Option<&str>) -> CommandResult {
         }
     };
     let message = format!(
-        "Run a multi-agent swarm for this task: {task:?}. Decompose it into independent, parallelizable subtasks and open one headless sub-agent per subtask with `agent_open` (pass `max_depth: {max_depth}` for nested delegation, and an `agent_type`/role that fits each subtask — explore for research, review for verification, implementer for edits). Run them concurrently; collect each worker's result summary with `agent_eval` (summaries, not full transcripts) and synthesize a single answer. Keep the fanout proportional to the task, and verify any claimed side effects before reporting success."
+        "Run a multi-agent swarm for this task: {task:?}. Decompose it into independent, parallelizable subtasks and open one headless sub-agent per subtask with `agent_open` (pass `max_depth: {max_depth}` for nested delegation, and an `agent_type`/role that fits each subtask — explore for research, review for verification, implementer for edits). Run them concurrently; poll each worker with nonblocking `agent_eval`, synthesize results as they arrive, and pass `block:true` only for a deliberate final wait. Keep the fanout proportional to the task, and verify any claimed side effects before reporting success."
     );
     CommandResult::with_message_and_action(
         format!("Dispatching a swarm at depth {max_depth}..."),
diff --git a/crates/tui/src/prompts/base.md b/crates/tui/src/prompts/base.md
index 76099c40..39d90358 100644
--- a/crates/tui/src/prompts/base.md
+++ b/crates/tui/src/prompts/base.md
@@ -169,7 +169,7 @@ For any task estimated to take 5+ concrete steps:
 - **Parallel implementation**: After a plan is laid out, open one sub-agent session per independent leaf task. Each does one thing well; you integrate results.
 - **Solo tasks**: A single read, a single search, a focused question — do these yourself. Opening a sub-agent has overhead; one-turn reads are faster direct.
 - **Sequential work**: If step B depends on step A's output, run A yourself, then decide whether to open a sub-agent based on what A found. Don't pre-open dependent work.
-- **Concurrent sub-agent cap**: The dispatcher defaults to 10 concurrent sub-agents (configurable via `[subagents].max_concurrent` in `config.toml`, hard ceiling 20). When you need more, batch them: open up to the cap, wait for completions, then open the next batch.
+- **Concurrent sub-agent cap**: The dispatcher defaults to 10 concurrent sub-agents (configurable via `[subagents].max_concurrent` in `config.toml`, hard ceiling 20). When you need more, batch them: open up to the cap, poll with nonblocking `agent_eval`, then open the next batch as slots free.
 
 ## Parallel-First Heuristic
 
@@ -264,7 +264,7 @@ Use `agent_open` for independent investigations or implementation slices that ca
 
 Use `tool_agent` for the experimental Fin fast lane: simple OCR, search, fetch, or command-probe tasks where a fast low-cost model with thinking off should execute tools while the parent keeps planning and synthesis context clean. Do not use it for nuanced implementation, architecture, release decisions, or anything that needs careful reasoning.
 
-Use `agent_eval` to send follow-up input, block for completion, or retrieve the current session projection. Use `agent_close` to cancel or release a session that is no longer useful. Keep tiny single-read/search tasks local so the transcript stays compact.
+Use `agent_eval` to send follow-up input or retrieve the current session projection without stopping parent work. It is nonblocking by default; pass `block:true` only when you intentionally want the parent turn to wait for a terminal child result. Use `agent_close` to cancel or release a session that is no longer useful. Keep tiny single-read/search tasks local so the transcript stays compact.
 
 ### `rlm_open` / `rlm_eval` / `rlm_configure` / `rlm_close`
 Use persistent RLM sessions for long-context semantic work, bulk classification/extraction, and decomposition where a Python REPL plus child LLM helpers is useful. Use deterministic Python inside RLM for exact counts and structured aggregation; use `grep_files` or `exec_shell` directly when that is the clearest deterministic check. Batch RLM child calls only after asserting independence with `dependency_mode="independent"`; use `sub_query_sequence` for dependent chains. Close sessions when their context is no longer needed.
diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs
index c103fc88..a167358e 100644
--- a/crates/tui/src/tools/subagent/mod.rs
+++ b/crates/tui/src/tools/subagent/mod.rs
@@ -3520,7 +3520,7 @@ impl ToolSpec for AgentEvalTool {
     }
 
     fn description(&self) -> &'static str {
-        "Fetch or wait on a child sub-agent session. Optionally deliver a message/items to a running session, then return the latest session projection. With continue=true, resume only a checkpointed interrupted session. With block=true (default), waits for the session to reach a terminal boundary; block=false is a non-blocking status fetch. Terminal projections expose a handle_read-compatible transcript_handle for the full child transcript."
+        "Fetch the current projection for a child sub-agent session. Optionally deliver a message/items to a running session, then return immediately with the latest status. Set block=true only when you intentionally want to wait for a terminal boundary. With continue=true, resume only a checkpointed interrupted session; that resume waits by default unless block=false is explicit. Terminal projections expose a handle_read-compatible transcript_handle for the full child transcript."
     }
 
     fn input_schema(&self) -> Value {
@@ -3570,7 +3570,7 @@ impl ToolSpec for AgentEvalTool {
                 },
                 "block": {
                     "type": "boolean",
-                    "description": "Wait for a terminal boundary before returning (default true)"
+                    "description": "Wait for a terminal boundary before returning (default: false; default true only with continue=true/resume=true)"
                 },
                 "timeout_ms": {
                     "type": "integer",
@@ -3598,7 +3598,10 @@ impl ToolSpec for AgentEvalTool {
         let interrupt = optional_bool(&input, "interrupt", false);
         let continue_from_checkpoint =
             optional_bool(&input, "continue", false) || optional_bool(&input, "resume", false);
-        let block = optional_bool(&input, "block", true);
+        let block = input
+            .get("block")
+            .and_then(Value::as_bool)
+            .unwrap_or(continue_from_checkpoint);
         let timeout_ms = optional_u64(&input, "timeout_ms", DEFAULT_RESULT_TIMEOUT_MS)
             .clamp(1000, MAX_RESULT_TIMEOUT_MS);
 
diff --git a/crates/tui/src/tools/subagent/tests.rs b/crates/tui/src/tools/subagent/tests.rs
index 63301bb2..4e302102 100644
--- a/crates/tui/src/tools/subagent/tests.rs
+++ b/crates/tui/src/tools/subagent/tests.rs
@@ -1465,6 +1465,62 @@ async fn agent_eval_resolves_session_via_agent_name_alias() {
     assert_eq!(projection.status, "completed");
 }
 
+#[tokio::test]
+async fn agent_eval_follow_up_defaults_to_nonblocking_projection() {
+    let manager = Arc::new(RwLock::new(SubAgentManager::new(PathBuf::from("."), 1)));
+    let (input_tx, mut input_rx) = mpsc::unbounded_channel();
+    let agent = SubAgent::new(
+        "test_agent_running_eval".to_string(),
+        SubAgentType::Explore,
+        "map docs".to_string(),
+        make_assignment(),
+        "deepseek-v4-flash".to_string(),
+        Some("Blue".to_string()),
+        Some(vec!["read_file".to_string()]),
+        input_tx,
+        PathBuf::from("."),
+        "boot_test".to_string(),
+    );
+    let agent_id = agent.id.clone();
+    {
+        let mut guard = manager.write().await;
+        guard.agents.insert(agent_id.clone(), agent);
+    }
+
+    let ctx = ToolContext::new(".");
+    let tool = AgentEvalTool::new(manager.clone());
+    let result = tokio::time::timeout(
+        Duration::from_millis(100),
+        tool.execute(
+            json!({
+                "agent_id": agent_id,
+                "message": "please prioritize the newest user input"
+            }),
+            &ctx,
+        ),
+    )
+    .await
+    .expect("agent_eval should not wait for a running child by default")
+    .expect("agent_eval should return a projection");
+
+    let meta = result.metadata.expect("metadata present");
+    assert_eq!(meta["terminal"], json!(false));
+    assert_eq!(meta["timed_out"], json!(false));
+    assert_eq!(meta["message_delivery"]["delivered"], json!(true));
+
+    let delivered = tokio::time::timeout(Duration::from_millis(100), input_rx.recv())
+        .await
+        .expect("follow-up should be delivered without waiting")
+        .expect("follow-up message should exist");
+    assert_eq!(delivered.text, "please prioritize the newest user input");
+    assert!(!delivered.interrupt);
+
+    let projection: SubAgentSessionProjection =
+        serde_json::from_str(&result.content).expect("projection deserializes");
+    assert_eq!(projection.status, "running");
+    assert!(!projection.terminal);
+}
+
 #[tokio::test]
 async fn api_timeout_preserves_checkpoint_and_agent_eval_continues_from_it() {
     let tmp = tempdir().expect("tempdir");
diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs
index 6116134e..38d2c8ed 100644
--- a/crates/tui/src/tui/ui.rs
+++ b/crates/tui/src/tui/ui.rs
@@ -4433,20 +4433,25 @@ async fn run_event_loop(
                     app.delete_word_backward();
                 }
                 KeyCode::Char('s') | KeyCode::Char('S')
-                    if key.modifiers == KeyModifiers::CONTROL && !app.input.is_empty() =>
+                    if key.modifiers == KeyModifiers::CONTROL =>
                 {
+                    if send_ctrl_s_queued_message_now(app, config, &engine_handle).await? {
+                        continue;
+                    }
                     // #440: park the current draft to the persistent
                     // stash and clear the composer. Empty composers
                     // are a no-op so a stray Ctrl+S can't pollute the
                     // file. Surface a toast so the user sees the
                     // confirmation (no-op feels broken otherwise).
-                    crate::composer_stash::push_stash(&app.input);
-                    app.clear_input_recoverable();
-                    app.push_status_toast(
-                        "Draft stashed — `/stash pop` to restore",
-                        StatusToastLevel::Info,
-                        Some(3_000),
-                    );
+                    if !app.input.is_empty() {
+                        crate::composer_stash::push_stash(&app.input);
+                        app.clear_input_recoverable();
+                        app.push_status_toast(
+                            "Draft stashed — `/stash pop` to restore",
+                            StatusToastLevel::Info,
+                            Some(3_000),
+                        );
+                    }
                 }
                 KeyCode::Char('y') if key.modifiers.contains(KeyModifiers::CONTROL) => {
                     // #379: context-sensitive Ctrl+Y.
@@ -5359,6 +5364,65 @@ fn queue_current_draft_for_next_turn(app: &mut App) -> bool {
     true
 }
 
+fn take_ctrl_s_queued_message(app: &mut App) -> Option<QueuedMessage> {
+    if let Some(mut draft) = app.queued_draft.take() {
+        if let Some(input) = app.submit_input() {
+            draft.display = input;
+        }
+        return Some(draft);
+    }
+    if app.input.is_empty() {
+        return app.pop_queued_message();
+    }
+    None
+}
+
+async fn send_ctrl_s_queued_message_now(
+    app: &mut App,
+    config: &Config,
+    engine_handle: &EngineHandle,
+) -> Result<bool> {
+    let Some(message) = take_ctrl_s_queued_message(app) else {
+        return Ok(false);
+    };
+    if app.offline_mode {
+        app.queue_message(message);
+        app.status_message = Some(format!(
+            "Offline: {} queued — ↑ to edit, /queue list",
+            app.queued_message_count()
+        ));
+        return Ok(true);
+    }
+
+    let display = message.display.clone();
+    if app.is_loading {
+        if let Err(err) = steer_user_message(app, engine_handle, message.clone()).await {
+            app.queue_message(message);
+            app.status_message = Some(format!(
+                "Steer failed ({err}); {} queued — ↑ to edit, /queue list",
+                app.queued_message_count()
+            ));
+        } else {
+            app.push_status_toast(
+                "Sent queued message into current turn",
+                StatusToastLevel::Info,
+                Some(1_500),
+            );
+        }
+    } else if let Err(err) =
+        dispatch_user_message(app, config, engine_handle, message.clone()).await
+    {
+        app.queue_message(message);
+        app.status_message = Some(format!(
+            "Dispatch failed ({err}); kept {} queued message(s)",
+            app.queued_message_count()
+        ));
+    } else {
+        app.status_message = Some(format!("Sent queued message: {display}"));
+    }
+    Ok(true)
+}
+
 fn queued_message_content_for_app(
     app: &App,
     message: &QueuedMessage,
diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs
index 09d5ccec..d2ad45ec 100644
--- a/crates/tui/src/tui/ui/tests.rs
+++ b/crates/tui/src/tui/ui/tests.rs
@@ -5715,6 +5715,57 @@ async fn steer_user_message_records_prompt_for_cancel_restore() {
     );
 }
 
+#[tokio::test]
+async fn ctrl_s_sends_next_queued_message_into_running_turn() {
+    let mut app = create_test_app();
+    app.is_loading = true;
+    app.queue_message(crate::tui::app::QueuedMessage::new(
+        "please attend to your sub agents".to_string(),
+        None,
+    ));
+    let config = Config::default();
+    let mut engine = crate::core::engine::mock_engine_handle();
+
+    assert!(
+        send_ctrl_s_queued_message_now(&mut app, &config, &engine.handle)
+            .await
+            .expect("ctrl+s send succeeds")
+    );
+
+    assert_eq!(app.queued_message_count(), 0);
+    assert_eq!(
+        engine.rx_steer.recv().await.as_deref(),
+        Some("please attend to your sub agents")
+    );
+}
+
+#[tokio::test]
+async fn ctrl_s_sends_edited_queued_draft_into_running_turn() {
+    let mut app = create_test_app();
+    app.is_loading = true;
+    app.queued_draft = Some(crate::tui::app::QueuedMessage::new(
+        "original queued follow-up".to_string(),
+        Some("skill body".to_string()),
+    ));
+    app.input = "edited queued follow-up".to_string();
+    app.cursor_position = app.input.chars().count();
+    let config = Config::default();
+    let mut engine = crate::core::engine::mock_engine_handle();
+
+    assert!(
+        send_ctrl_s_queued_message_now(&mut app, &config, &engine.handle)
+            .await
+            .expect("ctrl+s draft send succeeds")
+    );
+
+    assert!(app.queued_draft.is_none());
+    assert!(app.input.is_empty());
+    assert_eq!(app.queued_message_count(), 0);
+    let content = engine.rx_steer.recv().await.expect("steer content");
+    assert!(content.contains("edited queued follow-up"));
+    assert!(content.contains("skill body"));
+}
+
 #[tokio::test]
 async fn numeric_plan_choice_still_queues_follow_up_when_busy() {
     let mut app = create_test_app();
diff --git a/docs/SUBAGENTS.md b/docs/SUBAGENTS.md
index 72175fb8..fb9dddbc 100644
--- a/docs/SUBAGENTS.md
+++ b/docs/SUBAGENTS.md
@@ -254,11 +254,13 @@ workspace/branch, lifecycle events, artifact refs, follow-up target, takeover
 target, usage provenance, and verification provenance.
 
 `agent_eval` returns these fields at the top level of the session projection and
-inside `worker_record`. A running or continuable interrupted child should be
-continued through the returned `follow_up` target (`agent_eval` with the same
-agent id or session name). A local takeover should use the returned `takeover`
-instructions; unsupported future cases must say why instead of leaving the
-operator to guess.
+inside `worker_record`. It is nonblocking by default: use it to poll status or
+deliver follow-up input while the parent keeps coordinating. Pass `block:true`
+only when deliberately waiting for a terminal child result. A running or
+continuable interrupted child should be continued through the returned
+`follow_up` target (`agent_eval` with the same agent id or session name). A
+local takeover should use the returned `takeover` instructions; unsupported
+future cases must say why instead of leaving the operator to guess.
 
 Follow-up delivery is explicit. If a message was delivered, the worker record
 stores a bounded preview and timestamp. If the child had already terminated,
diff --git a/docs/TOOL_SURFACE.md b/docs/TOOL_SURFACE.md
index ace2bec8..67aa03e9 100644
--- a/docs/TOOL_SURFACE.md
+++ b/docs/TOOL_SURFACE.md
@@ -190,7 +190,7 @@ The active model-facing sub-agent surface is persistent and intentionally small:
 | Tool | Niche |
 |---|---|
 | `agent_open` | Open a named sub-agent session for independent work. Returns a session projection immediately so the parent can keep coordinating. |
-| `agent_eval` | Send follow-up input, block for completion, or fetch the current projection/transcript handle for an existing session. |
+| `agent_eval` | Send follow-up input or fetch the current projection/transcript handle for an existing session. Nonblocking by default; pass `block:true` only for a deliberate wait. |
 | `agent_close` | Cancel or release a sub-agent session by name or id. |
 
 See `agent.txt` for the delegation protocol and