diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1b613204..0c80ef8f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,45 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.8.16] - 2026-05-07
+
+A focused hotfix for v0.8.15 regressions in RLM, sub-agent visibility, and
+terminal ownership. This release keeps the v0.8.15 feature set intact while
+making long-running delegated work easier to inspect and safer to run.
+
+### Changed
+- **RLM has no fixed 180s wall-clock timeout** (#955) — RLM turns can continue
+ past the old hard limit when the long-input REPL is still making progress.
+- **RLM output is easier to audit** (#955) — final reports now include compact
+ execution metadata: input size, iteration count, elapsed time, sub-LLM RPC
+ count, and termination state.
+- **RLM chunking guidance is stricter for exact work** (#955) — prompts now
+ tell the sub-agent to use deterministic Python over the full `context` for
+ counts/aggregation and to report chunk coverage when splitting a whole input.
+- **Tool guidance is less defensive** (#955) — the system prompt now explains
+ when to use tools instead of discouraging the model from using capabilities
+ that are actually available.
+
+### Fixed
+- **Active RLM work stays visible** (#955) — foreground RLM calls surface in the
+ active task/right-rail state instead of leaving the Tasks panel saying
+ `No active tasks`.
+- **`/subagents` no longer reports false emptiness** (#955) — the sub-agent
+ overlay now includes live progress-only agents and transcript fanout workers
+ when the manager cache has not refreshed yet.
+- **Sub-agent cards are quieter and more useful** (#955) — low-signal scheduler
+ lines such as `step 1/100: requesting model response` are hidden, while
+ compact tool activity remains visible.
+- **Sub-agent completion protocol stays internal** (#955) — completion
+ sentinels are routed as internal runtime events instead of user messages, so
+ the parent agent does not explain raw protocol XML back to the user.
+- **Sub-agents cannot take over the parent terminal** (#955) — background
+ agents reject `exec_shell` with `interactive=true`; they can still use
+ non-interactive shell, background shell, `tty=true`, and task-shell tools.
+- **Terminal scrollback ownership is restored** (#955) — the TUI re-enters
+ alternate-screen mode after foreground/sub-agent work drains, preventing the
+ host terminal scrollbar from taking over the live interface.
+
## [0.8.15] - 2026-05-06
An auth, Windows, editor-integration, and setup stabilization release. This
diff --git a/Cargo.lock b/Cargo.lock
index 6a512918..8abc208d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1080,7 +1080,7 @@ dependencies = [
[[package]]
name = "deepseek-agent"
-version = "0.8.15"
+version = "0.8.16"
dependencies = [
"deepseek-config",
"serde",
@@ -1088,7 +1088,7 @@ dependencies = [
[[package]]
name = "deepseek-app-server"
-version = "0.8.15"
+version = "0.8.16"
dependencies = [
"anyhow",
"axum",
@@ -1110,7 +1110,7 @@ dependencies = [
[[package]]
name = "deepseek-config"
-version = "0.8.15"
+version = "0.8.16"
dependencies = [
"anyhow",
"deepseek-secrets",
@@ -1122,7 +1122,7 @@ dependencies = [
[[package]]
name = "deepseek-core"
-version = "0.8.15"
+version = "0.8.16"
dependencies = [
"anyhow",
"chrono",
@@ -1140,7 +1140,7 @@ dependencies = [
[[package]]
name = "deepseek-execpolicy"
-version = "0.8.15"
+version = "0.8.16"
dependencies = [
"anyhow",
"deepseek-protocol",
@@ -1149,7 +1149,7 @@ dependencies = [
[[package]]
name = "deepseek-hooks"
-version = "0.8.15"
+version = "0.8.16"
dependencies = [
"anyhow",
"async-trait",
@@ -1163,7 +1163,7 @@ dependencies = [
[[package]]
name = "deepseek-mcp"
-version = "0.8.15"
+version = "0.8.16"
dependencies = [
"anyhow",
"serde",
@@ -1172,7 +1172,7 @@ dependencies = [
[[package]]
name = "deepseek-protocol"
-version = "0.8.15"
+version = "0.8.16"
dependencies = [
"serde",
"serde_json",
@@ -1180,7 +1180,7 @@ dependencies = [
[[package]]
name = "deepseek-secrets"
-version = "0.8.15"
+version = "0.8.16"
dependencies = [
"dirs",
"keyring",
@@ -1193,7 +1193,7 @@ dependencies = [
[[package]]
name = "deepseek-state"
-version = "0.8.15"
+version = "0.8.16"
dependencies = [
"anyhow",
"chrono",
@@ -1205,7 +1205,7 @@ dependencies = [
[[package]]
name = "deepseek-tools"
-version = "0.8.15"
+version = "0.8.16"
dependencies = [
"anyhow",
"async-trait",
@@ -1218,7 +1218,7 @@ dependencies = [
[[package]]
name = "deepseek-tui"
-version = "0.8.15"
+version = "0.8.16"
dependencies = [
"anyhow",
"arboard",
@@ -1278,7 +1278,7 @@ dependencies = [
[[package]]
name = "deepseek-tui-cli"
-version = "0.8.15"
+version = "0.8.16"
dependencies = [
"anyhow",
"chrono",
@@ -1302,7 +1302,7 @@ dependencies = [
[[package]]
name = "deepseek-tui-core"
-version = "0.8.15"
+version = "0.8.16"
[[package]]
name = "deranged"
diff --git a/Cargo.toml b/Cargo.toml
index 5dbd2fe5..8dd07166 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -19,7 +19,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"]
resolver = "2"
[workspace.package]
-version = "0.8.15"
+version = "0.8.16"
edition = "2024"
# Rust 1.88 stabilized `let_chains` in `if`/`while` conditions, which the
# codebase relies on extensively. Cargo enforces this so users on older
diff --git a/README.md b/README.md
index dec3b51d..2ae37f89 100644
--- a/README.md
+++ b/README.md
@@ -202,18 +202,28 @@ deepseek --provider ollama --model deepseek-coder:1.3b
---
-## What's New In v0.8.15
+## What's New In v0.8.16
-A community-driven stabilization release focused on auth recovery, Windows
-terminals, Zed/ACP compatibility, setup friction, and clearer cost display.
+A focused hotfix for RLM, sub-agent visibility, and terminal ownership on top
+of v0.8.15.
[Full changelog](CHANGELOG.md).
-- **Friendlier auth recovery** — runtime API-key failures now explain when the active key came only from `DEEPSEEK_API_KEY` and no saved config key is present
-- **Zed / ACP adapter** — `deepseek serve --acp` exposes a local stdio Agent Client Protocol server for Zed and other compatible editors
-- **Windows terminal fixes** — UTF-8 console setup, dispatcher resume handling, clipboard fallback, Ctrl+E composer behavior, and safer Windows mouse defaults
-- **Yuan cost display** — set `cost_currency = "cny"` (or `yuan` / `rmb`) to show footer, `/cost`, `/tokens`, and notification summaries in CNY
-- **Setup and skill polish** — workspace trust persists globally, plain Markdown `SKILL.md` files load correctly, global Agents/Cursor skill paths are discovered, and the TUI shows skills in slash autocomplete
-- **Reliability fixes** — workspace-scoped `resume --last`, capped API `max_tokens`, endpoint diagnostics in `deepseek doctor`, npm `--version` fallback, and current-date turn metadata
+- **RLM no longer has the old 180s wall-clock timeout** — long-input REPL work
+ can keep running while it is still making progress.
+- **RLM reports what happened** — output now includes input size, iteration
+ count, elapsed time, sub-LLM RPC count, and termination state.
+- **RLM chunking is safer for exact answers** — prompts require deterministic
+ Python for counts/aggregation and coverage reporting for whole-input chunks.
+- **Sub-agent visibility is more truthful** — `/subagents`, the transcript, and
+ the right rail include live progress and fanout workers instead of showing
+ false `No agents` or `No active tasks` states.
+- **Sub-agent cards are quieter** — internal scheduler lines are hidden while
+ useful tool activity remains visible.
+- **Sub-agent completion events stay internal** — the parent agent integrates
+ child results without explaining raw sentinel XML back to the user.
+- **Terminal ownership is hardened** — background sub-agents cannot take over
+ the parent terminal, and the TUI restores alternate-screen mode after
+ delegated work drains.
---
diff --git a/crates/agent/Cargo.toml b/crates/agent/Cargo.toml
index e9fb24a5..e66af62c 100644
--- a/crates/agent/Cargo.toml
+++ b/crates/agent/Cargo.toml
@@ -7,5 +7,5 @@ repository.workspace = true
description = "Model/provider registry and fallback strategy for DeepSeek workspace architecture"
[dependencies]
-deepseek-config = { path = "../config", version = "0.8.15" }
+deepseek-config = { path = "../config", version = "0.8.16" }
serde.workspace = true
diff --git a/crates/app-server/Cargo.toml b/crates/app-server/Cargo.toml
index 3e1225ec..54b69329 100644
--- a/crates/app-server/Cargo.toml
+++ b/crates/app-server/Cargo.toml
@@ -10,15 +10,15 @@ description = "Codex-style app-server transport for DeepSeek workspace architect
anyhow.workspace = true
axum.workspace = true
clap.workspace = true
-deepseek-agent = { path = "../agent", version = "0.8.15" }
-deepseek-config = { path = "../config", version = "0.8.15" }
-deepseek-core = { path = "../core", version = "0.8.15" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.8.15" }
-deepseek-hooks = { path = "../hooks", version = "0.8.15" }
-deepseek-mcp = { path = "../mcp", version = "0.8.15" }
-deepseek-protocol = { path = "../protocol", version = "0.8.15" }
-deepseek-state = { path = "../state", version = "0.8.15" }
-deepseek-tools = { path = "../tools", version = "0.8.15" }
+deepseek-agent = { path = "../agent", version = "0.8.16" }
+deepseek-config = { path = "../config", version = "0.8.16" }
+deepseek-core = { path = "../core", version = "0.8.16" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.8.16" }
+deepseek-hooks = { path = "../hooks", version = "0.8.16" }
+deepseek-mcp = { path = "../mcp", version = "0.8.16" }
+deepseek-protocol = { path = "../protocol", version = "0.8.16" }
+deepseek-state = { path = "../state", version = "0.8.16" }
+deepseek-tools = { path = "../tools", version = "0.8.16" }
serde.workspace = true
serde_json.workspace = true
tokio.workspace = true
diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml
index f7a6ab89..1f049c0c 100644
--- a/crates/cli/Cargo.toml
+++ b/crates/cli/Cargo.toml
@@ -14,13 +14,13 @@ path = "src/main.rs"
anyhow.workspace = true
clap.workspace = true
clap_complete.workspace = true
-deepseek-agent = { path = "../agent", version = "0.8.15" }
-deepseek-app-server = { path = "../app-server", version = "0.8.15" }
-deepseek-config = { path = "../config", version = "0.8.15" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.8.15" }
-deepseek-mcp = { path = "../mcp", version = "0.8.15" }
-deepseek-secrets = { path = "../secrets", version = "0.8.15" }
-deepseek-state = { path = "../state", version = "0.8.15" }
+deepseek-agent = { path = "../agent", version = "0.8.16" }
+deepseek-app-server = { path = "../app-server", version = "0.8.16" }
+deepseek-config = { path = "../config", version = "0.8.16" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.8.16" }
+deepseek-mcp = { path = "../mcp", version = "0.8.16" }
+deepseek-secrets = { path = "../secrets", version = "0.8.16" }
+deepseek-state = { path = "../state", version = "0.8.16" }
chrono.workspace = true
dirs.workspace = true
serde.workspace = true
diff --git a/crates/config/Cargo.toml b/crates/config/Cargo.toml
index 3b4ca5b8..8d59c75e 100644
--- a/crates/config/Cargo.toml
+++ b/crates/config/Cargo.toml
@@ -8,7 +8,7 @@ description = "Config schema and precedence model for DeepSeek workspace archite
[dependencies]
anyhow.workspace = true
-deepseek-secrets = { path = "../secrets", version = "0.8.15" }
+deepseek-secrets = { path = "../secrets", version = "0.8.16" }
dirs.workspace = true
serde.workspace = true
toml.workspace = true
diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml
index c7bc7820..6f6c437d 100644
--- a/crates/core/Cargo.toml
+++ b/crates/core/Cargo.toml
@@ -9,13 +9,13 @@ description = "Core runtime boundaries for DeepSeek workspace architecture"
[dependencies]
anyhow.workspace = true
chrono.workspace = true
-deepseek-agent = { path = "../agent", version = "0.8.15" }
-deepseek-config = { path = "../config", version = "0.8.15" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.8.15" }
-deepseek-hooks = { path = "../hooks", version = "0.8.15" }
-deepseek-mcp = { path = "../mcp", version = "0.8.15" }
-deepseek-protocol = { path = "../protocol", version = "0.8.15" }
-deepseek-state = { path = "../state", version = "0.8.15" }
-deepseek-tools = { path = "../tools", version = "0.8.15" }
+deepseek-agent = { path = "../agent", version = "0.8.16" }
+deepseek-config = { path = "../config", version = "0.8.16" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.8.16" }
+deepseek-hooks = { path = "../hooks", version = "0.8.16" }
+deepseek-mcp = { path = "../mcp", version = "0.8.16" }
+deepseek-protocol = { path = "../protocol", version = "0.8.16" }
+deepseek-state = { path = "../state", version = "0.8.16" }
+deepseek-tools = { path = "../tools", version = "0.8.16" }
serde_json.workspace = true
uuid.workspace = true
diff --git a/crates/execpolicy/Cargo.toml b/crates/execpolicy/Cargo.toml
index f571d14e..d7517dab 100644
--- a/crates/execpolicy/Cargo.toml
+++ b/crates/execpolicy/Cargo.toml
@@ -8,5 +8,5 @@ description = "Execution policy and approval model parity for DeepSeek workspace
[dependencies]
anyhow.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.8.15" }
+deepseek-protocol = { path = "../protocol", version = "0.8.16" }
serde.workspace = true
diff --git a/crates/hooks/Cargo.toml b/crates/hooks/Cargo.toml
index f63b33ef..a59432cb 100644
--- a/crates/hooks/Cargo.toml
+++ b/crates/hooks/Cargo.toml
@@ -10,7 +10,7 @@ description = "Hook dispatch and notifications parity for DeepSeek workspace arc
anyhow.workspace = true
async-trait.workspace = true
chrono.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.8.15" }
+deepseek-protocol = { path = "../protocol", version = "0.8.16" }
reqwest.workspace = true
serde.workspace = true
serde_json.workspace = true
diff --git a/crates/tools/Cargo.toml b/crates/tools/Cargo.toml
index a327a580..347f8450 100644
--- a/crates/tools/Cargo.toml
+++ b/crates/tools/Cargo.toml
@@ -9,7 +9,7 @@ description = "Tool invocation lifecycle, schema validation, and scheduler paral
[dependencies]
anyhow.workspace = true
async-trait.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.8.15" }
+deepseek-protocol = { path = "../protocol", version = "0.8.16" }
serde.workspace = true
serde_json.workspace = true
tokio.workspace = true
diff --git a/crates/tui/Cargo.toml b/crates/tui/Cargo.toml
index c0caa9b8..3b9cd503 100644
--- a/crates/tui/Cargo.toml
+++ b/crates/tui/Cargo.toml
@@ -21,8 +21,8 @@ path = "src/main.rs"
[dependencies]
anyhow = "1.0.100"
arboard = "3.4"
-deepseek-secrets = { path = "../secrets", version = "0.8.15" }
-deepseek-tools = { path = "../tools", version = "0.8.15" }
+deepseek-secrets = { path = "../secrets", version = "0.8.16" }
+deepseek-tools = { path = "../tools", version = "0.8.16" }
schemaui = { version = "0.12.0", default-features = false, optional = true }
async-stream = "0.3.6"
async-trait = "0.1"
diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs
index b0f76ab9..af1c8d53 100644
--- a/crates/tui/src/client/chat.rs
+++ b/crates/tui/src/client/chat.rs
@@ -538,6 +538,14 @@ fn build_chat_messages_with_reasoning(
pending_tool_calls.clear();
}
out.push(msg);
+ } else if role == "system" {
+ let content = text_parts.join("\n");
+ if !content.trim().is_empty() {
+ out.push(json!({
+ "role": "system",
+ "content": content,
+ }));
+ }
} else if role == "user" {
let content = text_parts.join("\n");
if !content.trim().is_empty() {
@@ -1601,4 +1609,21 @@ mod stream_decoder_tests {
.expect("tool-use block present");
assert_eq!(id, "call_xyz");
}
+
+ #[test]
+ fn request_builder_preserves_internal_system_messages() {
+ let messages = vec![Message {
+ role: "system".to_string(),
+ content: vec![ContentBlock::Text {
+ text: "internal runtime event".to_string(),
+ cache_control: None,
+ }],
+ }];
+
+ let built = build_chat_messages(None, &messages, "deepseek-v4-flash");
+
+ assert_eq!(built.len(), 1);
+ assert_eq!(built[0]["role"], "system");
+ assert_eq!(built[0]["content"], "internal runtime event");
+ }
}
diff --git a/crates/tui/src/commands/core.rs b/crates/tui/src/commands/core.rs
index c5e047c8..259ecd3d 100644
--- a/crates/tui/src/commands/core.rs
+++ b/crates/tui/src/commands/core.rs
@@ -5,7 +5,7 @@ use std::fmt::Write;
use crate::config::{COMMON_DEEPSEEK_MODELS, normalize_model_name};
use crate::localization::{MessageId, tr};
use crate::tui::app::{App, AppAction, AppMode, ReasoningEffort};
-use crate::tui::views::{HelpView, ModalKind, SubAgentsView};
+use crate::tui::views::{HelpView, ModalKind, SubAgentsView, subagent_view_agents};
use super::CommandResult;
@@ -140,8 +140,8 @@ pub fn models(_app: &mut App) -> CommandResult {
/// List sub-agent status from the engine
pub fn subagents(app: &mut App) -> CommandResult {
if app.view_stack.top_kind() != Some(ModalKind::SubAgents) {
- app.view_stack
- .push(SubAgentsView::new(app.subagent_cache.clone()));
+ let agents = subagent_view_agents(app, &app.subagent_cache);
+ app.view_stack.push(SubAgentsView::new(agents));
}
app.status_message = Some(tr(app.ui_locale, MessageId::SubagentsFetching).to_string());
CommandResult::action(AppAction::ListSubAgents)
diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs
index f0461fad..55b57896 100644
--- a/crates/tui/src/core/engine/turn_loop.rs
+++ b/crates/tui/src/core/engine/turn_loop.rs
@@ -906,17 +906,8 @@ impl Engine {
if !completions.is_empty() {
let count = completions.len();
for c in completions {
- self.session
- .working_set
- .observe_user_message(&c.payload, &self.session.workspace);
- self.add_session_message(Message {
- role: "user".to_string(),
- content: vec![ContentBlock::Text {
- text: c.payload,
- cache_control: None,
- }],
- })
- .await;
+ self.add_session_message(subagent_completion_runtime_message(&c.payload))
+ .await;
}
let _ = self
.tx_event
@@ -1860,6 +1851,24 @@ impl Engine {
}
}
+fn subagent_completion_runtime_message(payload: &str) -> Message {
+ Message {
+ role: "system".to_string(),
+ content: vec![ContentBlock::Text {
+ text: format!(
+ "\n\
+This is an internal runtime event, not user input. Use the sub-agent completion \
+data below to continue coordinating the current task. Do not tell the user they \
+pasted sentinels, do not explain the sentinel protocol, and do not quote the raw \
+XML unless the user explicitly asks to debug sub-agent internals.\n\n\
+{payload}\n\
+"
+ ),
+ cache_control: None,
+ }],
+ }
+}
+
/// Resolve an `"auto"` reasoning-effort tier to a concrete value.
///
/// When the configured effort is `"auto"`, inspects the last user message
@@ -1905,3 +1914,25 @@ fn resolve_auto_effort(reasoning_effort: Option<&str>, messages: &[Message]) ->
None => None,
}
}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn subagent_completion_handoff_is_internal_system_message() {
+ let message = subagent_completion_runtime_message(
+ "Build passed\n{\"agent_id\":\"agent_a\"}",
+ );
+
+ assert_eq!(message.role, "system");
+ let text = match &message.content[0] {
+ ContentBlock::Text { text, .. } => text,
+ other => panic!("expected text block, got {other:?}"),
+ };
+ assert!(text.contains("internal runtime event, not user input"));
+ assert!(text.contains("Do not tell the user they pasted sentinels"));
+ assert!(text.contains(""));
+ assert!(text.contains("Build passed"));
+ }
+}
diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs
index 20e0a46d..d77ca63a 100644
--- a/crates/tui/src/prompts.rs
+++ b/crates/tui/src/prompts.rs
@@ -608,6 +608,15 @@ mod tests {
assert!(prompt.contains("Approval Policy: Suggest"));
}
+ #[test]
+ fn package_version_is_current_hotfix_release() {
+ assert_eq!(
+ env!("CARGO_PKG_VERSION"),
+ "0.8.16",
+ "0.8.16 hotfix branch must report the release version before publishing"
+ );
+ }
+
#[test]
fn compose_prompt_deterministic_order() {
let prompt = compose_prompt(AppMode::Yolo, Personality::Calm);
@@ -721,14 +730,18 @@ mod tests {
}
#[test]
- fn when_not_to_use_sections_present() {
+ fn tool_selection_guide_avoids_defensive_tool_suppression() {
let prompt = compose_prompt(AppMode::Agent, Personality::Calm);
- assert!(prompt.contains("When NOT to use certain tools"));
- assert!(prompt.contains("### `apply_patch`"));
- assert!(prompt.contains("### `edit_file`"));
- assert!(prompt.contains("### `exec_shell`"));
- assert!(prompt.contains("### `agent_spawn`"));
- assert!(prompt.contains("### `rlm`"));
+ assert!(prompt.contains("Tool Selection Guide"));
+ assert!(prompt.contains("Use `agent_result`"));
+ assert!(
+ !prompt.contains("When NOT to use certain tools"),
+ "the system prompt should steer tool choice without training the model to avoid available tools"
+ );
+ assert!(
+ !prompt.contains("Don't reach for"),
+ "avoid defensive anti-tool wording in the base prompt"
+ );
}
/// #588: language-mirroring directive must ship in every mode so
@@ -766,7 +779,7 @@ mod tests {
fn rlm_specialty_tool_guidance_present() {
let prompt = compose_prompt(AppMode::Agent, Personality::Calm);
// Structural: the RLM heading must exist as a section anchor.
- assert!(prompt.contains("RLM — When to Use It"));
+ assert!(prompt.contains("RLM — How to Use It"));
// Structural: the word "rlm" must appear multiple times (tool
// name, section heading, toolbox reference). Just verify the
// lowercase form — exact wording is NOT a test concern.
@@ -775,14 +788,20 @@ mod tests {
rlm_count >= 5,
"RLM guidance present: expected >= 5 mentions of 'rlm', got {rlm_count}"
);
+ assert!(
+ !prompt.contains("When NOT to use RLM"),
+ "RLM guidance should explain fit and verification without telling the model to avoid the tool"
+ );
}
#[test]
fn subagent_done_sentinel_section_present() {
let prompt = compose_prompt(AppMode::Agent, Personality::Calm);
- assert!(prompt.contains("Sub-agent completion sentinel"));
+ assert!(prompt.contains("Internal Sub-agent Completion Events"));
assert!(prompt.contains(""));
+ assert!(prompt.contains("not user input"));
assert!(prompt.contains("Integration protocol"));
+ assert!(prompt.contains("Do not tell the user they pasted sentinels"));
}
#[test]
diff --git a/crates/tui/src/prompts/base.md b/crates/tui/src/prompts/base.md
index 2840562d..97cec022 100644
--- a/crates/tui/src/prompts/base.md
+++ b/crates/tui/src/prompts/base.md
@@ -85,7 +85,7 @@ Before you fire any tool, scan your checklist: is there another tool you could r
The dispatcher runs parallel tool calls simultaneously. Serializing independent operations wastes the user's time and grows your context faster than necessary.
-## RLM — When to Use It
+## RLM — How to Use It
RLM loads input into a Python REPL where you write code that calls sub-LLM helpers (`llm_query`, `llm_query_batched`, `rlm_query`). Three patterns, not one — choose based on the shape of the work:
@@ -95,7 +95,7 @@ RLM loads input into a Python REPL where you write code that calls sub-LLM helpe
**RECURSE** — A problem that benefits from decomposition + critique. Use `rlm_query` to have a sub-LLM review your reasoning, identify gaps, or explore alternative approaches. The sub-LLM returns a synthesized answer you verify against live tool output.
-**When NOT to use RLM**: a single short file you can read directly; a simple classification on 3 items; interactive iterative exploration (RLM is one-shot batch). For those, `read_file`, `grep_files`, or `agent_spawn` are faster and cheaper.
+For exact counts or structured aggregates, compute them directly in Python inside the REPL (`len`, regexes, parsers, counters) and use child LLM calls only for semantic interpretation. When you chunk a whole input, use `chunk_context()` plus `chunk_coverage()` and report coverage explicitly: chunks processed, total chunks, line/char ranges, and any skipped sections. Cross-check surprising aggregate results with deterministic code before presenting them.
The Python helpers visible inside the REPL (`llm_query`, `llm_query_batched`, `rlm_query`, `rlm_query_batched`) are NOT separately-callable tools — they are functions the sub-agent uses inside its Python code. You only call `rlm` itself from the model side.
@@ -147,48 +147,28 @@ When context is deep (past a soft seam): cache reasoning conclusions in concise
Multiple `tool_calls` in one turn run in parallel. `web_search` returns `ref_id`s — cite as `(ref_id)`.
-## When NOT to use certain tools
+## Tool Selection Guide
### `apply_patch`
-Don't reach for `apply_patch` when:
-- You're creating a brand-new file — use `write_file`.
-- The change is a single search/replace in one location — `edit_file` is simpler and less error-prone.
-- You haven't read the target file yet. Patches written blind almost always fail to apply.
-- The file is short enough to rewrite whole — `write_file` with full content avoids fuzz matching entirely.
+Use `apply_patch` for structural edits, coordinated changes, or cases where line context matters. Use `write_file` for brand-new files or full-file rewrites. Use `edit_file` for a single unambiguous replacement.
### `edit_file`
-Don't reach for `edit_file` when:
-- You're making coordinated changes across many files — `apply_patch` with a multi-file diff is atomic.
-- You need to insert or delete whole blocks of lines — `apply_patch` handles structural edits more cleanly.
-- The search string is ambiguous or could match multiple locations — `apply_patch` with line-number context is more precise.
-- You're creating a new file — `write_file` is the correct tool.
+Use `edit_file` for one clear replacement in one file. Use `apply_patch` when the edit changes whole blocks, touches multiple files, or needs surrounding line context.
### `exec_shell`
-Don't reach for `exec_shell` when:
-- A structured tool already covers the same operation: `grep_files` for code search, `git_status`/`git_diff` for git inspection, `read_file` for file contents.
-- You just need to read or write a file — `read_file` / `write_file` are faster and show up in the tool log.
-- The command is a single `cat`, `ls`, or `echo` — use `read_file`, `list_dir`, or just state the result.
-- You're tempted to pipe `curl` for a web lookup — `web_search` or `fetch_url` give structured results.
-- The command may run for minutes, start a server, run a full test suite, or perform a scientific/release computation — use `task_shell_start` or `exec_shell` with `background: true`, then poll with `task_shell_wait` or `exec_shell_wait`.
+Use `exec_shell` for shell-native diagnostics, pipelines, and bounded commands. Use structured tools for structured operations when they map directly (`grep_files`, `git_diff`, `read_file`). For long commands, servers, full test suites, or release computations, start background work with `task_shell_start` or `exec_shell` using `background: true`, then poll with `task_shell_wait` or `exec_shell_wait`.
### `agent_spawn`
-Don't reach for `agent_spawn` when:
-- The task is a single read or search you can do in one turn — spawning has overhead.
-- You need sequential steps where each depends on the prior result — run them yourself, in order.
-- The work can be done with a fast `exec_shell` pipeline or a `grep_files` call.
+Use `agent_spawn` for independent investigations or implementation slices that can run while you continue coordinating. Use `agent_wait` when you need one or more completions. Use `agent_result` when the sentinel summary is too thin or you need the full structured output. Keep tiny single-read/search tasks local so the transcript stays compact.
### `rlm`
-Don't reach for `rlm` (the recursive language model tool) when:
-- The input fits comfortably in your context window and the task is straightforward — just read it directly with `read_file`.
-- A simple `grep_files` or `exec_shell` pipeline can answer the question.
-- You need interactive, iterative exploration of the data — `rlm` is batch-oriented (the sub-LLM writes Python in one shot, then returns).
-- The task is a simple classification or extraction on short text — your own reasoning is faster and cheaper.
+Use `rlm` for long-context semantic work, bulk classification/extraction, and decomposition where a Python REPL plus child LLM helpers is useful. Use deterministic Python inside RLM for exact counts and structured aggregation; use `grep_files` or `exec_shell` directly when that is the clearest deterministic check.
Inside the `rlm` REPL, the sub-LLM has access to `llm_query()`, `llm_query_batched()`, `rlm_query()`, and `rlm_query_batched()` as Python helpers for further sub-LLM work — those are not standalone tools you call directly.
-## Sub-agent completion sentinel
+## Internal Sub-agent Completion Events
-When you spawn a sub-agent via `agent_spawn`, the child runs independently. You will receive a `` element in the transcript when it finishes. This sentinel carries:
+When you spawn a sub-agent via `agent_spawn`, the child runs independently. The runtime may send you an internal `` completion event when it finishes. This event is not user input. It carries:
- `agent_id` — the child's identifier
- `summary` — a human-readable summary of what the child found or did
@@ -201,6 +181,7 @@ When you spawn a sub-agent via `agent_spawn`, the child runs independently. You
3. If the summary is insufficient, call `agent_result` to pull the full structured result.
4. If the child failed (`"failed"`), assess whether the failure blocks your plan or whether you can proceed with a fallback.
5. Update your `checklist_write` items to reflect the child's contribution.
+6. Do not tell the user they pasted sentinels or explain this protocol unless they explicitly ask about sub-agent internals.
You may see multiple `` sentinels in a single turn when children were spawned in parallel. Process each one, then synthesize.
diff --git a/crates/tui/src/repl/runtime.rs b/crates/tui/src/repl/runtime.rs
index 3b5cdfc9..449cdf98 100644
--- a/crates/tui/src/repl/runtime.rs
+++ b/crates/tui/src/repl/runtime.rs
@@ -144,6 +144,7 @@ pub struct PythonRuntime {
stdout_limit: usize,
round_count: u64,
started: Instant,
+ round_timeout: Option,
}
impl PythonRuntime {
@@ -151,7 +152,7 @@ impl PythonRuntime {
/// Used by the agent loop for inline `repl` blocks the model emits in
/// regular conversation.
pub async fn new() -> Result {
- Self::spawn_inner(None).await
+ Self::spawn_inner(None, Some(ROUND_TIMEOUT)).await
}
/// Compatibility shim — older RLM code path used to pass a state file.
@@ -172,10 +173,13 @@ impl PythonRuntime {
/// Spawn a REPL with `context` (and `ctx`) preloaded from a file. Used
/// by the RLM turn loop.
pub async fn spawn_with_context(context_path: &Path) -> Result {
- Self::spawn_inner(Some(context_path)).await
+ Self::spawn_inner(Some(context_path), None).await
}
- async fn spawn_inner(context_path: Option<&Path>) -> Result {
+ async fn spawn_inner(
+ context_path: Option<&Path>,
+ round_timeout: Option,
+ ) -> Result {
let session_id = Uuid::new_v4().simple().to_string();
let bootstrap = render_bootstrap(&session_id);
@@ -215,6 +219,7 @@ impl PythonRuntime {
stdout_limit: DEFAULT_STDOUT_LIMIT,
round_count: 0,
started: Instant::now(),
+ round_timeout,
};
// Wait for `__RLM_READY___` before handing control back. If
@@ -298,6 +303,7 @@ impl PythonRuntime {
let mut final_value: Option = None;
let mut had_error = false;
let mut rpc_count: u32 = 0;
+ let round_timeout = self.round_timeout;
let read_loop = async {
loop {
@@ -360,15 +366,19 @@ impl PythonRuntime {
Ok::<_, String>(())
};
- match tokio::time::timeout(ROUND_TIMEOUT, read_loop).await {
- Ok(Ok(())) => {}
- Ok(Err(e)) => return Err(e),
- Err(_) => {
- return Err(format!(
- "REPL round timed out after {}s",
- ROUND_TIMEOUT.as_secs()
- ));
+ if let Some(round_timeout) = round_timeout {
+ match tokio::time::timeout(round_timeout, read_loop).await {
+ Ok(Ok(())) => {}
+ Ok(Err(e)) => return Err(e),
+ Err(_) => {
+ return Err(format!(
+ "REPL round timed out after {}s",
+ round_timeout.as_secs()
+ ));
+ }
}
+ } else {
+ read_loop.await?;
}
let stderr = self.drain_stderr().await;
@@ -429,6 +439,12 @@ impl PythonRuntime {
self.round_count
}
+ /// Current per-round timeout policy. RLM context runs intentionally return
+ /// `None` so long map-reduce jobs are not killed by the old 180s cap.
+ pub fn round_timeout(&self) -> Option {
+ self.round_timeout
+ }
+
/// Wall-clock uptime since spawn.
pub fn uptime(&self) -> Duration {
self.started.elapsed()
@@ -578,6 +594,55 @@ def repl_get(name, default=None):
def repl_set(name, value):
globals()[str(name)] = value
+def chunk_context(max_chars=20000, overlap=0):
+ """Return full-coverage context chunks with index/start/end/text fields."""
+ max_chars = int(max_chars)
+ overlap = max(0, int(overlap))
+ if max_chars <= 0:
+ raise ValueError("max_chars must be > 0")
+ if overlap >= max_chars:
+ raise ValueError("overlap must be smaller than max_chars")
+ chunks = []
+ start = 0
+ idx = 0
+ total = len(context)
+ while start < total:
+ end = min(total, start + max_chars)
+ chunks.append({"index": idx, "start": start, "end": end, "text": context[start:end]})
+ idx += 1
+ if end >= total:
+ break
+ start = end - overlap
+ return chunks
+
+def chunk_coverage(chunks):
+ """Summarize coverage for chunks produced by chunk_context()."""
+ spans = []
+ for c in chunks:
+ try:
+ spans.append((int(c["start"]), int(c["end"])))
+ except Exception:
+ continue
+ spans.sort()
+ covered = 0
+ cursor = 0
+ gaps = []
+ for start, end in spans:
+ if start > cursor:
+ gaps.append((cursor, start))
+ if end > cursor:
+ covered += end - max(start, cursor)
+ cursor = end
+ if cursor < len(context):
+ gaps.append((cursor, len(context)))
+ return {
+ "chunks": len(chunks),
+ "context_chars": len(context),
+ "covered_chars": covered,
+ "gaps": gaps,
+ "complete": covered >= len(context) and not gaps,
+ }
+
# Load the long input as `context` (and `ctx`) from a file. This keeps the
# big string out of the process command-line and out of the LLM's window.
_ctx_file = _os.environ.get("RLM_CONTEXT_FILE","")
@@ -595,6 +660,7 @@ _BOOTSTRAP_NAMES = {
"_rpc","_ctx_file","_BOOTSTRAP_NAMES","_main_loop",
"llm_query","llm_query_batched","rlm_query","rlm_query_batched",
"FINAL","FINAL_VAR","SHOW_VARS","repl_get","repl_set",
+ "chunk_context","chunk_coverage",
"context","ctx",
"_json","_os","_sys","_traceback",
}
@@ -773,6 +839,44 @@ mod tests {
rt.shutdown().await;
}
+ #[tokio::test]
+ async fn context_chunk_helpers_report_full_coverage() {
+ let path = write_temp_context("abcdefghijklmnopqrstuvwxyz");
+ let mut rt = PythonRuntime::spawn_with_context(&path)
+ .await
+ .expect("spawn");
+ let round = rt
+ .execute(
+ "chunks = chunk_context(max_chars=10)\n\
+ coverage = chunk_coverage(chunks)\n\
+ print(len(chunks), coverage['covered_chars'], coverage['complete'])",
+ )
+ .await
+ .expect("execute");
+ assert!(round.stdout.contains("3 26 True"), "{}", round.stdout);
+ rt.shutdown().await;
+ }
+
+ #[tokio::test]
+ async fn rlm_context_runtime_has_no_fixed_round_timeout() {
+ let path = write_temp_context("long input");
+ let rt = PythonRuntime::spawn_with_context(&path)
+ .await
+ .expect("spawn");
+ assert!(
+ rt.round_timeout().is_none(),
+ "RLM context runs must not inherit the old 180s REPL round timeout"
+ );
+ rt.shutdown().await;
+ }
+
+ #[tokio::test]
+ async fn inline_runtime_keeps_bounded_round_timeout() {
+ let rt = PythonRuntime::new().await.expect("spawn");
+ assert_eq!(rt.round_timeout(), Some(ROUND_TIMEOUT));
+ rt.shutdown().await;
+ }
+
#[tokio::test]
async fn final_is_captured() {
let mut rt = PythonRuntime::new().await.expect("spawn");
diff --git a/crates/tui/src/rlm/prompt.rs b/crates/tui/src/rlm/prompt.rs
index 5553d2cf..39f5d6cd 100644
--- a/crates/tui/src/rlm/prompt.rs
+++ b/crates/tui/src/rlm/prompt.rs
@@ -19,6 +19,8 @@ The REPL exposes:
- `llm_query_batched(prompts, model=None)` — concurrent fan-out. Returns `list[str]` in input order. The `model` argument is accepted for compatibility but ignored.
- `rlm_query(prompt, model=None)` — recursive sub-RLM. Use when a sub-task itself needs decomposition. The `model` argument is accepted for compatibility but ignored.
- `rlm_query_batched(prompts, model=None)` — concurrent recursive sub-RLMs. The `model` argument is accepted for compatibility but ignored.
+- `chunk_context(max_chars=20000, overlap=0)` — full-coverage chunks with index/start/end/text fields.
+- `chunk_coverage(chunks)` — coverage summary for chunks produced by `chunk_context`.
- `SHOW_VARS()` — list user variables and their types.
- `repl_set(name, value)` / `repl_get(name)` — explicit cross-round storage.
- `print(...)` — diagnostic output. The driver feeds you a truncated preview next round.
@@ -40,11 +42,12 @@ print(context[:500])
2. CHUNK + map-reduce with batched concurrent calls.
```repl
chunk_size = 8000
-chunks = [context[i:i+chunk_size] for i in range(0, len(context), chunk_size)]
-prompts = [f"Extract any mentions of X from this section:\n\n{c}" for c in chunks]
+chunks = chunk_context(max_chars=chunk_size)
+coverage = chunk_coverage(chunks)
+prompts = [f"Extract any mentions of X from section {c['index']} ({c['start']}:{c['end']}):\n\n{c['text']}" for c in chunks]
partials = llm_query_batched(prompts)
combined = "\n\n".join(partials)
-answer = llm_query(f"Synthesize across these section-level extractions:\n\n{combined}")
+answer = llm_query(f"Coverage: {coverage}\n\nSynthesize across these section-level extractions:\n\n{combined}")
print(answer[:500])
```
Then on the next turn:
@@ -73,6 +76,8 @@ Rules
- Never `print(context)` or otherwise dump it whole — slice, sample, or chunk.
- You MUST call `llm_query` / `llm_query_batched` / `rlm_query` at least once before `FINAL(...)`. Calling FINAL from a top-level prose answer (without ever running a `repl` block that touched `context` via a sub-LLM) is REJECTED — the driver will discard the FINAL and ask you to actually use the REPL.
- Sub-LLMs are powerful — feed them generous chunks (tens of thousands of chars), not tiny windows.
+- For exact counts, package totals, line totals, or other structured aggregates, compute them with Python over `context` directly. Do not ask a child LLM to count.
+- For whole-input map-reduce, report coverage in the final answer: chunks processed, total chunks, and whether every line/char range was included. If you only processed a subset, say that explicitly.
- Do NOT pad your output with prose like "Here is what I'll do:" — just emit the next ```repl block.
"#;
@@ -115,6 +120,8 @@ mod tests {
"llm_query_batched",
"rlm_query",
"rlm_query_batched",
+ "chunk_context",
+ "chunk_coverage",
"SHOW_VARS",
"FINAL",
"FINAL_VAR",
@@ -133,4 +140,12 @@ mod tests {
"system prompt should reject the prose-shortcut path explicitly"
);
}
+
+ #[test]
+ fn rlm_prompt_requires_deterministic_counts_and_coverage() {
+ let s = body();
+ assert!(s.contains("compute them with Python"));
+ assert!(s.contains("report coverage"));
+ assert!(s.contains("chunks processed"));
+ }
}
diff --git a/crates/tui/src/rlm/turn.rs b/crates/tui/src/rlm/turn.rs
index 16504fb8..676d9e1e 100644
--- a/crates/tui/src/rlm/turn.rs
+++ b/crates/tui/src/rlm/turn.rs
@@ -34,8 +34,6 @@ const STDOUT_METADATA_PREVIEW_LEN: usize = 800;
const PROMPT_PREVIEW_LEN: usize = 500;
/// Temperature for root LLM calls.
const ROOT_TEMPERATURE: f32 = 0.3;
-/// Hard wall-clock cap on a whole RLM turn.
-const TURN_TIMEOUT: Duration = Duration::from_secs(180);
/// Bound on conversation history we keep across iterations.
const MAX_HISTORY_MESSAGES: usize = 20;
@@ -156,6 +154,13 @@ pub(crate) fn run_rlm_turn_inner(
))
}
+/// RLM turns are long-running background-style work. Do not kill the whole
+/// turn with the old fixed 180s wall-clock cap; per-request cancellation still
+/// comes from the parent turn token and the user can cancel from the TUI.
+fn turn_timeout() -> Option {
+ None
+}
+
// ---------------------------------------------------------------------------
// Implementation
// ---------------------------------------------------------------------------
@@ -237,15 +242,14 @@ async fn run_rlm_turn_impl(
let result = 'turn: {
for iteration in 0..MAX_RLM_ITERATIONS {
- if start.elapsed() > TURN_TIMEOUT {
+ if let Some(timeout) = turn_timeout()
+ && start.elapsed() > timeout
+ {
break 'turn RlmTurnResult {
answer: String::new(),
iterations: iteration,
duration: start.elapsed(),
- error: Some(format!(
- "RLM turn timed out after {}s",
- TURN_TIMEOUT.as_secs()
- )),
+ error: Some(format!("RLM turn timed out after {}s", timeout.as_secs())),
usage: total_usage,
termination: RlmTermination::Error,
trace: trace.clone(),
@@ -588,6 +592,14 @@ fn build_metadata_message(
parts.push("**REPL helpers** (use inside ```repl blocks)".to_string());
parts.push("- `context` / `ctx` — the full input string".to_string());
parts.push("- `len(context)` / `context[a:b]` / `context.splitlines()` — slice it".to_string());
+ parts.push(
+ "- `chunk_context(max_chars=20000, overlap=0)` — full-coverage chunks with index/start/end/text"
+ .to_string(),
+ );
+ parts.push(
+ "- `chunk_coverage(chunks)` — coverage report for chunk_context output"
+ .to_string(),
+ );
parts.push(
"- `llm_query(prompt, model=None)` — one-shot child LLM; `model` is ignored and child calls stay pinned to Flash"
.to_string(),
@@ -971,4 +983,12 @@ mod tests {
assert!(s.contains("line19"));
assert!(s.contains("…"));
}
+
+ #[test]
+ fn rlm_turn_has_no_fixed_wall_clock_timeout() {
+ assert!(
+ turn_timeout().is_none(),
+ "RLM turns should not be killed by the old fixed 180s wall-clock cap"
+ );
+ }
}
diff --git a/crates/tui/src/tools/rlm.rs b/crates/tui/src/tools/rlm.rs
index 8881c8ce..2c7058b4 100644
--- a/crates/tui/src/tools/rlm.rs
+++ b/crates/tui/src/tools/rlm.rs
@@ -60,20 +60,22 @@ impl ToolSpec for RlmTool {
calls in-REPL helpers (`llm_query`, `llm_query_batched`, \
`rlm_query`, `rlm_query_batched`) to process it, then returns a \
synthesized answer. \n\n\
- DO NOT use this tool when: the input fits in your context (just \
- use `read_file` and reason directly); a `grep_files` / \
- `exec_shell` pipeline would answer the question; the task is a \
- short classification or extraction; you need interactive \
- iterative exploration (rlm is one-shot batch). \n\n\
- Use this tool only when the input is genuinely too large to load \
- (a whole file > 50K tokens, a long transcript, a multi-document \
- corpus). It is slower and more expensive than direct reasoning. \n\n\
+ Use this tool when the input is genuinely large or when a Python \
+ map-reduce pass plus child LLM calls is the right shape: whole \
+ files, long transcripts, multi-document corpora, bulk semantic \
+ classification, or decomposition/critique work. For exact counts \
+ or structured aggregates, compute them directly in Python inside \
+ the REPL and report the deterministic result instead of asking a \
+ child LLM to guess. For whole-input map-reduce, use the REPL \
+ helpers `chunk_context()` and `chunk_coverage()` so the result \
+ states what was covered. \n\n\
Provide `task` (what to do) plus exactly one of `file_path` \
(workspace-relative, preferred — keeps the long input out of \
your context entirely) or `content` (inline, capped at 200k \
chars). The Python helpers (`llm_query`, `rlm_query`, etc.) live \
INSIDE the REPL — they are not separately-callable tools. \n\n\
- Returns the final synthesized answer as a string."
+ Returns the final synthesized answer plus an RLM report showing \
+ input size, iterations, duration, sub-LLM calls, and trace summary."
}
fn input_schema(&self) -> Value {
@@ -177,6 +179,8 @@ impl ToolSpec for RlmTool {
"rlm: input is empty after loading",
));
}
+ let input_chars = body.chars().count();
+ let input_lines = body.lines().count();
// Pin child calls to Flash so model-generated tool args cannot quietly
// turn fanout work into Pro-billed requests. The RLM root still uses
@@ -250,6 +254,14 @@ impl ToolSpec for RlmTool {
RlmTermination::Error => String::new(),
};
+ let report = format!(
+ "RLM report:\n- input: {input_lines} line(s), {input_chars} char(s)\n- iterations: {}\n- duration: {}ms\n- sub-LLM RPCs: {}\n- termination: {:?}\n\nAnswer:\n",
+ result.iterations,
+ result.duration.as_millis(),
+ result.total_rpcs,
+ result.termination,
+ );
+
let trace_summary = if result.trace.is_empty() {
String::from("\n\n[trace: no REPL rounds executed]")
} else {
@@ -309,14 +321,17 @@ impl ToolSpec for RlmTool {
"child_model": child_model,
"termination": format!("{:?}", result.termination).to_lowercase(),
"max_depth": max_depth,
+ "context_chars": input_chars,
+ "context_lines": input_lines,
"total_rpcs": result.total_rpcs,
"trace": trace_json,
});
- Ok(
- ToolResult::success(format!("{}{}{}", result.answer, footer, trace_summary))
- .with_metadata(metadata),
- )
+ Ok(ToolResult::success(format!(
+ "{report}{}{}{}",
+ result.answer, footer, trace_summary
+ ))
+ .with_metadata(metadata))
}
}
@@ -369,6 +384,24 @@ mod tests {
assert!(tool().supports_parallel());
}
+ #[test]
+ fn description_steers_without_suppressing_rlm_use() {
+ let t = tool();
+ let description = t.description();
+ assert!(
+ description.contains("Use this tool when"),
+ "description should positively explain the RLM fit"
+ );
+ assert!(
+ !description.contains("DO NOT use"),
+ "avoid training the model to avoid an available tool"
+ );
+ assert!(
+ !description.contains("slower and more expensive"),
+ "cost caveats belong in verification guidance, not tool suppression"
+ );
+ }
+
#[tokio::test]
async fn returns_not_available_without_client() {
let t = tool();
diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs
index 993584a7..af204a07 100644
--- a/crates/tui/src/tools/subagent/mod.rs
+++ b/crates/tui/src/tools/subagent/mod.rs
@@ -3720,6 +3720,7 @@ impl SubAgentToolRegistry {
if !self.is_tool_allowed(name) {
return Err(anyhow!("Tool {name} not allowed for this sub-agent"));
}
+ reject_subagent_terminal_takeover(name, &input)?;
self.registry
.execute(name, input)
.await
@@ -3727,6 +3728,22 @@ impl SubAgentToolRegistry {
}
}
+fn reject_subagent_terminal_takeover(name: &str, input: &Value) -> Result<()> {
+ let wants_interactive_shell = name == "exec_shell"
+ && input
+ .get("interactive")
+ .and_then(Value::as_bool)
+ .unwrap_or(false);
+ if wants_interactive_shell {
+ return Err(anyhow!(
+ "Sub-agents run in the background and cannot use exec_shell with interactive=true \
+ because that would take over the parent TUI terminal. Use non-interactive \
+ exec_shell, background=true, tty=true, or task_shell_start instead."
+ ));
+ }
+ Ok(())
+}
+
/// Resolve the effective allowed-tools list for a child.
///
/// **v0.6.6 default: full inheritance.** Returning `Ok(None)` means the
diff --git a/crates/tui/src/tools/subagent/tests.rs b/crates/tui/src/tools/subagent/tests.rs
index 71d84f8b..0295b567 100644
--- a/crates/tui/src/tools/subagent/tests.rs
+++ b/crates/tui/src/tools/subagent/tests.rs
@@ -1003,6 +1003,39 @@ fn mailbox_propagates_through_child_runtime_chain() {
);
}
+#[test]
+fn subagent_rejects_interactive_shell_terminal_takeover() {
+ let err = reject_subagent_terminal_takeover(
+ "exec_shell",
+ &serde_json::json!({
+ "command": "python3 -i",
+ "interactive": true
+ }),
+ )
+ .expect_err("sub-agents must not inherit the parent terminal");
+
+ let msg = err.to_string();
+ assert!(msg.contains("cannot use exec_shell with interactive=true"));
+ assert!(msg.contains("parent TUI terminal"));
+
+ reject_subagent_terminal_takeover(
+ "exec_shell",
+ &serde_json::json!({
+ "command": "cargo check",
+ "interactive": false
+ }),
+ )
+ .expect("non-interactive shell remains allowed");
+ reject_subagent_terminal_takeover(
+ "exec_shell",
+ &serde_json::json!({
+ "command": "cargo test",
+ "background": true
+ }),
+ )
+ .expect("background shell remains allowed");
+}
+
#[tokio::test]
async fn mailbox_close_as_cancel_propagates_to_grandchild_runtime() {
use crate::tools::subagent::mailbox::Mailbox;
diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs
index f5d936fe..0fb83fed 100644
--- a/crates/tui/src/tui/ui.rs
+++ b/crates/tui/src/tui/ui.rs
@@ -82,6 +82,7 @@ use crate::tui::tool_routing::{
};
use crate::tui::ui_text::{history_cell_to_text, line_to_plain, slice_text, text_display_width};
use crate::tui::user_input::UserInputView;
+use crate::tui::views::subagent_view_agents;
use super::active_cell::ActiveCell;
use super::app::{
@@ -582,6 +583,8 @@ async fn refresh_active_task_panel(app: &mut App, task_manager: &SharedTaskManag
.map(task_summary_to_panel_entry)
.collect();
+ entries.extend(active_rlm_task_entries(app));
+
if let Some(shell_mgr) = app.runtime_services.shell_manager.as_ref()
&& let Ok(mut mgr) = shell_mgr.lock()
{
@@ -601,6 +604,39 @@ async fn refresh_active_task_panel(app: &mut App, task_manager: &SharedTaskManag
app.task_panel = entries;
}
+fn active_rlm_task_entries(app: &App) -> Vec {
+ let Some(active) = app.active_cell.as_ref() else {
+ return Vec::new();
+ };
+ let duration_ms = app
+ .turn_started_at
+ .map(|started| u64::try_from(started.elapsed().as_millis()).unwrap_or(u64::MAX));
+ active
+ .entries()
+ .iter()
+ .enumerate()
+ .filter_map(|(idx, entry)| {
+ let HistoryCell::Tool(ToolCell::Generic(generic)) = entry else {
+ return None;
+ };
+ if generic.name != "rlm" || generic.status != ToolStatus::Running {
+ return None;
+ }
+ let summary = generic
+ .input_summary
+ .as_deref()
+ .filter(|summary| !summary.trim().is_empty())
+ .unwrap_or("running chunked analysis");
+ Some(TaskPanelEntry {
+ id: format!("rlm-{}", idx + 1),
+ status: "running".to_string(),
+ prompt_summary: format!("RLM: {summary}"),
+ duration_ms,
+ })
+ })
+ .collect()
+}
+
#[allow(clippy::too_many_lines)]
async fn run_event_loop(
terminal: &mut AppTerminal,
@@ -628,6 +664,7 @@ async fn run_event_loop(
// #376: native-copy escape — hold Shift to bypass alt-screen mouse capture
// for terminal-native text selection.
let mut shift_bypass_active = false;
+ let mut terminal_paused_at: Option = None;
loop {
if !drain_web_config_events(&mut web_config_session, app, config, &engine_handle).await {
@@ -1123,6 +1160,7 @@ async fn run_event_loop(
app.use_bracketed_paste,
)?;
event_broker.pause_events();
+ terminal_paused_at = Some(Instant::now());
}
}
EngineEvent::ResumeEvents => {
@@ -1134,6 +1172,7 @@ async fn run_event_loop(
app.use_bracketed_paste,
)?;
event_broker.resume_events();
+ terminal_paused_at = None;
}
}
EngineEvent::AgentSpawned { id, prompt } => {
@@ -1162,11 +1201,54 @@ async fn run_event_loop(
app.status_message = Some(format!("Sub-agent {id}: {display}"));
}
EngineEvent::AgentComplete { id, result } => {
+ let subagent_elapsed = app
+ .agent_activity_started_at
+ .or(app.turn_started_at)
+ .map(|started| started.elapsed())
+ .unwrap_or_default();
+ let has_other_running_subagents =
+ app.agent_progress.keys().any(|agent_id| agent_id != &id)
+ || app.subagent_cache.iter().any(|agent| {
+ agent.agent_id != id
+ && matches!(agent.status, SubAgentStatus::Running)
+ });
app.agent_progress.remove(&id);
app.status_message = Some(format!(
"Sub-agent {id} completed: {}",
summarize_tool_output(&result)
));
+ let should_recapture_terminal =
+ !has_other_running_subagents && app.use_alt_screen;
+ if !has_other_running_subagents
+ && let Some((method, threshold, include_summary)) =
+ notification_settings(config)
+ {
+ let in_tmux = std::env::var("TMUX").is_ok_and(|v| !v.is_empty());
+ let msg = subagent_completion_notification_message(
+ &id,
+ &result,
+ include_summary,
+ subagent_elapsed,
+ );
+ crate::tui::notifications::notify_done(
+ method,
+ in_tmux,
+ &msg,
+ threshold,
+ subagent_elapsed,
+ );
+ }
+ if should_recapture_terminal {
+ resume_terminal(
+ terminal,
+ app.use_alt_screen,
+ app.use_mouse_capture,
+ app.use_bracketed_paste,
+ )?;
+ event_broker.resume_events();
+ terminal_paused_at = None;
+ app.needs_redraw = true;
+ }
let _ = engine_handle.send(Op::ListSubAgents).await;
}
EngineEvent::AgentList { agents } => {
@@ -1175,9 +1257,10 @@ async fn run_event_loop(
sorted.retain(|a| !a.from_prior_session);
app.subagent_cache = sorted.clone();
reconcile_subagent_activity_state(app);
- if app.view_stack.update_subagents(&sorted) {
+ let view_agents = subagent_view_agents(app, &sorted);
+ if app.view_stack.update_subagents(&view_agents) {
app.status_message =
- Some(format!("Sub-agents: {} total", sorted.len()));
+ Some(format!("Sub-agents: {} total", view_agents.len()));
}
// Individual spawn/complete events already log to history;
// full list available via /agents command.
@@ -1406,8 +1489,23 @@ async fn run_event_loop(
}
if event_broker.is_paused() {
- tokio::time::sleep(std::time::Duration::from_millis(50)).await;
- continue;
+ let grace_active = terminal_paused_at
+ .map(|paused_at| paused_at.elapsed() < Duration::from_millis(500))
+ .unwrap_or(false);
+ if terminal_pause_has_live_owner(app) || grace_active {
+ tokio::time::sleep(std::time::Duration::from_millis(50)).await;
+ continue;
+ }
+ resume_terminal(
+ terminal,
+ app.use_alt_screen,
+ app.use_mouse_capture,
+ app.use_bracketed_paste,
+ )?;
+ event_broker.resume_events();
+ terminal_paused_at = None;
+ app.status_message = Some("Terminal controls restored".to_string());
+ app.needs_redraw = true;
}
let now = Instant::now();
@@ -3095,6 +3193,30 @@ fn completed_turn_notification_message(
msg
}
+fn subagent_completion_notification_message(
+ id: &str,
+ result: &str,
+ include_summary: bool,
+ elapsed: Duration,
+) -> String {
+ let result_line = result
+ .lines()
+ .map(str::trim)
+ .find(|line| !line.is_empty() && !line.starts_with(""));
+ let mut msg = result_line
+ .and_then(notification_text_summary)
+ .map(|summary| format!("sub-agent {id}: {summary}"))
+ .unwrap_or_else(|| format!("deepseek: sub-agent {id} complete"));
+
+ if include_summary {
+ let human = crate::tui::notifications::humanize_duration(elapsed);
+ msg.push('\n');
+ msg.push_str(&format!("deepseek: sub-agent complete ({human})"));
+ }
+
+ msg
+}
+
fn latest_assistant_notification_text(messages: &[Message]) -> Option {
messages
.iter()
@@ -6325,6 +6447,17 @@ fn active_foreground_shell_running(app: &App) -> bool {
})
}
+fn terminal_pause_has_live_owner(app: &App) -> bool {
+ app.active_cell.as_ref().is_some_and(|active| {
+ active.entries().iter().any(|cell| {
+ matches!(
+ cell,
+ HistoryCell::Tool(ToolCell::Exec(exec)) if exec.status == ToolStatus::Running
+ )
+ })
+ })
+}
+
fn collect_active_tool_status(cell: &HistoryCell, snapshot: &mut ActiveToolStatusSnapshot) {
let HistoryCell::Tool(tool) = cell else {
return;
diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs
index 9c139bd7..bfe61bf2 100644
--- a/crates/tui/src/tui/ui/tests.rs
+++ b/crates/tui/src/tui/ui/tests.rs
@@ -1937,6 +1937,72 @@ fn spillover_pager_section_returns_notice_when_file_missing() {
assert!(section.contains("could not read spillover file"));
}
+#[test]
+fn terminal_pause_has_live_owner_only_for_running_exec_cells() {
+ let mut app = create_test_app();
+ assert!(!terminal_pause_has_live_owner(&app));
+
+ let mut active = ActiveCell::new();
+ active.push_tool(
+ "tool-1",
+ HistoryCell::Tool(ToolCell::Exec(ExecCell {
+ command: "python3 -i".to_string(),
+ status: ToolStatus::Running,
+ output: None,
+ started_at: Some(Instant::now()),
+ duration_ms: None,
+ source: ExecSource::Assistant,
+ interaction: Some("interactive".to_string()),
+ })),
+ );
+ app.active_cell = Some(active);
+ assert!(terminal_pause_has_live_owner(&app));
+
+ let mut active = ActiveCell::new();
+ active.push_tool(
+ "tool-2",
+ HistoryCell::Tool(ToolCell::Generic(GenericToolCell {
+ name: "rlm".to_string(),
+ status: ToolStatus::Running,
+ input_summary: Some("file_path: Cargo.lock".to_string()),
+ output: None,
+ prompts: None,
+ spillover_path: None,
+ })),
+ );
+ app.active_cell = Some(active);
+ assert!(
+ !terminal_pause_has_live_owner(&app),
+ "non-interactive RLM work must not keep the terminal in host-scrollback mode"
+ );
+}
+
+#[test]
+fn active_rlm_task_entries_surface_foreground_rlm_work() {
+ let mut app = create_test_app();
+ app.turn_started_at = Some(Instant::now() - Duration::from_secs(3));
+ let mut active = ActiveCell::new();
+ active.push_tool(
+ "tool-rlm",
+ HistoryCell::Tool(ToolCell::Generic(GenericToolCell {
+ name: "rlm".to_string(),
+ status: ToolStatus::Running,
+ input_summary: Some("file_path: Cargo.lock".to_string()),
+ output: None,
+ prompts: None,
+ spillover_path: None,
+ })),
+ );
+ app.active_cell = Some(active);
+
+ let entries = active_rlm_task_entries(&app);
+ assert_eq!(entries.len(), 1);
+ assert_eq!(entries[0].id, "rlm-1");
+ assert_eq!(entries[0].status, "running");
+ assert_eq!(entries[0].prompt_summary, "RLM: file_path: Cargo.lock");
+ assert!(entries[0].duration_ms.unwrap_or_default() >= 3000);
+}
+
#[test]
fn details_shortcut_modifiers_accept_plain_shift_and_alt_only() {
assert!(details_shortcut_modifiers(KeyModifiers::NONE));
@@ -3511,3 +3577,29 @@ fn completed_turn_notification_truncates_long_text() {
// 360-char body + 3-char ellipsis
assert_eq!(msg.chars().count(), 363);
}
+
+#[test]
+fn subagent_completion_notification_uses_summary_line_not_sentinel() {
+ let msg = super::subagent_completion_notification_message(
+ "agent_live",
+ "Finished the docs audit.\n{}",
+ false,
+ Duration::from_secs(42),
+ );
+
+ assert_eq!(msg, "sub-agent agent_live: Finished the docs audit.");
+ assert!(!msg.contains("deepseek:subagent.done"));
+}
+
+#[test]
+fn subagent_completion_notification_can_include_elapsed_summary() {
+ let msg = super::subagent_completion_notification_message(
+ "agent_live",
+ "",
+ true,
+ Duration::from_secs(65),
+ );
+
+ assert!(msg.contains("deepseek: sub-agent agent_live complete"));
+ assert!(msg.contains("deepseek: sub-agent complete (1m 5s)"));
+}
diff --git a/crates/tui/src/tui/views/mod.rs b/crates/tui/src/tui/views/mod.rs
index f7792bec..58baf297 100644
--- a/crates/tui/src/tui/views/mod.rs
+++ b/crates/tui/src/tui/views/mod.rs
@@ -7,9 +7,11 @@ use crate::localization::{Locale, MessageId, tr};
use crate::palette;
use crate::settings::Settings;
use crate::tools::UserInputResponse;
-use crate::tools::subagent::{SubAgentResult, SubAgentStatus, SubAgentType};
+use crate::tools::subagent::{SubAgentAssignment, SubAgentResult, SubAgentStatus, SubAgentType};
use crate::tui::app::App;
use crate::tui::approval::{ElevationOption, ReviewDecision};
+use crate::tui::history::{HistoryCell, SubAgentCell, summarize_tool_output};
+use crate::tui::widgets::agent_card::AgentLifecycle;
pub mod status_picker;
@@ -1384,6 +1386,105 @@ pub struct SubAgentsView {
scroll: usize,
}
+/// Build the agent rows shown by `/subagents`.
+///
+/// The engine manager is the durable source of truth, but live UI cards can
+/// briefly be ahead of the manager-list refresh. Include those live rows so
+/// the command does not say "no agents" while the footer/sidebar already show
+/// active delegated work.
+pub(crate) fn subagent_view_agents(
+ app: &App,
+ manager_agents: &[SubAgentResult],
+) -> Vec {
+ let mut agents = manager_agents.to_vec();
+ let mut seen: std::collections::HashSet =
+ agents.iter().map(|agent| agent.agent_id.clone()).collect();
+
+ for (agent_id, progress) in &app.agent_progress {
+ if seen.insert(agent_id.clone()) {
+ agents.push(live_subagent_result(
+ agent_id,
+ SubAgentType::General,
+ SubAgentStatus::Running,
+ progress,
+ Some("live"),
+ ));
+ }
+ }
+
+ for cell in &app.history {
+ match cell {
+ HistoryCell::SubAgent(SubAgentCell::Delegate(card))
+ if seen.insert(card.agent_id.clone()) =>
+ {
+ let agent_type =
+ SubAgentType::from_str(&card.agent_type).unwrap_or(SubAgentType::General);
+ agents.push(live_subagent_result(
+ &card.agent_id,
+ agent_type,
+ lifecycle_to_subagent_status(card.status),
+ card.summary.as_deref().unwrap_or(card.agent_type.as_str()),
+ Some("transcript"),
+ ));
+ }
+ HistoryCell::SubAgent(SubAgentCell::Fanout(card)) => {
+ for worker in &card.workers {
+ if seen.insert(worker.agent_id.clone()) {
+ let objective = format!(
+ "{} worker {}",
+ summarize_tool_output(&card.kind),
+ summarize_tool_output(&worker.worker_id)
+ );
+ agents.push(live_subagent_result(
+ &worker.agent_id,
+ SubAgentType::General,
+ lifecycle_to_subagent_status(worker.status),
+ &objective,
+ Some(card.kind.as_str()),
+ ));
+ }
+ }
+ }
+ _ => {}
+ }
+ }
+
+ agents
+}
+
+fn lifecycle_to_subagent_status(status: AgentLifecycle) -> SubAgentStatus {
+ match status {
+ AgentLifecycle::Pending | AgentLifecycle::Running => SubAgentStatus::Running,
+ AgentLifecycle::Completed => SubAgentStatus::Completed,
+ AgentLifecycle::Failed => SubAgentStatus::Failed("failed in transcript".to_string()),
+ AgentLifecycle::Cancelled => SubAgentStatus::Cancelled,
+ }
+}
+
+fn live_subagent_result(
+ agent_id: &str,
+ agent_type: SubAgentType,
+ status: SubAgentStatus,
+ objective: &str,
+ role: Option<&str>,
+) -> SubAgentResult {
+ SubAgentResult {
+ agent_id: agent_id.to_string(),
+ agent_type,
+ assignment: SubAgentAssignment {
+ objective: summarize_tool_output(objective),
+ role: role.map(str::to_string),
+ },
+ model: String::new(),
+ nickname: None,
+ status,
+ result: None,
+ steps_taken: 0,
+ duration_ms: 0,
+ from_prior_session: false,
+ }
+}
+
impl SubAgentsView {
pub fn new(agents: Vec) -> Self {
Self { agents, scroll: 0 }
@@ -1742,11 +1843,16 @@ fn truncate_view_text(text: &str, max_chars: usize) -> String {
mod tests {
use super::{
ConfigListItem, ConfigSection, ConfigView, ModalKind, ModalView, ShellControlView,
- ViewAction, ViewEvent, ViewStack, truncate_view_text,
+ ViewAction, ViewEvent, ViewStack, subagent_view_agents, truncate_view_text,
};
use crate::config::Config;
use crate::localization::Locale;
+ use crate::tools::subagent::{
+ SubAgentAssignment, SubAgentResult, SubAgentStatus, SubAgentType,
+ };
use crate::tui::app::{App, TuiOptions};
+ use crate::tui::history::{HistoryCell, SubAgentCell};
+ use crate::tui::widgets::agent_card::{AgentLifecycle, FanoutCard};
use crossterm::event::{
KeyCode, KeyEvent, KeyModifiers, MouseButton, MouseEvent, MouseEventKind,
};
@@ -1785,6 +1891,72 @@ mod tests {
}
}
+ fn manager_agent(id: &str, status: SubAgentStatus) -> SubAgentResult {
+ SubAgentResult {
+ agent_id: id.to_string(),
+ agent_type: SubAgentType::Explore,
+ assignment: SubAgentAssignment {
+ objective: "read the docs".to_string(),
+ role: None,
+ },
+ model: "deepseek-v4-flash".to_string(),
+ nickname: None,
+ status,
+ result: None,
+ steps_taken: 1,
+ duration_ms: 10,
+ from_prior_session: false,
+ }
+ }
+
+ #[test]
+ fn subagent_view_agents_includes_progress_only_running_agent() {
+ let mut app = create_test_app();
+ app.agent_progress
+ .insert("agent_live".to_string(), "reading code".to_string());
+
+ let agents = subagent_view_agents(&app, &[]);
+
+ assert_eq!(agents.len(), 1);
+ assert_eq!(agents[0].agent_id, "agent_live");
+ assert!(matches!(agents[0].status, SubAgentStatus::Running));
+ assert_eq!(agents[0].assignment.role.as_deref(), Some("live"));
+ assert!(agents[0].assignment.objective.contains("reading code"));
+ }
+
+ #[test]
+ fn subagent_view_agents_includes_live_fanout_workers_when_cache_is_empty() {
+ let mut app = create_test_app();
+ let mut card = FanoutCard::new("rlm").with_workers(["chunk_1", "chunk_2"]);
+ card.upsert_worker("chunk_1", AgentLifecycle::Completed);
+ card.upsert_worker("chunk_2", AgentLifecycle::Running);
+ app.add_message(HistoryCell::SubAgent(SubAgentCell::Fanout(card)));
+ app.last_fanout_card_index = Some(app.history.len().saturating_sub(1));
+
+ let agents = subagent_view_agents(&app, &[]);
+
+ assert_eq!(agents.len(), 2);
+ assert_eq!(agents[0].agent_id, "chunk_1");
+ assert!(matches!(agents[0].status, SubAgentStatus::Completed));
+ assert_eq!(agents[1].agent_id, "chunk_2");
+ assert!(matches!(agents[1].status, SubAgentStatus::Running));
+ assert_eq!(agents[1].assignment.role.as_deref(), Some("rlm"));
+ }
+
+ #[test]
+ fn subagent_view_agents_deduplicates_manager_rows_over_live_rows() {
+ let mut app = create_test_app();
+ app.agent_progress
+ .insert("agent_cached".to_string(), "live duplicate".to_string());
+ let manager = vec![manager_agent("agent_cached", SubAgentStatus::Running)];
+
+ let agents = subagent_view_agents(&app, &manager);
+
+ assert_eq!(agents.len(), 1);
+ assert_eq!(agents[0].agent_type, SubAgentType::Explore);
+ assert_eq!(agents[0].assignment.objective, "read the docs");
+ }
+
fn visible_section_labels(view: &ConfigView) -> Vec<&'static str> {
view.visible_items()
.into_iter()
diff --git a/crates/tui/src/tui/widgets/agent_card.rs b/crates/tui/src/tui/widgets/agent_card.rs
index f923bdc4..e9504bef 100644
--- a/crates/tui/src/tui/widgets/agent_card.rs
+++ b/crates/tui/src/tui/widgets/agent_card.rs
@@ -389,23 +389,15 @@ pub fn apply_to_delegate(card: &mut DelegateCard, msg: &MailboxMessage) -> bool
}
MailboxMessage::Progress { status, .. } => {
card.status = AgentLifecycle::Running;
- card.push_action(status);
+ if !is_low_signal_progress(status) {
+ card.push_action(status);
+ }
}
- MailboxMessage::ToolCallStarted {
- tool_name, step, ..
- } => {
- card.push_action(format!("[{step}] {tool_name} started"));
+ MailboxMessage::ToolCallStarted { tool_name, .. } => {
+ card.push_action(format!("{tool_name} running"));
}
- MailboxMessage::ToolCallCompleted {
- tool_name,
- step,
- ok,
- ..
- } => {
- card.push_action(format!(
- "[{step}] {tool_name} {}",
- if *ok { "ok" } else { "failed" }
- ));
+ MailboxMessage::ToolCallCompleted { tool_name, ok, .. } => {
+ card.push_action(format!("{tool_name} {}", if *ok { "ok" } else { "failed" }));
}
MailboxMessage::Completed { summary, .. } => {
card.status = AgentLifecycle::Completed;
@@ -433,6 +425,13 @@ pub fn apply_to_delegate(card: &mut DelegateCard, msg: &MailboxMessage) -> bool
true
}
+fn is_low_signal_progress(status: &str) -> bool {
+ let status = status.trim().to_ascii_lowercase();
+ status.contains("requesting model response")
+ || status.starts_with("started (")
+ || (status.starts_with("step ") && status.contains(": complete"))
+}
+
/// Apply a mailbox envelope to a `FanoutCard`. Updates per-worker state
/// based on which child the envelope is about. Returns `true` on change.
pub fn apply_to_fanout(card: &mut FanoutCard, msg: &MailboxMessage) -> bool {
@@ -550,6 +549,57 @@ mod tests {
);
}
+ #[test]
+ fn delegate_card_ignores_low_signal_scheduler_progress() {
+ let mut card = DelegateCard::new("agent_003", "general");
+ let msg = MailboxMessage::progress("agent_003", "step 1/100: requesting model response");
+
+ assert!(apply_to_delegate(&mut card, &msg));
+ assert_eq!(card.status, AgentLifecycle::Running);
+ assert_eq!(
+ card.action_count(),
+ 0,
+ "scheduler progress should not become a stale transcript row"
+ );
+
+ let rendered = render_to_strings(&card.render_lines(80)).join("\n");
+ assert!(!rendered.contains("step 1/100"), "{rendered}");
+ assert!(
+ !rendered.contains("requesting model response"),
+ "{rendered}"
+ );
+ }
+
+ #[test]
+ fn delegate_tool_rows_omit_internal_step_numbers() {
+ let mut card = DelegateCard::new("agent_004", "general");
+
+ assert!(apply_to_delegate(
+ &mut card,
+ &MailboxMessage::ToolCallStarted {
+ agent_id: "agent_004".into(),
+ tool_name: "read_file".into(),
+ step: 7,
+ }
+ ));
+ assert!(apply_to_delegate(
+ &mut card,
+ &MailboxMessage::ToolCallCompleted {
+ agent_id: "agent_004".into(),
+ tool_name: "read_file".into(),
+ step: 7,
+ ok: true,
+ }
+ ));
+
+ let rendered = render_to_strings(&card.render_lines(80)).join("\n");
+ assert!(rendered.contains("read_file"), "{rendered}");
+ assert!(
+ !rendered.contains("[7]"),
+ "internal loop step numbers are not useful in the live card: {rendered}"
+ );
+ }
+
#[test]
fn delegate_card_ignores_envelopes_for_other_agents() {
let mut card = DelegateCard::new("agent_a", "general");
diff --git a/npm/deepseek-tui/package.json b/npm/deepseek-tui/package.json
index 10252551..ba3eebc6 100644
--- a/npm/deepseek-tui/package.json
+++ b/npm/deepseek-tui/package.json
@@ -1,7 +1,7 @@
{
"name": "deepseek-tui",
- "version": "0.8.15",
- "deepseekBinaryVersion": "0.8.15",
+ "version": "0.8.16",
+ "deepseekBinaryVersion": "0.8.16",
"description": "Install and run deepseek and deepseek-tui binaries from GitHub release artifacts.",
"author": "Hmbown",
"license": "MIT",