diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b613204..0c80ef8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,45 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.8.16] - 2026-05-07 + +A focused hotfix for v0.8.15 regressions in RLM, sub-agent visibility, and +terminal ownership. This release keeps the v0.8.15 feature set intact while +making long-running delegated work easier to inspect and safer to run. + +### Changed +- **RLM has no fixed 180s wall-clock timeout** (#955) — RLM turns can continue + past the old hard limit when the long-input REPL is still making progress. +- **RLM output is easier to audit** (#955) — final reports now include compact + execution metadata: input size, iteration count, elapsed time, sub-LLM RPC + count, and termination state. +- **RLM chunking guidance is stricter for exact work** (#955) — prompts now + tell the sub-agent to use deterministic Python over the full `context` for + counts/aggregation and to report chunk coverage when splitting a whole input. +- **Tool guidance is less defensive** (#955) — the system prompt now explains + when to use tools instead of discouraging the model from using capabilities + that are actually available. + +### Fixed +- **Active RLM work stays visible** (#955) — foreground RLM calls surface in the + active task/right-rail state instead of leaving the Tasks panel saying + `No active tasks`. +- **`/subagents` no longer reports false emptiness** (#955) — the sub-agent + overlay now includes live progress-only agents and transcript fanout workers + when the manager cache has not refreshed yet. +- **Sub-agent cards are quieter and more useful** (#955) — low-signal scheduler + lines such as `step 1/100: requesting model response` are hidden, while + compact tool activity remains visible. +- **Sub-agent completion protocol stays internal** (#955) — completion + sentinels are routed as internal runtime events instead of user messages, so + the parent agent does not explain raw protocol XML back to the user. +- **Sub-agents cannot take over the parent terminal** (#955) — background + agents reject `exec_shell` with `interactive=true`; they can still use + non-interactive shell, background shell, `tty=true`, and task-shell tools. +- **Terminal scrollback ownership is restored** (#955) — the TUI re-enters + alternate-screen mode after foreground/sub-agent work drains, preventing the + host terminal scrollbar from taking over the live interface. + ## [0.8.15] - 2026-05-06 An auth, Windows, editor-integration, and setup stabilization release. This diff --git a/Cargo.lock b/Cargo.lock index 6a512918..8abc208d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1080,7 +1080,7 @@ dependencies = [ [[package]] name = "deepseek-agent" -version = "0.8.15" +version = "0.8.16" dependencies = [ "deepseek-config", "serde", @@ -1088,7 +1088,7 @@ dependencies = [ [[package]] name = "deepseek-app-server" -version = "0.8.15" +version = "0.8.16" dependencies = [ "anyhow", "axum", @@ -1110,7 +1110,7 @@ dependencies = [ [[package]] name = "deepseek-config" -version = "0.8.15" +version = "0.8.16" dependencies = [ "anyhow", "deepseek-secrets", @@ -1122,7 +1122,7 @@ dependencies = [ [[package]] name = "deepseek-core" -version = "0.8.15" +version = "0.8.16" dependencies = [ "anyhow", "chrono", @@ -1140,7 +1140,7 @@ dependencies = [ [[package]] name = "deepseek-execpolicy" -version = "0.8.15" +version = "0.8.16" dependencies = [ "anyhow", "deepseek-protocol", @@ -1149,7 +1149,7 @@ dependencies = [ [[package]] name = "deepseek-hooks" -version = "0.8.15" +version = "0.8.16" dependencies = [ "anyhow", "async-trait", @@ -1163,7 +1163,7 @@ dependencies = [ [[package]] name = "deepseek-mcp" -version = "0.8.15" +version = "0.8.16" dependencies = [ "anyhow", "serde", @@ -1172,7 +1172,7 @@ dependencies = [ [[package]] name = "deepseek-protocol" -version = "0.8.15" +version = "0.8.16" dependencies = [ "serde", "serde_json", @@ -1180,7 +1180,7 @@ dependencies = [ [[package]] name = "deepseek-secrets" -version = "0.8.15" +version = "0.8.16" dependencies = [ "dirs", "keyring", @@ -1193,7 +1193,7 @@ dependencies = [ [[package]] name = "deepseek-state" -version = "0.8.15" +version = "0.8.16" dependencies = [ "anyhow", "chrono", @@ -1205,7 +1205,7 @@ dependencies = [ [[package]] name = "deepseek-tools" -version = "0.8.15" +version = "0.8.16" dependencies = [ "anyhow", "async-trait", @@ -1218,7 +1218,7 @@ dependencies = [ [[package]] name = "deepseek-tui" -version = "0.8.15" +version = "0.8.16" dependencies = [ "anyhow", "arboard", @@ -1278,7 +1278,7 @@ dependencies = [ [[package]] name = "deepseek-tui-cli" -version = "0.8.15" +version = "0.8.16" dependencies = [ "anyhow", "chrono", @@ -1302,7 +1302,7 @@ dependencies = [ [[package]] name = "deepseek-tui-core" -version = "0.8.15" +version = "0.8.16" [[package]] name = "deranged" diff --git a/Cargo.toml b/Cargo.toml index 5dbd2fe5..8dd07166 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"] resolver = "2" [workspace.package] -version = "0.8.15" +version = "0.8.16" edition = "2024" # Rust 1.88 stabilized `let_chains` in `if`/`while` conditions, which the # codebase relies on extensively. Cargo enforces this so users on older diff --git a/README.md b/README.md index dec3b51d..2ae37f89 100644 --- a/README.md +++ b/README.md @@ -202,18 +202,28 @@ deepseek --provider ollama --model deepseek-coder:1.3b --- -## What's New In v0.8.15 +## What's New In v0.8.16 -A community-driven stabilization release focused on auth recovery, Windows -terminals, Zed/ACP compatibility, setup friction, and clearer cost display. +A focused hotfix for RLM, sub-agent visibility, and terminal ownership on top +of v0.8.15. [Full changelog](CHANGELOG.md). -- **Friendlier auth recovery** — runtime API-key failures now explain when the active key came only from `DEEPSEEK_API_KEY` and no saved config key is present -- **Zed / ACP adapter** — `deepseek serve --acp` exposes a local stdio Agent Client Protocol server for Zed and other compatible editors -- **Windows terminal fixes** — UTF-8 console setup, dispatcher resume handling, clipboard fallback, Ctrl+E composer behavior, and safer Windows mouse defaults -- **Yuan cost display** — set `cost_currency = "cny"` (or `yuan` / `rmb`) to show footer, `/cost`, `/tokens`, and notification summaries in CNY -- **Setup and skill polish** — workspace trust persists globally, plain Markdown `SKILL.md` files load correctly, global Agents/Cursor skill paths are discovered, and the TUI shows skills in slash autocomplete -- **Reliability fixes** — workspace-scoped `resume --last`, capped API `max_tokens`, endpoint diagnostics in `deepseek doctor`, npm `--version` fallback, and current-date turn metadata +- **RLM no longer has the old 180s wall-clock timeout** — long-input REPL work + can keep running while it is still making progress. +- **RLM reports what happened** — output now includes input size, iteration + count, elapsed time, sub-LLM RPC count, and termination state. +- **RLM chunking is safer for exact answers** — prompts require deterministic + Python for counts/aggregation and coverage reporting for whole-input chunks. +- **Sub-agent visibility is more truthful** — `/subagents`, the transcript, and + the right rail include live progress and fanout workers instead of showing + false `No agents` or `No active tasks` states. +- **Sub-agent cards are quieter** — internal scheduler lines are hidden while + useful tool activity remains visible. +- **Sub-agent completion events stay internal** — the parent agent integrates + child results without explaining raw sentinel XML back to the user. +- **Terminal ownership is hardened** — background sub-agents cannot take over + the parent terminal, and the TUI restores alternate-screen mode after + delegated work drains. --- diff --git a/crates/agent/Cargo.toml b/crates/agent/Cargo.toml index e9fb24a5..e66af62c 100644 --- a/crates/agent/Cargo.toml +++ b/crates/agent/Cargo.toml @@ -7,5 +7,5 @@ repository.workspace = true description = "Model/provider registry and fallback strategy for DeepSeek workspace architecture" [dependencies] -deepseek-config = { path = "../config", version = "0.8.15" } +deepseek-config = { path = "../config", version = "0.8.16" } serde.workspace = true diff --git a/crates/app-server/Cargo.toml b/crates/app-server/Cargo.toml index 3e1225ec..54b69329 100644 --- a/crates/app-server/Cargo.toml +++ b/crates/app-server/Cargo.toml @@ -10,15 +10,15 @@ description = "Codex-style app-server transport for DeepSeek workspace architect anyhow.workspace = true axum.workspace = true clap.workspace = true -deepseek-agent = { path = "../agent", version = "0.8.15" } -deepseek-config = { path = "../config", version = "0.8.15" } -deepseek-core = { path = "../core", version = "0.8.15" } -deepseek-execpolicy = { path = "../execpolicy", version = "0.8.15" } -deepseek-hooks = { path = "../hooks", version = "0.8.15" } -deepseek-mcp = { path = "../mcp", version = "0.8.15" } -deepseek-protocol = { path = "../protocol", version = "0.8.15" } -deepseek-state = { path = "../state", version = "0.8.15" } -deepseek-tools = { path = "../tools", version = "0.8.15" } +deepseek-agent = { path = "../agent", version = "0.8.16" } +deepseek-config = { path = "../config", version = "0.8.16" } +deepseek-core = { path = "../core", version = "0.8.16" } +deepseek-execpolicy = { path = "../execpolicy", version = "0.8.16" } +deepseek-hooks = { path = "../hooks", version = "0.8.16" } +deepseek-mcp = { path = "../mcp", version = "0.8.16" } +deepseek-protocol = { path = "../protocol", version = "0.8.16" } +deepseek-state = { path = "../state", version = "0.8.16" } +deepseek-tools = { path = "../tools", version = "0.8.16" } serde.workspace = true serde_json.workspace = true tokio.workspace = true diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index f7a6ab89..1f049c0c 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -14,13 +14,13 @@ path = "src/main.rs" anyhow.workspace = true clap.workspace = true clap_complete.workspace = true -deepseek-agent = { path = "../agent", version = "0.8.15" } -deepseek-app-server = { path = "../app-server", version = "0.8.15" } -deepseek-config = { path = "../config", version = "0.8.15" } -deepseek-execpolicy = { path = "../execpolicy", version = "0.8.15" } -deepseek-mcp = { path = "../mcp", version = "0.8.15" } -deepseek-secrets = { path = "../secrets", version = "0.8.15" } -deepseek-state = { path = "../state", version = "0.8.15" } +deepseek-agent = { path = "../agent", version = "0.8.16" } +deepseek-app-server = { path = "../app-server", version = "0.8.16" } +deepseek-config = { path = "../config", version = "0.8.16" } +deepseek-execpolicy = { path = "../execpolicy", version = "0.8.16" } +deepseek-mcp = { path = "../mcp", version = "0.8.16" } +deepseek-secrets = { path = "../secrets", version = "0.8.16" } +deepseek-state = { path = "../state", version = "0.8.16" } chrono.workspace = true dirs.workspace = true serde.workspace = true diff --git a/crates/config/Cargo.toml b/crates/config/Cargo.toml index 3b4ca5b8..8d59c75e 100644 --- a/crates/config/Cargo.toml +++ b/crates/config/Cargo.toml @@ -8,7 +8,7 @@ description = "Config schema and precedence model for DeepSeek workspace archite [dependencies] anyhow.workspace = true -deepseek-secrets = { path = "../secrets", version = "0.8.15" } +deepseek-secrets = { path = "../secrets", version = "0.8.16" } dirs.workspace = true serde.workspace = true toml.workspace = true diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index c7bc7820..6f6c437d 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -9,13 +9,13 @@ description = "Core runtime boundaries for DeepSeek workspace architecture" [dependencies] anyhow.workspace = true chrono.workspace = true -deepseek-agent = { path = "../agent", version = "0.8.15" } -deepseek-config = { path = "../config", version = "0.8.15" } -deepseek-execpolicy = { path = "../execpolicy", version = "0.8.15" } -deepseek-hooks = { path = "../hooks", version = "0.8.15" } -deepseek-mcp = { path = "../mcp", version = "0.8.15" } -deepseek-protocol = { path = "../protocol", version = "0.8.15" } -deepseek-state = { path = "../state", version = "0.8.15" } -deepseek-tools = { path = "../tools", version = "0.8.15" } +deepseek-agent = { path = "../agent", version = "0.8.16" } +deepseek-config = { path = "../config", version = "0.8.16" } +deepseek-execpolicy = { path = "../execpolicy", version = "0.8.16" } +deepseek-hooks = { path = "../hooks", version = "0.8.16" } +deepseek-mcp = { path = "../mcp", version = "0.8.16" } +deepseek-protocol = { path = "../protocol", version = "0.8.16" } +deepseek-state = { path = "../state", version = "0.8.16" } +deepseek-tools = { path = "../tools", version = "0.8.16" } serde_json.workspace = true uuid.workspace = true diff --git a/crates/execpolicy/Cargo.toml b/crates/execpolicy/Cargo.toml index f571d14e..d7517dab 100644 --- a/crates/execpolicy/Cargo.toml +++ b/crates/execpolicy/Cargo.toml @@ -8,5 +8,5 @@ description = "Execution policy and approval model parity for DeepSeek workspace [dependencies] anyhow.workspace = true -deepseek-protocol = { path = "../protocol", version = "0.8.15" } +deepseek-protocol = { path = "../protocol", version = "0.8.16" } serde.workspace = true diff --git a/crates/hooks/Cargo.toml b/crates/hooks/Cargo.toml index f63b33ef..a59432cb 100644 --- a/crates/hooks/Cargo.toml +++ b/crates/hooks/Cargo.toml @@ -10,7 +10,7 @@ description = "Hook dispatch and notifications parity for DeepSeek workspace arc anyhow.workspace = true async-trait.workspace = true chrono.workspace = true -deepseek-protocol = { path = "../protocol", version = "0.8.15" } +deepseek-protocol = { path = "../protocol", version = "0.8.16" } reqwest.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/tools/Cargo.toml b/crates/tools/Cargo.toml index a327a580..347f8450 100644 --- a/crates/tools/Cargo.toml +++ b/crates/tools/Cargo.toml @@ -9,7 +9,7 @@ description = "Tool invocation lifecycle, schema validation, and scheduler paral [dependencies] anyhow.workspace = true async-trait.workspace = true -deepseek-protocol = { path = "../protocol", version = "0.8.15" } +deepseek-protocol = { path = "../protocol", version = "0.8.16" } serde.workspace = true serde_json.workspace = true tokio.workspace = true diff --git a/crates/tui/Cargo.toml b/crates/tui/Cargo.toml index c0caa9b8..3b9cd503 100644 --- a/crates/tui/Cargo.toml +++ b/crates/tui/Cargo.toml @@ -21,8 +21,8 @@ path = "src/main.rs" [dependencies] anyhow = "1.0.100" arboard = "3.4" -deepseek-secrets = { path = "../secrets", version = "0.8.15" } -deepseek-tools = { path = "../tools", version = "0.8.15" } +deepseek-secrets = { path = "../secrets", version = "0.8.16" } +deepseek-tools = { path = "../tools", version = "0.8.16" } schemaui = { version = "0.12.0", default-features = false, optional = true } async-stream = "0.3.6" async-trait = "0.1" diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs index b0f76ab9..af1c8d53 100644 --- a/crates/tui/src/client/chat.rs +++ b/crates/tui/src/client/chat.rs @@ -538,6 +538,14 @@ fn build_chat_messages_with_reasoning( pending_tool_calls.clear(); } out.push(msg); + } else if role == "system" { + let content = text_parts.join("\n"); + if !content.trim().is_empty() { + out.push(json!({ + "role": "system", + "content": content, + })); + } } else if role == "user" { let content = text_parts.join("\n"); if !content.trim().is_empty() { @@ -1601,4 +1609,21 @@ mod stream_decoder_tests { .expect("tool-use block present"); assert_eq!(id, "call_xyz"); } + + #[test] + fn request_builder_preserves_internal_system_messages() { + let messages = vec![Message { + role: "system".to_string(), + content: vec![ContentBlock::Text { + text: "internal runtime event".to_string(), + cache_control: None, + }], + }]; + + let built = build_chat_messages(None, &messages, "deepseek-v4-flash"); + + assert_eq!(built.len(), 1); + assert_eq!(built[0]["role"], "system"); + assert_eq!(built[0]["content"], "internal runtime event"); + } } diff --git a/crates/tui/src/commands/core.rs b/crates/tui/src/commands/core.rs index c5e047c8..259ecd3d 100644 --- a/crates/tui/src/commands/core.rs +++ b/crates/tui/src/commands/core.rs @@ -5,7 +5,7 @@ use std::fmt::Write; use crate::config::{COMMON_DEEPSEEK_MODELS, normalize_model_name}; use crate::localization::{MessageId, tr}; use crate::tui::app::{App, AppAction, AppMode, ReasoningEffort}; -use crate::tui::views::{HelpView, ModalKind, SubAgentsView}; +use crate::tui::views::{HelpView, ModalKind, SubAgentsView, subagent_view_agents}; use super::CommandResult; @@ -140,8 +140,8 @@ pub fn models(_app: &mut App) -> CommandResult { /// List sub-agent status from the engine pub fn subagents(app: &mut App) -> CommandResult { if app.view_stack.top_kind() != Some(ModalKind::SubAgents) { - app.view_stack - .push(SubAgentsView::new(app.subagent_cache.clone())); + let agents = subagent_view_agents(app, &app.subagent_cache); + app.view_stack.push(SubAgentsView::new(agents)); } app.status_message = Some(tr(app.ui_locale, MessageId::SubagentsFetching).to_string()); CommandResult::action(AppAction::ListSubAgents) diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs index f0461fad..55b57896 100644 --- a/crates/tui/src/core/engine/turn_loop.rs +++ b/crates/tui/src/core/engine/turn_loop.rs @@ -906,17 +906,8 @@ impl Engine { if !completions.is_empty() { let count = completions.len(); for c in completions { - self.session - .working_set - .observe_user_message(&c.payload, &self.session.workspace); - self.add_session_message(Message { - role: "user".to_string(), - content: vec![ContentBlock::Text { - text: c.payload, - cache_control: None, - }], - }) - .await; + self.add_session_message(subagent_completion_runtime_message(&c.payload)) + .await; } let _ = self .tx_event @@ -1860,6 +1851,24 @@ impl Engine { } } +fn subagent_completion_runtime_message(payload: &str) -> Message { + Message { + role: "system".to_string(), + content: vec![ContentBlock::Text { + text: format!( + "\n\ +This is an internal runtime event, not user input. Use the sub-agent completion \ +data below to continue coordinating the current task. Do not tell the user they \ +pasted sentinels, do not explain the sentinel protocol, and do not quote the raw \ +XML unless the user explicitly asks to debug sub-agent internals.\n\n\ +{payload}\n\ +" + ), + cache_control: None, + }], + } +} + /// Resolve an `"auto"` reasoning-effort tier to a concrete value. /// /// When the configured effort is `"auto"`, inspects the last user message @@ -1905,3 +1914,25 @@ fn resolve_auto_effort(reasoning_effort: Option<&str>, messages: &[Message]) -> None => None, } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn subagent_completion_handoff_is_internal_system_message() { + let message = subagent_completion_runtime_message( + "Build passed\n{\"agent_id\":\"agent_a\"}", + ); + + assert_eq!(message.role, "system"); + let text = match &message.content[0] { + ContentBlock::Text { text, .. } => text, + other => panic!("expected text block, got {other:?}"), + }; + assert!(text.contains("internal runtime event, not user input")); + assert!(text.contains("Do not tell the user they pasted sentinels")); + assert!(text.contains("")); + assert!(text.contains("Build passed")); + } +} diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs index 20e0a46d..d77ca63a 100644 --- a/crates/tui/src/prompts.rs +++ b/crates/tui/src/prompts.rs @@ -608,6 +608,15 @@ mod tests { assert!(prompt.contains("Approval Policy: Suggest")); } + #[test] + fn package_version_is_current_hotfix_release() { + assert_eq!( + env!("CARGO_PKG_VERSION"), + "0.8.16", + "0.8.16 hotfix branch must report the release version before publishing" + ); + } + #[test] fn compose_prompt_deterministic_order() { let prompt = compose_prompt(AppMode::Yolo, Personality::Calm); @@ -721,14 +730,18 @@ mod tests { } #[test] - fn when_not_to_use_sections_present() { + fn tool_selection_guide_avoids_defensive_tool_suppression() { let prompt = compose_prompt(AppMode::Agent, Personality::Calm); - assert!(prompt.contains("When NOT to use certain tools")); - assert!(prompt.contains("### `apply_patch`")); - assert!(prompt.contains("### `edit_file`")); - assert!(prompt.contains("### `exec_shell`")); - assert!(prompt.contains("### `agent_spawn`")); - assert!(prompt.contains("### `rlm`")); + assert!(prompt.contains("Tool Selection Guide")); + assert!(prompt.contains("Use `agent_result`")); + assert!( + !prompt.contains("When NOT to use certain tools"), + "the system prompt should steer tool choice without training the model to avoid available tools" + ); + assert!( + !prompt.contains("Don't reach for"), + "avoid defensive anti-tool wording in the base prompt" + ); } /// #588: language-mirroring directive must ship in every mode so @@ -766,7 +779,7 @@ mod tests { fn rlm_specialty_tool_guidance_present() { let prompt = compose_prompt(AppMode::Agent, Personality::Calm); // Structural: the RLM heading must exist as a section anchor. - assert!(prompt.contains("RLM — When to Use It")); + assert!(prompt.contains("RLM — How to Use It")); // Structural: the word "rlm" must appear multiple times (tool // name, section heading, toolbox reference). Just verify the // lowercase form — exact wording is NOT a test concern. @@ -775,14 +788,20 @@ mod tests { rlm_count >= 5, "RLM guidance present: expected >= 5 mentions of 'rlm', got {rlm_count}" ); + assert!( + !prompt.contains("When NOT to use RLM"), + "RLM guidance should explain fit and verification without telling the model to avoid the tool" + ); } #[test] fn subagent_done_sentinel_section_present() { let prompt = compose_prompt(AppMode::Agent, Personality::Calm); - assert!(prompt.contains("Sub-agent completion sentinel")); + assert!(prompt.contains("Internal Sub-agent Completion Events")); assert!(prompt.contains("")); + assert!(prompt.contains("not user input")); assert!(prompt.contains("Integration protocol")); + assert!(prompt.contains("Do not tell the user they pasted sentinels")); } #[test] diff --git a/crates/tui/src/prompts/base.md b/crates/tui/src/prompts/base.md index 2840562d..97cec022 100644 --- a/crates/tui/src/prompts/base.md +++ b/crates/tui/src/prompts/base.md @@ -85,7 +85,7 @@ Before you fire any tool, scan your checklist: is there another tool you could r The dispatcher runs parallel tool calls simultaneously. Serializing independent operations wastes the user's time and grows your context faster than necessary. -## RLM — When to Use It +## RLM — How to Use It RLM loads input into a Python REPL where you write code that calls sub-LLM helpers (`llm_query`, `llm_query_batched`, `rlm_query`). Three patterns, not one — choose based on the shape of the work: @@ -95,7 +95,7 @@ RLM loads input into a Python REPL where you write code that calls sub-LLM helpe **RECURSE** — A problem that benefits from decomposition + critique. Use `rlm_query` to have a sub-LLM review your reasoning, identify gaps, or explore alternative approaches. The sub-LLM returns a synthesized answer you verify against live tool output. -**When NOT to use RLM**: a single short file you can read directly; a simple classification on 3 items; interactive iterative exploration (RLM is one-shot batch). For those, `read_file`, `grep_files`, or `agent_spawn` are faster and cheaper. +For exact counts or structured aggregates, compute them directly in Python inside the REPL (`len`, regexes, parsers, counters) and use child LLM calls only for semantic interpretation. When you chunk a whole input, use `chunk_context()` plus `chunk_coverage()` and report coverage explicitly: chunks processed, total chunks, line/char ranges, and any skipped sections. Cross-check surprising aggregate results with deterministic code before presenting them. The Python helpers visible inside the REPL (`llm_query`, `llm_query_batched`, `rlm_query`, `rlm_query_batched`) are NOT separately-callable tools — they are functions the sub-agent uses inside its Python code. You only call `rlm` itself from the model side. @@ -147,48 +147,28 @@ When context is deep (past a soft seam): cache reasoning conclusions in concise Multiple `tool_calls` in one turn run in parallel. `web_search` returns `ref_id`s — cite as `(ref_id)`. -## When NOT to use certain tools +## Tool Selection Guide ### `apply_patch` -Don't reach for `apply_patch` when: -- You're creating a brand-new file — use `write_file`. -- The change is a single search/replace in one location — `edit_file` is simpler and less error-prone. -- You haven't read the target file yet. Patches written blind almost always fail to apply. -- The file is short enough to rewrite whole — `write_file` with full content avoids fuzz matching entirely. +Use `apply_patch` for structural edits, coordinated changes, or cases where line context matters. Use `write_file` for brand-new files or full-file rewrites. Use `edit_file` for a single unambiguous replacement. ### `edit_file` -Don't reach for `edit_file` when: -- You're making coordinated changes across many files — `apply_patch` with a multi-file diff is atomic. -- You need to insert or delete whole blocks of lines — `apply_patch` handles structural edits more cleanly. -- The search string is ambiguous or could match multiple locations — `apply_patch` with line-number context is more precise. -- You're creating a new file — `write_file` is the correct tool. +Use `edit_file` for one clear replacement in one file. Use `apply_patch` when the edit changes whole blocks, touches multiple files, or needs surrounding line context. ### `exec_shell` -Don't reach for `exec_shell` when: -- A structured tool already covers the same operation: `grep_files` for code search, `git_status`/`git_diff` for git inspection, `read_file` for file contents. -- You just need to read or write a file — `read_file` / `write_file` are faster and show up in the tool log. -- The command is a single `cat`, `ls`, or `echo` — use `read_file`, `list_dir`, or just state the result. -- You're tempted to pipe `curl` for a web lookup — `web_search` or `fetch_url` give structured results. -- The command may run for minutes, start a server, run a full test suite, or perform a scientific/release computation — use `task_shell_start` or `exec_shell` with `background: true`, then poll with `task_shell_wait` or `exec_shell_wait`. +Use `exec_shell` for shell-native diagnostics, pipelines, and bounded commands. Use structured tools for structured operations when they map directly (`grep_files`, `git_diff`, `read_file`). For long commands, servers, full test suites, or release computations, start background work with `task_shell_start` or `exec_shell` using `background: true`, then poll with `task_shell_wait` or `exec_shell_wait`. ### `agent_spawn` -Don't reach for `agent_spawn` when: -- The task is a single read or search you can do in one turn — spawning has overhead. -- You need sequential steps where each depends on the prior result — run them yourself, in order. -- The work can be done with a fast `exec_shell` pipeline or a `grep_files` call. +Use `agent_spawn` for independent investigations or implementation slices that can run while you continue coordinating. Use `agent_wait` when you need one or more completions. Use `agent_result` when the sentinel summary is too thin or you need the full structured output. Keep tiny single-read/search tasks local so the transcript stays compact. ### `rlm` -Don't reach for `rlm` (the recursive language model tool) when: -- The input fits comfortably in your context window and the task is straightforward — just read it directly with `read_file`. -- A simple `grep_files` or `exec_shell` pipeline can answer the question. -- You need interactive, iterative exploration of the data — `rlm` is batch-oriented (the sub-LLM writes Python in one shot, then returns). -- The task is a simple classification or extraction on short text — your own reasoning is faster and cheaper. +Use `rlm` for long-context semantic work, bulk classification/extraction, and decomposition where a Python REPL plus child LLM helpers is useful. Use deterministic Python inside RLM for exact counts and structured aggregation; use `grep_files` or `exec_shell` directly when that is the clearest deterministic check. Inside the `rlm` REPL, the sub-LLM has access to `llm_query()`, `llm_query_batched()`, `rlm_query()`, and `rlm_query_batched()` as Python helpers for further sub-LLM work — those are not standalone tools you call directly. -## Sub-agent completion sentinel +## Internal Sub-agent Completion Events -When you spawn a sub-agent via `agent_spawn`, the child runs independently. You will receive a `` element in the transcript when it finishes. This sentinel carries: +When you spawn a sub-agent via `agent_spawn`, the child runs independently. The runtime may send you an internal `` completion event when it finishes. This event is not user input. It carries: - `agent_id` — the child's identifier - `summary` — a human-readable summary of what the child found or did @@ -201,6 +181,7 @@ When you spawn a sub-agent via `agent_spawn`, the child runs independently. You 3. If the summary is insufficient, call `agent_result` to pull the full structured result. 4. If the child failed (`"failed"`), assess whether the failure blocks your plan or whether you can proceed with a fallback. 5. Update your `checklist_write` items to reflect the child's contribution. +6. Do not tell the user they pasted sentinels or explain this protocol unless they explicitly ask about sub-agent internals. You may see multiple `` sentinels in a single turn when children were spawned in parallel. Process each one, then synthesize. diff --git a/crates/tui/src/repl/runtime.rs b/crates/tui/src/repl/runtime.rs index 3b5cdfc9..449cdf98 100644 --- a/crates/tui/src/repl/runtime.rs +++ b/crates/tui/src/repl/runtime.rs @@ -144,6 +144,7 @@ pub struct PythonRuntime { stdout_limit: usize, round_count: u64, started: Instant, + round_timeout: Option, } impl PythonRuntime { @@ -151,7 +152,7 @@ impl PythonRuntime { /// Used by the agent loop for inline `repl` blocks the model emits in /// regular conversation. pub async fn new() -> Result { - Self::spawn_inner(None).await + Self::spawn_inner(None, Some(ROUND_TIMEOUT)).await } /// Compatibility shim — older RLM code path used to pass a state file. @@ -172,10 +173,13 @@ impl PythonRuntime { /// Spawn a REPL with `context` (and `ctx`) preloaded from a file. Used /// by the RLM turn loop. pub async fn spawn_with_context(context_path: &Path) -> Result { - Self::spawn_inner(Some(context_path)).await + Self::spawn_inner(Some(context_path), None).await } - async fn spawn_inner(context_path: Option<&Path>) -> Result { + async fn spawn_inner( + context_path: Option<&Path>, + round_timeout: Option, + ) -> Result { let session_id = Uuid::new_v4().simple().to_string(); let bootstrap = render_bootstrap(&session_id); @@ -215,6 +219,7 @@ impl PythonRuntime { stdout_limit: DEFAULT_STDOUT_LIMIT, round_count: 0, started: Instant::now(), + round_timeout, }; // Wait for `__RLM_READY___` before handing control back. If @@ -298,6 +303,7 @@ impl PythonRuntime { let mut final_value: Option = None; let mut had_error = false; let mut rpc_count: u32 = 0; + let round_timeout = self.round_timeout; let read_loop = async { loop { @@ -360,15 +366,19 @@ impl PythonRuntime { Ok::<_, String>(()) }; - match tokio::time::timeout(ROUND_TIMEOUT, read_loop).await { - Ok(Ok(())) => {} - Ok(Err(e)) => return Err(e), - Err(_) => { - return Err(format!( - "REPL round timed out after {}s", - ROUND_TIMEOUT.as_secs() - )); + if let Some(round_timeout) = round_timeout { + match tokio::time::timeout(round_timeout, read_loop).await { + Ok(Ok(())) => {} + Ok(Err(e)) => return Err(e), + Err(_) => { + return Err(format!( + "REPL round timed out after {}s", + round_timeout.as_secs() + )); + } } + } else { + read_loop.await?; } let stderr = self.drain_stderr().await; @@ -429,6 +439,12 @@ impl PythonRuntime { self.round_count } + /// Current per-round timeout policy. RLM context runs intentionally return + /// `None` so long map-reduce jobs are not killed by the old 180s cap. + pub fn round_timeout(&self) -> Option { + self.round_timeout + } + /// Wall-clock uptime since spawn. pub fn uptime(&self) -> Duration { self.started.elapsed() @@ -578,6 +594,55 @@ def repl_get(name, default=None): def repl_set(name, value): globals()[str(name)] = value +def chunk_context(max_chars=20000, overlap=0): + """Return full-coverage context chunks with index/start/end/text fields.""" + max_chars = int(max_chars) + overlap = max(0, int(overlap)) + if max_chars <= 0: + raise ValueError("max_chars must be > 0") + if overlap >= max_chars: + raise ValueError("overlap must be smaller than max_chars") + chunks = [] + start = 0 + idx = 0 + total = len(context) + while start < total: + end = min(total, start + max_chars) + chunks.append({"index": idx, "start": start, "end": end, "text": context[start:end]}) + idx += 1 + if end >= total: + break + start = end - overlap + return chunks + +def chunk_coverage(chunks): + """Summarize coverage for chunks produced by chunk_context().""" + spans = [] + for c in chunks: + try: + spans.append((int(c["start"]), int(c["end"]))) + except Exception: + continue + spans.sort() + covered = 0 + cursor = 0 + gaps = [] + for start, end in spans: + if start > cursor: + gaps.append((cursor, start)) + if end > cursor: + covered += end - max(start, cursor) + cursor = end + if cursor < len(context): + gaps.append((cursor, len(context))) + return { + "chunks": len(chunks), + "context_chars": len(context), + "covered_chars": covered, + "gaps": gaps, + "complete": covered >= len(context) and not gaps, + } + # Load the long input as `context` (and `ctx`) from a file. This keeps the # big string out of the process command-line and out of the LLM's window. _ctx_file = _os.environ.get("RLM_CONTEXT_FILE","") @@ -595,6 +660,7 @@ _BOOTSTRAP_NAMES = { "_rpc","_ctx_file","_BOOTSTRAP_NAMES","_main_loop", "llm_query","llm_query_batched","rlm_query","rlm_query_batched", "FINAL","FINAL_VAR","SHOW_VARS","repl_get","repl_set", + "chunk_context","chunk_coverage", "context","ctx", "_json","_os","_sys","_traceback", } @@ -773,6 +839,44 @@ mod tests { rt.shutdown().await; } + #[tokio::test] + async fn context_chunk_helpers_report_full_coverage() { + let path = write_temp_context("abcdefghijklmnopqrstuvwxyz"); + let mut rt = PythonRuntime::spawn_with_context(&path) + .await + .expect("spawn"); + let round = rt + .execute( + "chunks = chunk_context(max_chars=10)\n\ + coverage = chunk_coverage(chunks)\n\ + print(len(chunks), coverage['covered_chars'], coverage['complete'])", + ) + .await + .expect("execute"); + assert!(round.stdout.contains("3 26 True"), "{}", round.stdout); + rt.shutdown().await; + } + + #[tokio::test] + async fn rlm_context_runtime_has_no_fixed_round_timeout() { + let path = write_temp_context("long input"); + let rt = PythonRuntime::spawn_with_context(&path) + .await + .expect("spawn"); + assert!( + rt.round_timeout().is_none(), + "RLM context runs must not inherit the old 180s REPL round timeout" + ); + rt.shutdown().await; + } + + #[tokio::test] + async fn inline_runtime_keeps_bounded_round_timeout() { + let rt = PythonRuntime::new().await.expect("spawn"); + assert_eq!(rt.round_timeout(), Some(ROUND_TIMEOUT)); + rt.shutdown().await; + } + #[tokio::test] async fn final_is_captured() { let mut rt = PythonRuntime::new().await.expect("spawn"); diff --git a/crates/tui/src/rlm/prompt.rs b/crates/tui/src/rlm/prompt.rs index 5553d2cf..39f5d6cd 100644 --- a/crates/tui/src/rlm/prompt.rs +++ b/crates/tui/src/rlm/prompt.rs @@ -19,6 +19,8 @@ The REPL exposes: - `llm_query_batched(prompts, model=None)` — concurrent fan-out. Returns `list[str]` in input order. The `model` argument is accepted for compatibility but ignored. - `rlm_query(prompt, model=None)` — recursive sub-RLM. Use when a sub-task itself needs decomposition. The `model` argument is accepted for compatibility but ignored. - `rlm_query_batched(prompts, model=None)` — concurrent recursive sub-RLMs. The `model` argument is accepted for compatibility but ignored. +- `chunk_context(max_chars=20000, overlap=0)` — full-coverage chunks with index/start/end/text fields. +- `chunk_coverage(chunks)` — coverage summary for chunks produced by `chunk_context`. - `SHOW_VARS()` — list user variables and their types. - `repl_set(name, value)` / `repl_get(name)` — explicit cross-round storage. - `print(...)` — diagnostic output. The driver feeds you a truncated preview next round. @@ -40,11 +42,12 @@ print(context[:500]) 2. CHUNK + map-reduce with batched concurrent calls. ```repl chunk_size = 8000 -chunks = [context[i:i+chunk_size] for i in range(0, len(context), chunk_size)] -prompts = [f"Extract any mentions of X from this section:\n\n{c}" for c in chunks] +chunks = chunk_context(max_chars=chunk_size) +coverage = chunk_coverage(chunks) +prompts = [f"Extract any mentions of X from section {c['index']} ({c['start']}:{c['end']}):\n\n{c['text']}" for c in chunks] partials = llm_query_batched(prompts) combined = "\n\n".join(partials) -answer = llm_query(f"Synthesize across these section-level extractions:\n\n{combined}") +answer = llm_query(f"Coverage: {coverage}\n\nSynthesize across these section-level extractions:\n\n{combined}") print(answer[:500]) ``` Then on the next turn: @@ -73,6 +76,8 @@ Rules - Never `print(context)` or otherwise dump it whole — slice, sample, or chunk. - You MUST call `llm_query` / `llm_query_batched` / `rlm_query` at least once before `FINAL(...)`. Calling FINAL from a top-level prose answer (without ever running a `repl` block that touched `context` via a sub-LLM) is REJECTED — the driver will discard the FINAL and ask you to actually use the REPL. - Sub-LLMs are powerful — feed them generous chunks (tens of thousands of chars), not tiny windows. +- For exact counts, package totals, line totals, or other structured aggregates, compute them with Python over `context` directly. Do not ask a child LLM to count. +- For whole-input map-reduce, report coverage in the final answer: chunks processed, total chunks, and whether every line/char range was included. If you only processed a subset, say that explicitly. - Do NOT pad your output with prose like "Here is what I'll do:" — just emit the next ```repl block. "#; @@ -115,6 +120,8 @@ mod tests { "llm_query_batched", "rlm_query", "rlm_query_batched", + "chunk_context", + "chunk_coverage", "SHOW_VARS", "FINAL", "FINAL_VAR", @@ -133,4 +140,12 @@ mod tests { "system prompt should reject the prose-shortcut path explicitly" ); } + + #[test] + fn rlm_prompt_requires_deterministic_counts_and_coverage() { + let s = body(); + assert!(s.contains("compute them with Python")); + assert!(s.contains("report coverage")); + assert!(s.contains("chunks processed")); + } } diff --git a/crates/tui/src/rlm/turn.rs b/crates/tui/src/rlm/turn.rs index 16504fb8..676d9e1e 100644 --- a/crates/tui/src/rlm/turn.rs +++ b/crates/tui/src/rlm/turn.rs @@ -34,8 +34,6 @@ const STDOUT_METADATA_PREVIEW_LEN: usize = 800; const PROMPT_PREVIEW_LEN: usize = 500; /// Temperature for root LLM calls. const ROOT_TEMPERATURE: f32 = 0.3; -/// Hard wall-clock cap on a whole RLM turn. -const TURN_TIMEOUT: Duration = Duration::from_secs(180); /// Bound on conversation history we keep across iterations. const MAX_HISTORY_MESSAGES: usize = 20; @@ -156,6 +154,13 @@ pub(crate) fn run_rlm_turn_inner( )) } +/// RLM turns are long-running background-style work. Do not kill the whole +/// turn with the old fixed 180s wall-clock cap; per-request cancellation still +/// comes from the parent turn token and the user can cancel from the TUI. +fn turn_timeout() -> Option { + None +} + // --------------------------------------------------------------------------- // Implementation // --------------------------------------------------------------------------- @@ -237,15 +242,14 @@ async fn run_rlm_turn_impl( let result = 'turn: { for iteration in 0..MAX_RLM_ITERATIONS { - if start.elapsed() > TURN_TIMEOUT { + if let Some(timeout) = turn_timeout() + && start.elapsed() > timeout + { break 'turn RlmTurnResult { answer: String::new(), iterations: iteration, duration: start.elapsed(), - error: Some(format!( - "RLM turn timed out after {}s", - TURN_TIMEOUT.as_secs() - )), + error: Some(format!("RLM turn timed out after {}s", timeout.as_secs())), usage: total_usage, termination: RlmTermination::Error, trace: trace.clone(), @@ -588,6 +592,14 @@ fn build_metadata_message( parts.push("**REPL helpers** (use inside ```repl blocks)".to_string()); parts.push("- `context` / `ctx` — the full input string".to_string()); parts.push("- `len(context)` / `context[a:b]` / `context.splitlines()` — slice it".to_string()); + parts.push( + "- `chunk_context(max_chars=20000, overlap=0)` — full-coverage chunks with index/start/end/text" + .to_string(), + ); + parts.push( + "- `chunk_coverage(chunks)` — coverage report for chunk_context output" + .to_string(), + ); parts.push( "- `llm_query(prompt, model=None)` — one-shot child LLM; `model` is ignored and child calls stay pinned to Flash" .to_string(), @@ -971,4 +983,12 @@ mod tests { assert!(s.contains("line19")); assert!(s.contains("…")); } + + #[test] + fn rlm_turn_has_no_fixed_wall_clock_timeout() { + assert!( + turn_timeout().is_none(), + "RLM turns should not be killed by the old fixed 180s wall-clock cap" + ); + } } diff --git a/crates/tui/src/tools/rlm.rs b/crates/tui/src/tools/rlm.rs index 8881c8ce..2c7058b4 100644 --- a/crates/tui/src/tools/rlm.rs +++ b/crates/tui/src/tools/rlm.rs @@ -60,20 +60,22 @@ impl ToolSpec for RlmTool { calls in-REPL helpers (`llm_query`, `llm_query_batched`, \ `rlm_query`, `rlm_query_batched`) to process it, then returns a \ synthesized answer. \n\n\ - DO NOT use this tool when: the input fits in your context (just \ - use `read_file` and reason directly); a `grep_files` / \ - `exec_shell` pipeline would answer the question; the task is a \ - short classification or extraction; you need interactive \ - iterative exploration (rlm is one-shot batch). \n\n\ - Use this tool only when the input is genuinely too large to load \ - (a whole file > 50K tokens, a long transcript, a multi-document \ - corpus). It is slower and more expensive than direct reasoning. \n\n\ + Use this tool when the input is genuinely large or when a Python \ + map-reduce pass plus child LLM calls is the right shape: whole \ + files, long transcripts, multi-document corpora, bulk semantic \ + classification, or decomposition/critique work. For exact counts \ + or structured aggregates, compute them directly in Python inside \ + the REPL and report the deterministic result instead of asking a \ + child LLM to guess. For whole-input map-reduce, use the REPL \ + helpers `chunk_context()` and `chunk_coverage()` so the result \ + states what was covered. \n\n\ Provide `task` (what to do) plus exactly one of `file_path` \ (workspace-relative, preferred — keeps the long input out of \ your context entirely) or `content` (inline, capped at 200k \ chars). The Python helpers (`llm_query`, `rlm_query`, etc.) live \ INSIDE the REPL — they are not separately-callable tools. \n\n\ - Returns the final synthesized answer as a string." + Returns the final synthesized answer plus an RLM report showing \ + input size, iterations, duration, sub-LLM calls, and trace summary." } fn input_schema(&self) -> Value { @@ -177,6 +179,8 @@ impl ToolSpec for RlmTool { "rlm: input is empty after loading", )); } + let input_chars = body.chars().count(); + let input_lines = body.lines().count(); // Pin child calls to Flash so model-generated tool args cannot quietly // turn fanout work into Pro-billed requests. The RLM root still uses @@ -250,6 +254,14 @@ impl ToolSpec for RlmTool { RlmTermination::Error => String::new(), }; + let report = format!( + "RLM report:\n- input: {input_lines} line(s), {input_chars} char(s)\n- iterations: {}\n- duration: {}ms\n- sub-LLM RPCs: {}\n- termination: {:?}\n\nAnswer:\n", + result.iterations, + result.duration.as_millis(), + result.total_rpcs, + result.termination, + ); + let trace_summary = if result.trace.is_empty() { String::from("\n\n[trace: no REPL rounds executed]") } else { @@ -309,14 +321,17 @@ impl ToolSpec for RlmTool { "child_model": child_model, "termination": format!("{:?}", result.termination).to_lowercase(), "max_depth": max_depth, + "context_chars": input_chars, + "context_lines": input_lines, "total_rpcs": result.total_rpcs, "trace": trace_json, }); - Ok( - ToolResult::success(format!("{}{}{}", result.answer, footer, trace_summary)) - .with_metadata(metadata), - ) + Ok(ToolResult::success(format!( + "{report}{}{}{}", + result.answer, footer, trace_summary + )) + .with_metadata(metadata)) } } @@ -369,6 +384,24 @@ mod tests { assert!(tool().supports_parallel()); } + #[test] + fn description_steers_without_suppressing_rlm_use() { + let t = tool(); + let description = t.description(); + assert!( + description.contains("Use this tool when"), + "description should positively explain the RLM fit" + ); + assert!( + !description.contains("DO NOT use"), + "avoid training the model to avoid an available tool" + ); + assert!( + !description.contains("slower and more expensive"), + "cost caveats belong in verification guidance, not tool suppression" + ); + } + #[tokio::test] async fn returns_not_available_without_client() { let t = tool(); diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs index 993584a7..af204a07 100644 --- a/crates/tui/src/tools/subagent/mod.rs +++ b/crates/tui/src/tools/subagent/mod.rs @@ -3720,6 +3720,7 @@ impl SubAgentToolRegistry { if !self.is_tool_allowed(name) { return Err(anyhow!("Tool {name} not allowed for this sub-agent")); } + reject_subagent_terminal_takeover(name, &input)?; self.registry .execute(name, input) .await @@ -3727,6 +3728,22 @@ impl SubAgentToolRegistry { } } +fn reject_subagent_terminal_takeover(name: &str, input: &Value) -> Result<()> { + let wants_interactive_shell = name == "exec_shell" + && input + .get("interactive") + .and_then(Value::as_bool) + .unwrap_or(false); + if wants_interactive_shell { + return Err(anyhow!( + "Sub-agents run in the background and cannot use exec_shell with interactive=true \ + because that would take over the parent TUI terminal. Use non-interactive \ + exec_shell, background=true, tty=true, or task_shell_start instead." + )); + } + Ok(()) +} + /// Resolve the effective allowed-tools list for a child. /// /// **v0.6.6 default: full inheritance.** Returning `Ok(None)` means the diff --git a/crates/tui/src/tools/subagent/tests.rs b/crates/tui/src/tools/subagent/tests.rs index 71d84f8b..0295b567 100644 --- a/crates/tui/src/tools/subagent/tests.rs +++ b/crates/tui/src/tools/subagent/tests.rs @@ -1003,6 +1003,39 @@ fn mailbox_propagates_through_child_runtime_chain() { ); } +#[test] +fn subagent_rejects_interactive_shell_terminal_takeover() { + let err = reject_subagent_terminal_takeover( + "exec_shell", + &serde_json::json!({ + "command": "python3 -i", + "interactive": true + }), + ) + .expect_err("sub-agents must not inherit the parent terminal"); + + let msg = err.to_string(); + assert!(msg.contains("cannot use exec_shell with interactive=true")); + assert!(msg.contains("parent TUI terminal")); + + reject_subagent_terminal_takeover( + "exec_shell", + &serde_json::json!({ + "command": "cargo check", + "interactive": false + }), + ) + .expect("non-interactive shell remains allowed"); + reject_subagent_terminal_takeover( + "exec_shell", + &serde_json::json!({ + "command": "cargo test", + "background": true + }), + ) + .expect("background shell remains allowed"); +} + #[tokio::test] async fn mailbox_close_as_cancel_propagates_to_grandchild_runtime() { use crate::tools::subagent::mailbox::Mailbox; diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index f5d936fe..0fb83fed 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -82,6 +82,7 @@ use crate::tui::tool_routing::{ }; use crate::tui::ui_text::{history_cell_to_text, line_to_plain, slice_text, text_display_width}; use crate::tui::user_input::UserInputView; +use crate::tui::views::subagent_view_agents; use super::active_cell::ActiveCell; use super::app::{ @@ -582,6 +583,8 @@ async fn refresh_active_task_panel(app: &mut App, task_manager: &SharedTaskManag .map(task_summary_to_panel_entry) .collect(); + entries.extend(active_rlm_task_entries(app)); + if let Some(shell_mgr) = app.runtime_services.shell_manager.as_ref() && let Ok(mut mgr) = shell_mgr.lock() { @@ -601,6 +604,39 @@ async fn refresh_active_task_panel(app: &mut App, task_manager: &SharedTaskManag app.task_panel = entries; } +fn active_rlm_task_entries(app: &App) -> Vec { + let Some(active) = app.active_cell.as_ref() else { + return Vec::new(); + }; + let duration_ms = app + .turn_started_at + .map(|started| u64::try_from(started.elapsed().as_millis()).unwrap_or(u64::MAX)); + active + .entries() + .iter() + .enumerate() + .filter_map(|(idx, entry)| { + let HistoryCell::Tool(ToolCell::Generic(generic)) = entry else { + return None; + }; + if generic.name != "rlm" || generic.status != ToolStatus::Running { + return None; + } + let summary = generic + .input_summary + .as_deref() + .filter(|summary| !summary.trim().is_empty()) + .unwrap_or("running chunked analysis"); + Some(TaskPanelEntry { + id: format!("rlm-{}", idx + 1), + status: "running".to_string(), + prompt_summary: format!("RLM: {summary}"), + duration_ms, + }) + }) + .collect() +} + #[allow(clippy::too_many_lines)] async fn run_event_loop( terminal: &mut AppTerminal, @@ -628,6 +664,7 @@ async fn run_event_loop( // #376: native-copy escape — hold Shift to bypass alt-screen mouse capture // for terminal-native text selection. let mut shift_bypass_active = false; + let mut terminal_paused_at: Option = None; loop { if !drain_web_config_events(&mut web_config_session, app, config, &engine_handle).await { @@ -1123,6 +1160,7 @@ async fn run_event_loop( app.use_bracketed_paste, )?; event_broker.pause_events(); + terminal_paused_at = Some(Instant::now()); } } EngineEvent::ResumeEvents => { @@ -1134,6 +1172,7 @@ async fn run_event_loop( app.use_bracketed_paste, )?; event_broker.resume_events(); + terminal_paused_at = None; } } EngineEvent::AgentSpawned { id, prompt } => { @@ -1162,11 +1201,54 @@ async fn run_event_loop( app.status_message = Some(format!("Sub-agent {id}: {display}")); } EngineEvent::AgentComplete { id, result } => { + let subagent_elapsed = app + .agent_activity_started_at + .or(app.turn_started_at) + .map(|started| started.elapsed()) + .unwrap_or_default(); + let has_other_running_subagents = + app.agent_progress.keys().any(|agent_id| agent_id != &id) + || app.subagent_cache.iter().any(|agent| { + agent.agent_id != id + && matches!(agent.status, SubAgentStatus::Running) + }); app.agent_progress.remove(&id); app.status_message = Some(format!( "Sub-agent {id} completed: {}", summarize_tool_output(&result) )); + let should_recapture_terminal = + !has_other_running_subagents && app.use_alt_screen; + if !has_other_running_subagents + && let Some((method, threshold, include_summary)) = + notification_settings(config) + { + let in_tmux = std::env::var("TMUX").is_ok_and(|v| !v.is_empty()); + let msg = subagent_completion_notification_message( + &id, + &result, + include_summary, + subagent_elapsed, + ); + crate::tui::notifications::notify_done( + method, + in_tmux, + &msg, + threshold, + subagent_elapsed, + ); + } + if should_recapture_terminal { + resume_terminal( + terminal, + app.use_alt_screen, + app.use_mouse_capture, + app.use_bracketed_paste, + )?; + event_broker.resume_events(); + terminal_paused_at = None; + app.needs_redraw = true; + } let _ = engine_handle.send(Op::ListSubAgents).await; } EngineEvent::AgentList { agents } => { @@ -1175,9 +1257,10 @@ async fn run_event_loop( sorted.retain(|a| !a.from_prior_session); app.subagent_cache = sorted.clone(); reconcile_subagent_activity_state(app); - if app.view_stack.update_subagents(&sorted) { + let view_agents = subagent_view_agents(app, &sorted); + if app.view_stack.update_subagents(&view_agents) { app.status_message = - Some(format!("Sub-agents: {} total", sorted.len())); + Some(format!("Sub-agents: {} total", view_agents.len())); } // Individual spawn/complete events already log to history; // full list available via /agents command. @@ -1406,8 +1489,23 @@ async fn run_event_loop( } if event_broker.is_paused() { - tokio::time::sleep(std::time::Duration::from_millis(50)).await; - continue; + let grace_active = terminal_paused_at + .map(|paused_at| paused_at.elapsed() < Duration::from_millis(500)) + .unwrap_or(false); + if terminal_pause_has_live_owner(app) || grace_active { + tokio::time::sleep(std::time::Duration::from_millis(50)).await; + continue; + } + resume_terminal( + terminal, + app.use_alt_screen, + app.use_mouse_capture, + app.use_bracketed_paste, + )?; + event_broker.resume_events(); + terminal_paused_at = None; + app.status_message = Some("Terminal controls restored".to_string()); + app.needs_redraw = true; } let now = Instant::now(); @@ -3095,6 +3193,30 @@ fn completed_turn_notification_message( msg } +fn subagent_completion_notification_message( + id: &str, + result: &str, + include_summary: bool, + elapsed: Duration, +) -> String { + let result_line = result + .lines() + .map(str::trim) + .find(|line| !line.is_empty() && !line.starts_with("")); + let mut msg = result_line + .and_then(notification_text_summary) + .map(|summary| format!("sub-agent {id}: {summary}")) + .unwrap_or_else(|| format!("deepseek: sub-agent {id} complete")); + + if include_summary { + let human = crate::tui::notifications::humanize_duration(elapsed); + msg.push('\n'); + msg.push_str(&format!("deepseek: sub-agent complete ({human})")); + } + + msg +} + fn latest_assistant_notification_text(messages: &[Message]) -> Option { messages .iter() @@ -6325,6 +6447,17 @@ fn active_foreground_shell_running(app: &App) -> bool { }) } +fn terminal_pause_has_live_owner(app: &App) -> bool { + app.active_cell.as_ref().is_some_and(|active| { + active.entries().iter().any(|cell| { + matches!( + cell, + HistoryCell::Tool(ToolCell::Exec(exec)) if exec.status == ToolStatus::Running + ) + }) + }) +} + fn collect_active_tool_status(cell: &HistoryCell, snapshot: &mut ActiveToolStatusSnapshot) { let HistoryCell::Tool(tool) = cell else { return; diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index 9c139bd7..bfe61bf2 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -1937,6 +1937,72 @@ fn spillover_pager_section_returns_notice_when_file_missing() { assert!(section.contains("could not read spillover file")); } +#[test] +fn terminal_pause_has_live_owner_only_for_running_exec_cells() { + let mut app = create_test_app(); + assert!(!terminal_pause_has_live_owner(&app)); + + let mut active = ActiveCell::new(); + active.push_tool( + "tool-1", + HistoryCell::Tool(ToolCell::Exec(ExecCell { + command: "python3 -i".to_string(), + status: ToolStatus::Running, + output: None, + started_at: Some(Instant::now()), + duration_ms: None, + source: ExecSource::Assistant, + interaction: Some("interactive".to_string()), + })), + ); + app.active_cell = Some(active); + assert!(terminal_pause_has_live_owner(&app)); + + let mut active = ActiveCell::new(); + active.push_tool( + "tool-2", + HistoryCell::Tool(ToolCell::Generic(GenericToolCell { + name: "rlm".to_string(), + status: ToolStatus::Running, + input_summary: Some("file_path: Cargo.lock".to_string()), + output: None, + prompts: None, + spillover_path: None, + })), + ); + app.active_cell = Some(active); + assert!( + !terminal_pause_has_live_owner(&app), + "non-interactive RLM work must not keep the terminal in host-scrollback mode" + ); +} + +#[test] +fn active_rlm_task_entries_surface_foreground_rlm_work() { + let mut app = create_test_app(); + app.turn_started_at = Some(Instant::now() - Duration::from_secs(3)); + let mut active = ActiveCell::new(); + active.push_tool( + "tool-rlm", + HistoryCell::Tool(ToolCell::Generic(GenericToolCell { + name: "rlm".to_string(), + status: ToolStatus::Running, + input_summary: Some("file_path: Cargo.lock".to_string()), + output: None, + prompts: None, + spillover_path: None, + })), + ); + app.active_cell = Some(active); + + let entries = active_rlm_task_entries(&app); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].id, "rlm-1"); + assert_eq!(entries[0].status, "running"); + assert_eq!(entries[0].prompt_summary, "RLM: file_path: Cargo.lock"); + assert!(entries[0].duration_ms.unwrap_or_default() >= 3000); +} + #[test] fn details_shortcut_modifiers_accept_plain_shift_and_alt_only() { assert!(details_shortcut_modifiers(KeyModifiers::NONE)); @@ -3511,3 +3577,29 @@ fn completed_turn_notification_truncates_long_text() { // 360-char body + 3-char ellipsis assert_eq!(msg.chars().count(), 363); } + +#[test] +fn subagent_completion_notification_uses_summary_line_not_sentinel() { + let msg = super::subagent_completion_notification_message( + "agent_live", + "Finished the docs audit.\n{}", + false, + Duration::from_secs(42), + ); + + assert_eq!(msg, "sub-agent agent_live: Finished the docs audit."); + assert!(!msg.contains("deepseek:subagent.done")); +} + +#[test] +fn subagent_completion_notification_can_include_elapsed_summary() { + let msg = super::subagent_completion_notification_message( + "agent_live", + "", + true, + Duration::from_secs(65), + ); + + assert!(msg.contains("deepseek: sub-agent agent_live complete")); + assert!(msg.contains("deepseek: sub-agent complete (1m 5s)")); +} diff --git a/crates/tui/src/tui/views/mod.rs b/crates/tui/src/tui/views/mod.rs index f7792bec..58baf297 100644 --- a/crates/tui/src/tui/views/mod.rs +++ b/crates/tui/src/tui/views/mod.rs @@ -7,9 +7,11 @@ use crate::localization::{Locale, MessageId, tr}; use crate::palette; use crate::settings::Settings; use crate::tools::UserInputResponse; -use crate::tools::subagent::{SubAgentResult, SubAgentStatus, SubAgentType}; +use crate::tools::subagent::{SubAgentAssignment, SubAgentResult, SubAgentStatus, SubAgentType}; use crate::tui::app::App; use crate::tui::approval::{ElevationOption, ReviewDecision}; +use crate::tui::history::{HistoryCell, SubAgentCell, summarize_tool_output}; +use crate::tui::widgets::agent_card::AgentLifecycle; pub mod status_picker; @@ -1384,6 +1386,105 @@ pub struct SubAgentsView { scroll: usize, } +/// Build the agent rows shown by `/subagents`. +/// +/// The engine manager is the durable source of truth, but live UI cards can +/// briefly be ahead of the manager-list refresh. Include those live rows so +/// the command does not say "no agents" while the footer/sidebar already show +/// active delegated work. +pub(crate) fn subagent_view_agents( + app: &App, + manager_agents: &[SubAgentResult], +) -> Vec { + let mut agents = manager_agents.to_vec(); + let mut seen: std::collections::HashSet = + agents.iter().map(|agent| agent.agent_id.clone()).collect(); + + for (agent_id, progress) in &app.agent_progress { + if seen.insert(agent_id.clone()) { + agents.push(live_subagent_result( + agent_id, + SubAgentType::General, + SubAgentStatus::Running, + progress, + Some("live"), + )); + } + } + + for cell in &app.history { + match cell { + HistoryCell::SubAgent(SubAgentCell::Delegate(card)) + if seen.insert(card.agent_id.clone()) => + { + let agent_type = + SubAgentType::from_str(&card.agent_type).unwrap_or(SubAgentType::General); + agents.push(live_subagent_result( + &card.agent_id, + agent_type, + lifecycle_to_subagent_status(card.status), + card.summary.as_deref().unwrap_or(card.agent_type.as_str()), + Some("transcript"), + )); + } + HistoryCell::SubAgent(SubAgentCell::Fanout(card)) => { + for worker in &card.workers { + if seen.insert(worker.agent_id.clone()) { + let objective = format!( + "{} worker {}", + summarize_tool_output(&card.kind), + summarize_tool_output(&worker.worker_id) + ); + agents.push(live_subagent_result( + &worker.agent_id, + SubAgentType::General, + lifecycle_to_subagent_status(worker.status), + &objective, + Some(card.kind.as_str()), + )); + } + } + } + _ => {} + } + } + + agents +} + +fn lifecycle_to_subagent_status(status: AgentLifecycle) -> SubAgentStatus { + match status { + AgentLifecycle::Pending | AgentLifecycle::Running => SubAgentStatus::Running, + AgentLifecycle::Completed => SubAgentStatus::Completed, + AgentLifecycle::Failed => SubAgentStatus::Failed("failed in transcript".to_string()), + AgentLifecycle::Cancelled => SubAgentStatus::Cancelled, + } +} + +fn live_subagent_result( + agent_id: &str, + agent_type: SubAgentType, + status: SubAgentStatus, + objective: &str, + role: Option<&str>, +) -> SubAgentResult { + SubAgentResult { + agent_id: agent_id.to_string(), + agent_type, + assignment: SubAgentAssignment { + objective: summarize_tool_output(objective), + role: role.map(str::to_string), + }, + model: String::new(), + nickname: None, + status, + result: None, + steps_taken: 0, + duration_ms: 0, + from_prior_session: false, + } +} + impl SubAgentsView { pub fn new(agents: Vec) -> Self { Self { agents, scroll: 0 } @@ -1742,11 +1843,16 @@ fn truncate_view_text(text: &str, max_chars: usize) -> String { mod tests { use super::{ ConfigListItem, ConfigSection, ConfigView, ModalKind, ModalView, ShellControlView, - ViewAction, ViewEvent, ViewStack, truncate_view_text, + ViewAction, ViewEvent, ViewStack, subagent_view_agents, truncate_view_text, }; use crate::config::Config; use crate::localization::Locale; + use crate::tools::subagent::{ + SubAgentAssignment, SubAgentResult, SubAgentStatus, SubAgentType, + }; use crate::tui::app::{App, TuiOptions}; + use crate::tui::history::{HistoryCell, SubAgentCell}; + use crate::tui::widgets::agent_card::{AgentLifecycle, FanoutCard}; use crossterm::event::{ KeyCode, KeyEvent, KeyModifiers, MouseButton, MouseEvent, MouseEventKind, }; @@ -1785,6 +1891,72 @@ mod tests { } } + fn manager_agent(id: &str, status: SubAgentStatus) -> SubAgentResult { + SubAgentResult { + agent_id: id.to_string(), + agent_type: SubAgentType::Explore, + assignment: SubAgentAssignment { + objective: "read the docs".to_string(), + role: None, + }, + model: "deepseek-v4-flash".to_string(), + nickname: None, + status, + result: None, + steps_taken: 1, + duration_ms: 10, + from_prior_session: false, + } + } + + #[test] + fn subagent_view_agents_includes_progress_only_running_agent() { + let mut app = create_test_app(); + app.agent_progress + .insert("agent_live".to_string(), "reading code".to_string()); + + let agents = subagent_view_agents(&app, &[]); + + assert_eq!(agents.len(), 1); + assert_eq!(agents[0].agent_id, "agent_live"); + assert!(matches!(agents[0].status, SubAgentStatus::Running)); + assert_eq!(agents[0].assignment.role.as_deref(), Some("live")); + assert!(agents[0].assignment.objective.contains("reading code")); + } + + #[test] + fn subagent_view_agents_includes_live_fanout_workers_when_cache_is_empty() { + let mut app = create_test_app(); + let mut card = FanoutCard::new("rlm").with_workers(["chunk_1", "chunk_2"]); + card.upsert_worker("chunk_1", AgentLifecycle::Completed); + card.upsert_worker("chunk_2", AgentLifecycle::Running); + app.add_message(HistoryCell::SubAgent(SubAgentCell::Fanout(card))); + app.last_fanout_card_index = Some(app.history.len().saturating_sub(1)); + + let agents = subagent_view_agents(&app, &[]); + + assert_eq!(agents.len(), 2); + assert_eq!(agents[0].agent_id, "chunk_1"); + assert!(matches!(agents[0].status, SubAgentStatus::Completed)); + assert_eq!(agents[1].agent_id, "chunk_2"); + assert!(matches!(agents[1].status, SubAgentStatus::Running)); + assert_eq!(agents[1].assignment.role.as_deref(), Some("rlm")); + } + + #[test] + fn subagent_view_agents_deduplicates_manager_rows_over_live_rows() { + let mut app = create_test_app(); + app.agent_progress + .insert("agent_cached".to_string(), "live duplicate".to_string()); + let manager = vec![manager_agent("agent_cached", SubAgentStatus::Running)]; + + let agents = subagent_view_agents(&app, &manager); + + assert_eq!(agents.len(), 1); + assert_eq!(agents[0].agent_type, SubAgentType::Explore); + assert_eq!(agents[0].assignment.objective, "read the docs"); + } + fn visible_section_labels(view: &ConfigView) -> Vec<&'static str> { view.visible_items() .into_iter() diff --git a/crates/tui/src/tui/widgets/agent_card.rs b/crates/tui/src/tui/widgets/agent_card.rs index f923bdc4..e9504bef 100644 --- a/crates/tui/src/tui/widgets/agent_card.rs +++ b/crates/tui/src/tui/widgets/agent_card.rs @@ -389,23 +389,15 @@ pub fn apply_to_delegate(card: &mut DelegateCard, msg: &MailboxMessage) -> bool } MailboxMessage::Progress { status, .. } => { card.status = AgentLifecycle::Running; - card.push_action(status); + if !is_low_signal_progress(status) { + card.push_action(status); + } } - MailboxMessage::ToolCallStarted { - tool_name, step, .. - } => { - card.push_action(format!("[{step}] {tool_name} started")); + MailboxMessage::ToolCallStarted { tool_name, .. } => { + card.push_action(format!("{tool_name} running")); } - MailboxMessage::ToolCallCompleted { - tool_name, - step, - ok, - .. - } => { - card.push_action(format!( - "[{step}] {tool_name} {}", - if *ok { "ok" } else { "failed" } - )); + MailboxMessage::ToolCallCompleted { tool_name, ok, .. } => { + card.push_action(format!("{tool_name} {}", if *ok { "ok" } else { "failed" })); } MailboxMessage::Completed { summary, .. } => { card.status = AgentLifecycle::Completed; @@ -433,6 +425,13 @@ pub fn apply_to_delegate(card: &mut DelegateCard, msg: &MailboxMessage) -> bool true } +fn is_low_signal_progress(status: &str) -> bool { + let status = status.trim().to_ascii_lowercase(); + status.contains("requesting model response") + || status.starts_with("started (") + || (status.starts_with("step ") && status.contains(": complete")) +} + /// Apply a mailbox envelope to a `FanoutCard`. Updates per-worker state /// based on which child the envelope is about. Returns `true` on change. pub fn apply_to_fanout(card: &mut FanoutCard, msg: &MailboxMessage) -> bool { @@ -550,6 +549,57 @@ mod tests { ); } + #[test] + fn delegate_card_ignores_low_signal_scheduler_progress() { + let mut card = DelegateCard::new("agent_003", "general"); + let msg = MailboxMessage::progress("agent_003", "step 1/100: requesting model response"); + + assert!(apply_to_delegate(&mut card, &msg)); + assert_eq!(card.status, AgentLifecycle::Running); + assert_eq!( + card.action_count(), + 0, + "scheduler progress should not become a stale transcript row" + ); + + let rendered = render_to_strings(&card.render_lines(80)).join("\n"); + assert!(!rendered.contains("step 1/100"), "{rendered}"); + assert!( + !rendered.contains("requesting model response"), + "{rendered}" + ); + } + + #[test] + fn delegate_tool_rows_omit_internal_step_numbers() { + let mut card = DelegateCard::new("agent_004", "general"); + + assert!(apply_to_delegate( + &mut card, + &MailboxMessage::ToolCallStarted { + agent_id: "agent_004".into(), + tool_name: "read_file".into(), + step: 7, + } + )); + assert!(apply_to_delegate( + &mut card, + &MailboxMessage::ToolCallCompleted { + agent_id: "agent_004".into(), + tool_name: "read_file".into(), + step: 7, + ok: true, + } + )); + + let rendered = render_to_strings(&card.render_lines(80)).join("\n"); + assert!(rendered.contains("read_file"), "{rendered}"); + assert!( + !rendered.contains("[7]"), + "internal loop step numbers are not useful in the live card: {rendered}" + ); + } + #[test] fn delegate_card_ignores_envelopes_for_other_agents() { let mut card = DelegateCard::new("agent_a", "general"); diff --git a/npm/deepseek-tui/package.json b/npm/deepseek-tui/package.json index 10252551..ba3eebc6 100644 --- a/npm/deepseek-tui/package.json +++ b/npm/deepseek-tui/package.json @@ -1,7 +1,7 @@ { "name": "deepseek-tui", - "version": "0.8.15", - "deepseekBinaryVersion": "0.8.15", + "version": "0.8.16", + "deepseekBinaryVersion": "0.8.16", "description": "Install and run deepseek and deepseek-tui binaries from GitHub release artifacts.", "author": "Hmbown", "license": "MIT",