From eac8d82ab85b104449e4e40e391a0cd4798364a3 Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 1 May 2026 02:36:56 -0500
Subject: [PATCH 01/17] test(palette): cover disabled MCP server case (#197)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Acceptance list called for tests of no-config / healthy / disabled /
failed servers. Healthy and failed already had a single dense
test (`command_palette_includes_mcp_discovery_and_failed_servers`);
no-config is implicit in the existing call sites that pass `None`
for the snapshot. Disabled was the actual gap — adds one focused
case asserting the `[disabled]` state tag appears in the rendered
description so users can see disabled servers in the palette
without opening the MCP manager.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/tui/src/tui/command_palette.rs | 44 +++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)
diff --git a/crates/tui/src/tui/command_palette.rs b/crates/tui/src/tui/command_palette.rs
index 1c8a57cd..3dedae7f 100644
--- a/crates/tui/src/tui/command_palette.rs
+++ b/crates/tui/src/tui/command_palette.rs
@@ -1017,6 +1017,50 @@ mod tests {
         assert_eq!(use_entry.command, "mcp_fs_read");
     }
 
+    #[test]
+    fn command_palette_marks_disabled_servers_visibly() {
+        // The healthy/failed cases are covered above; disabled was the
+        // remaining gap from #197's acceptance list. Disabled servers must
+        // appear in the palette with a `[disabled]` state tag so users can
+        // see them without opening the MCP manager.
+        let snapshot = crate::mcp::McpManagerSnapshot {
+            config_path: Path::new("mcp.json").to_path_buf(),
+            config_exists: true,
+            restart_required: false,
+            servers: vec![crate::mcp::McpServerSnapshot {
+                name: "muted".to_string(),
+                enabled: false,
+                required: false,
+                transport: "stdio".to_string(),
+                command_or_url: "node disabled.js".to_string(),
+                connect_timeout: 10,
+                execute_timeout: 60,
+                read_timeout: 120,
+                connected: false,
+                error: None,
+                tools: Vec::new(),
+                resources: Vec::new(),
+                prompts: Vec::new(),
+            }],
+        };
+        let entries = build_entries(
+            Path::new("."),
+            Path::new("."),
+            Path::new("mcp.json"),
+            Some(&snapshot),
+        );
+
+        let muted = entries
+            .iter()
+            .find(|entry| entry.label == "mcp:muted")
+            .expect("disabled server should still appear in the palette");
+        assert!(
+            muted.description.contains("[disabled]"),
+            "expected `[disabled]` state tag in description, got: {}",
+            muted.description
+        );
+    }
+
     #[test]
     fn command_palette_emits_actions_not_raw_insertions() {
         let entries = vec![CommandPaletteEntry {

From d8acd6e3cb8ae29a9d0d6e2753ab4ce3d7f140a8 Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 1 May 2026 02:36:56 -0500
Subject: [PATCH 02/17] fix(skills): use real on-disk path in model-visible
 block, not frontmatter name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`render_available_skills_context` rendered each skill's file path as
`<skills_dir>/<frontmatter-name>/SKILL.md`. The directory name and the
frontmatter `name` can differ — community installs and manually-placed
skills routinely have this drift — and when they do, the model is told
the file lives at a path that does not exist, so it can't open the
SKILL.md it needs to actually use the skill.

`Skill` now carries a `path: PathBuf` populated by `discover()` from the
real directory entry. Renderer uses it directly. Adds a regression test
that creates a skill at `weird-dir-name/SKILL.md` with `name: friendly-name`
and asserts the rendered prompt contains the real path and not the
fabricated one.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/tui/src/skills/mod.rs | 66 +++++++++++++++++++++++++++++++++---
 1 file changed, 62 insertions(+), 4 deletions(-)

diff --git a/crates/tui/src/skills/mod.rs b/crates/tui/src/skills/mod.rs
index 7e01766a..d795e5d6 100644
--- a/crates/tui/src/skills/mod.rs
+++ b/crates/tui/src/skills/mod.rs
@@ -44,6 +44,11 @@ pub struct Skill {
     pub name: String,
     pub description: String,
     pub body: String,
+    /// On-disk path to the `SKILL.md` this was loaded from. The directory
+    /// name can differ from the frontmatter `name` for community installs
+    /// or manually-placed skills, so callers must use this rather than
+    /// reconstructing `<dir>/<name>/SKILL.md`.
+    pub path: PathBuf,
 }
 
 /// Collection of discovered skills.
@@ -70,7 +75,10 @@ impl SkillRegistry {
                     let skill_path = entry.path().join("SKILL.md");
                     match fs::read_to_string(&skill_path) {
                         Ok(content) => match Self::parse_skill(&skill_path, &content) {
-                            Ok(skill) => registry.skills.push(skill),
+                            Ok(mut skill) => {
+                                skill.path = skill_path.clone();
+                                registry.skills.push(skill);
+                            }
                             Err(reason) => registry.push_warning(format!(
                                 "Failed to parse {}: {reason}",
                                 skill_path.display()
@@ -137,6 +145,9 @@ impl SkillRegistry {
             name,
             description,
             body,
+            // Filled in by `discover` after parse succeeds; default to an
+            // empty path so direct constructors (e.g. tests) compile.
+            path: PathBuf::new(),
         })
     }
 
@@ -196,16 +207,19 @@ instructions when using a specific skill.\n\n",
 
     let mut omitted = 0usize;
     for skill in skills {
-        let path = skills_dir.join(&skill.name).join("SKILL.md");
+        // Use the real on-disk path captured at discovery — the directory
+        // name can differ from the frontmatter `name` for community
+        // installs, in which case `<dir>/<name>/SKILL.md` would not exist
+        // and the model would fail to open it.
         let description = truncate_for_prompt(&skill.description, MAX_SKILL_DESCRIPTION_CHARS);
         let line = if description.is_empty() {
-            format!("- {}: (file: {})\n", skill.name, path.display())
+            format!("- {}: (file: {})\n", skill.name, skill.path.display())
         } else {
             format!(
                 "- {}: {} (file: {})\n",
                 skill.name,
                 description,
-                path.display()
+                skill.path.display()
             )
         };
 
@@ -336,6 +350,50 @@ mod tests {
         assert!(rendered.contains("### How to use skills"));
     }
 
+    #[test]
+    fn render_available_skills_context_uses_real_dir_name_not_frontmatter_name() {
+        // Regression: when a community-installed or manually-placed skill
+        // lives in a directory whose name differs from its frontmatter
+        // `name`, the rendered prompt must point to the real on-disk file
+        // path, not <skills_dir>/<frontmatter-name>/SKILL.md (which does
+        // not exist).
+        let tmpdir = TempDir::new().unwrap();
+        create_skill_dir(
+            &tmpdir,
+            "weird-dir-name",
+            "---\nname: friendly-name\ndescription: drift case\n---\nbody",
+        );
+
+        let rendered = crate::skills::render_available_skills_context(
+            &tmpdir.path().join("skills"),
+        )
+        .expect("skill context");
+
+        let real_path = tmpdir
+            .path()
+            .join("skills")
+            .join("weird-dir-name")
+            .join("SKILL.md")
+            .display()
+            .to_string();
+        let stale_path = tmpdir
+            .path()
+            .join("skills")
+            .join("friendly-name")
+            .join("SKILL.md")
+            .display()
+            .to_string();
+
+        assert!(
+            rendered.contains(&real_path),
+            "expected real on-disk path {real_path:?} in rendered output, got:\n{rendered}"
+        );
+        assert!(
+            !rendered.contains(&stale_path),
+            "rendered output must not invent a path under the frontmatter name:\n{rendered}"
+        );
+    }
+
     #[test]
     fn render_available_skills_context_returns_none_when_empty() {
         let tmpdir = TempDir::new().unwrap();

From 1512afae69569889231b1f5bb746596aefba090e Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 1 May 2026 02:46:20 -0500
Subject: [PATCH 03/17] feat(privacy): contract \$HOME to ~ in user-visible
 display paths
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Anywhere the TUI, doctor stdout, setup stdout, or onboarding shows a
file path, it used to print the absolute form (e.g. /Users/<name>/...).
On macOS/Linux the home-directory segment reveals the OS account name,
which is often the same as a public handle — undesirable for users who
share screenshots, screencasts, or paste doctor output into a public
help request.

Adds `crate::utils::display_path` that contracts a leading $HOME to `~`
and falls through unchanged otherwise. Used at every viewer-visible site:

  doctor:    workspace, config.toml, MCP config, all skills dirs,
             selected skills dir, tools dir, plugins dir
  setup:     workspace, skills/tools/plugins paths and status output
  TUI:       context inspector header, trust-directory onboarding,
             shell-job cwd (sidebar + detail pager), subagent task header

Persisted state, audit log, session checkpoints, and LLM-bound system
prompts intentionally keep the absolute path — those need full fidelity
to resolve correctly across processes and the LLM provider sees
absolute paths anyway by virtue of the workspace summary.

`display_path` has 4 tests covering: home contraction, bare-`~` for
home itself, untouched-when-unrelated, and a username-prefix regression
guard (so `/Users/alice2/...` doesn't get rewritten when $HOME is
`/Users/alice`).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/tui/src/main.rs                        | 54 +++++-----
 crates/tui/src/tui/context_inspector.rs       |  6 +-
 .../tui/src/tui/onboarding/trust_directory.rs |  2 +-
 crates/tui/src/tui/shell_job_routing.rs       |  4 +-
 crates/tui/src/tui/subagent_routing.rs        |  5 +-
 crates/tui/src/utils.rs                       | 98 +++++++++++++++++++
 6 files changed, 140 insertions(+), 29 deletions(-)

diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs
index de6544fa..9238975a 100644
--- a/crates/tui/src/main.rs
+++ b/crates/tui/src/main.rs
@@ -979,7 +979,7 @@ fn run_setup(config: &Config, workspace: &Path, args: SetupArgs) -> Result<()> {
         "DeepSeek Setup".truecolor(aqua_r, aqua_g, aqua_b).bold()
     );
     println!("{}", "==============".truecolor(sky_r, sky_g, sky_b));
-    println!("Workspace: {}", workspace.display());
+    println!("Workspace: {}", crate::utils::display_path(workspace));
 
     if run_mcp {
         let mcp_path = config.mcp_config_path();
@@ -1022,10 +1022,13 @@ fn run_setup(config: &Config, workspace: &Path, args: SetupArgs) -> Result<()> {
         if args.local {
             println!(
                 "    Local skills dir enabled for this workspace: {}",
-                skills_dir.display()
+                crate::utils::display_path(&skills_dir)
             );
         } else {
-            println!("    Skills dir: {}", skills_dir.display());
+            println!(
+                "    Skills dir: {}",
+                crate::utils::display_path(&skills_dir)
+            );
         }
         println!("    Next: run the TUI and use `/skills` then `/skill getting-started`.");
     }
@@ -1035,7 +1038,7 @@ fn run_setup(config: &Config, workspace: &Path, args: SetupArgs) -> Result<()> {
         let (dir, readme_status, example_status) = init_tools_dir(&tools_dir, args.force)?;
         report_write_status("Tools README", &dir.join("README.md"), readme_status);
         report_write_status("Example tool", &dir.join("example.sh"), example_status);
-        println!("    Tools dir: {}", dir.display());
+        println!("    Tools dir: {}", crate::utils::display_path(&dir));
         println!("    Next: drop scripts here; surface them via skills/MCP when ready.");
     }
 
@@ -1045,7 +1048,10 @@ fn run_setup(config: &Config, workspace: &Path, args: SetupArgs) -> Result<()> {
             init_plugins_dir(&plugins_dir, args.force)?;
         report_write_status("Plugins README", &readme_path, readme_status);
         report_write_status("Example plugin", &example_path, example_status);
-        println!("    Plugins dir: {}", plugins_dir.display());
+        println!(
+            "    Plugins dir: {}",
+            crate::utils::display_path(&plugins_dir)
+        );
         println!("    Next: copy the example dir, edit PLUGIN.md, wire via skill/MCP.");
     }
 
@@ -1200,7 +1206,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
     println!(
         "  · skills: {} at {}",
         skills_count_for(&skills_dir),
-        skills_dir.display()
+        crate::utils::display_path(&skills_dir)
     );
 
     let tools_dir = default_tools_dir();
@@ -1216,7 +1222,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
         } else {
             0
         },
-        tools_dir.display()
+        crate::utils::display_path(&tools_dir)
     );
 
     let plugins_dir = default_plugins_dir();
@@ -1232,7 +1238,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> {
         } else {
             0
         },
-        plugins_dir.display()
+        crate::utils::display_path(&plugins_dir)
     );
 
     let sandbox = crate::sandbox::get_platform_sandbox();
@@ -1348,16 +1354,16 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt
         println!(
             "  {} config.toml found at {}",
             "✓".truecolor(aqua_r, aqua_g, aqua_b),
-            config_path.display()
+            crate::utils::display_path(&config_path)
         );
     } else {
         println!(
             "  {} config.toml not found at {} (using defaults/env)",
             "!".truecolor(sky_r, sky_g, sky_b),
-            config_path.display()
+            crate::utils::display_path(&config_path)
         );
     }
-    println!("  workspace: {}", workspace.display());
+    println!("  workspace: {}", crate::utils::display_path(workspace));
 
     // Check API keys
     println!();
@@ -1477,7 +1483,7 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt
         println!(
             "  {} MCP config found at {}",
             "✓".truecolor(aqua_r, aqua_g, aqua_b),
-            mcp_config_path.display()
+            crate::utils::display_path(&mcp_config_path)
         );
         match load_mcp_config(&mcp_config_path) {
             Ok(cfg) if cfg.servers.is_empty() => {
@@ -1532,7 +1538,7 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt
         println!(
             "  {} MCP config not found at {}",
             "·".dimmed(),
-            mcp_config_path.display()
+            crate::utils::display_path(&mcp_config_path)
         );
         println!("    Run `deepseek mcp init` or `deepseek setup --mcp`.");
     }
@@ -1561,14 +1567,14 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt
         println!(
             "  {} local skills dir found at {} ({} items)",
             "✓".truecolor(aqua_r, aqua_g, aqua_b),
-            local_skills_dir.display(),
+            crate::utils::display_path(&local_skills_dir),
             describe_dir(&local_skills_dir)
         );
     } else {
         println!(
             "  {} local skills dir not found at {}",
             "·".dimmed(),
-            local_skills_dir.display()
+            crate::utils::display_path(&local_skills_dir)
         );
     }
 
@@ -1576,14 +1582,14 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt
         println!(
             "  {} .agents skills dir found at {} ({} items)",
             "✓".truecolor(aqua_r, aqua_g, aqua_b),
-            agents_skills_dir.display(),
+            crate::utils::display_path(&agents_skills_dir),
             describe_dir(&agents_skills_dir)
         );
     } else {
         println!(
             "  {} .agents skills dir not found at {}",
             "·".dimmed(),
-            agents_skills_dir.display()
+            crate::utils::display_path(&agents_skills_dir)
         );
     }
 
@@ -1591,21 +1597,21 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt
         println!(
             "  {} global skills dir found at {} ({} items)",
             "✓".truecolor(aqua_r, aqua_g, aqua_b),
-            global_skills_dir.display(),
+            crate::utils::display_path(&global_skills_dir),
             describe_dir(&global_skills_dir)
         );
     } else {
         println!(
             "  {} global skills dir not found at {}",
             "·".dimmed(),
-            global_skills_dir.display()
+            crate::utils::display_path(&global_skills_dir)
         );
     }
 
     println!(
         "  {} selected skills dir: {}",
         "·".dimmed(),
-        selected_skills_dir.display()
+        crate::utils::display_path(&selected_skills_dir)
     );
     if !agents_skills_dir.exists() && !local_skills_dir.exists() && !global_skills_dir.exists() {
         println!("    Run `deepseek setup --skills` (or add --local for ./skills).");
@@ -1620,14 +1626,14 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt
         println!(
             "  {} tools dir found at {} ({} items)",
             "✓".truecolor(aqua_r, aqua_g, aqua_b),
-            tools_dir.display(),
+            crate::utils::display_path(&tools_dir),
             count
         );
     } else {
         println!(
             "  {} tools dir not found at {}",
             "·".dimmed(),
-            tools_dir.display()
+            crate::utils::display_path(&tools_dir)
         );
         println!("    Run `deepseek-tui setup --tools` to scaffold a starter dir.");
     }
@@ -1641,14 +1647,14 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt
         println!(
             "  {} plugins dir found at {} ({} items)",
             "✓".truecolor(aqua_r, aqua_g, aqua_b),
-            plugins_dir.display(),
+            crate::utils::display_path(&plugins_dir),
             count
         );
     } else {
         println!(
             "  {} plugins dir not found at {}",
             "·".dimmed(),
-            plugins_dir.display()
+            crate::utils::display_path(&plugins_dir)
         );
         println!("    Run `deepseek-tui setup --plugins` to scaffold a starter dir.");
     }
diff --git a/crates/tui/src/tui/context_inspector.rs b/crates/tui/src/tui/context_inspector.rs
index d0e7a586..c9e5c091 100644
--- a/crates/tui/src/tui/context_inspector.rs
+++ b/crates/tui/src/tui/context_inspector.rs
@@ -30,7 +30,11 @@ pub fn build_context_inspector_text(app: &App) -> String {
     let _ = writeln!(out, "Session Context");
     let _ = writeln!(out, "---------------");
     let _ = writeln!(out, "Model: {}", app.model);
-    let _ = writeln!(out, "Workspace: {}", app.workspace.display());
+    let _ = writeln!(
+        out,
+        "Workspace: {}",
+        crate::utils::display_path(&app.workspace)
+    );
     if let Some(session_id) = app.current_session_id.as_deref() {
         let _ = writeln!(out, "Session: {}", session_id);
     }
diff --git a/crates/tui/src/tui/onboarding/trust_directory.rs b/crates/tui/src/tui/onboarding/trust_directory.rs
index 3d51107a..44c18b7d 100644
--- a/crates/tui/src/tui/onboarding/trust_directory.rs
+++ b/crates/tui/src/tui/onboarding/trust_directory.rs
@@ -20,7 +20,7 @@ pub fn lines(app: &App) -> Vec<Line<'static>> {
         Style::default().fg(palette::TEXT_PRIMARY),
     )));
     lines.push(Line::from(Span::styled(
-        format!("Workspace: {}", app.workspace.display()),
+        format!("Workspace: {}", crate::utils::display_path(&app.workspace)),
         Style::default().fg(palette::TEXT_MUTED),
     )));
     lines.push(Line::from(""));
diff --git a/crates/tui/src/tui/shell_job_routing.rs b/crates/tui/src/tui/shell_job_routing.rs
index 428c4351..6c08566f 100644
--- a/crates/tui/src/tui/shell_job_routing.rs
+++ b/crates/tui/src/tui/shell_job_routing.rs
@@ -52,7 +52,7 @@ pub(super) fn format_shell_job_list(jobs: &[ShellJobSnapshot]) -> String {
             job.exit_code,
             task
         ));
-        lines.push(format!("  cwd: {}", job.cwd.display()));
+        lines.push(format!("  cwd: {}", crate::utils::display_path(&job.cwd)));
         lines.push(format!("  cmd: {}", job.command));
         let tail = if !job.stderr_tail.trim().is_empty() {
             job.stderr_tail.trim()
@@ -115,7 +115,7 @@ fn format_shell_job_detail(detail: &ShellJobDetail) -> String {
         format!("Job: {}", job.id),
         format!("Status: {}", status_label(&job.status, job.stale)),
         format!("Command: {}", job.command),
-        format!("Cwd: {}", job.cwd.display()),
+        format!("Cwd: {}", crate::utils::display_path(&job.cwd)),
         format!("Elapsed: {}", format_elapsed(job.elapsed_ms)),
         format!("Exit Code: {:?}", job.exit_code),
         format!("Stdin Available: {}", job.stdin_available),
diff --git a/crates/tui/src/tui/subagent_routing.rs b/crates/tui/src/tui/subagent_routing.rs
index fabc0c7b..c5fb44ec 100644
--- a/crates/tui/src/tui/subagent_routing.rs
+++ b/crates/tui/src/tui/subagent_routing.rs
@@ -489,7 +489,10 @@ fn format_task_detail(task: &TaskRecord) -> String {
     lines.push(format!("Status: {}", task_status_label(task.status)));
     lines.push(format!("Mode: {}", task.mode));
     lines.push(format!("Model: {}", task.model));
-    lines.push(format!("Workspace: {}", task.workspace.display()));
+    lines.push(format!(
+        "Workspace: {}",
+        crate::utils::display_path(&task.workspace)
+    ));
     if let Some(thread_id) = task.thread_id.as_ref() {
         lines.push(format!("Runtime Thread: {thread_id}"));
     }
diff --git a/crates/tui/src/utils.rs b/crates/tui/src/utils.rs
index 1801ca6b..80a8e403 100644
--- a/crates/tui/src/utils.rs
+++ b/crates/tui/src/utils.rs
@@ -186,6 +186,33 @@ pub fn url_encode(input: &str) -> String {
     encoded
 }
 
+/// Render a path for **user-facing display** with the home directory
+/// contracted to `~`. Use this in the TUI, doctor/setup stdout, and any
+/// other place a viewer might see the output (screenshot, video,
+/// pasted-into-issue help). On macOS/Linux the absolute path
+/// `/Users/<name>/...` or `/home/<name>/...` reveals the OS account name,
+/// which is often the same as a public handle — undesirable for users
+/// who share their terminal.
+///
+/// **Do not use** this for paths that get persisted (sessions, audit log)
+/// or sent to the LLM provider — those want full fidelity so they
+/// resolve correctly across processes.
+#[must_use]
+pub fn display_path(path: &Path) -> String {
+    let Some(home) = dirs::home_dir() else {
+        return path.display().to_string();
+    };
+    if let Ok(rest) = path.strip_prefix(&home) {
+        if rest.as_os_str().is_empty() {
+            return "~".to_string();
+        }
+        // Render with the platform-correct separator after the tilde.
+        let sep = std::path::MAIN_SEPARATOR;
+        return format!("~{sep}{}", rest.display());
+    }
+    path.display().to_string()
+}
+
 /// Estimate the total character count across message content blocks.
 #[must_use]
 pub fn estimate_message_chars(messages: &[Message]) -> usize {
@@ -205,3 +232,74 @@ pub fn estimate_message_chars(messages: &[Message]) -> usize {
     }
     total
 }
+
+#[cfg(test)]
+mod tests {
+    use super::display_path;
+    use std::path::PathBuf;
+
+    /// Save and restore $HOME inside one test so a panic anywhere can't
+    /// poison sibling tests that read the env var.
+    fn with_home<R>(home: &str, f: impl FnOnce() -> R) -> R {
+        let prev = std::env::var_os("HOME");
+        // SAFETY: tests in this crate are run single-threaded with respect
+        // to env-var mutation by the integration harness, and we restore
+        // immediately after the closure.
+        unsafe { std::env::set_var("HOME", home) };
+        let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(f));
+        match prev {
+            Some(v) => unsafe { std::env::set_var("HOME", v) },
+            None => unsafe { std::env::remove_var("HOME") },
+        }
+        match result {
+            Ok(v) => v,
+            Err(p) => std::panic::resume_unwind(p),
+        }
+    }
+
+    #[test]
+    fn display_path_contracts_home_prefix() {
+        with_home("/Users/alice", || {
+            assert_eq!(
+                display_path(&PathBuf::from("/Users/alice/projects/foo")),
+                format!(
+                    "~{}projects{}foo",
+                    std::path::MAIN_SEPARATOR,
+                    std::path::MAIN_SEPARATOR
+                ),
+            );
+        });
+    }
+
+    #[test]
+    fn display_path_returns_bare_tilde_for_home_itself() {
+        with_home("/Users/alice", || {
+            assert_eq!(display_path(&PathBuf::from("/Users/alice")), "~");
+        });
+    }
+
+    #[test]
+    fn display_path_leaves_unrelated_paths_alone() {
+        with_home("/Users/alice", || {
+            // Different user — must not get rewritten or share the tilde.
+            assert_eq!(
+                display_path(&PathBuf::from("/Users/bob/Code")),
+                "/Users/bob/Code".to_string()
+            );
+            // System path must stay absolute.
+            assert_eq!(display_path(&PathBuf::from("/etc/hosts")), "/etc/hosts");
+        });
+    }
+
+    #[test]
+    fn display_path_does_not_match_username_prefix() {
+        // Regression guard: a directory named like the user's home
+        // *prefix* but not under it must not get rewritten.
+        with_home("/Users/alice", || {
+            assert_eq!(
+                display_path(&PathBuf::from("/Users/alice2/work")),
+                "/Users/alice2/work"
+            );
+        });
+    }
+}

From 100efced804e86dee119b1bcb86bf9f10e2002ee Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 1 May 2026 02:53:29 -0500
Subject: [PATCH 04/17] chore: cargo fmt + drop needless borrow flagged by
 clippy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CI on PR #256 flagged two minor lint hits in the privacy lane:
- skills/mod.rs: rustfmt wanted the new regression test's
  `let rendered = …` line collapsed to one chain.
- main.rs:1614: `selected_skills_dir` is already `&PathBuf`, so
  passing `&selected_skills_dir` is a `&&PathBuf` and clippy's
  `needless_borrow` triggers under `-D warnings`.

No behavior change; same coverage and outputs. Re-runs locally:
  cargo fmt --all -- --check  → clean
  cargo clippy --workspace --all-targets --all-features --locked -- -D warnings → clean

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/tui/src/main.rs       | 2 +-
 crates/tui/src/skills/mod.rs | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs
index 9238975a..f87d08b9 100644
--- a/crates/tui/src/main.rs
+++ b/crates/tui/src/main.rs
@@ -1611,7 +1611,7 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt
     println!(
         "  {} selected skills dir: {}",
         "·".dimmed(),
-        crate::utils::display_path(&selected_skills_dir)
+        crate::utils::display_path(selected_skills_dir)
     );
     if !agents_skills_dir.exists() && !local_skills_dir.exists() && !global_skills_dir.exists() {
         println!("    Run `deepseek setup --skills` (or add --local for ./skills).");
diff --git a/crates/tui/src/skills/mod.rs b/crates/tui/src/skills/mod.rs
index d795e5d6..d07b57cb 100644
--- a/crates/tui/src/skills/mod.rs
+++ b/crates/tui/src/skills/mod.rs
@@ -364,10 +364,9 @@ mod tests {
             "---\nname: friendly-name\ndescription: drift case\n---\nbody",
         );
 
-        let rendered = crate::skills::render_available_skills_context(
-            &tmpdir.path().join("skills"),
-        )
-        .expect("skill context");
+        let rendered =
+            crate::skills::render_available_skills_context(&tmpdir.path().join("skills"))
+                .expect("skill context");
 
         let real_path = tmpdir
             .path()

From df53a221135e63b0d2a60893b21d4f1173c8507c Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 1 May 2026 02:59:57 -0500
Subject: [PATCH 05/17] test(utils): gate display_path tests to cfg(unix)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The tests set \$HOME to drive `dirs::home_dir()`. On Unix that's the
contract dirs uses; on Windows dirs reads %USERPROFILE% first, so
setting HOME has no effect and the tests fail.

The `display_path` function itself is platform-identical — it
delegates to `dirs::home_dir()` for the home prefix and uses
`std::path::MAIN_SEPARATOR` for the separator after the tilde. The
contraction logic is exercised on macOS/Linux which is sufficient
coverage for an abstraction whose platform detail is delegated.

If we want Windows-specific assertion coverage in the future, it should
either set USERPROFILE alongside HOME or accept an injected home dir.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/tui/src/utils.rs | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/crates/tui/src/utils.rs b/crates/tui/src/utils.rs
index 80a8e403..a1b8bded 100644
--- a/crates/tui/src/utils.rs
+++ b/crates/tui/src/utils.rs
@@ -233,7 +233,14 @@ pub fn estimate_message_chars(messages: &[Message]) -> usize {
     total
 }
 
-#[cfg(test)]
+// Tests below set `HOME` to drive `dirs::home_dir()`, which is honored on
+// Unix but not on Windows (which reads `USERPROFILE` first). The
+// `display_path` contraction logic itself is platform-identical — it
+// delegates to `dirs::home_dir()`. Gate to `cfg(unix)` so we cover the
+// behavior on the platform whose env-var contract matches the test
+// driver, instead of writing platform-specific test scaffolding for a
+// pure abstraction.
+#[cfg(all(test, unix))]
 mod tests {
     use super::display_path;
     use std::path::PathBuf;

From 84da3b7fc6ff1e3b4dcbe0108ae24656ceaf9d33 Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 1 May 2026 03:09:05 -0500
Subject: [PATCH 06/17] test(rlm): cover bridge batch and depth guard

---
 crates/tui/src/rlm/bridge.rs | 151 +++++++++++++++++++++++++++++------
 crates/tui/src/rlm/turn.rs   |  16 ++++
 2 files changed, 141 insertions(+), 26 deletions(-)

diff --git a/crates/tui/src/rlm/bridge.rs b/crates/tui/src/rlm/bridge.rs
index 0b8e7754..36ec6680 100644
--- a/crates/tui/src/rlm/bridge.rs
+++ b/crates/tui/src/rlm/bridge.rs
@@ -121,19 +121,8 @@ impl RlmBridge {
     }
 
     async fn dispatch_llm_batch(&self, prompts: Vec<String>, model: Option<String>) -> BatchResp {
-        if prompts.is_empty() {
-            return BatchResp { results: vec![] };
-        }
-        if prompts.len() > MAX_BATCH {
-            return BatchResp {
-                results: prompts
-                    .iter()
-                    .map(|_| SingleResp {
-                        text: String::new(),
-                        error: Some(format!("batch too large: {} > {MAX_BATCH}", prompts.len())),
-                    })
-                    .collect(),
-            };
+        if let Some(resp) = batch_guard(prompts.len()) {
+            return resp;
         }
 
         let model = Arc::new(
@@ -201,19 +190,8 @@ impl RlmBridge {
     }
 
     async fn dispatch_rlm_batch(&self, prompts: Vec<String>, model: Option<String>) -> BatchResp {
-        if prompts.is_empty() {
-            return BatchResp { results: vec![] };
-        }
-        if prompts.len() > MAX_BATCH {
-            return BatchResp {
-                results: prompts
-                    .iter()
-                    .map(|_| SingleResp {
-                        text: String::new(),
-                        error: Some(format!("batch too large: {} > {MAX_BATCH}", prompts.len())),
-                    })
-                    .collect(),
-            };
+        if let Some(resp) = batch_guard(prompts.len()) {
+            return resp;
         }
 
         let model = Arc::new(model);
@@ -227,6 +205,23 @@ impl RlmBridge {
     }
 }
 
+fn batch_guard(prompt_count: usize) -> Option<BatchResp> {
+    if prompt_count == 0 {
+        return Some(BatchResp { results: vec![] });
+    }
+    if prompt_count > MAX_BATCH {
+        return Some(BatchResp {
+            results: (0..prompt_count)
+                .map(|_| SingleResp {
+                    text: String::new(),
+                    error: Some(format!("batch too large: {prompt_count} > {MAX_BATCH}")),
+                })
+                .collect(),
+        });
+    }
+    None
+}
+
 impl RpcDispatcher for RlmBridge {
     fn dispatch<'a>(
         &'a self,
@@ -255,3 +250,107 @@ impl RpcDispatcher for RlmBridge {
         })
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::config::{Config, ProviderConfig, ProvidersConfig};
+    use serde_json::json;
+    use wiremock::matchers::{method, path};
+    use wiremock::{Mock, MockServer, ResponseTemplate};
+
+    fn client_for(server: &MockServer) -> DeepSeekClient {
+        let config = Config {
+            provider: Some("sglang".to_string()),
+            providers: Some(ProvidersConfig {
+                sglang: ProviderConfig {
+                    base_url: Some(server.uri()),
+                    ..ProviderConfig::default()
+                },
+                ..ProvidersConfig::default()
+            }),
+            ..Config::default()
+        };
+        DeepSeekClient::new(&config).expect("test client")
+    }
+
+    fn chat_response(text: &str) -> serde_json::Value {
+        json!({
+            "id": "chatcmpl-test",
+            "model": "test-model",
+            "choices": [{
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": text,
+                },
+                "finish_reason": "stop"
+            }],
+            "usage": {
+                "prompt_tokens": 3,
+                "completion_tokens": 5
+            }
+        })
+    }
+
+    #[test]
+    fn batch_guard_allows_non_empty_batches_at_the_cap() {
+        assert!(batch_guard(MAX_BATCH).is_none());
+    }
+
+    #[test]
+    fn batch_guard_returns_empty_response_for_empty_batches() {
+        let response = batch_guard(0).expect("empty batch should be handled");
+        assert!(response.results.is_empty());
+    }
+
+    #[test]
+    fn batch_guard_returns_one_error_per_oversized_prompt() {
+        let response = batch_guard(MAX_BATCH + 2).expect("oversized batch should be handled");
+        assert_eq!(response.results.len(), MAX_BATCH + 2);
+        assert!(response.results.iter().all(|result| {
+            result.text.is_empty()
+                && result
+                    .error
+                    .as_deref()
+                    .is_some_and(|err| err.contains("batch too large"))
+        }));
+    }
+
+    #[tokio::test]
+    async fn rlm_dispatch_at_depth_zero_falls_back_to_plain_llm_query() {
+        let server = MockServer::start().await;
+        Mock::given(method("POST"))
+            .and(path("/v1/responses"))
+            .respond_with(ResponseTemplate::new(404).set_body_string("responses unavailable"))
+            .mount(&server)
+            .await;
+        Mock::given(method("POST"))
+            .and(path("/v1/chat/completions"))
+            .respond_with(
+                ResponseTemplate::new(200).set_body_json(chat_response("fallback answer")),
+            )
+            .mount(&server)
+            .await;
+
+        let bridge = RlmBridge::new(client_for(&server), "child-model".to_string(), 0);
+        let response = bridge
+            .dispatch(RpcRequest::Rlm {
+                prompt: "nested prompt".to_string(),
+                model: Some("override-model".to_string()),
+            })
+            .await;
+
+        match response {
+            RpcResponse::Single(single) => {
+                assert_eq!(single.text, "fallback answer");
+                assert!(single.error.is_none());
+            }
+            other => panic!("expected single response, got {other:?}"),
+        }
+
+        let usage = bridge.usage.lock().await;
+        assert_eq!(usage.input_tokens, 3);
+        assert_eq!(usage.output_tokens, 5);
+    }
+}
diff --git a/crates/tui/src/rlm/turn.rs b/crates/tui/src/rlm/turn.rs
index 08d59818..bc73dbf5 100644
--- a/crates/tui/src/rlm/turn.rs
+++ b/crates/tui/src/rlm/turn.rs
@@ -835,6 +835,22 @@ mod tests {
         assert!(text.contains("FINAL"));
     }
 
+    #[test]
+    fn build_metadata_truncates_long_context_without_leaking_tail() {
+        let secret_tail = "DO_NOT_LEAK_CONTEXT_TAIL";
+        let prompt = format!("{}{}", "a".repeat(PROMPT_PREVIEW_LEN + 100), secret_tail);
+        let msg = build_metadata_message(&prompt, None, 0, None, None);
+        let text = extract_text_blocks(&msg.content);
+
+        assert!(text.contains(&format!("- Length: {} chars", prompt.chars().count())));
+        assert!(text.contains("- Preview: \""));
+        assert!(text.contains("..."));
+        assert!(
+            !text.contains(secret_tail),
+            "metadata leaked the non-preview tail of context"
+        );
+    }
+
     #[test]
     fn build_metadata_with_iteration_shows_previous_code() {
         let msg = build_metadata_message("Test prompt", None, 3, Some("print('hi')"), Some("hi"));

From d2c007833f316c5e9ef95cbfefa6e479e6f5cb22 Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 1 May 2026 04:10:59 -0500
Subject: [PATCH 07/17] test(rlm): make bridge client seam mockable

---
 crates/tui/src/llm_client/mod.rs |   5 +-
 crates/tui/src/rlm/bridge.rs     | 205 ++++++++++++++++++++++---------
 crates/tui/src/rlm/turn.rs       |  76 ++++++++----
 3 files changed, 202 insertions(+), 84 deletions(-)

diff --git a/crates/tui/src/llm_client/mod.rs b/crates/tui/src/llm_client/mod.rs
index 48b28b18..009f701b 100644
--- a/crates/tui/src/llm_client/mod.rs
+++ b/crates/tui/src/llm_client/mod.rs
@@ -58,7 +58,10 @@ pub trait LlmClient: Send + Sync {
     fn model(&self) -> &str;
 
     /// Creates a non-streaming message completion
-    async fn create_message(&self, request: MessageRequest) -> Result<MessageResponse>;
+    fn create_message(
+        &self,
+        request: MessageRequest,
+    ) -> impl Future<Output = Result<MessageResponse>> + Send;
 
     /// Creates a streaming message completion
     ///
diff --git a/crates/tui/src/rlm/bridge.rs b/crates/tui/src/rlm/bridge.rs
index 36ec6680..6338762b 100644
--- a/crates/tui/src/rlm/bridge.rs
+++ b/crates/tui/src/rlm/bridge.rs
@@ -13,13 +13,14 @@
 
 use std::sync::Arc;
 use std::time::Duration;
+use std::{future::Future, pin::Pin};
 
+use anyhow::Result;
 use futures_util::future::join_all;
 use tokio::sync::Mutex;
 
-use crate::client::DeepSeekClient;
-use crate::llm_client::LlmClient as _;
-use crate::models::{ContentBlock, Message, MessageRequest, SystemPrompt, Usage};
+use crate::llm_client::LlmClient;
+use crate::models::{ContentBlock, Message, MessageRequest, MessageResponse, SystemPrompt, Usage};
 use crate::repl::runtime::{BatchResp, RpcDispatcher, RpcRequest, RpcResponse, SingleResp};
 
 /// Per-child completion timeout — same as the previous sidecar default.
@@ -29,18 +30,46 @@ const DEFAULT_CHILD_MAX_TOKENS: u32 = 4096;
 /// Hard cap on prompts per batch RPC.
 pub const MAX_BATCH: usize = 16;
 
+/// Object-safe slice of the LLM client interface that the RLM bridge needs.
+///
+/// `LlmClient` itself uses native async trait methods, which are not dyn-safe.
+/// The bridge only needs non-streaming completions, so this boxed-future shim
+/// gives tests a clean mock seam without changing the wider provider trait.
+pub(crate) trait RlmLlmClient: Send + Sync {
+    fn create_message_boxed(
+        &self,
+        request: MessageRequest,
+    ) -> Pin<Box<dyn Future<Output = Result<MessageResponse>> + Send + '_>>;
+}
+
+impl<T> RlmLlmClient for T
+where
+    T: LlmClient + Send + Sync,
+{
+    fn create_message_boxed(
+        &self,
+        request: MessageRequest,
+    ) -> Pin<Box<dyn Future<Output = Result<MessageResponse>> + Send + '_>> {
+        Box::pin(self.create_message(request))
+    }
+}
+
 /// State shared with the bridge across all RPC calls in one turn.
 pub struct RlmBridge {
-    pub client: DeepSeekClient,
-    pub child_model: String,
+    client: Arc<dyn RlmLlmClient>,
+    child_model: String,
     /// Recursion budget remaining for `Rlm` / `RlmBatch` requests. When
     /// zero, those requests fall back to plain `Llm` completions.
-    pub depth_remaining: u32,
-    pub usage: Arc<Mutex<Usage>>,
+    depth_remaining: u32,
+    usage: Arc<Mutex<Usage>>,
 }
 
 impl RlmBridge {
-    pub fn new(client: DeepSeekClient, child_model: String, depth_remaining: u32) -> Self {
+    pub(crate) fn new(
+        client: Arc<dyn RlmLlmClient>,
+        child_model: String,
+        depth_remaining: u32,
+    ) -> Self {
         Self {
             client,
             child_model,
@@ -83,7 +112,7 @@ impl RlmBridge {
             top_p: Some(0.9_f32),
         };
 
-        let fut = self.client.create_message(request);
+        let fut = self.client.create_message_boxed(request);
         let response =
             match tokio::time::timeout(Duration::from_secs(CHILD_TIMEOUT_SECS), fut).await {
                 Ok(Ok(r)) => r,
@@ -165,7 +194,7 @@ impl RlmBridge {
         // Recursive call. The dyn-erasure on `run_rlm_turn_inner` breaks
         // the `bridge → turn → bridge` opaque-future cycle.
         let result = super::turn::run_rlm_turn_inner(
-            &self.client,
+            Arc::clone(&self.client),
             child_model.clone(),
             prompt,
             None,
@@ -254,43 +283,32 @@ impl RpcDispatcher for RlmBridge {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::config::{Config, ProviderConfig, ProvidersConfig};
-    use serde_json::json;
-    use wiremock::matchers::{method, path};
-    use wiremock::{Mock, MockServer, ResponseTemplate};
+    use crate::llm_client::mock::MockLlmClient;
 
-    fn client_for(server: &MockServer) -> DeepSeekClient {
-        let config = Config {
-            provider: Some("sglang".to_string()),
-            providers: Some(ProvidersConfig {
-                sglang: ProviderConfig {
-                    base_url: Some(server.uri()),
-                    ..ProviderConfig::default()
-                },
-                ..ProvidersConfig::default()
-            }),
-            ..Config::default()
-        };
-        DeepSeekClient::new(&config).expect("test client")
+    fn mock_response(text: &str, input_tokens: u32, output_tokens: u32) -> MessageResponse {
+        MessageResponse {
+            id: "mock_msg".to_string(),
+            r#type: "message".to_string(),
+            role: "assistant".to_string(),
+            content: vec![ContentBlock::Text {
+                text: text.to_string(),
+                cache_control: None,
+            }],
+            model: "mock-model".to_string(),
+            stop_reason: Some("end_turn".to_string()),
+            stop_sequence: None,
+            container: None,
+            usage: Usage {
+                input_tokens,
+                output_tokens,
+                ..Usage::default()
+            },
+        }
     }
 
-    fn chat_response(text: &str) -> serde_json::Value {
-        json!({
-            "id": "chatcmpl-test",
-            "model": "test-model",
-            "choices": [{
-                "index": 0,
-                "message": {
-                    "role": "assistant",
-                    "content": text,
-                },
-                "finish_reason": "stop"
-            }],
-            "usage": {
-                "prompt_tokens": 3,
-                "completion_tokens": 5
-            }
-        })
+    fn bridge_for(mock: Arc<MockLlmClient>, depth_remaining: u32) -> RlmBridge {
+        let client: Arc<dyn RlmLlmClient> = mock;
+        RlmBridge::new(client, "child-model".to_string(), depth_remaining)
     }
 
     #[test]
@@ -318,22 +336,89 @@ mod tests {
     }
 
     #[tokio::test]
-    async fn rlm_dispatch_at_depth_zero_falls_back_to_plain_llm_query() {
-        let server = MockServer::start().await;
-        Mock::given(method("POST"))
-            .and(path("/v1/responses"))
-            .respond_with(ResponseTemplate::new(404).set_body_string("responses unavailable"))
-            .mount(&server)
-            .await;
-        Mock::given(method("POST"))
-            .and(path("/v1/chat/completions"))
-            .respond_with(
-                ResponseTemplate::new(200).set_body_json(chat_response("fallback answer")),
-            )
-            .mount(&server)
+    async fn llm_dispatch_uses_trait_backed_mock_client() {
+        let mock = Arc::new(MockLlmClient::new(Vec::new()));
+        mock.push_message_response(mock_response("child answer", 7, 11));
+        let bridge = bridge_for(Arc::clone(&mock), 1);
+
+        let response = bridge
+            .dispatch(RpcRequest::Llm {
+                prompt: "child prompt".to_string(),
+                model: Some("override-model".to_string()),
+                max_tokens: Some(123),
+                system: Some("child system".to_string()),
+            })
             .await;
 
-        let bridge = RlmBridge::new(client_for(&server), "child-model".to_string(), 0);
+        match response {
+            RpcResponse::Single(single) => {
+                assert_eq!(single.text, "child answer");
+                assert!(single.error.is_none());
+            }
+            other => panic!("expected single response, got {other:?}"),
+        }
+
+        let captured = mock.captured_requests();
+        assert_eq!(captured.len(), 1);
+        assert_eq!(captured[0].model, "override-model");
+        assert_eq!(captured[0].max_tokens, 123);
+        assert_eq!(
+            captured[0].system,
+            Some(SystemPrompt::Text("child system".to_string()))
+        );
+
+        let usage = bridge.usage.lock().await;
+        assert_eq!(usage.input_tokens, 7);
+        assert_eq!(usage.output_tokens, 11);
+    }
+
+    #[tokio::test]
+    async fn llm_batch_dispatch_preserves_result_count_and_usage() {
+        let mock = Arc::new(MockLlmClient::new(Vec::new()));
+        mock.push_message_response(mock_response("one", 1, 2));
+        mock.push_message_response(mock_response("two", 3, 4));
+        mock.push_message_response(mock_response("three", 5, 6));
+        let bridge = bridge_for(Arc::clone(&mock), 1);
+
+        let response = bridge
+            .dispatch(RpcRequest::LlmBatch {
+                prompts: vec!["a".to_string(), "b".to_string(), "c".to_string()],
+                model: Some("batch-model".to_string()),
+            })
+            .await;
+
+        match response {
+            RpcResponse::Batch(batch) => {
+                let texts: Vec<_> = batch
+                    .results
+                    .iter()
+                    .map(|result| result.text.as_str())
+                    .collect();
+                assert_eq!(texts, ["one", "two", "three"]);
+                assert!(batch.results.iter().all(|result| result.error.is_none()));
+            }
+            other => panic!("expected batch response, got {other:?}"),
+        }
+
+        let captured = mock.captured_requests();
+        assert_eq!(captured.len(), 3);
+        assert!(
+            captured
+                .iter()
+                .all(|request| request.model == "batch-model")
+        );
+
+        let usage = bridge.usage.lock().await;
+        assert_eq!(usage.input_tokens, 9);
+        assert_eq!(usage.output_tokens, 12);
+    }
+
+    #[tokio::test]
+    async fn rlm_dispatch_at_depth_zero_falls_back_to_plain_llm_query() {
+        let mock = Arc::new(MockLlmClient::new(Vec::new()));
+        mock.push_message_response(mock_response("fallback answer", 3, 5));
+        let bridge = bridge_for(Arc::clone(&mock), 0);
+
         let response = bridge
             .dispatch(RpcRequest::Rlm {
                 prompt: "nested prompt".to_string(),
@@ -352,5 +437,9 @@ mod tests {
         let usage = bridge.usage.lock().await;
         assert_eq!(usage.input_tokens, 3);
         assert_eq!(usage.output_tokens, 5);
+
+        let captured = mock.captured_requests();
+        assert_eq!(captured.len(), 1);
+        assert_eq!(captured[0].model, "override-model");
     }
 }
diff --git a/crates/tui/src/rlm/turn.rs b/crates/tui/src/rlm/turn.rs
index bc73dbf5..c97f5ad1 100644
--- a/crates/tui/src/rlm/turn.rs
+++ b/crates/tui/src/rlm/turn.rs
@@ -2,6 +2,7 @@
 //! subprocess + stdin/stdout RPC bridge (no HTTP sidecar).
 
 use std::path::PathBuf;
+use std::sync::Arc;
 use std::time::{Duration, Instant};
 
 use tokio::sync::mpsc;
@@ -9,11 +10,10 @@ use uuid::Uuid;
 
 use crate::client::DeepSeekClient;
 use crate::core::events::Event;
-use crate::llm_client::LlmClient;
-use crate::models::{ContentBlock, Message, MessageRequest, Usage};
+use crate::models::{ContentBlock, Message, MessageRequest, SystemPrompt, Usage};
 use crate::repl::PythonRuntime;
 
-use super::bridge::RlmBridge;
+use super::bridge::{RlmBridge, RlmLlmClient};
 use super::prompt::rlm_system_prompt;
 
 // ---------------------------------------------------------------------------
@@ -99,7 +99,7 @@ pub async fn run_rlm_turn(
     max_depth: u32,
 ) -> RlmTurnResult {
     run_rlm_turn_inner(
-        client,
+        Arc::new(client.clone()),
         model,
         prompt,
         None,
@@ -122,7 +122,7 @@ pub async fn run_rlm_turn_with_root(
     max_depth: u32,
 ) -> RlmTurnResult {
     run_rlm_turn_inner(
-        client,
+        Arc::new(client.clone()),
         model,
         prompt,
         root_prompt,
@@ -136,15 +136,15 @@ pub async fn run_rlm_turn_with_root(
 /// Inner entry point — also used by the bridge when it recurses. Returns
 /// a boxed future to break the recursive opaque-future-type cycle:
 /// `run_rlm_turn_inner` → `RlmBridge::dispatch` → `run_rlm_turn_inner`.
-pub(crate) fn run_rlm_turn_inner<'a>(
-    client: &'a DeepSeekClient,
+pub(crate) fn run_rlm_turn_inner(
+    client: Arc<dyn RlmLlmClient>,
     model: String,
     prompt: String,
     root_prompt: Option<String>,
     child_model: String,
     tx_event: mpsc::Sender<Event>,
     max_depth: u32,
-) -> std::pin::Pin<Box<dyn std::future::Future<Output = RlmTurnResult> + Send + 'a>> {
+) -> std::pin::Pin<Box<dyn std::future::Future<Output = RlmTurnResult> + Send>> {
     Box::pin(run_rlm_turn_impl(
         client,
         model,
@@ -161,7 +161,7 @@ pub(crate) fn run_rlm_turn_inner<'a>(
 // ---------------------------------------------------------------------------
 
 async fn run_rlm_turn_impl(
-    client: &DeepSeekClient,
+    client: Arc<dyn RlmLlmClient>,
     model: String,
     prompt: String,
     root_prompt: Option<String>,
@@ -212,7 +212,7 @@ async fn run_rlm_turn_impl(
     };
 
     // 3. Build the bridge that services llm_query / rlm_query RPCs.
-    let bridge = RlmBridge::new(client.clone(), child_model.clone(), max_depth);
+    let bridge = RlmBridge::new(Arc::clone(&client), child_model.clone(), max_depth);
     let usage_handle = bridge.usage_handle();
 
     let _ = tx_event
@@ -262,22 +262,9 @@ async fn run_rlm_turn_impl(
                 .await;
 
             // 4a. Root LLM generates code from metadata-only context.
-            let request = MessageRequest {
-                model: model.clone(),
-                messages: messages.clone(),
-                max_tokens: ROOT_MAX_TOKENS,
-                system: Some(system.clone()),
-                tools: None,
-                tool_choice: None,
-                metadata: None,
-                thinking: None,
-                reasoning_effort: None,
-                stream: Some(false),
-                temperature: Some(ROOT_TEMPERATURE),
-                top_p: Some(0.9_f32),
-            };
+            let request = build_root_request(&model, &messages, &system);
 
-            let response = match client.create_message(request).await {
+            let response = match client.create_message_boxed(request).await {
                 Ok(r) => r,
                 Err(e) => {
                     break 'turn RlmTurnResult {
@@ -551,6 +538,23 @@ fn write_context_file(prompt: &str) -> std::io::Result<PathBuf> {
     Ok(path)
 }
 
+fn build_root_request(model: &str, messages: &[Message], system: &SystemPrompt) -> MessageRequest {
+    MessageRequest {
+        model: model.to_string(),
+        messages: messages.to_vec(),
+        max_tokens: ROOT_MAX_TOKENS,
+        system: Some(system.clone()),
+        tools: None,
+        tool_choice: None,
+        metadata: None,
+        thinking: None,
+        reasoning_effort: None,
+        stream: Some(false),
+        temperature: Some(ROOT_TEMPERATURE),
+        top_p: Some(0.9_f32),
+    }
+}
+
 /// Build `Metadata(state)` from the paper. Surfaces:
 /// - the small `root_prompt` (if any) — repeated each iteration
 /// - `context` length + preview
@@ -851,6 +855,28 @@ mod tests {
         );
     }
 
+    #[test]
+    fn build_root_request_keeps_context_tail_out_of_root_payload() {
+        let secret_tail = "DO_NOT_LEAK_ROOT_REQUEST";
+        let prompt = format!("{}{}", "a".repeat(PROMPT_PREVIEW_LEN + 100), secret_tail);
+        let messages = vec![build_metadata_message(
+            &prompt,
+            Some("answer from the long context"),
+            0,
+            None,
+            None,
+        )];
+
+        let request = build_root_request("root-model", &messages, &rlm_system_prompt());
+        let payload = serde_json::to_string(&request).expect("request should serialize");
+
+        assert!(payload.contains(&format!("- Length: {} chars", prompt.chars().count())));
+        assert!(
+            !payload.contains(secret_tail),
+            "root LLM request leaked the non-preview tail of context"
+        );
+    }
+
     #[test]
     fn build_metadata_with_iteration_shows_previous_code() {
         let msg = build_metadata_message("Test prompt", None, 3, Some("print('hi')"), Some("hi"));

From 0887a88465cd7610aed747419db48dacd47a08c0 Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 1 May 2026 05:09:39 -0500
Subject: [PATCH 08/17] refactor(engine): extract tool catalog helpers

---
 crates/tui/src/core/engine.rs              | 426 +--------------------
 crates/tui/src/core/engine/tool_catalog.rs | 418 ++++++++++++++++++++
 2 files changed, 428 insertions(+), 416 deletions(-)
 create mode 100644 crates/tui/src/core/engine/tool_catalog.rs

diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs
index 99a12e30..04bb656a 100644
--- a/crates/tui/src/core/engine.rs
+++ b/crates/tui/src/core/engine.rs
@@ -42,7 +42,7 @@ use crate::seam_manager::{SeamConfig, SeamManager};
 use crate::tools::plan::{SharedPlanState, new_shared_plan_state};
 use crate::tools::shell::{SharedShellManager, new_shared_shell_manager};
 use crate::tools::spec::RuntimeToolServices;
-use crate::tools::spec::{ApprovalRequirement, ToolError, ToolResult, required_str};
+use crate::tools::spec::{ApprovalRequirement, ToolError, ToolResult};
 use crate::tools::subagent::{
     Mailbox, SharedSubAgentManager, SubAgentRuntime, SubAgentType, new_shared_subagent_manager,
 };
@@ -398,14 +398,6 @@ pub(crate) const TOOL_CALL_START_MARKERS: [&str; 5] = [
     "<function_calls>",
 ];
 
-const MULTI_TOOL_PARALLEL_NAME: &str = "multi_tool_use.parallel";
-const REQUEST_USER_INPUT_NAME: &str = "request_user_input";
-const CODE_EXECUTION_TOOL_NAME: &str = "code_execution";
-const CODE_EXECUTION_TOOL_TYPE: &str = "code_execution_20250825";
-const TOOL_SEARCH_REGEX_NAME: &str = "tool_search_tool_regex";
-const TOOL_SEARCH_REGEX_TYPE: &str = "tool_search_tool_regex_20251119";
-const TOOL_SEARCH_BM25_NAME: &str = "tool_search_tool_bm25";
-const TOOL_SEARCH_BM25_TYPE: &str = "tool_search_tool_bm25_20251119";
 pub(crate) const TOOL_CALL_END_MARKERS: [&str; 5] = [
     "[/TOOL_CALL]",
     "</deepseek:tool_call>",
@@ -463,413 +455,6 @@ pub(crate) fn filter_tool_call_delta(delta: &str, in_tool_call: &mut bool) -> St
     output
 }
 
-/// Compute the tool input that should be reported when a tool's stream block
-/// closes (`ContentBlockStop`). Prefers the parsed `input_buffer` over the
-/// initial `input` placeholder so a `ToolCallStarted` event never carries a
-/// stale `{}` when args were actually streamed in via `InputJsonDelta`.
-///
-/// Order of preference:
-///   1. `input_buffer` parses cleanly → use that.
-///   2. `input_buffer` is empty → fall back to `input` (model embedded args
-///      directly in the `ContentBlockStart` frame and sent no deltas).
-///   3. `input_buffer` non-empty but unparseable → fall back to `input`
-///      (the per-delta parser has already mirrored the most recent valid
-///      partial parse into `tool_state.input`).
-fn is_tool_search_tool(name: &str) -> bool {
-    matches!(name, TOOL_SEARCH_REGEX_NAME | TOOL_SEARCH_BM25_NAME)
-}
-
-fn should_default_defer_tool(name: &str, mode: AppMode) -> bool {
-    if mode == AppMode::Yolo {
-        return false;
-    }
-
-    // Shell tools are kept active in Agent so the model can run verification
-    // commands (build/test/git/cargo) without first having to discover the
-    // tool through ToolSearch. Plan mode never registers shell tools.
-    let always_loaded_in_action_modes = matches!(mode, AppMode::Agent)
-        && matches!(
-            name,
-            "exec_shell"
-                | "exec_shell_wait"
-                | "exec_shell_interact"
-                | "exec_wait"
-                | "exec_interact"
-        );
-    if always_loaded_in_action_modes {
-        return false;
-    }
-
-    !matches!(
-        name,
-        "read_file"
-            | "list_dir"
-            | "grep_files"
-            | "file_search"
-            | "diagnostics"
-            | "rlm"
-            | "recall_archive"
-            | MULTI_TOOL_PARALLEL_NAME
-            | "update_plan"
-            | "checklist_write"
-            | "todo_write"
-            | "task_create"
-            | "task_list"
-            | "task_read"
-            | "task_gate_run"
-            | "task_shell_start"
-            | "task_shell_wait"
-            | "github_issue_context"
-            | "github_pr_context"
-            | REQUEST_USER_INPUT_NAME
-    )
-}
-
-fn ensure_advanced_tooling(catalog: &mut Vec<Tool>) {
-    if !catalog.iter().any(|t| t.name == CODE_EXECUTION_TOOL_NAME) {
-        catalog.push(Tool {
-            tool_type: Some(CODE_EXECUTION_TOOL_TYPE.to_string()),
-            name: CODE_EXECUTION_TOOL_NAME.to_string(),
-            description: "Execute Python code in a local sandboxed runtime and return stdout/stderr/return_code as JSON.".to_string(),
-            input_schema: json!({
-                "type": "object",
-                "properties": {
-                    "code": { "type": "string", "description": "Python source code to execute." }
-                },
-                "required": ["code"]
-            }),
-            allowed_callers: Some(vec!["direct".to_string()]),
-            defer_loading: Some(false),
-            input_examples: None,
-            strict: None,
-            cache_control: None,
-        });
-    }
-
-    if !catalog.iter().any(|t| t.name == TOOL_SEARCH_REGEX_NAME) {
-        catalog.push(Tool {
-            tool_type: Some(TOOL_SEARCH_REGEX_TYPE.to_string()),
-            name: TOOL_SEARCH_REGEX_NAME.to_string(),
-            description: "Search deferred tool definitions using a regex query and return matching tool references.".to_string(),
-            input_schema: json!({
-                "type": "object",
-                "properties": {
-                    "query": { "type": "string", "description": "Regex pattern to search tool names/descriptions/schema." }
-                },
-                "required": ["query"]
-            }),
-            allowed_callers: Some(vec!["direct".to_string()]),
-            defer_loading: Some(false),
-            input_examples: None,
-            strict: None,
-            cache_control: None,
-        });
-    }
-
-    if !catalog.iter().any(|t| t.name == TOOL_SEARCH_BM25_NAME) {
-        catalog.push(Tool {
-            tool_type: Some(TOOL_SEARCH_BM25_TYPE.to_string()),
-            name: TOOL_SEARCH_BM25_NAME.to_string(),
-            description: "Search deferred tool definitions using natural-language matching and return matching tool references.".to_string(),
-            input_schema: json!({
-                "type": "object",
-                "properties": {
-                    "query": { "type": "string", "description": "Natural language query for tool discovery." }
-                },
-                "required": ["query"]
-            }),
-            allowed_callers: Some(vec!["direct".to_string()]),
-            defer_loading: Some(false),
-            input_examples: None,
-            strict: None,
-            cache_control: None,
-        });
-    }
-}
-
-fn initial_active_tools(catalog: &[Tool]) -> std::collections::HashSet<String> {
-    let mut active = std::collections::HashSet::new();
-    for tool in catalog {
-        if !tool.defer_loading.unwrap_or(false) || is_tool_search_tool(&tool.name) {
-            active.insert(tool.name.clone());
-        }
-    }
-    if active.is_empty()
-        && !catalog.is_empty()
-        && let Some(first) = catalog.first()
-    {
-        active.insert(first.name.clone());
-    }
-    active
-}
-
-fn active_tool_list_from_catalog(
-    catalog: &[Tool],
-    active: &std::collections::HashSet<String>,
-) -> Vec<Tool> {
-    catalog
-        .iter()
-        .filter(|tool| active.contains(&tool.name))
-        .cloned()
-        .collect()
-}
-
-fn active_tools_for_step(
-    catalog: &[Tool],
-    active: &std::collections::HashSet<String>,
-    force_update_plan: bool,
-) -> Vec<Tool> {
-    // DeepSeek reasoning models reject explicit named tool_choice forcing here, so for
-    // obvious quick-plan asks we narrow the first-step tool surface to update_plan instead.
-    if force_update_plan {
-        let forced: Vec<_> = catalog
-            .iter()
-            .filter(|tool| tool.name == "update_plan")
-            .cloned()
-            .collect();
-        if !forced.is_empty() {
-            return forced;
-        }
-    }
-
-    active_tool_list_from_catalog(catalog, active)
-}
-
-fn tool_search_haystack(tool: &Tool) -> String {
-    format!(
-        "{}\n{}\n{}",
-        tool.name.to_lowercase(),
-        tool.description.to_lowercase(),
-        tool.input_schema.to_string().to_lowercase()
-    )
-}
-
-fn discover_tools_with_regex(catalog: &[Tool], query: &str) -> Result<Vec<String>, ToolError> {
-    let regex = regex::Regex::new(query)
-        .map_err(|err| ToolError::invalid_input(format!("Invalid regex query: {err}")))?;
-
-    let mut matches = Vec::new();
-    for tool in catalog {
-        if is_tool_search_tool(&tool.name) {
-            continue;
-        }
-        let hay = tool_search_haystack(tool);
-        if regex.is_match(&hay) {
-            matches.push(tool.name.clone());
-        }
-        if matches.len() >= 5 {
-            break;
-        }
-    }
-    Ok(matches)
-}
-
-fn discover_tools_with_bm25_like(catalog: &[Tool], query: &str) -> Vec<String> {
-    let terms: Vec<String> = query
-        .split_whitespace()
-        .map(|term| term.trim().to_lowercase())
-        .filter(|term| !term.is_empty())
-        .collect();
-    if terms.is_empty() {
-        return Vec::new();
-    }
-
-    let mut scored: Vec<(i64, String)> = Vec::new();
-    for tool in catalog {
-        if is_tool_search_tool(&tool.name) {
-            continue;
-        }
-        let hay = tool_search_haystack(tool);
-        let mut score = 0i64;
-        for term in &terms {
-            if hay.contains(term) {
-                score += 1;
-            }
-            if tool.name.to_lowercase().contains(term) {
-                score += 2;
-            }
-        }
-        if score > 0 {
-            scored.push((score, tool.name.clone()));
-        }
-    }
-    scored.sort_by(|a, b| b.0.cmp(&a.0).then_with(|| a.1.cmp(&b.1)));
-    scored.into_iter().take(5).map(|(_, name)| name).collect()
-}
-
-fn edit_distance(a: &str, b: &str) -> usize {
-    if a == b {
-        return 0;
-    }
-    if a.is_empty() {
-        return b.chars().count();
-    }
-    if b.is_empty() {
-        return a.chars().count();
-    }
-
-    let b_chars: Vec<char> = b.chars().collect();
-    let mut prev: Vec<usize> = (0..=b_chars.len()).collect();
-    let mut curr = vec![0usize; b_chars.len() + 1];
-
-    for (i, a_ch) in a.chars().enumerate() {
-        curr[0] = i + 1;
-        for (j, b_ch) in b_chars.iter().enumerate() {
-            let cost = if a_ch == *b_ch { 0 } else { 1 };
-            let delete = prev[j + 1] + 1;
-            let insert = curr[j] + 1;
-            let substitute = prev[j] + cost;
-            curr[j + 1] = delete.min(insert).min(substitute);
-        }
-        std::mem::swap(&mut prev, &mut curr);
-    }
-
-    prev[b_chars.len()]
-}
-
-fn suggest_tool_names(catalog: &[Tool], requested: &str, limit: usize) -> Vec<String> {
-    let requested = requested.trim().to_ascii_lowercase();
-    if requested.is_empty() || limit == 0 {
-        return Vec::new();
-    }
-
-    let mut candidates: Vec<(u8, usize, String)> = Vec::new();
-    for tool in catalog {
-        let candidate = tool.name.to_ascii_lowercase();
-        let prefix_match = candidate.starts_with(&requested) || requested.starts_with(&candidate);
-        let contains_match = candidate.contains(&requested) || requested.contains(&candidate);
-        let distance = edit_distance(&candidate, &requested);
-        let close_typo = distance <= 3;
-
-        if !(prefix_match || contains_match || close_typo) {
-            continue;
-        }
-
-        let rank = if prefix_match {
-            0
-        } else if contains_match {
-            1
-        } else {
-            2
-        };
-        candidates.push((rank, distance, tool.name.clone()));
-    }
-
-    candidates.sort_by(|a, b| {
-        a.0.cmp(&b.0)
-            .then_with(|| a.1.cmp(&b.1))
-            .then_with(|| a.2.cmp(&b.2))
-    });
-    candidates.dedup_by(|a, b| a.2 == b.2);
-    candidates
-        .into_iter()
-        .take(limit)
-        .map(|(_, _, name)| name)
-        .collect()
-}
-
-fn missing_tool_error_message(tool_name: &str, catalog: &[Tool]) -> String {
-    let suggestions = suggest_tool_names(catalog, tool_name, 3);
-    if suggestions.is_empty() {
-        return format!(
-            "Tool '{tool_name}' is not available in the current tool catalog. \
-             Verify mode/feature flags, or use {TOOL_SEARCH_BM25_NAME} with a short query."
-        );
-    }
-
-    format!(
-        "Tool '{tool_name}' is not available in the current tool catalog. \
-         Did you mean: {}? You can also use {TOOL_SEARCH_BM25_NAME} to discover tools.",
-        suggestions.join(", ")
-    )
-}
-
-fn maybe_activate_requested_deferred_tool(
-    tool_name: &str,
-    catalog: &[Tool],
-    active_tools: &mut std::collections::HashSet<String>,
-) -> bool {
-    let Some(def) = catalog.iter().find(|def| def.name == tool_name) else {
-        return false;
-    };
-
-    if !def.defer_loading.unwrap_or(false) || active_tools.contains(tool_name) {
-        return false;
-    }
-
-    active_tools.insert(tool_name.to_string())
-}
-
-fn execute_tool_search(
-    tool_name: &str,
-    input: &serde_json::Value,
-    catalog: &[Tool],
-    active_tools: &mut std::collections::HashSet<String>,
-) -> Result<ToolResult, ToolError> {
-    let query = required_str(input, "query")?;
-    let discovered = if tool_name == TOOL_SEARCH_REGEX_NAME {
-        discover_tools_with_regex(catalog, query)?
-    } else {
-        discover_tools_with_bm25_like(catalog, query)
-    };
-
-    for name in &discovered {
-        active_tools.insert(name.clone());
-    }
-
-    let references = discovered
-        .iter()
-        .map(|name| json!({"type": "tool_reference", "tool_name": name}))
-        .collect::<Vec<_>>();
-
-    let payload = json!({
-        "type": "tool_search_tool_search_result",
-        "tool_references": references,
-    });
-
-    Ok(ToolResult {
-        content: serde_json::to_string(&payload).unwrap_or_else(|_| payload.to_string()),
-        success: true,
-        metadata: Some(json!({
-            "tool_references": discovered,
-        })),
-    })
-}
-
-async fn execute_code_execution_tool(
-    input: &serde_json::Value,
-    workspace: &std::path::Path,
-) -> Result<ToolResult, ToolError> {
-    let code = required_str(input, "code")?;
-    let mut cmd = tokio::process::Command::new("python3");
-    cmd.arg("-c");
-    cmd.arg(code);
-    cmd.current_dir(workspace);
-
-    let output = tokio::time::timeout(Duration::from_secs(120), cmd.output())
-        .await
-        .map_err(|_| ToolError::Timeout { seconds: 120 })
-        .and_then(|res| res.map_err(|e| ToolError::execution_failed(e.to_string())))?;
-
-    let stdout = String::from_utf8_lossy(&output.stdout).to_string();
-    let stderr = String::from_utf8_lossy(&output.stderr).to_string();
-    let return_code = output.status.code().unwrap_or(-1);
-    let success = output.status.success();
-    let payload = json!({
-        "type": "code_execution_result",
-        "stdout": stdout,
-        "stderr": stderr,
-        "return_code": return_code,
-        "content": [],
-    });
-
-    Ok(ToolResult {
-        content: serde_json::to_string(&payload).unwrap_or_else(|_| payload.to_string()),
-        success,
-        metadata: Some(payload),
-    })
-}
-
 fn caller_type_for_tool_use(caller: Option<&ToolCaller>) -> &str {
     caller.map_or("direct", |c| c.caller_type.as_str())
 }
@@ -2878,6 +2463,7 @@ pub(crate) fn mock_engine_handle() -> MockEngineHandle {
 mod approval;
 mod capacity_flow;
 mod dispatch;
+mod tool_catalog;
 mod turn_loop;
 
 use self::approval::{ApprovalDecision, ApprovalResult, UserInputDecision};
@@ -2887,6 +2473,14 @@ use self::dispatch::{
     mcp_tool_is_read_only, parse_parallel_tool_calls, parse_tool_input,
     should_force_update_plan_first, should_parallelize_tool_batch, should_stop_after_plan_tool,
 };
+#[cfg(test)]
+use self::tool_catalog::TOOL_SEARCH_BM25_NAME;
+use self::tool_catalog::{
+    CODE_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME, REQUEST_USER_INPUT_NAME,
+    active_tools_for_step, ensure_advanced_tooling, execute_code_execution_tool,
+    execute_tool_search, initial_active_tools, is_tool_search_tool,
+    maybe_activate_requested_deferred_tool, missing_tool_error_message, should_default_defer_tool,
+};
 
 #[cfg(test)]
 mod tests;
diff --git a/crates/tui/src/core/engine/tool_catalog.rs b/crates/tui/src/core/engine/tool_catalog.rs
new file mode 100644
index 00000000..011d632e
--- /dev/null
+++ b/crates/tui/src/core/engine/tool_catalog.rs
@@ -0,0 +1,418 @@
+//! Deferred tool catalog and built-in advanced tool helpers.
+//!
+//! The streaming turn loop owns when tools are offered or executed. This module
+//! owns the catalog-level policy around deferred loading, tool search, missing
+//! tool suggestions, and the small set of built-in advanced tools that are not
+//! registered by the normal runtime tool registry.
+
+use std::collections::HashSet;
+use std::path::Path;
+use std::time::Duration;
+
+use serde_json::json;
+
+use crate::models::Tool;
+use crate::tools::spec::{ToolError, ToolResult, required_str};
+use crate::tui::app::AppMode;
+
+pub(super) const MULTI_TOOL_PARALLEL_NAME: &str = "multi_tool_use.parallel";
+pub(super) const REQUEST_USER_INPUT_NAME: &str = "request_user_input";
+pub(super) const CODE_EXECUTION_TOOL_NAME: &str = "code_execution";
+const CODE_EXECUTION_TOOL_TYPE: &str = "code_execution_20250825";
+const TOOL_SEARCH_REGEX_NAME: &str = "tool_search_tool_regex";
+const TOOL_SEARCH_REGEX_TYPE: &str = "tool_search_tool_regex_20251119";
+pub(super) const TOOL_SEARCH_BM25_NAME: &str = "tool_search_tool_bm25";
+const TOOL_SEARCH_BM25_TYPE: &str = "tool_search_tool_bm25_20251119";
+
+pub(super) fn is_tool_search_tool(name: &str) -> bool {
+    matches!(name, TOOL_SEARCH_REGEX_NAME | TOOL_SEARCH_BM25_NAME)
+}
+
+pub(super) fn should_default_defer_tool(name: &str, mode: AppMode) -> bool {
+    if mode == AppMode::Yolo {
+        return false;
+    }
+
+    // Shell tools are kept active in Agent so the model can run verification
+    // commands (build/test/git/cargo) without first having to discover the
+    // tool through ToolSearch. Plan mode never registers shell tools.
+    let always_loaded_in_action_modes = matches!(mode, AppMode::Agent)
+        && matches!(
+            name,
+            "exec_shell"
+                | "exec_shell_wait"
+                | "exec_shell_interact"
+                | "exec_wait"
+                | "exec_interact"
+        );
+    if always_loaded_in_action_modes {
+        return false;
+    }
+
+    !matches!(
+        name,
+        "read_file"
+            | "list_dir"
+            | "grep_files"
+            | "file_search"
+            | "diagnostics"
+            | "rlm"
+            | "recall_archive"
+            | MULTI_TOOL_PARALLEL_NAME
+            | "update_plan"
+            | "checklist_write"
+            | "todo_write"
+            | "task_create"
+            | "task_list"
+            | "task_read"
+            | "task_gate_run"
+            | "task_shell_start"
+            | "task_shell_wait"
+            | "github_issue_context"
+            | "github_pr_context"
+            | REQUEST_USER_INPUT_NAME
+    )
+}
+
+pub(super) fn ensure_advanced_tooling(catalog: &mut Vec<Tool>) {
+    if !catalog.iter().any(|t| t.name == CODE_EXECUTION_TOOL_NAME) {
+        catalog.push(Tool {
+            tool_type: Some(CODE_EXECUTION_TOOL_TYPE.to_string()),
+            name: CODE_EXECUTION_TOOL_NAME.to_string(),
+            description: "Execute Python code in a local sandboxed runtime and return stdout/stderr/return_code as JSON.".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "code": { "type": "string", "description": "Python source code to execute." }
+                },
+                "required": ["code"]
+            }),
+            allowed_callers: Some(vec!["direct".to_string()]),
+            defer_loading: Some(false),
+            input_examples: None,
+            strict: None,
+            cache_control: None,
+        });
+    }
+
+    if !catalog.iter().any(|t| t.name == TOOL_SEARCH_REGEX_NAME) {
+        catalog.push(Tool {
+            tool_type: Some(TOOL_SEARCH_REGEX_TYPE.to_string()),
+            name: TOOL_SEARCH_REGEX_NAME.to_string(),
+            description: "Search deferred tool definitions using a regex query and return matching tool references.".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "query": { "type": "string", "description": "Regex pattern to search tool names/descriptions/schema." }
+                },
+                "required": ["query"]
+            }),
+            allowed_callers: Some(vec!["direct".to_string()]),
+            defer_loading: Some(false),
+            input_examples: None,
+            strict: None,
+            cache_control: None,
+        });
+    }
+
+    if !catalog.iter().any(|t| t.name == TOOL_SEARCH_BM25_NAME) {
+        catalog.push(Tool {
+            tool_type: Some(TOOL_SEARCH_BM25_TYPE.to_string()),
+            name: TOOL_SEARCH_BM25_NAME.to_string(),
+            description: "Search deferred tool definitions using natural-language matching and return matching tool references.".to_string(),
+            input_schema: json!({
+                "type": "object",
+                "properties": {
+                    "query": { "type": "string", "description": "Natural language query for tool discovery." }
+                },
+                "required": ["query"]
+            }),
+            allowed_callers: Some(vec!["direct".to_string()]),
+            defer_loading: Some(false),
+            input_examples: None,
+            strict: None,
+            cache_control: None,
+        });
+    }
+}
+
+pub(super) fn initial_active_tools(catalog: &[Tool]) -> HashSet<String> {
+    let mut active = HashSet::new();
+    for tool in catalog {
+        if !tool.defer_loading.unwrap_or(false) || is_tool_search_tool(&tool.name) {
+            active.insert(tool.name.clone());
+        }
+    }
+    if active.is_empty()
+        && !catalog.is_empty()
+        && let Some(first) = catalog.first()
+    {
+        active.insert(first.name.clone());
+    }
+    active
+}
+
+fn active_tool_list_from_catalog(catalog: &[Tool], active: &HashSet<String>) -> Vec<Tool> {
+    catalog
+        .iter()
+        .filter(|tool| active.contains(&tool.name))
+        .cloned()
+        .collect()
+}
+
+pub(super) fn active_tools_for_step(
+    catalog: &[Tool],
+    active: &HashSet<String>,
+    force_update_plan: bool,
+) -> Vec<Tool> {
+    // DeepSeek reasoning models reject explicit named tool_choice forcing here,
+    // so for obvious quick-plan asks we narrow the first-step tool surface to
+    // update_plan instead.
+    if force_update_plan {
+        let forced: Vec<_> = catalog
+            .iter()
+            .filter(|tool| tool.name == "update_plan")
+            .cloned()
+            .collect();
+        if !forced.is_empty() {
+            return forced;
+        }
+    }
+
+    active_tool_list_from_catalog(catalog, active)
+}
+
+fn tool_search_haystack(tool: &Tool) -> String {
+    format!(
+        "{}\n{}\n{}",
+        tool.name.to_lowercase(),
+        tool.description.to_lowercase(),
+        tool.input_schema.to_string().to_lowercase()
+    )
+}
+
+fn discover_tools_with_regex(catalog: &[Tool], query: &str) -> Result<Vec<String>, ToolError> {
+    let regex = regex::Regex::new(query)
+        .map_err(|err| ToolError::invalid_input(format!("Invalid regex query: {err}")))?;
+
+    let mut matches = Vec::new();
+    for tool in catalog {
+        if is_tool_search_tool(&tool.name) {
+            continue;
+        }
+        let hay = tool_search_haystack(tool);
+        if regex.is_match(&hay) {
+            matches.push(tool.name.clone());
+        }
+        if matches.len() >= 5 {
+            break;
+        }
+    }
+    Ok(matches)
+}
+
+fn discover_tools_with_bm25_like(catalog: &[Tool], query: &str) -> Vec<String> {
+    let terms: Vec<String> = query
+        .split_whitespace()
+        .map(|term| term.trim().to_lowercase())
+        .filter(|term| !term.is_empty())
+        .collect();
+    if terms.is_empty() {
+        return Vec::new();
+    }
+
+    let mut scored: Vec<(i64, String)> = Vec::new();
+    for tool in catalog {
+        if is_tool_search_tool(&tool.name) {
+            continue;
+        }
+        let hay = tool_search_haystack(tool);
+        let mut score = 0i64;
+        for term in &terms {
+            if hay.contains(term) {
+                score += 1;
+            }
+            if tool.name.to_lowercase().contains(term) {
+                score += 2;
+            }
+        }
+        if score > 0 {
+            scored.push((score, tool.name.clone()));
+        }
+    }
+    scored.sort_by(|a, b| b.0.cmp(&a.0).then_with(|| a.1.cmp(&b.1)));
+    scored.into_iter().take(5).map(|(_, name)| name).collect()
+}
+
+fn edit_distance(a: &str, b: &str) -> usize {
+    if a == b {
+        return 0;
+    }
+    if a.is_empty() {
+        return b.chars().count();
+    }
+    if b.is_empty() {
+        return a.chars().count();
+    }
+
+    let b_chars: Vec<char> = b.chars().collect();
+    let mut prev: Vec<usize> = (0..=b_chars.len()).collect();
+    let mut curr = vec![0usize; b_chars.len() + 1];
+
+    for (i, a_ch) in a.chars().enumerate() {
+        curr[0] = i + 1;
+        for (j, b_ch) in b_chars.iter().enumerate() {
+            let cost = if a_ch == *b_ch { 0 } else { 1 };
+            let delete = prev[j + 1] + 1;
+            let insert = curr[j] + 1;
+            let substitute = prev[j] + cost;
+            curr[j + 1] = delete.min(insert).min(substitute);
+        }
+        std::mem::swap(&mut prev, &mut curr);
+    }
+
+    prev[b_chars.len()]
+}
+
+fn suggest_tool_names(catalog: &[Tool], requested: &str, limit: usize) -> Vec<String> {
+    let requested = requested.trim().to_ascii_lowercase();
+    if requested.is_empty() || limit == 0 {
+        return Vec::new();
+    }
+
+    let mut candidates: Vec<(u8, usize, String)> = Vec::new();
+    for tool in catalog {
+        let candidate = tool.name.to_ascii_lowercase();
+        let prefix_match = candidate.starts_with(&requested) || requested.starts_with(&candidate);
+        let contains_match = candidate.contains(&requested) || requested.contains(&candidate);
+        let distance = edit_distance(&candidate, &requested);
+        let close_typo = distance <= 3;
+
+        if !(prefix_match || contains_match || close_typo) {
+            continue;
+        }
+
+        let rank = if prefix_match {
+            0
+        } else if contains_match {
+            1
+        } else {
+            2
+        };
+        candidates.push((rank, distance, tool.name.clone()));
+    }
+
+    candidates.sort_by(|a, b| {
+        a.0.cmp(&b.0)
+            .then_with(|| a.1.cmp(&b.1))
+            .then_with(|| a.2.cmp(&b.2))
+    });
+    candidates.dedup_by(|a, b| a.2 == b.2);
+    candidates
+        .into_iter()
+        .take(limit)
+        .map(|(_, _, name)| name)
+        .collect()
+}
+
+pub(super) fn missing_tool_error_message(tool_name: &str, catalog: &[Tool]) -> String {
+    let suggestions = suggest_tool_names(catalog, tool_name, 3);
+    if suggestions.is_empty() {
+        return format!(
+            "Tool '{tool_name}' is not available in the current tool catalog. \
+             Verify mode/feature flags, or use {TOOL_SEARCH_BM25_NAME} with a short query."
+        );
+    }
+
+    format!(
+        "Tool '{tool_name}' is not available in the current tool catalog. \
+         Did you mean: {}? You can also use {TOOL_SEARCH_BM25_NAME} to discover tools.",
+        suggestions.join(", ")
+    )
+}
+
+pub(super) fn maybe_activate_requested_deferred_tool(
+    tool_name: &str,
+    catalog: &[Tool],
+    active_tools: &mut HashSet<String>,
+) -> bool {
+    let Some(def) = catalog.iter().find(|def| def.name == tool_name) else {
+        return false;
+    };
+
+    if !def.defer_loading.unwrap_or(false) || active_tools.contains(tool_name) {
+        return false;
+    }
+
+    active_tools.insert(tool_name.to_string())
+}
+
+pub(super) fn execute_tool_search(
+    tool_name: &str,
+    input: &serde_json::Value,
+    catalog: &[Tool],
+    active_tools: &mut HashSet<String>,
+) -> Result<ToolResult, ToolError> {
+    let query = required_str(input, "query")?;
+    let discovered = if tool_name == TOOL_SEARCH_REGEX_NAME {
+        discover_tools_with_regex(catalog, query)?
+    } else {
+        discover_tools_with_bm25_like(catalog, query)
+    };
+
+    for name in &discovered {
+        active_tools.insert(name.clone());
+    }
+
+    let references = discovered
+        .iter()
+        .map(|name| json!({"type": "tool_reference", "tool_name": name}))
+        .collect::<Vec<_>>();
+
+    let payload = json!({
+        "type": "tool_search_tool_search_result",
+        "tool_references": references,
+    });
+
+    Ok(ToolResult {
+        content: serde_json::to_string(&payload).unwrap_or_else(|_| payload.to_string()),
+        success: true,
+        metadata: Some(json!({
+            "tool_references": discovered,
+        })),
+    })
+}
+
+pub(super) async fn execute_code_execution_tool(
+    input: &serde_json::Value,
+    workspace: &Path,
+) -> Result<ToolResult, ToolError> {
+    let code = required_str(input, "code")?;
+    let mut cmd = tokio::process::Command::new("python3");
+    cmd.arg("-c");
+    cmd.arg(code);
+    cmd.current_dir(workspace);
+
+    let output = tokio::time::timeout(Duration::from_secs(120), cmd.output())
+        .await
+        .map_err(|_| ToolError::Timeout { seconds: 120 })
+        .and_then(|res| res.map_err(|e| ToolError::execution_failed(e.to_string())))?;
+
+    let stdout = String::from_utf8_lossy(&output.stdout).to_string();
+    let stderr = String::from_utf8_lossy(&output.stderr).to_string();
+    let return_code = output.status.code().unwrap_or(-1);
+    let success = output.status.success();
+    let payload = json!({
+        "type": "code_execution_result",
+        "stdout": stdout,
+        "stderr": stderr,
+        "return_code": return_code,
+        "content": [],
+    });
+
+    Ok(ToolResult {
+        content: serde_json::to_string(&payload).unwrap_or_else(|_| payload.to_string()),
+        success,
+        metadata: Some(payload),
+    })
+}

From f0fad7aa2ebdb976757ad0d24047c51c1031cf7a Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 1 May 2026 06:07:59 -0500
Subject: [PATCH 09/17] refactor(engine): modularize turn tool setup

---
 crates/tui/src/core/engine.rs              | 77 +++-------------------
 crates/tui/src/core/engine/tests.rs        | 70 ++++++++++++++++++++
 crates/tui/src/core/engine/tool_catalog.rs | 35 ++++++++++
 crates/tui/src/core/engine/tool_setup.rs   | 52 +++++++++++++++
 4 files changed, 167 insertions(+), 67 deletions(-)
 create mode 100644 crates/tui/src/core/engine/tool_setup.rs

diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs
index 04bb656a..af3998c6 100644
--- a/crates/tui/src/core/engine.rs
+++ b/crates/tui/src/core/engine.rs
@@ -1302,43 +1302,7 @@ impl Engine {
         let plan_state = self.config.plan_state.clone();
 
         let tool_context = self.build_tool_context(mode, auto_approve);
-        let mut builder = if mode == AppMode::Plan {
-            ToolRegistryBuilder::new()
-                .with_read_only_file_tools()
-                .with_search_tools()
-                .with_git_tools()
-                .with_git_history_tools()
-                .with_diagnostics_tool()
-                .with_validation_tools()
-                .with_runtime_task_tools()
-                .with_todo_tool(todo_list.clone())
-                .with_plan_tool(plan_state.clone())
-        } else {
-            ToolRegistryBuilder::new()
-                .with_agent_tools(self.session.allow_shell)
-                .with_todo_tool(todo_list.clone())
-                .with_plan_tool(plan_state.clone())
-        };
-
-        builder = builder
-            .with_review_tool(self.deepseek_client.clone(), self.session.model.clone())
-            .with_rlm_tool(self.deepseek_client.clone(), self.session.model.clone())
-            .with_user_input_tool()
-            .with_parallel_tool();
-
-        if self.config.features.enabled(Feature::ApplyPatch) && mode != AppMode::Plan {
-            builder = builder.with_patch_tools();
-        }
-        if self.config.features.enabled(Feature::WebSearch) {
-            builder = builder.with_web_tools();
-        }
-        // Plan mode now keeps shell available — the existing approval flow
-        // and command-safety classifier gate destructive commands. Writes
-        // and patches stay blocked above; that's the only "destructive"
-        // boundary plan mode enforces by tool registration.
-        if self.config.features.enabled(Feature::ShellTool) && self.session.allow_shell {
-            builder = builder.with_shell_tools();
-        }
+        let builder = self.build_turn_tool_registry_builder(mode, todo_list, plan_state);
 
         // Mailbox for structured sub-agent envelopes (#128/#130). One per
         // turn: the receiver is drained by a short-lived task that converts
@@ -1411,31 +1375,9 @@ impl Engine {
         } else {
             Vec::new()
         };
-        let tools = tool_registry.as_ref().map(|registry| {
-            let mut tools = registry.to_api_tools();
-            for tool in &mut tools {
-                tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode));
-            }
-            let mut mcp_tools = mcp_tools;
-            for tool in &mut mcp_tools {
-                if mode == AppMode::Yolo {
-                    tool.defer_loading = Some(false);
-                    continue;
-                }
-
-                let keep_loaded = matches!(
-                    tool.name.as_str(),
-                    "list_mcp_resources"
-                        | "list_mcp_resource_templates"
-                        | "mcp_read_resource"
-                        | "read_mcp_resource"
-                        | "mcp_get_prompt"
-                );
-                tool.defer_loading = Some(!keep_loaded);
-            }
-            tools.extend(mcp_tools);
-            tools
-        });
+        let tools = tool_registry
+            .as_ref()
+            .map(|registry| build_model_tool_catalog(registry.to_api_tools(), mcp_tools, mode));
 
         // Main turn loop
         let (status, error) = self
@@ -2464,6 +2406,7 @@ mod approval;
 mod capacity_flow;
 mod dispatch;
 mod tool_catalog;
+mod tool_setup;
 mod turn_loop;
 
 use self::approval::{ApprovalDecision, ApprovalResult, UserInputDecision};
@@ -2473,14 +2416,14 @@ use self::dispatch::{
     mcp_tool_is_read_only, parse_parallel_tool_calls, parse_tool_input,
     should_force_update_plan_first, should_parallelize_tool_batch, should_stop_after_plan_tool,
 };
-#[cfg(test)]
-use self::tool_catalog::TOOL_SEARCH_BM25_NAME;
 use self::tool_catalog::{
     CODE_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME, REQUEST_USER_INPUT_NAME,
-    active_tools_for_step, ensure_advanced_tooling, execute_code_execution_tool,
-    execute_tool_search, initial_active_tools, is_tool_search_tool,
-    maybe_activate_requested_deferred_tool, missing_tool_error_message, should_default_defer_tool,
+    active_tools_for_step, build_model_tool_catalog, ensure_advanced_tooling,
+    execute_code_execution_tool, execute_tool_search, initial_active_tools, is_tool_search_tool,
+    maybe_activate_requested_deferred_tool, missing_tool_error_message,
 };
+#[cfg(test)]
+use self::tool_catalog::{TOOL_SEARCH_BM25_NAME, should_default_defer_tool};
 
 #[cfg(test)]
 mod tests;
diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs
index d4194473..8dfc291c 100644
--- a/crates/tui/src/core/engine/tests.rs
+++ b/crates/tui/src/core/engine/tests.rs
@@ -36,6 +36,20 @@ fn make_plan(
     }
 }
 
+fn api_tool(name: &str) -> Tool {
+    Tool {
+        tool_type: Some("function".to_string()),
+        name: name.to_string(),
+        description: format!("Test tool {name}"),
+        input_schema: json!({"type": "object"}),
+        allowed_callers: Some(vec!["direct".to_string()]),
+        defer_loading: None,
+        input_examples: None,
+        strict: None,
+        cache_control: None,
+    }
+}
+
 #[test]
 fn engine_handle_cancel_tracks_latest_turn_token() {
     let (mut engine, handle) = Engine::new(EngineConfig::default(), &Config::default());
@@ -205,6 +219,62 @@ fn non_yolo_mode_retains_default_defer_policy() {
     ));
 }
 
+#[test]
+fn model_tool_catalog_applies_native_and_mcp_deferral() {
+    let catalog = build_model_tool_catalog(
+        vec![
+            api_tool("read_file"),
+            api_tool("exec_shell"),
+            api_tool("project_map"),
+        ],
+        vec![api_tool("list_mcp_resources"), api_tool("mcp_server_write")],
+        AppMode::Agent,
+    );
+
+    let defer_loading = |name: &str| {
+        catalog
+            .iter()
+            .find(|tool| tool.name == name)
+            .and_then(|tool| tool.defer_loading)
+    };
+
+    assert_eq!(defer_loading("read_file"), Some(false));
+    assert_eq!(defer_loading("exec_shell"), Some(false));
+    assert_eq!(defer_loading("project_map"), Some(true));
+    assert_eq!(defer_loading("list_mcp_resources"), Some(false));
+    assert_eq!(defer_loading("mcp_server_write"), Some(true));
+}
+
+#[test]
+fn model_tool_catalog_keeps_everything_loaded_in_yolo_mode() {
+    let catalog = build_model_tool_catalog(
+        vec![api_tool("project_map")],
+        vec![api_tool("mcp_server_write")],
+        AppMode::Yolo,
+    );
+
+    assert!(catalog.iter().all(|tool| tool.defer_loading == Some(false)));
+}
+
+#[test]
+fn turn_tool_registry_builder_keeps_plan_mode_read_only_for_files() {
+    let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
+    let registry = engine
+        .build_turn_tool_registry_builder(
+            AppMode::Plan,
+            engine.config.todos.clone(),
+            engine.config.plan_state.clone(),
+        )
+        .build(engine.build_tool_context(AppMode::Plan, false));
+
+    assert!(registry.contains("read_file"));
+    assert!(registry.contains("list_dir"));
+    assert!(!registry.contains("write_file"));
+    assert!(!registry.contains("edit_file"));
+    assert!(registry.contains("update_plan"));
+    assert!(registry.contains("task_create"));
+}
+
 #[test]
 fn agent_mode_can_build_auto_approved_tool_context() {
     let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default());
diff --git a/crates/tui/src/core/engine/tool_catalog.rs b/crates/tui/src/core/engine/tool_catalog.rs
index 011d632e..2581fd4f 100644
--- a/crates/tui/src/core/engine/tool_catalog.rs
+++ b/crates/tui/src/core/engine/tool_catalog.rs
@@ -74,6 +74,41 @@ pub(super) fn should_default_defer_tool(name: &str, mode: AppMode) -> bool {
     )
 }
 
+pub(super) fn apply_native_tool_deferral(catalog: &mut [Tool], mode: AppMode) {
+    for tool in catalog {
+        tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode));
+    }
+}
+
+fn should_keep_mcp_tool_loaded(name: &str) -> bool {
+    matches!(
+        name,
+        "list_mcp_resources"
+            | "list_mcp_resource_templates"
+            | "mcp_read_resource"
+            | "read_mcp_resource"
+            | "mcp_get_prompt"
+    )
+}
+
+pub(super) fn apply_mcp_tool_deferral(catalog: &mut [Tool], mode: AppMode) {
+    for tool in catalog {
+        tool.defer_loading =
+            Some(mode != AppMode::Yolo && !should_keep_mcp_tool_loaded(&tool.name));
+    }
+}
+
+pub(super) fn build_model_tool_catalog(
+    mut native_tools: Vec<Tool>,
+    mut mcp_tools: Vec<Tool>,
+    mode: AppMode,
+) -> Vec<Tool> {
+    apply_native_tool_deferral(&mut native_tools, mode);
+    apply_mcp_tool_deferral(&mut mcp_tools, mode);
+    native_tools.extend(mcp_tools);
+    native_tools
+}
+
 pub(super) fn ensure_advanced_tooling(catalog: &mut Vec<Tool>) {
     if !catalog.iter().any(|t| t.name == CODE_EXECUTION_TOOL_NAME) {
         catalog.push(Tool {
diff --git a/crates/tui/src/core/engine/tool_setup.rs b/crates/tui/src/core/engine/tool_setup.rs
new file mode 100644
index 00000000..220d4706
--- /dev/null
+++ b/crates/tui/src/core/engine/tool_setup.rs
@@ -0,0 +1,52 @@
+//! Per-turn tool registry setup.
+//!
+//! This keeps mode/feature-specific registry construction out of the send path.
+
+use super::*;
+
+impl Engine {
+    pub(super) fn build_turn_tool_registry_builder(
+        &self,
+        mode: AppMode,
+        todo_list: SharedTodoList,
+        plan_state: SharedPlanState,
+    ) -> ToolRegistryBuilder {
+        let mut builder = if mode == AppMode::Plan {
+            ToolRegistryBuilder::new()
+                .with_read_only_file_tools()
+                .with_search_tools()
+                .with_git_tools()
+                .with_git_history_tools()
+                .with_diagnostics_tool()
+                .with_validation_tools()
+                .with_runtime_task_tools()
+                .with_todo_tool(todo_list)
+                .with_plan_tool(plan_state)
+        } else {
+            ToolRegistryBuilder::new()
+                .with_agent_tools(self.session.allow_shell)
+                .with_todo_tool(todo_list)
+                .with_plan_tool(plan_state)
+        };
+
+        builder = builder
+            .with_review_tool(self.deepseek_client.clone(), self.session.model.clone())
+            .with_rlm_tool(self.deepseek_client.clone(), self.session.model.clone())
+            .with_user_input_tool()
+            .with_parallel_tool();
+
+        if self.config.features.enabled(Feature::ApplyPatch) && mode != AppMode::Plan {
+            builder = builder.with_patch_tools();
+        }
+        if self.config.features.enabled(Feature::WebSearch) {
+            builder = builder.with_web_tools();
+        }
+        // Plan mode keeps shell available when the session allows it; command
+        // safety and approval checks still gate risky commands.
+        if self.config.features.enabled(Feature::ShellTool) && self.session.allow_shell {
+            builder = builder.with_shell_tools();
+        }
+
+        builder
+    }
+}

From 8dd5ed38d7fcfedac2cf600f5bbf97c90f836e33 Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 1 May 2026 07:09:30 -0500
Subject: [PATCH 10/17] refactor(engine): extract context helpers

---
 crates/tui/src/core/engine.rs               | 279 +-------------------
 crates/tui/src/core/engine/capacity_flow.rs |   2 +
 crates/tui/src/core/engine/context.rs       | 279 ++++++++++++++++++++
 crates/tui/src/core/engine/tests.rs         |   2 +
 4 files changed, 294 insertions(+), 268 deletions(-)
 create mode 100644 crates/tui/src/core/engine/context.rs

diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs
index af3998c6..d98e0700 100644
--- a/crates/tui/src/core/engine.rs
+++ b/crates/tui/src/core/engine.rs
@@ -22,7 +22,7 @@ use tokio_util::sync::CancellationToken;
 
 use crate::client::DeepSeekClient;
 use crate::compaction::{
-    CompactionConfig, compact_messages_safe, estimate_tokens, merge_system_prompts, should_compact,
+    CompactionConfig, compact_messages_safe, merge_system_prompts, should_compact,
 };
 use crate::config::{Config, DEFAULT_MAX_SUBAGENTS, DEFAULT_TEXT_MODEL};
 use crate::cycle_manager::{
@@ -35,7 +35,7 @@ use crate::llm_client::LlmClient;
 use crate::mcp::McpPool;
 use crate::models::{
     ContentBlock, ContentBlockStart, DEFAULT_CONTEXT_WINDOW_TOKENS, Delta, Message, MessageRequest,
-    StreamEvent, SystemBlock, SystemPrompt, Tool, ToolCaller, Usage, context_window_for_model,
+    StreamEvent, SystemPrompt, Tool, ToolCaller, Usage,
 };
 use crate::prompts;
 use crate::seam_manager::{SeamConfig, SeamManager};
@@ -358,38 +358,6 @@ fn should_transparently_retry_stream(
 ) -> bool {
     !any_content_received && transparent_attempts < MAX_TRANSPARENT_STREAM_RETRIES && !cancelled
 }
-/// Max output tokens requested for normal agent turns. Generous on purpose:
-/// V4 thinking models can produce tens of thousands of reasoning tokens on
-/// hard prompts before the visible reply, and DeepSeek V4 ships with a 1M
-/// context window. v0.7.5 keeps this cap fixed instead of silently lowering
-/// `max_tokens` near pressure; hard-cycle/preflight checks reserve this budget
-/// plus safety headroom before sending the next request.
-const TURN_MAX_OUTPUT_TOKENS: u32 = 262_144;
-/// Keep this many most recent messages when emergency trimming is required.
-const MIN_RECENT_MESSAGES_TO_KEEP: usize = 4;
-/// Allow a few emergency recovery attempts before failing the turn.
-const MAX_CONTEXT_RECOVERY_ATTEMPTS: u8 = 2;
-/// Reserve additional headroom to avoid hitting provider hard limits.
-const CONTEXT_HEADROOM_TOKENS: usize = 1024;
-/// Hard cap for any tool output inserted into model context.
-const TOOL_RESULT_CONTEXT_HARD_LIMIT_CHARS: usize = 12_000;
-/// Soft cap for known noisy tools inserted into model context.
-const TOOL_RESULT_CONTEXT_SOFT_LIMIT_CHARS: usize = 2_000;
-/// Snippet length kept when compacting tool output for model context.
-const TOOL_RESULT_CONTEXT_SNIPPET_CHARS: usize = 900;
-/// Hard cap for tool output inserted into a large-context model.
-const LARGE_CONTEXT_TOOL_RESULT_HARD_LIMIT_CHARS: usize = 180_000;
-/// Soft cap for known noisy tools inserted into a large-context model.
-const LARGE_CONTEXT_TOOL_RESULT_SOFT_LIMIT_CHARS: usize = 60_000;
-/// Snippet length kept when compacting large-context tool output.
-const LARGE_CONTEXT_TOOL_RESULT_SNIPPET_CHARS: usize = 40_000;
-/// Context window size at which tool output limits can be relaxed.
-const LARGE_CONTEXT_WINDOW_TOKENS: u32 = 500_000;
-/// Max chars to keep from metadata-provided output summaries.
-const TOOL_RESULT_METADATA_SUMMARY_CHARS: usize = 320;
-const COMPACTION_SUMMARY_MARKER: &str = "Conversation Summary (Auto-Generated)";
-const WORKING_SET_SUMMARY_MARKER: &str = "## Repo Working Set";
-
 pub(crate) const TOOL_CALL_START_MARKERS: [&str; 5] = [
     "[TOOL_CALL]",
     "<deepseek:tool_call",
@@ -565,240 +533,6 @@ fn format_tool_error(err: &ToolError, tool_name: &str) -> String {
     }
 }
 
-fn summarize_text(text: &str, limit: usize) -> String {
-    if text.chars().count() <= limit {
-        return text.to_string();
-    }
-    let take = limit.saturating_sub(3);
-    let mut out: String = text.chars().take(take).collect();
-    out.push_str("...");
-    out
-}
-
-fn summarize_text_head_tail(text: &str, limit: usize) -> String {
-    let total = text.chars().count();
-    if total <= limit {
-        return text.to_string();
-    }
-    if limit <= 20 {
-        return summarize_text(text, limit);
-    }
-
-    let marker = "\n\n[... output truncated for context ...]\n\n";
-    let marker_len = marker.chars().count();
-    if limit <= marker_len + 20 {
-        return summarize_text(text, limit);
-    }
-
-    let remaining = limit - marker_len;
-    let head_len = remaining.saturating_mul(2) / 3;
-    let tail_len = remaining.saturating_sub(head_len);
-    let head: String = text.chars().take(head_len).collect();
-    let tail_vec: Vec<char> = text.chars().rev().take(tail_len).collect();
-    let tail: String = tail_vec.into_iter().rev().collect();
-    format!("{head}{marker}{tail}")
-}
-
-fn tool_result_is_noisy(tool_name: &str) -> bool {
-    matches!(
-        tool_name,
-        "exec_shell"
-            | "exec_shell_wait"
-            | "exec_shell_interact"
-            | "multi_tool_use.parallel"
-            | "web_search"
-    )
-}
-
-fn tool_result_metadata_summary(metadata: Option<&serde_json::Value>) -> Option<String> {
-    let obj = metadata?.as_object()?;
-    for key in ["summary", "stdout_summary", "stderr_summary", "message"] {
-        if let Some(text) = obj.get(key).and_then(serde_json::Value::as_str) {
-            let trimmed = text.trim();
-            if !trimmed.is_empty() {
-                return Some(summarize_text(trimmed, TOOL_RESULT_METADATA_SUMMARY_CHARS));
-            }
-        }
-    }
-    None
-}
-
-#[derive(Debug, Clone, Copy)]
-struct ToolResultContextLimits {
-    hard_limit_chars: usize,
-    noisy_soft_limit_chars: usize,
-    snippet_chars: usize,
-}
-
-fn tool_result_context_limits_for_model(model: &str) -> ToolResultContextLimits {
-    let is_large_context =
-        context_window_for_model(model).is_some_and(|window| window >= LARGE_CONTEXT_WINDOW_TOKENS);
-
-    if is_large_context {
-        ToolResultContextLimits {
-            hard_limit_chars: LARGE_CONTEXT_TOOL_RESULT_HARD_LIMIT_CHARS,
-            noisy_soft_limit_chars: LARGE_CONTEXT_TOOL_RESULT_SOFT_LIMIT_CHARS,
-            snippet_chars: LARGE_CONTEXT_TOOL_RESULT_SNIPPET_CHARS,
-        }
-    } else {
-        ToolResultContextLimits {
-            hard_limit_chars: TOOL_RESULT_CONTEXT_HARD_LIMIT_CHARS,
-            noisy_soft_limit_chars: TOOL_RESULT_CONTEXT_SOFT_LIMIT_CHARS,
-            snippet_chars: TOOL_RESULT_CONTEXT_SNIPPET_CHARS,
-        }
-    }
-}
-
-pub(crate) fn compact_tool_result_for_context(
-    model: &str,
-    tool_name: &str,
-    output: &ToolResult,
-) -> String {
-    let raw = output.content.trim();
-    if raw.is_empty() {
-        return String::new();
-    }
-
-    let limits = tool_result_context_limits_for_model(model);
-    let raw_chars = raw.chars().count();
-    let should_compact = raw_chars > limits.hard_limit_chars
-        || (tool_result_is_noisy(tool_name) && raw_chars > limits.noisy_soft_limit_chars);
-    if !should_compact {
-        return raw.to_string();
-    }
-
-    let snippet = summarize_text_head_tail(raw, limits.snippet_chars);
-    let omitted = raw_chars.saturating_sub(snippet.chars().count());
-    let summary = tool_result_metadata_summary(output.metadata.as_ref());
-
-    if let Some(summary) = summary {
-        format!(
-            "[{tool_name} output compacted to protect context]\nSummary: {summary}\nSnippet: {snippet}\n(Original: {raw_chars} chars, omitted: {omitted} chars.)"
-        )
-    } else {
-        format!(
-            "[{tool_name} output compacted to protect context]\nSnippet: {snippet}\n(Original: {raw_chars} chars, omitted: {omitted} chars.)"
-        )
-    }
-}
-
-fn extract_compaction_summary_prompt(prompt: Option<SystemPrompt>) -> Option<SystemPrompt> {
-    match prompt {
-        Some(SystemPrompt::Blocks(blocks)) => {
-            let summary_blocks: Vec<_> = blocks
-                .into_iter()
-                .filter(|block| block.text.contains(COMPACTION_SUMMARY_MARKER))
-                .collect();
-            if summary_blocks.is_empty() {
-                None
-            } else {
-                Some(SystemPrompt::Blocks(summary_blocks))
-            }
-        }
-        Some(SystemPrompt::Text(text)) => {
-            if text.contains(COMPACTION_SUMMARY_MARKER) {
-                Some(SystemPrompt::Text(text))
-            } else {
-                None
-            }
-        }
-        None => None,
-    }
-}
-
-fn remove_working_set_summary(prompt: Option<&SystemPrompt>) -> Option<SystemPrompt> {
-    match prompt {
-        Some(SystemPrompt::Blocks(blocks)) => {
-            let filtered: Vec<SystemBlock> = blocks
-                .iter()
-                .filter(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER))
-                .cloned()
-                .collect();
-            if filtered.is_empty() {
-                None
-            } else {
-                Some(SystemPrompt::Blocks(filtered))
-            }
-        }
-        Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Text(text.clone())),
-        None => None,
-    }
-}
-
-fn append_working_set_summary(
-    prompt: Option<SystemPrompt>,
-    working_set_summary: Option<&str>,
-) -> Option<SystemPrompt> {
-    let Some(summary) = working_set_summary.map(str::trim).filter(|s| !s.is_empty()) else {
-        return prompt;
-    };
-    let working_set_block = SystemBlock {
-        block_type: "text".to_string(),
-        text: summary.to_string(),
-        cache_control: None,
-    };
-
-    match prompt {
-        Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Blocks(vec![
-            SystemBlock {
-                block_type: "text".to_string(),
-                text,
-                cache_control: None,
-            },
-            working_set_block,
-        ])),
-        Some(SystemPrompt::Blocks(mut blocks)) => {
-            blocks.retain(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER));
-            blocks.push(working_set_block);
-            Some(SystemPrompt::Blocks(blocks))
-        }
-        None => Some(SystemPrompt::Blocks(vec![working_set_block])),
-    }
-}
-
-fn estimate_text_tokens_conservative(text: &str) -> usize {
-    text.chars().count().div_ceil(3)
-}
-
-fn estimate_system_tokens_conservative(system: Option<&SystemPrompt>) -> usize {
-    match system {
-        Some(SystemPrompt::Text(text)) => estimate_text_tokens_conservative(text),
-        Some(SystemPrompt::Blocks(blocks)) => blocks
-            .iter()
-            .map(|block| estimate_text_tokens_conservative(&block.text))
-            .sum(),
-        None => 0,
-    }
-}
-
-fn estimate_input_tokens_conservative(
-    messages: &[Message],
-    system: Option<&SystemPrompt>,
-) -> usize {
-    let message_tokens = estimate_tokens(messages).saturating_mul(3).div_ceil(2);
-    let system_tokens = estimate_system_tokens_conservative(system);
-    let framing_overhead = messages.len().saturating_mul(12).saturating_add(48);
-    message_tokens
-        .saturating_add(system_tokens)
-        .saturating_add(framing_overhead)
-}
-
-fn context_input_budget(model: &str, requested_output_tokens: u32) -> Option<usize> {
-    let window = usize::try_from(context_window_for_model(model)?).ok()?;
-    let output = usize::try_from(requested_output_tokens).ok()?;
-    window
-        .checked_sub(output)
-        .and_then(|v| v.checked_sub(CONTEXT_HEADROOM_TOKENS))
-}
-
-fn turn_response_headroom_tokens() -> u64 {
-    u64::from(TURN_MAX_OUTPUT_TOKENS).saturating_add(CONTEXT_HEADROOM_TOKENS as u64)
-}
-
-fn is_context_length_error_message(message: &str) -> bool {
-    crate::error_taxonomy::classify_error_message(message) == ErrorCategory::InvalidInput
-}
-
 fn emit_tool_audit(event: serde_json::Value) {
     let Some(path) = std::env::var_os("DEEPSEEK_TOOL_AUDIT_LOG") else {
         return;
@@ -2404,6 +2138,15 @@ pub(crate) fn mock_engine_handle() -> MockEngineHandle {
 
 mod approval;
 mod capacity_flow;
+mod context;
+pub(crate) use context::compact_tool_result_for_context;
+use context::{
+    COMPACTION_SUMMARY_MARKER, MAX_CONTEXT_RECOVERY_ATTEMPTS, MIN_RECENT_MESSAGES_TO_KEEP,
+    TURN_MAX_OUTPUT_TOKENS, append_working_set_summary, context_input_budget,
+    estimate_input_tokens_conservative, extract_compaction_summary_prompt,
+    is_context_length_error_message, remove_working_set_summary, summarize_text,
+    turn_response_headroom_tokens,
+};
 mod dispatch;
 mod tool_catalog;
 mod tool_setup;
diff --git a/crates/tui/src/core/engine/capacity_flow.rs b/crates/tui/src/core/engine/capacity_flow.rs
index f5fbfe4a..f280e644 100644
--- a/crates/tui/src/core/engine/capacity_flow.rs
+++ b/crates/tui/src/core/engine/capacity_flow.rs
@@ -7,6 +7,8 @@
 
 use super::*;
 
+use crate::models::context_window_for_model;
+
 impl Engine {
     pub(super) async fn run_capacity_pre_request_checkpoint(
         &mut self,
diff --git a/crates/tui/src/core/engine/context.rs b/crates/tui/src/core/engine/context.rs
new file mode 100644
index 00000000..c1e49686
--- /dev/null
+++ b/crates/tui/src/core/engine/context.rs
@@ -0,0 +1,279 @@
+//! Context budgeting and prompt-shaping helpers for the engine.
+//!
+//! These functions are shared by the streaming turn loop, capacity flow, and
+//! engine session maintenance code. Keeping them here prevents the top-level
+//! engine module from accumulating unrelated context-policy details.
+
+use crate::compaction::estimate_tokens;
+use crate::error_taxonomy::ErrorCategory;
+use crate::models::{Message, SystemBlock, SystemPrompt, context_window_for_model};
+use crate::tools::spec::ToolResult;
+
+/// Max output tokens requested for normal agent turns. Generous on purpose:
+/// V4 thinking models can produce tens of thousands of reasoning tokens on
+/// hard prompts before the visible reply, and DeepSeek V4 ships with a 1M
+/// context window. v0.7.5 keeps this cap fixed instead of silently lowering
+/// `max_tokens` near pressure; hard-cycle/preflight checks reserve this budget
+/// plus safety headroom before sending the next request.
+pub(super) const TURN_MAX_OUTPUT_TOKENS: u32 = 262_144;
+/// Keep this many most recent messages when emergency trimming is required.
+pub(super) const MIN_RECENT_MESSAGES_TO_KEEP: usize = 4;
+/// Allow a few emergency recovery attempts before failing the turn.
+pub(super) const MAX_CONTEXT_RECOVERY_ATTEMPTS: u8 = 2;
+/// Reserve additional headroom to avoid hitting provider hard limits.
+const CONTEXT_HEADROOM_TOKENS: usize = 1024;
+/// Hard cap for any tool output inserted into model context.
+const TOOL_RESULT_CONTEXT_HARD_LIMIT_CHARS: usize = 12_000;
+/// Soft cap for known noisy tools inserted into model context.
+const TOOL_RESULT_CONTEXT_SOFT_LIMIT_CHARS: usize = 2_000;
+/// Snippet length kept when compacting tool output for model context.
+const TOOL_RESULT_CONTEXT_SNIPPET_CHARS: usize = 900;
+/// Hard cap for tool output inserted into a large-context model.
+const LARGE_CONTEXT_TOOL_RESULT_HARD_LIMIT_CHARS: usize = 180_000;
+/// Soft cap for known noisy tools inserted into a large-context model.
+const LARGE_CONTEXT_TOOL_RESULT_SOFT_LIMIT_CHARS: usize = 60_000;
+/// Snippet length kept when compacting large-context tool output.
+const LARGE_CONTEXT_TOOL_RESULT_SNIPPET_CHARS: usize = 40_000;
+/// Context window size at which tool output limits can be relaxed.
+const LARGE_CONTEXT_WINDOW_TOKENS: u32 = 500_000;
+/// Max chars to keep from metadata-provided output summaries.
+const TOOL_RESULT_METADATA_SUMMARY_CHARS: usize = 320;
+
+pub(super) const COMPACTION_SUMMARY_MARKER: &str = "Conversation Summary (Auto-Generated)";
+pub(super) const WORKING_SET_SUMMARY_MARKER: &str = "## Repo Working Set";
+
+#[derive(Debug, Clone, Copy)]
+struct ToolResultContextLimits {
+    hard_limit_chars: usize,
+    noisy_soft_limit_chars: usize,
+    snippet_chars: usize,
+}
+
+pub(super) fn summarize_text(text: &str, limit: usize) -> String {
+    if text.chars().count() <= limit {
+        return text.to_string();
+    }
+    let take = limit.saturating_sub(3);
+    let mut out: String = text.chars().take(take).collect();
+    out.push_str("...");
+    out
+}
+
+fn summarize_text_head_tail(text: &str, limit: usize) -> String {
+    let total = text.chars().count();
+    if total <= limit {
+        return text.to_string();
+    }
+    if limit <= 20 {
+        return summarize_text(text, limit);
+    }
+
+    let marker = "\n\n[... output truncated for context ...]\n\n";
+    let marker_len = marker.chars().count();
+    if limit <= marker_len + 20 {
+        return summarize_text(text, limit);
+    }
+
+    let remaining = limit - marker_len;
+    let head_len = remaining.saturating_mul(2) / 3;
+    let tail_len = remaining.saturating_sub(head_len);
+    let head: String = text.chars().take(head_len).collect();
+    let tail_vec: Vec<char> = text.chars().rev().take(tail_len).collect();
+    let tail: String = tail_vec.into_iter().rev().collect();
+    format!("{head}{marker}{tail}")
+}
+
+fn tool_result_is_noisy(tool_name: &str) -> bool {
+    matches!(
+        tool_name,
+        "exec_shell"
+            | "exec_shell_wait"
+            | "exec_shell_interact"
+            | "multi_tool_use.parallel"
+            | "web_search"
+    )
+}
+
+fn tool_result_metadata_summary(metadata: Option<&serde_json::Value>) -> Option<String> {
+    let obj = metadata?.as_object()?;
+    for key in ["summary", "stdout_summary", "stderr_summary", "message"] {
+        if let Some(text) = obj.get(key).and_then(serde_json::Value::as_str) {
+            let trimmed = text.trim();
+            if !trimmed.is_empty() {
+                return Some(summarize_text(trimmed, TOOL_RESULT_METADATA_SUMMARY_CHARS));
+            }
+        }
+    }
+    None
+}
+
+fn tool_result_context_limits_for_model(model: &str) -> ToolResultContextLimits {
+    let is_large_context =
+        context_window_for_model(model).is_some_and(|window| window >= LARGE_CONTEXT_WINDOW_TOKENS);
+
+    if is_large_context {
+        ToolResultContextLimits {
+            hard_limit_chars: LARGE_CONTEXT_TOOL_RESULT_HARD_LIMIT_CHARS,
+            noisy_soft_limit_chars: LARGE_CONTEXT_TOOL_RESULT_SOFT_LIMIT_CHARS,
+            snippet_chars: LARGE_CONTEXT_TOOL_RESULT_SNIPPET_CHARS,
+        }
+    } else {
+        ToolResultContextLimits {
+            hard_limit_chars: TOOL_RESULT_CONTEXT_HARD_LIMIT_CHARS,
+            noisy_soft_limit_chars: TOOL_RESULT_CONTEXT_SOFT_LIMIT_CHARS,
+            snippet_chars: TOOL_RESULT_CONTEXT_SNIPPET_CHARS,
+        }
+    }
+}
+
+pub(crate) fn compact_tool_result_for_context(
+    model: &str,
+    tool_name: &str,
+    output: &ToolResult,
+) -> String {
+    let raw = output.content.trim();
+    if raw.is_empty() {
+        return String::new();
+    }
+
+    let limits = tool_result_context_limits_for_model(model);
+    let raw_chars = raw.chars().count();
+    let should_compact = raw_chars > limits.hard_limit_chars
+        || (tool_result_is_noisy(tool_name) && raw_chars > limits.noisy_soft_limit_chars);
+    if !should_compact {
+        return raw.to_string();
+    }
+
+    let snippet = summarize_text_head_tail(raw, limits.snippet_chars);
+    let omitted = raw_chars.saturating_sub(snippet.chars().count());
+    let summary = tool_result_metadata_summary(output.metadata.as_ref());
+
+    if let Some(summary) = summary {
+        format!(
+            "[{tool_name} output compacted to protect context]\nSummary: {summary}\nSnippet: {snippet}\n(Original: {raw_chars} chars, omitted: {omitted} chars.)"
+        )
+    } else {
+        format!(
+            "[{tool_name} output compacted to protect context]\nSnippet: {snippet}\n(Original: {raw_chars} chars, omitted: {omitted} chars.)"
+        )
+    }
+}
+
+pub(super) fn extract_compaction_summary_prompt(
+    prompt: Option<SystemPrompt>,
+) -> Option<SystemPrompt> {
+    match prompt {
+        Some(SystemPrompt::Blocks(blocks)) => {
+            let summary_blocks: Vec<_> = blocks
+                .into_iter()
+                .filter(|block| block.text.contains(COMPACTION_SUMMARY_MARKER))
+                .collect();
+            if summary_blocks.is_empty() {
+                None
+            } else {
+                Some(SystemPrompt::Blocks(summary_blocks))
+            }
+        }
+        Some(SystemPrompt::Text(text)) => {
+            if text.contains(COMPACTION_SUMMARY_MARKER) {
+                Some(SystemPrompt::Text(text))
+            } else {
+                None
+            }
+        }
+        None => None,
+    }
+}
+
+pub(super) fn remove_working_set_summary(prompt: Option<&SystemPrompt>) -> Option<SystemPrompt> {
+    match prompt {
+        Some(SystemPrompt::Blocks(blocks)) => {
+            let filtered: Vec<SystemBlock> = blocks
+                .iter()
+                .filter(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER))
+                .cloned()
+                .collect();
+            if filtered.is_empty() {
+                None
+            } else {
+                Some(SystemPrompt::Blocks(filtered))
+            }
+        }
+        Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Text(text.clone())),
+        None => None,
+    }
+}
+
+pub(super) fn append_working_set_summary(
+    prompt: Option<SystemPrompt>,
+    working_set_summary: Option<&str>,
+) -> Option<SystemPrompt> {
+    let Some(summary) = working_set_summary.map(str::trim).filter(|s| !s.is_empty()) else {
+        return prompt;
+    };
+    let working_set_block = SystemBlock {
+        block_type: "text".to_string(),
+        text: summary.to_string(),
+        cache_control: None,
+    };
+
+    match prompt {
+        Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Blocks(vec![
+            SystemBlock {
+                block_type: "text".to_string(),
+                text,
+                cache_control: None,
+            },
+            working_set_block,
+        ])),
+        Some(SystemPrompt::Blocks(mut blocks)) => {
+            blocks.retain(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER));
+            blocks.push(working_set_block);
+            Some(SystemPrompt::Blocks(blocks))
+        }
+        None => Some(SystemPrompt::Blocks(vec![working_set_block])),
+    }
+}
+
+fn estimate_text_tokens_conservative(text: &str) -> usize {
+    text.chars().count().div_ceil(3)
+}
+
+fn estimate_system_tokens_conservative(system: Option<&SystemPrompt>) -> usize {
+    match system {
+        Some(SystemPrompt::Text(text)) => estimate_text_tokens_conservative(text),
+        Some(SystemPrompt::Blocks(blocks)) => blocks
+            .iter()
+            .map(|block| estimate_text_tokens_conservative(&block.text))
+            .sum(),
+        None => 0,
+    }
+}
+
+pub(super) fn estimate_input_tokens_conservative(
+    messages: &[Message],
+    system: Option<&SystemPrompt>,
+) -> usize {
+    let message_tokens = estimate_tokens(messages).saturating_mul(3).div_ceil(2);
+    let system_tokens = estimate_system_tokens_conservative(system);
+    let framing_overhead = messages.len().saturating_mul(12).saturating_add(48);
+    message_tokens
+        .saturating_add(system_tokens)
+        .saturating_add(framing_overhead)
+}
+
+pub(super) fn context_input_budget(model: &str, requested_output_tokens: u32) -> Option<usize> {
+    let window = usize::try_from(context_window_for_model(model)?).ok()?;
+    let output = usize::try_from(requested_output_tokens).ok()?;
+    window
+        .checked_sub(output)
+        .and_then(|v| v.checked_sub(CONTEXT_HEADROOM_TOKENS))
+}
+
+pub(super) fn turn_response_headroom_tokens() -> u64 {
+    u64::from(TURN_MAX_OUTPUT_TOKENS).saturating_add(CONTEXT_HEADROOM_TOKENS as u64)
+}
+
+pub(super) fn is_context_length_error_message(message: &str) -> bool {
+    crate::error_taxonomy::classify_error_message(message) == ErrorCategory::InvalidInput
+}
diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs
index 8dfc291c..f4a7f7ed 100644
--- a/crates/tui/src/core/engine/tests.rs
+++ b/crates/tui/src/core/engine/tests.rs
@@ -1,5 +1,7 @@
 use super::*;
 
+use super::context::WORKING_SET_SUMMARY_MARKER;
+use crate::models::SystemBlock;
 use serde_json::json;
 use std::fs;
 use std::path::PathBuf;

From 8379230ef1fbdc78d72a946fc5da551c32093a2f Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 1 May 2026 08:07:22 -0500
Subject: [PATCH 11/17] refactor(engine): split tool execution helpers

---
 crates/tui/src/core/engine.rs                | 304 +------------------
 crates/tui/src/core/engine/lsp_hooks.rs      | 128 ++++++++
 crates/tui/src/core/engine/tool_execution.rs | 197 ++++++++++++
 3 files changed, 330 insertions(+), 299 deletions(-)
 create mode 100644 crates/tui/src/core/engine/lsp_hooks.rs
 create mode 100644 crates/tui/src/core/engine/tool_execution.rs

diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs
index d98e0700..6e2e5911 100644
--- a/crates/tui/src/core/engine.rs
+++ b/crates/tui/src/core/engine.rs
@@ -11,7 +11,6 @@ use std::collections::HashMap;
 use std::path::PathBuf;
 use std::sync::{Arc, Mutex as StdMutex};
 use std::time::{Duration, Instant};
-use std::{fs::OpenOptions, io::Write};
 
 use anyhow::Result;
 use futures_util::StreamExt;
@@ -427,67 +426,6 @@ fn caller_type_for_tool_use(caller: Option<&ToolCaller>) -> &str {
     caller.map_or("direct", |c| c.caller_type.as_str())
 }
 
-/// #136: derive the file path(s) edited by a tool call. Returns the empty
-/// vec for tools that don't modify files. We intentionally only handle the
-/// three known edit tools — adding more (e.g. specialized refactor tools)
-/// is a one-line change here.
-fn edited_paths_for_tool(tool_name: &str, input: &serde_json::Value) -> Vec<PathBuf> {
-    match tool_name {
-        "edit_file" | "write_file" => {
-            if let Some(path) = input.get("path").and_then(|v| v.as_str()) {
-                vec![PathBuf::from(path)]
-            } else {
-                Vec::new()
-            }
-        }
-        "apply_patch" => {
-            // `apply_patch` accepts either a `path` override or a list of
-            // `files` (each `{path, content}`). We try both shapes.
-            let mut out = Vec::new();
-            if let Some(path) = input.get("path").and_then(|v| v.as_str()) {
-                out.push(PathBuf::from(path));
-            }
-            if let Some(files) = input.get("files").and_then(|v| v.as_array()) {
-                for entry in files {
-                    if let Some(path) = entry.get("path").and_then(|v| v.as_str()) {
-                        out.push(PathBuf::from(path));
-                    }
-                }
-            }
-            // Fallback: parse `---`/`+++` headers from a unified diff payload.
-            if out.is_empty()
-                && let Some(patch) = input.get("patch").and_then(|v| v.as_str())
-            {
-                out.extend(parse_patch_paths(patch));
-            }
-            out
-        }
-        _ => Vec::new(),
-    }
-}
-
-/// Lightweight parser for `+++ b/<path>` lines in a unified diff. Used as a
-/// fallback when `apply_patch` is invoked with raw `patch` text and no
-/// `path`/`files` override. We deliberately keep this dumb — the real
-/// `apply_patch` tool already validates the patch shape; we only need a
-/// best-effort hint for the LSP hook.
-fn parse_patch_paths(patch: &str) -> Vec<PathBuf> {
-    let mut out = Vec::new();
-    for line in patch.lines() {
-        if let Some(rest) = line.strip_prefix("+++ ") {
-            let trimmed = rest.trim();
-            // Strip leading `b/` per git diff conventions.
-            let path = trimmed.strip_prefix("b/").unwrap_or(trimmed);
-            // Skip `/dev/null` (deletion).
-            if path == "/dev/null" {
-                continue;
-            }
-            out.push(PathBuf::from(path));
-        }
-    }
-    out
-}
-
 fn caller_allowed_for_tool(caller: Option<&ToolCaller>, tool_def: Option<&Tool>) -> bool {
     let requested = caller_type_for_tool_use(caller);
     if let Some(def) = tool_def
@@ -533,23 +471,6 @@ fn format_tool_error(err: &ToolError, tool_name: &str) -> String {
     }
 }
 
-fn emit_tool_audit(event: serde_json::Value) {
-    let Some(path) = std::env::var_os("DEEPSEEK_TOOL_AUDIT_LOG") else {
-        return;
-    };
-    let line = match serde_json::to_string(&event) {
-        Ok(line) => line,
-        Err(_) => return,
-    };
-    let path = PathBuf::from(path);
-    if let Some(parent) = path.parent() {
-        let _ = std::fs::create_dir_all(parent);
-    }
-    if let Ok(mut file) = OpenOptions::new().create(true).append(true).open(path) {
-        let _ = writeln!(file, "{line}");
-    }
-}
-
 impl Engine {
     fn reset_cancel_token(&mut self) {
         let token = CancellationToken::new();
@@ -889,57 +810,6 @@ impl Engine {
         self.emit_session_updated().await;
     }
 
-    /// #136: post-edit hook. Inspects the tool name + input, derives the
-    /// edited file path, and asks the LSP manager for diagnostics. The
-    /// rendered block is queued in `pending_lsp_blocks` and flushed to the
-    /// session message stream just before the next API request. Failure is
-    /// silent by design — a missing/crashing LSP server must never block
-    /// the agent.
-    async fn run_post_edit_lsp_hook(&mut self, tool_name: &str, tool_input: &serde_json::Value) {
-        if !self.lsp_manager.config().enabled {
-            return;
-        }
-        let paths = edited_paths_for_tool(tool_name, tool_input);
-        for path in paths {
-            let absolute = if path.is_absolute() {
-                path.clone()
-            } else {
-                self.session.workspace.join(&path)
-            };
-            // Use a short edit-sequence based on the existing turn counter so
-            // log output stays correlated even though we do not currently
-            // batch by sequence.
-            let seq = self.turn_counter;
-            if let Some(block) = self.lsp_manager.diagnostics_for(&absolute, seq).await {
-                self.pending_lsp_blocks.push(block);
-            }
-        }
-    }
-
-    /// Drain `pending_lsp_blocks` into a single synthetic user message so the
-    /// model sees the diagnostics on its next request. Skips when nothing is
-    /// pending. The message uses the standard `text` content block shape
-    /// (the same shape as the post-tool steer messages) so we don't need to
-    /// invent a new envelope.
-    async fn flush_pending_lsp_diagnostics(&mut self) {
-        if self.pending_lsp_blocks.is_empty() {
-            return;
-        }
-        let blocks = std::mem::take(&mut self.pending_lsp_blocks);
-        let rendered = crate::lsp::render_blocks(&blocks);
-        if rendered.is_empty() {
-            return;
-        }
-        self.add_session_message(Message {
-            role: "user".to_string(),
-            content: vec![ContentBlock::Text {
-                text: rendered,
-                cache_control: None,
-            }],
-        })
-        .await;
-    }
-
     /// Handle a send message operation
     #[allow(clippy::too_many_arguments)]
     async fn handle_send_message(
@@ -1550,175 +1420,6 @@ impl Engine {
         pool.to_api_tools()
     }
 
-    async fn execute_mcp_tool_with_pool(
-        pool: Arc<AsyncMutex<McpPool>>,
-        name: &str,
-        input: serde_json::Value,
-    ) -> Result<ToolResult, ToolError> {
-        let mut pool = pool.lock().await;
-        let result = pool
-            .call_tool(name, input)
-            .await
-            .map_err(|e| ToolError::execution_failed(format!("MCP tool failed: {e}")))?;
-        let content = serde_json::to_string_pretty(&result).unwrap_or_else(|_| result.to_string());
-        Ok(ToolResult::success(content))
-    }
-
-    async fn execute_parallel_tool(
-        &mut self,
-        input: serde_json::Value,
-        tool_registry: Option<&crate::tools::ToolRegistry>,
-        tool_exec_lock: Arc<RwLock<()>>,
-    ) -> Result<ToolResult, ToolError> {
-        let calls = parse_parallel_tool_calls(&input)?;
-        let mcp_pool = if calls.iter().any(|(tool, _)| McpPool::is_mcp_tool(tool)) {
-            Some(self.ensure_mcp_pool().await?)
-        } else {
-            None
-        };
-        let Some(registry) = tool_registry else {
-            return Err(ToolError::not_available(
-                "tool registry unavailable for multi_tool_use.parallel",
-            ));
-        };
-
-        let mut tasks = FuturesUnordered::new();
-        for (tool_name, tool_input) in calls {
-            if tool_name == MULTI_TOOL_PARALLEL_NAME {
-                return Err(ToolError::invalid_input(
-                    "multi_tool_use.parallel cannot call itself",
-                ));
-            }
-            if McpPool::is_mcp_tool(&tool_name) {
-                if !mcp_tool_is_parallel_safe(&tool_name) {
-                    return Err(ToolError::invalid_input(format!(
-                        "Tool '{tool_name}' is an MCP tool and cannot run in parallel. \
-                         Allowed MCP tools: list_mcp_resources, list_mcp_resource_templates, \
-                         mcp_read_resource, read_mcp_resource, mcp_get_prompt."
-                    )));
-                }
-            } else {
-                let Some(spec) = registry.get(&tool_name) else {
-                    return Err(ToolError::not_available(format!(
-                        "tool '{tool_name}' is not registered"
-                    )));
-                };
-                if !spec.is_read_only() {
-                    return Err(ToolError::invalid_input(format!(
-                        "Tool '{tool_name}' is not read-only and cannot run in parallel"
-                    )));
-                }
-                if spec.approval_requirement() != ApprovalRequirement::Auto {
-                    return Err(ToolError::invalid_input(format!(
-                        "Tool '{tool_name}' requires approval and cannot run in parallel"
-                    )));
-                }
-                if !spec.supports_parallel() {
-                    return Err(ToolError::invalid_input(format!(
-                        "Tool '{tool_name}' does not support parallel execution"
-                    )));
-                }
-            }
-
-            let registry_ref = registry;
-            let lock = tool_exec_lock.clone();
-            let tx_event = self.tx_event.clone();
-            let mcp_pool = mcp_pool.clone();
-            tasks.push(async move {
-                let result = Engine::execute_tool_with_lock(
-                    lock,
-                    true,
-                    false,
-                    tx_event,
-                    tool_name.clone(),
-                    tool_input.clone(),
-                    Some(registry_ref),
-                    mcp_pool,
-                    None,
-                )
-                .await;
-                (tool_name, result)
-            });
-        }
-
-        let mut results = Vec::new();
-        while let Some((tool_name, result)) = tasks.next().await {
-            match result {
-                Ok(output) => {
-                    let mut error = None;
-                    if !output.success {
-                        error = Some(output.content.clone());
-                    }
-                    results.push(ParallelToolResultEntry {
-                        tool_name,
-                        success: output.success,
-                        content: output.content,
-                        error,
-                    });
-                }
-                Err(err) => {
-                    let message = format!("{err}");
-                    results.push(ParallelToolResultEntry {
-                        tool_name,
-                        success: false,
-                        content: format!("Error: {message}"),
-                        error: Some(message),
-                    });
-                }
-            }
-        }
-
-        ToolResult::json(&ParallelToolResult { results })
-            .map_err(|e| ToolError::execution_failed(e.to_string()))
-    }
-
-    #[allow(clippy::too_many_arguments)]
-    async fn execute_tool_with_lock(
-        lock: Arc<RwLock<()>>,
-        supports_parallel: bool,
-        interactive: bool,
-        tx_event: mpsc::Sender<Event>,
-        tool_name: String,
-        tool_input: serde_json::Value,
-        registry: Option<&crate::tools::ToolRegistry>,
-        mcp_pool: Option<Arc<AsyncMutex<McpPool>>>,
-        context_override: Option<crate::tools::ToolContext>,
-    ) -> Result<ToolResult, ToolError> {
-        let _guard = if supports_parallel {
-            ToolExecGuard::Read(lock.read().await)
-        } else {
-            ToolExecGuard::Write(lock.write().await)
-        };
-
-        if interactive {
-            let _ = tx_event.send(Event::PauseEvents).await;
-        }
-
-        let result = if McpPool::is_mcp_tool(&tool_name) {
-            if let Some(pool) = mcp_pool {
-                Engine::execute_mcp_tool_with_pool(pool, &tool_name, tool_input).await
-            } else {
-                Err(ToolError::not_available(format!(
-                    "tool '{tool_name}' is not registered"
-                )))
-            }
-        } else if let Some(registry) = registry {
-            registry
-                .execute_full_with_context(&tool_name, tool_input, context_override.as_ref())
-                .await
-        } else {
-            Err(ToolError::not_available(format!(
-                "tool '{tool_name}' is not registered"
-            )))
-        };
-
-        if interactive {
-            let _ = tx_event.send(Event::ResumeEvents).await;
-        }
-
-        result
-    }
-
     /// Handle a turn using the DeepSeek API.
     #[allow(clippy::too_many_lines)]
     /// Run the pre-request layered-context checkpoint (#159). Checks whether
@@ -2148,7 +1849,9 @@ use context::{
     turn_response_headroom_tokens,
 };
 mod dispatch;
+mod lsp_hooks;
 mod tool_catalog;
+mod tool_execution;
 mod tool_setup;
 mod turn_loop;
 
@@ -2159,6 +1862,8 @@ use self::dispatch::{
     mcp_tool_is_read_only, parse_parallel_tool_calls, parse_tool_input,
     should_force_update_plan_first, should_parallelize_tool_batch, should_stop_after_plan_tool,
 };
+#[cfg(test)]
+use self::lsp_hooks::{edited_paths_for_tool, parse_patch_paths};
 use self::tool_catalog::{
     CODE_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME, REQUEST_USER_INPUT_NAME,
     active_tools_for_step, build_model_tool_catalog, ensure_advanced_tooling,
@@ -2167,6 +1872,7 @@ use self::tool_catalog::{
 };
 #[cfg(test)]
 use self::tool_catalog::{TOOL_SEARCH_BM25_NAME, should_default_defer_tool};
+use self::tool_execution::emit_tool_audit;
 
 #[cfg(test)]
 mod tests;
diff --git a/crates/tui/src/core/engine/lsp_hooks.rs b/crates/tui/src/core/engine/lsp_hooks.rs
new file mode 100644
index 00000000..535a869f
--- /dev/null
+++ b/crates/tui/src/core/engine/lsp_hooks.rs
@@ -0,0 +1,128 @@
+//! Post-edit LSP diagnostics hooks for engine tool execution.
+//!
+//! The turn loop only needs to ask "did a successful edit produce diagnostics?"
+//! This module owns the tool-input path extraction and the synthetic diagnostic
+//! message injection so the top-level engine module stays focused on session
+//! orchestration.
+
+use std::path::PathBuf;
+
+use super::*;
+
+/// #136: derive the file path(s) edited by a tool call. Returns the empty
+/// vec for tools that don't modify files. We intentionally only handle the
+/// three known edit tools — adding more (e.g. specialized refactor tools)
+/// is a one-line change here.
+pub(super) fn edited_paths_for_tool(tool_name: &str, input: &serde_json::Value) -> Vec<PathBuf> {
+    match tool_name {
+        "edit_file" | "write_file" => {
+            if let Some(path) = input.get("path").and_then(|v| v.as_str()) {
+                vec![PathBuf::from(path)]
+            } else {
+                Vec::new()
+            }
+        }
+        "apply_patch" => {
+            // `apply_patch` accepts either a `path` override or a list of
+            // `files` (each `{path, content}`). We try both shapes.
+            let mut out = Vec::new();
+            if let Some(path) = input.get("path").and_then(|v| v.as_str()) {
+                out.push(PathBuf::from(path));
+            }
+            if let Some(files) = input.get("files").and_then(|v| v.as_array()) {
+                for entry in files {
+                    if let Some(path) = entry.get("path").and_then(|v| v.as_str()) {
+                        out.push(PathBuf::from(path));
+                    }
+                }
+            }
+            // Fallback: parse `---`/`+++` headers from a unified diff payload.
+            if out.is_empty()
+                && let Some(patch) = input.get("patch").and_then(|v| v.as_str())
+            {
+                out.extend(parse_patch_paths(patch));
+            }
+            out
+        }
+        _ => Vec::new(),
+    }
+}
+
+/// Lightweight parser for `+++ b/<path>` lines in a unified diff. Used as a
+/// fallback when `apply_patch` is invoked with raw `patch` text and no
+/// `path`/`files` override. We deliberately keep this dumb — the real
+/// `apply_patch` tool already validates the patch shape; we only need a
+/// best-effort hint for the LSP hook.
+pub(super) fn parse_patch_paths(patch: &str) -> Vec<PathBuf> {
+    let mut out = Vec::new();
+    for line in patch.lines() {
+        if let Some(rest) = line.strip_prefix("+++ ") {
+            let trimmed = rest.trim();
+            // Strip leading `b/` per git diff conventions.
+            let path = trimmed.strip_prefix("b/").unwrap_or(trimmed);
+            // Skip `/dev/null` (deletion).
+            if path == "/dev/null" {
+                continue;
+            }
+            out.push(PathBuf::from(path));
+        }
+    }
+    out
+}
+
+impl Engine {
+    /// #136: post-edit hook. Inspects the tool name + input, derives the
+    /// edited file path, and asks the LSP manager for diagnostics. The
+    /// rendered block is queued in `pending_lsp_blocks` and flushed to the
+    /// session message stream just before the next API request. Failure is
+    /// silent by design — a missing/crashing LSP server must never block
+    /// the agent.
+    pub(super) async fn run_post_edit_lsp_hook(
+        &mut self,
+        tool_name: &str,
+        tool_input: &serde_json::Value,
+    ) {
+        if !self.lsp_manager.config().enabled {
+            return;
+        }
+        let paths = edited_paths_for_tool(tool_name, tool_input);
+        for path in paths {
+            let absolute = if path.is_absolute() {
+                path.clone()
+            } else {
+                self.session.workspace.join(&path)
+            };
+            // Use a short edit-sequence based on the existing turn counter so
+            // log output stays correlated even though we do not currently
+            // batch by sequence.
+            let seq = self.turn_counter;
+            if let Some(block) = self.lsp_manager.diagnostics_for(&absolute, seq).await {
+                self.pending_lsp_blocks.push(block);
+            }
+        }
+    }
+
+    /// Drain `pending_lsp_blocks` into a single synthetic user message so the
+    /// model sees the diagnostics on its next request. Skips when nothing is
+    /// pending. The message uses the standard `text` content block shape
+    /// (the same shape as the post-tool steer messages) so we don't need to
+    /// invent a new envelope.
+    pub(super) async fn flush_pending_lsp_diagnostics(&mut self) {
+        if self.pending_lsp_blocks.is_empty() {
+            return;
+        }
+        let blocks = std::mem::take(&mut self.pending_lsp_blocks);
+        let rendered = crate::lsp::render_blocks(&blocks);
+        if rendered.is_empty() {
+            return;
+        }
+        self.add_session_message(Message {
+            role: "user".to_string(),
+            content: vec![ContentBlock::Text {
+                text: rendered,
+                cache_control: None,
+            }],
+        })
+        .await;
+    }
+}
diff --git a/crates/tui/src/core/engine/tool_execution.rs b/crates/tui/src/core/engine/tool_execution.rs
new file mode 100644
index 00000000..e18e235b
--- /dev/null
+++ b/crates/tui/src/core/engine/tool_execution.rs
@@ -0,0 +1,197 @@
+//! Low-level tool execution helpers for the engine turn loop.
+//!
+//! This module keeps the mechanics of MCP dispatch, execution locking, and
+//! parallel-tool fanout out of `engine.rs`; the turn loop still owns planning,
+//! approval, and how tool results are written back into session state.
+
+use std::{fs::OpenOptions, io::Write};
+
+use super::*;
+
+pub(super) fn emit_tool_audit(event: serde_json::Value) {
+    let Some(path) = std::env::var_os("DEEPSEEK_TOOL_AUDIT_LOG") else {
+        return;
+    };
+    let line = match serde_json::to_string(&event) {
+        Ok(line) => line,
+        Err(_) => return,
+    };
+    let path = PathBuf::from(path);
+    if let Some(parent) = path.parent() {
+        let _ = std::fs::create_dir_all(parent);
+    }
+    if let Ok(mut file) = OpenOptions::new().create(true).append(true).open(path) {
+        let _ = writeln!(file, "{line}");
+    }
+}
+
+impl Engine {
+    pub(super) async fn execute_mcp_tool_with_pool(
+        pool: Arc<AsyncMutex<McpPool>>,
+        name: &str,
+        input: serde_json::Value,
+    ) -> Result<ToolResult, ToolError> {
+        let mut pool = pool.lock().await;
+        let result = pool
+            .call_tool(name, input)
+            .await
+            .map_err(|e| ToolError::execution_failed(format!("MCP tool failed: {e}")))?;
+        let content = serde_json::to_string_pretty(&result).unwrap_or_else(|_| result.to_string());
+        Ok(ToolResult::success(content))
+    }
+
+    pub(super) async fn execute_parallel_tool(
+        &mut self,
+        input: serde_json::Value,
+        tool_registry: Option<&crate::tools::ToolRegistry>,
+        tool_exec_lock: Arc<RwLock<()>>,
+    ) -> Result<ToolResult, ToolError> {
+        let calls = parse_parallel_tool_calls(&input)?;
+        let mcp_pool = if calls.iter().any(|(tool, _)| McpPool::is_mcp_tool(tool)) {
+            Some(self.ensure_mcp_pool().await?)
+        } else {
+            None
+        };
+        let Some(registry) = tool_registry else {
+            return Err(ToolError::not_available(
+                "tool registry unavailable for multi_tool_use.parallel",
+            ));
+        };
+
+        let mut tasks = FuturesUnordered::new();
+        for (tool_name, tool_input) in calls {
+            if tool_name == MULTI_TOOL_PARALLEL_NAME {
+                return Err(ToolError::invalid_input(
+                    "multi_tool_use.parallel cannot call itself",
+                ));
+            }
+            if McpPool::is_mcp_tool(&tool_name) {
+                if !mcp_tool_is_parallel_safe(&tool_name) {
+                    return Err(ToolError::invalid_input(format!(
+                        "Tool '{tool_name}' is an MCP tool and cannot run in parallel. \
+                         Allowed MCP tools: list_mcp_resources, list_mcp_resource_templates, \
+                         mcp_read_resource, read_mcp_resource, mcp_get_prompt."
+                    )));
+                }
+            } else {
+                let Some(spec) = registry.get(&tool_name) else {
+                    return Err(ToolError::not_available(format!(
+                        "tool '{tool_name}' is not registered"
+                    )));
+                };
+                if !spec.is_read_only() {
+                    return Err(ToolError::invalid_input(format!(
+                        "Tool '{tool_name}' is not read-only and cannot run in parallel"
+                    )));
+                }
+                if spec.approval_requirement() != ApprovalRequirement::Auto {
+                    return Err(ToolError::invalid_input(format!(
+                        "Tool '{tool_name}' requires approval and cannot run in parallel"
+                    )));
+                }
+                if !spec.supports_parallel() {
+                    return Err(ToolError::invalid_input(format!(
+                        "Tool '{tool_name}' does not support parallel execution"
+                    )));
+                }
+            }
+
+            let registry_ref = registry;
+            let lock = tool_exec_lock.clone();
+            let tx_event = self.tx_event.clone();
+            let mcp_pool = mcp_pool.clone();
+            tasks.push(async move {
+                let result = Engine::execute_tool_with_lock(
+                    lock,
+                    true,
+                    false,
+                    tx_event,
+                    tool_name.clone(),
+                    tool_input.clone(),
+                    Some(registry_ref),
+                    mcp_pool,
+                    None,
+                )
+                .await;
+                (tool_name, result)
+            });
+        }
+
+        let mut results = Vec::new();
+        while let Some((tool_name, result)) = tasks.next().await {
+            match result {
+                Ok(output) => {
+                    let mut error = None;
+                    if !output.success {
+                        error = Some(output.content.clone());
+                    }
+                    results.push(ParallelToolResultEntry {
+                        tool_name,
+                        success: output.success,
+                        content: output.content,
+                        error,
+                    });
+                }
+                Err(err) => {
+                    let message = format!("{err}");
+                    results.push(ParallelToolResultEntry {
+                        tool_name,
+                        success: false,
+                        content: format!("Error: {message}"),
+                        error: Some(message),
+                    });
+                }
+            }
+        }
+
+        ToolResult::json(&ParallelToolResult { results })
+            .map_err(|e| ToolError::execution_failed(e.to_string()))
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    pub(super) async fn execute_tool_with_lock(
+        lock: Arc<RwLock<()>>,
+        supports_parallel: bool,
+        interactive: bool,
+        tx_event: mpsc::Sender<Event>,
+        tool_name: String,
+        tool_input: serde_json::Value,
+        registry: Option<&crate::tools::ToolRegistry>,
+        mcp_pool: Option<Arc<AsyncMutex<McpPool>>>,
+        context_override: Option<crate::tools::ToolContext>,
+    ) -> Result<ToolResult, ToolError> {
+        let _guard = if supports_parallel {
+            ToolExecGuard::Read(lock.read().await)
+        } else {
+            ToolExecGuard::Write(lock.write().await)
+        };
+
+        if interactive {
+            let _ = tx_event.send(Event::PauseEvents).await;
+        }
+
+        let result = if McpPool::is_mcp_tool(&tool_name) {
+            if let Some(pool) = mcp_pool {
+                Engine::execute_mcp_tool_with_pool(pool, &tool_name, tool_input).await
+            } else {
+                Err(ToolError::not_available(format!(
+                    "tool '{tool_name}' is not registered"
+                )))
+            }
+        } else if let Some(registry) = registry {
+            registry
+                .execute_full_with_context(&tool_name, tool_input, context_override.as_ref())
+                .await
+        } else {
+            Err(ToolError::not_available(format!(
+                "tool '{tool_name}' is not registered"
+            )))
+        };
+
+        if interactive {
+            let _ = tx_event.send(Event::ResumeEvents).await;
+        }
+
+        result
+    }
+}

From a64bc9bbe553cbac176909da572eb4f6846ef4a2 Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 1 May 2026 09:09:09 -0500
Subject: [PATCH 12/17] refactor(engine): isolate streaming state helpers

---
 crates/tui/src/core/engine.rs           | 200 ++----------------------
 crates/tui/src/core/engine/dispatch.rs  |  56 ++++++-
 crates/tui/src/core/engine/streaming.rs | 137 ++++++++++++++++
 3 files changed, 209 insertions(+), 184 deletions(-)
 create mode 100644 crates/tui/src/core/engine/streaming.rs

diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs
index 6e2e5911..d69a2b2a 100644
--- a/crates/tui/src/core/engine.rs
+++ b/crates/tui/src/core/engine.rs
@@ -32,9 +32,11 @@ use crate::error_taxonomy::{ErrorCategory, ErrorEnvelope, StreamError};
 use crate::features::{Feature, Features};
 use crate::llm_client::LlmClient;
 use crate::mcp::McpPool;
+#[cfg(test)]
+use crate::models::ToolCaller;
 use crate::models::{
     ContentBlock, ContentBlockStart, DEFAULT_CONTEXT_WINDOW_TOKENS, Delta, Message, MessageRequest,
-    StreamEvent, SystemPrompt, Tool, ToolCaller, Usage,
+    StreamEvent, SystemPrompt, Tool, Usage,
 };
 use crate::prompts;
 use crate::seam_manager::{SeamConfig, SeamManager};
@@ -291,185 +293,7 @@ pub struct Engine {
     pending_lsp_blocks: Vec<crate::lsp::DiagnosticBlock>,
 }
 
-// === Internal stream helpers ===
-
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-enum ContentBlockKind {
-    Text,
-    Thinking,
-    ToolUse,
-}
-
-#[derive(Debug, Clone)]
-struct ToolUseState {
-    id: String,
-    name: String,
-    input: serde_json::Value,
-    caller: Option<ToolCaller>,
-    input_buffer: String,
-}
-
-/// Maximum time to wait for a single stream chunk before assuming a stall.
-/// **This is the idle timeout** — it resets on every SSE chunk, so long
-/// thinking turns that ARE producing reasoning_content stay alive. Only a
-/// genuine `chunk_timeout` window of silence kills the stream.
-const STREAM_CHUNK_TIMEOUT_SECS: u64 = 90;
-/// Maximum total bytes of text/thinking content before aborting the stream.
-const STREAM_MAX_CONTENT_BYTES: usize = 10 * 1024 * 1024; // 10 MB
-/// Sanity backstop for total stream wall-clock duration. **Not** a routine
-/// kill switch — `STREAM_CHUNK_TIMEOUT_SECS` (idle) is the primary stall
-/// detector. The wall-clock cap is here only to bound pathological cases
-/// (e.g. a server that keeps sending heartbeats forever without progress).
-///
-/// History: this used to be 300s (5 min) which was too aggressive — V4
-/// thinking turns on hard prompts legitimately exceed 5 minutes wall-clock
-/// while still emitting reasoning_content chunks the whole way. Bumped to
-/// 30 min in v0.6.6 to address `TODO_FIXES.md` #1. Codex defaults to a
-/// per-chunk idle of 300s with no wall-clock cap; we keep both layers but
-/// give the wall-clock a generous window so it never fires in practice.
-const STREAM_MAX_DURATION_SECS: u64 = 1800; // 30 minutes (was 300s; #103/#1)
-/// Hard cap on consecutive recoverable stream errors before we surface a turn
-/// failure. Bumped 3 → 5 in v0.6.7 along with the HTTP/2 keepalive defaults
-/// (#103) — keepalive should make spurious decode errors rarer, so we can
-/// tolerate a longer streak before giving up on the turn.
-const MAX_STREAM_ERRORS_BEFORE_FAIL: u32 = 5;
-/// Cap on transparent stream-level retries — these only happen when the wire
-/// dies before any content was streamed, so DeepSeek hasn't billed us and
-/// the user hasn't seen anything. Two attempts is enough to ride out a
-/// flaky edge node without amplifying real outages (#103).
-const MAX_TRANSPARENT_STREAM_RETRIES: u32 = 2;
-
-/// Decide whether a stream error is eligible for a transparent retry.
-///
-/// True only when ALL three conditions hold:
-/// 1. No content has been received on the current attempt — otherwise DeepSeek
-///    has already billed us for output tokens and the user has seen partial
-///    deltas; resending would double-bill and desync the UI.
-/// 2. We still have transparent-retry budget remaining.
-/// 3. The turn has not been cancelled.
-///
-/// Extracted as a pure function so the four #103 retry cases can be exercised
-/// in unit tests without booting the full engine state machine.
-fn should_transparently_retry_stream(
-    any_content_received: bool,
-    transparent_attempts: u32,
-    cancelled: bool,
-) -> bool {
-    !any_content_received && transparent_attempts < MAX_TRANSPARENT_STREAM_RETRIES && !cancelled
-}
-pub(crate) const TOOL_CALL_START_MARKERS: [&str; 5] = [
-    "[TOOL_CALL]",
-    "<deepseek:tool_call",
-    "<tool_call",
-    "<invoke ",
-    "<function_calls>",
-];
-
-pub(crate) const TOOL_CALL_END_MARKERS: [&str; 5] = [
-    "[/TOOL_CALL]",
-    "</deepseek:tool_call>",
-    "</tool_call>",
-    "</invoke>",
-    "</function_calls>",
-];
-
-/// Compact one-shot notice emitted when a model attempts to forge a tool-call
-/// wrapper in plain text instead of using the API tool channel. The visible
-/// content is still scrubbed; this exists so the user can see why their text
-/// shrank.
-pub(crate) const FAKE_WRAPPER_NOTICE: &str =
-    "Stripped non-API tool-call wrapper from model output (use the API tool channel)";
-
-/// True if `text` contains any of the known fake-wrapper start markers. Used by
-/// the streaming loop to decide whether to emit `FAKE_WRAPPER_NOTICE`.
-pub(crate) fn contains_fake_tool_wrapper(text: &str) -> bool {
-    TOOL_CALL_START_MARKERS.iter().any(|m| text.contains(m))
-}
-
-fn find_first_marker(text: &str, markers: &[&str]) -> Option<(usize, usize)> {
-    markers
-        .iter()
-        .filter_map(|marker| text.find(marker).map(|idx| (idx, marker.len())))
-        .min_by_key(|(idx, _)| *idx)
-}
-
-pub(crate) fn filter_tool_call_delta(delta: &str, in_tool_call: &mut bool) -> String {
-    if delta.is_empty() {
-        return String::new();
-    }
-
-    let mut output = String::new();
-    let mut rest = delta;
-
-    loop {
-        if *in_tool_call {
-            let Some((idx, len)) = find_first_marker(rest, &TOOL_CALL_END_MARKERS) else {
-                break;
-            };
-            rest = &rest[idx + len..];
-            *in_tool_call = false;
-        } else {
-            let Some((idx, len)) = find_first_marker(rest, &TOOL_CALL_START_MARKERS) else {
-                output.push_str(rest);
-                break;
-            };
-            output.push_str(&rest[..idx]);
-            rest = &rest[idx + len..];
-            *in_tool_call = true;
-        }
-    }
-
-    output
-}
-
-fn caller_type_for_tool_use(caller: Option<&ToolCaller>) -> &str {
-    caller.map_or("direct", |c| c.caller_type.as_str())
-}
-
-fn caller_allowed_for_tool(caller: Option<&ToolCaller>, tool_def: Option<&Tool>) -> bool {
-    let requested = caller_type_for_tool_use(caller);
-    if let Some(def) = tool_def
-        && let Some(allowed) = &def.allowed_callers
-    {
-        if allowed.is_empty() {
-            return requested == "direct";
-        }
-        return allowed.iter().any(|item| item == requested);
-    }
-    requested == "direct"
-}
-
-fn format_tool_error(err: &ToolError, tool_name: &str) -> String {
-    match err {
-        ToolError::InvalidInput { message } => {
-            format!("Invalid input for tool '{tool_name}': {message}")
-        }
-        ToolError::MissingField { field } => {
-            format!("Tool '{tool_name}' is missing required field '{field}'")
-        }
-        ToolError::PathEscape { path } => format!(
-            "Path escapes workspace: {}. Use a workspace-relative path or enable trust mode.",
-            path.display()
-        ),
-        ToolError::ExecutionFailed { message } => message.clone(),
-        ToolError::Timeout { seconds } => format!(
-            "Tool '{tool_name}' timed out after {seconds}s. Try a narrower scope or a longer timeout."
-        ),
-        ToolError::NotAvailable { message } => {
-            let lower = message.to_ascii_lowercase();
-            if lower.contains("current tool catalog") || lower.contains("did you mean:") {
-                message.clone()
-            } else {
-                format!(
-                    "Tool '{tool_name}' is not available: {message}. Check mode, feature flags, or tool name."
-                )
-            }
-        }
-        ToolError::PermissionDenied { message } => format!(
-            "Tool '{tool_name}' was denied: {message}. Adjust approval mode or request permission."
-        ),
-    }
-}
+// === Internal tool helpers ===
 
 impl Engine {
     fn reset_cancel_token(&mut self) {
@@ -1850,6 +1674,7 @@ use context::{
 };
 mod dispatch;
 mod lsp_hooks;
+mod streaming;
 mod tool_catalog;
 mod tool_execution;
 mod tool_setup;
@@ -1858,12 +1683,21 @@ mod turn_loop;
 use self::approval::{ApprovalDecision, ApprovalResult, UserInputDecision};
 use self::dispatch::{
     ParallelToolResult, ParallelToolResultEntry, ToolExecGuard, ToolExecOutcome, ToolExecutionPlan,
-    final_tool_input, mcp_tool_approval_description, mcp_tool_is_parallel_safe,
-    mcp_tool_is_read_only, parse_parallel_tool_calls, parse_tool_input,
-    should_force_update_plan_first, should_parallelize_tool_batch, should_stop_after_plan_tool,
+    caller_allowed_for_tool, caller_type_for_tool_use, final_tool_input, format_tool_error,
+    mcp_tool_approval_description, mcp_tool_is_parallel_safe, mcp_tool_is_read_only,
+    parse_parallel_tool_calls, parse_tool_input, should_force_update_plan_first,
+    should_parallelize_tool_batch, should_stop_after_plan_tool,
 };
 #[cfg(test)]
 use self::lsp_hooks::{edited_paths_for_tool, parse_patch_paths};
+#[cfg(test)]
+use self::streaming::TOOL_CALL_START_MARKERS;
+use self::streaming::{
+    ContentBlockKind, FAKE_WRAPPER_NOTICE, MAX_STREAM_ERRORS_BEFORE_FAIL,
+    MAX_TRANSPARENT_STREAM_RETRIES, STREAM_CHUNK_TIMEOUT_SECS, STREAM_MAX_CONTENT_BYTES,
+    STREAM_MAX_DURATION_SECS, ToolUseState, contains_fake_tool_wrapper, filter_tool_call_delta,
+    should_transparently_retry_stream,
+};
 use self::tool_catalog::{
     CODE_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME, REQUEST_USER_INPUT_NAME,
     active_tools_for_step, build_model_tool_catalog, ensure_advanced_tooling,
diff --git a/crates/tui/src/core/engine/dispatch.rs b/crates/tui/src/core/engine/dispatch.rs
index 94b8c864..eb39f41f 100644
--- a/crates/tui/src/core/engine/dispatch.rs
+++ b/crates/tui/src/core/engine/dispatch.rs
@@ -17,7 +17,7 @@
 
 use serde_json::json;
 
-use crate::models::ToolCaller;
+use crate::models::{Tool, ToolCaller};
 use crate::tools::spec::{ToolError, ToolResult};
 use crate::tui::app::AppMode;
 
@@ -71,6 +71,60 @@ pub(super) enum ToolExecGuard<'a> {
     Write(#[allow(dead_code)] tokio::sync::RwLockWriteGuard<'a, ()>),
 }
 
+// === Caller policy and errors ========================================
+
+pub(super) fn caller_type_for_tool_use(caller: Option<&ToolCaller>) -> &str {
+    caller.map_or("direct", |c| c.caller_type.as_str())
+}
+
+pub(super) fn caller_allowed_for_tool(
+    caller: Option<&ToolCaller>,
+    tool_def: Option<&Tool>,
+) -> bool {
+    let requested = caller_type_for_tool_use(caller);
+    if let Some(def) = tool_def
+        && let Some(allowed) = &def.allowed_callers
+    {
+        if allowed.is_empty() {
+            return requested == "direct";
+        }
+        return allowed.iter().any(|item| item == requested);
+    }
+    requested == "direct"
+}
+
+pub(super) fn format_tool_error(err: &ToolError, tool_name: &str) -> String {
+    match err {
+        ToolError::InvalidInput { message } => {
+            format!("Invalid input for tool '{tool_name}': {message}")
+        }
+        ToolError::MissingField { field } => {
+            format!("Tool '{tool_name}' is missing required field '{field}'")
+        }
+        ToolError::PathEscape { path } => format!(
+            "Path escapes workspace: {}. Use a workspace-relative path or enable trust mode.",
+            path.display()
+        ),
+        ToolError::ExecutionFailed { message } => message.clone(),
+        ToolError::Timeout { seconds } => format!(
+            "Tool '{tool_name}' timed out after {seconds}s. Try a narrower scope or a longer timeout."
+        ),
+        ToolError::NotAvailable { message } => {
+            let lower = message.to_ascii_lowercase();
+            if lower.contains("current tool catalog") || lower.contains("did you mean:") {
+                message.clone()
+            } else {
+                format!(
+                    "Tool '{tool_name}' is not available: {message}. Check mode, feature flags, or tool name."
+                )
+            }
+        }
+        ToolError::PermissionDenied { message } => format!(
+            "Tool '{tool_name}' was denied: {message}. Adjust approval mode or request permission."
+        ),
+    }
+}
+
 // === Streaming-buffer parsing =========================================
 
 /// Promote a streaming `ToolUseState` to a finalized JSON input.
diff --git a/crates/tui/src/core/engine/streaming.rs b/crates/tui/src/core/engine/streaming.rs
new file mode 100644
index 00000000..1855dc18
--- /dev/null
+++ b/crates/tui/src/core/engine/streaming.rs
@@ -0,0 +1,137 @@
+//! Streaming response state and guardrails.
+//!
+//! This module owns the local state used while decoding one model stream:
+//! content block kind tracking, streamed tool-use buffers, transparent retry
+//! policy, and scrubbers for text that looks like a forged tool-call wrapper.
+
+use crate::models::ToolCaller;
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub(super) enum ContentBlockKind {
+    Text,
+    Thinking,
+    ToolUse,
+}
+
+#[derive(Debug, Clone)]
+pub(super) struct ToolUseState {
+    pub(super) id: String,
+    pub(super) name: String,
+    pub(super) input: serde_json::Value,
+    pub(super) caller: Option<ToolCaller>,
+    pub(super) input_buffer: String,
+}
+
+/// Maximum time to wait for a single stream chunk before assuming a stall.
+/// **This is the idle timeout** — it resets on every SSE chunk, so long
+/// thinking turns that ARE producing reasoning_content stay alive. Only a
+/// genuine `chunk_timeout` window of silence kills the stream.
+pub(super) const STREAM_CHUNK_TIMEOUT_SECS: u64 = 90;
+/// Maximum total bytes of text/thinking content before aborting the stream.
+pub(super) const STREAM_MAX_CONTENT_BYTES: usize = 10 * 1024 * 1024; // 10 MB
+/// Sanity backstop for total stream wall-clock duration. **Not** a routine
+/// kill switch — `STREAM_CHUNK_TIMEOUT_SECS` (idle) is the primary stall
+/// detector. The wall-clock cap is here only to bound pathological cases
+/// (e.g. a server that keeps sending heartbeats forever without progress).
+///
+/// History: this used to be 300s (5 min) which was too aggressive — V4
+/// thinking turns on hard prompts legitimately exceed 5 minutes wall-clock
+/// while still emitting reasoning_content chunks the whole way. Bumped to
+/// 30 min in v0.6.6 to address `TODO_FIXES.md` #1. Codex defaults to a
+/// per-chunk idle of 300s with no wall-clock cap; we keep both layers but
+/// give the wall-clock a generous window so it never fires in practice.
+pub(super) const STREAM_MAX_DURATION_SECS: u64 = 1800; // 30 minutes (was 300s; #103/#1)
+/// Hard cap on consecutive recoverable stream errors before we surface a turn
+/// failure. Bumped 3 → 5 in v0.6.7 along with the HTTP/2 keepalive defaults
+/// (#103) — keepalive should make spurious decode errors rarer, so we can
+/// tolerate a longer streak before giving up on the turn.
+pub(super) const MAX_STREAM_ERRORS_BEFORE_FAIL: u32 = 5;
+/// Cap on transparent stream-level retries — these only happen when the wire
+/// dies before any content was streamed, so DeepSeek hasn't billed us and
+/// the user hasn't seen anything. Two attempts is enough to ride out a
+/// flaky edge node without amplifying real outages (#103).
+pub(super) const MAX_TRANSPARENT_STREAM_RETRIES: u32 = 2;
+
+/// Decide whether a stream error is eligible for a transparent retry.
+///
+/// True only when ALL three conditions hold:
+/// 1. No content has been received on the current attempt — otherwise DeepSeek
+///    has already billed us for output tokens and the user has seen partial
+///    deltas; resending would double-bill and desync the UI.
+/// 2. We still have transparent-retry budget remaining.
+/// 3. The turn has not been cancelled.
+///
+/// Extracted as a pure function so the four #103 retry cases can be exercised
+/// in unit tests without booting the full engine state machine.
+pub(super) fn should_transparently_retry_stream(
+    any_content_received: bool,
+    transparent_attempts: u32,
+    cancelled: bool,
+) -> bool {
+    !any_content_received && transparent_attempts < MAX_TRANSPARENT_STREAM_RETRIES && !cancelled
+}
+
+pub(crate) const TOOL_CALL_START_MARKERS: [&str; 5] = [
+    "[TOOL_CALL]",
+    "<deepseek:tool_call",
+    "<tool_call",
+    "<invoke ",
+    "<function_calls>",
+];
+
+pub(crate) const TOOL_CALL_END_MARKERS: [&str; 5] = [
+    "[/TOOL_CALL]",
+    "</deepseek:tool_call>",
+    "</tool_call>",
+    "</invoke>",
+    "</function_calls>",
+];
+
+/// Compact one-shot notice emitted when a model attempts to forge a tool-call
+/// wrapper in plain text instead of using the API tool channel. The visible
+/// content is still scrubbed; this exists so the user can see why their text
+/// shrank.
+pub(crate) const FAKE_WRAPPER_NOTICE: &str =
+    "Stripped non-API tool-call wrapper from model output (use the API tool channel)";
+
+/// True if `text` contains any of the known fake-wrapper start markers. Used by
+/// the streaming loop to decide whether to emit `FAKE_WRAPPER_NOTICE`.
+pub(crate) fn contains_fake_tool_wrapper(text: &str) -> bool {
+    TOOL_CALL_START_MARKERS.iter().any(|m| text.contains(m))
+}
+
+fn find_first_marker(text: &str, markers: &[&str]) -> Option<(usize, usize)> {
+    markers
+        .iter()
+        .filter_map(|marker| text.find(marker).map(|idx| (idx, marker.len())))
+        .min_by_key(|(idx, _)| *idx)
+}
+
+pub(crate) fn filter_tool_call_delta(delta: &str, in_tool_call: &mut bool) -> String {
+    if delta.is_empty() {
+        return String::new();
+    }
+
+    let mut output = String::new();
+    let mut rest = delta;
+
+    loop {
+        if *in_tool_call {
+            let Some((idx, len)) = find_first_marker(rest, &TOOL_CALL_END_MARKERS) else {
+                break;
+            };
+            rest = &rest[idx + len..];
+            *in_tool_call = false;
+        } else {
+            let Some((idx, len)) = find_first_marker(rest, &TOOL_CALL_START_MARKERS) else {
+                output.push_str(rest);
+                break;
+            };
+            output.push_str(&rest[..idx]);
+            rest = &rest[idx + len..];
+            *in_tool_call = true;
+        }
+    }
+
+    output
+}

From bb88ab912979fc958836b0730ac43c5ae34c4b94 Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 1 May 2026 09:28:28 -0500
Subject: [PATCH 13/17] fix(cli): make missing-companion-binary error actually
 helpful (#258)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

@whereiszebra (issue #258) downloaded just \`deepseek-macos-arm64\` from
the GitHub Release, ran it, hit:

  error: deepseek-tui binary not found at /path/to/deepseek-tui.
  Build workspace default members to install it, or set DEEPSEEK_TUI_BIN
  to its absolute path.

…spent 11 minutes figuring out they also needed \`deepseek-tui-macos-arm64\`
sitting next to it, and self-closed with: "Release page does not document
that both deepseek-macos-arm64 and deepseek-tui-macos-arm64 must be
downloaded together."

The dispatcher's error was the wrong message for the population that hits
it most often — direct GitHub Release downloaders. "Build workspace default
members" is meaningless if you didn't clone the repo. \`DEEPSEEK_TUI_BIN\`
is also not what they need.

New message lists the three concrete install paths that actually work for a
fresh user — npm, cargo, or grab BOTH binaries from the same release
page — and keeps the env var override as a final fallback for power
users. No logic change; just better text. Existing
\`locate_sibling_tui_binary_honours_env_override\` test still passes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/cli/src/lib.rs | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs
index 43cd70df..8b4da41d 100644
--- a/crates/cli/src/lib.rs
+++ b/crates/cli/src/lib.rs
@@ -1111,7 +1111,17 @@ fn locate_sibling_tui_binary() -> Result<PathBuf> {
     // expected name, not "deepseek-tui" on Windows.
     let expected = current.with_file_name(format!("deepseek-tui{}", std::env::consts::EXE_SUFFIX));
     bail!(
-        "deepseek-tui binary not found at {}. Build workspace default members to install it, or set DEEPSEEK_TUI_BIN to its absolute path.",
+        "Companion `deepseek-tui` binary not found at {}.\n\
+\n\
+The `deepseek` dispatcher delegates interactive sessions to a sibling \
+`deepseek-tui` binary. To fix this, install one of:\n\
+  • npm:    npm install -g deepseek-tui            (downloads both binaries)\n\
+  • cargo:  cargo install deepseek-tui-cli deepseek-tui --locked\n\
+  • GitHub Releases: download BOTH `deepseek-<platform>` AND \
+`deepseek-tui-<platform>` from https://github.com/Hmbown/DeepSeek-TUI/releases/latest \
+and place them in the same directory.\n\
+\n\
+Or set DEEPSEEK_TUI_BIN to the absolute path of an existing `deepseek-tui` binary.",
         expected.display()
     );
 }

From a7e629ae4dc38f3f62655e6980e4610ede688206 Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 1 May 2026 09:34:11 -0500
Subject: [PATCH 14/17] test(parity): scan engine submodules after
 decomposition refactor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The protocol-recovery contract tests `include_str!`-ed `engine.rs` and
asserted the fake-wrapper markers (`[TOOL_CALL]`, `<function_calls>`,
…) appeared as string literals in that file. The recent engine
decomposition refactor (commits f0fad7aa..a64bc9bb) split engine.rs
into `engine/streaming.rs`, `engine/turn_loop.rs`, `engine/dispatch.rs`,
`engine/tool_setup.rs`, `engine/tool_execution.rs`,
`engine/tool_catalog.rs`, `engine/context.rs`, `engine/approval.rs`,
`engine/capacity_flow.rs`, and `engine/lsp_hooks.rs`. The marker
literals followed the code into those files, so the original
single-file `include_str!` no longer saw them and 4 protocol-recovery
tests went red.

Switch to an `ENGINE_SOURCES: &[&str]` array of `include_str!`s across
engine.rs + every submodule, with a small `any_engine_source_contains`
helper. Test bodies are otherwise unchanged. The file-size sanity
check on `engine.rs` (>10_000 bytes) still passes — engine.rs is still
~65k bytes after the refactor.

Same regression coverage as before; just survives the new file layout.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/tui/tests/protocol_recovery.rs | 47 ++++++++++++++++++++-------
 1 file changed, 35 insertions(+), 12 deletions(-)

diff --git a/crates/tui/tests/protocol_recovery.rs b/crates/tui/tests/protocol_recovery.rs
index 3de0080e..b42f174e 100644
--- a/crates/tui/tests/protocol_recovery.rs
+++ b/crates/tui/tests/protocol_recovery.rs
@@ -23,7 +23,28 @@ use std::fs;
 #[allow(dead_code)]
 mod tool_parser;
 
-const ENGINE_SRC: &str = include_str!("../src/core/engine.rs");
+// `engine.rs` was decomposed into submodules under `core/engine/`. The
+// protocol-scrubbing strings the tests below assert on are now spread
+// across `engine.rs` and several `engine/*.rs` files. We compile-time
+// include each so a contributor moving a marker into a sibling submodule
+// does not silently break these regression checks.
+const ENGINE_SOURCES: &[&str] = &[
+    include_str!("../src/core/engine.rs"),
+    include_str!("../src/core/engine/streaming.rs"),
+    include_str!("../src/core/engine/turn_loop.rs"),
+    include_str!("../src/core/engine/dispatch.rs"),
+    include_str!("../src/core/engine/tool_setup.rs"),
+    include_str!("../src/core/engine/tool_execution.rs"),
+    include_str!("../src/core/engine/tool_catalog.rs"),
+    include_str!("../src/core/engine/context.rs"),
+    include_str!("../src/core/engine/approval.rs"),
+    include_str!("../src/core/engine/capacity_flow.rs"),
+    include_str!("../src/core/engine/lsp_hooks.rs"),
+];
+
+fn any_engine_source_contains(needle: &str) -> bool {
+    ENGINE_SOURCES.iter().any(|src| src.contains(needle))
+}
 
 const EXPECTED_START_MARKERS: &[&str] = &[
     "[TOOL_CALL]",
@@ -46,9 +67,10 @@ fn engine_keeps_known_fake_wrapper_start_markers() {
     for marker in EXPECTED_START_MARKERS {
         let needle = format!("\"{marker}\"");
         assert!(
-            ENGINE_SRC.contains(&needle),
-            "engine.rs no longer mentions start marker `{marker}` — protocol \
-             scrubbing may have regressed. Searched for {needle:?}."
+            any_engine_source_contains(&needle),
+            "no engine source file still mentions start marker `{marker}` — \
+             protocol scrubbing may have regressed. Searched for {needle:?} \
+             across engine.rs and engine/* submodules."
         );
     }
 }
@@ -58,9 +80,10 @@ fn engine_keeps_known_fake_wrapper_end_markers() {
     for marker in EXPECTED_END_MARKERS {
         let needle = format!("\"{marker}\"");
         assert!(
-            ENGINE_SRC.contains(&needle),
-            "engine.rs no longer mentions end marker `{marker}` — protocol \
-             scrubbing may have regressed. Searched for {needle:?}."
+            any_engine_source_contains(&needle),
+            "no engine source file still mentions end marker `{marker}` — \
+             protocol scrubbing may have regressed. Searched for {needle:?} \
+             across engine.rs and engine/* submodules."
         );
     }
 }
@@ -71,18 +94,18 @@ fn engine_marker_counts_stay_paired() {
     // filter able to enter tool-call mode without ever leaving it. Lock the
     // count to whatever the constants currently declare.
     assert_eq!(EXPECTED_START_MARKERS.len(), EXPECTED_END_MARKERS.len());
-    assert!(ENGINE_SRC.contains("TOOL_CALL_START_MARKERS"));
-    assert!(ENGINE_SRC.contains("TOOL_CALL_END_MARKERS"));
+    assert!(any_engine_source_contains("TOOL_CALL_START_MARKERS"));
+    assert!(any_engine_source_contains("TOOL_CALL_END_MARKERS"));
 }
 
 #[test]
 fn engine_emits_compact_fake_wrapper_notice() {
     assert!(
-        ENGINE_SRC.contains("FAKE_WRAPPER_NOTICE"),
-        "engine.rs no longer references the protocol-recovery notice constant"
+        any_engine_source_contains("FAKE_WRAPPER_NOTICE"),
+        "no engine source file references the protocol-recovery notice constant"
     );
     assert!(
-        ENGINE_SRC.contains("API tool channel"),
+        any_engine_source_contains("API tool channel"),
         "the protocol-recovery notice should mention the API tool channel"
     );
 }

From e620e75f99d643a2ab379194dc27e69392f4f689 Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 1 May 2026 09:46:21 -0500
Subject: [PATCH 15/17] chore: release v0.8.3
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bumps workspace, all internal path-deps, and npm wrapper (version +
deepseekBinaryVersion) from 0.8.2 → 0.8.3. Lockfile re-locked offline.
CHANGELOG entry summarizing the 0.8.3 lane: skills path bug fix,
privacy contraction, helpful missing-companion error (#258), engine
decomposition (#227), bridge/persistence/palette test gap closures,
crates.io badge, and 10 issue closures.

Local v0.8.3 verified at /tmp/deepseek-0.8.3-test/ before publish.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                  | 44 +++++++++++++++++++++++++++++++++++
 Cargo.lock                    | 28 +++++++++++-----------
 Cargo.toml                    |  2 +-
 crates/agent/Cargo.toml       |  2 +-
 crates/app-server/Cargo.toml  | 18 +++++++-------
 crates/cli/Cargo.toml         | 14 +++++------
 crates/config/Cargo.toml      |  2 +-
 crates/core/Cargo.toml        | 16 ++++++-------
 crates/execpolicy/Cargo.toml  |  2 +-
 crates/hooks/Cargo.toml       |  2 +-
 crates/mcp/Cargo.toml         |  2 +-
 crates/tools/Cargo.toml       |  2 +-
 crates/tui/Cargo.toml         |  6 ++---
 npm/deepseek-tui/package.json |  4 ++--
 14 files changed, 94 insertions(+), 50 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 12b10002..b3576469 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,50 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.8.3] - 2026-05-01
+
+### Fixed
+- **Skills prompt referenced fabricated paths** — `render_available_skills_context`
+  rendered each skill's file as `<skills_dir>/<frontmatter-name>/SKILL.md`,
+  which did not exist when the directory name differed from the frontmatter
+  `name` (community installs, manually-placed skills). `Skill` now carries the
+  real path captured at discovery and renders that.
+- **Missing-companion error was hostile to direct GitHub Release downloaders**
+  (#258) — replaced "Build workspace default members to install it" wall of
+  text with a concrete three-path checklist: `npm install -g deepseek-tui`,
+  `cargo install deepseek-tui-cli deepseek-tui --locked`, or downloading both
+  `deepseek-<platform>` AND `deepseek-tui-<platform>` from the same Release
+  page. `DEEPSEEK_TUI_BIN` stays as a power-user fallback.
+
+### Added
+- **Privacy: `$HOME` contracts to `~` in viewer-visible paths** — the TUI,
+  `deepseek doctor`, `deepseek setup`, and onboarding now contract the home
+  directory to `~` in every path shown on screen, so screenshots, screencasts,
+  and pasted help output do not leak the OS account name. Persisted state,
+  audit log, session checkpoints, and LLM-bound system prompts intentionally
+  keep absolute paths for full fidelity.
+- **`crates.io` badge** alongside the CI and npm badges in both English and
+  Simplified Chinese READMEs.
+- **Engine decomposition** (#227) — `core/engine.rs` is split into focused
+  submodules (`engine/{streaming,turn_loop,dispatch,tool_setup,tool_execution,tool_catalog,context,approval,capacity_flow,lsp_hooks,tests}.rs`).
+  No behavior change; preparation for the future agent-loop work.
+
+### Tests
+- RLM bridge: `batch_guard` extracted and tested for the empty-batch and
+  oversize-batch invariants; depth-guard fallback covered (partial #231).
+- Persistence: schema-version rejection covered for `load_session`,
+  `load_offline_queue_state`, `runtime_threads::load_turn`,
+  `runtime_threads::load_item` (partial #233).
+- Command palette: `[disabled]` server description tag (closes the
+  remaining #197 acceptance gap).
+- Protocol-recovery contract tests now scan the engine submodules in
+  addition to `engine.rs` so the decomposition refactor doesn't silently
+  hide the fake-wrapper marker assertions.
+
+### Issue triage
+- 10 issues closed with verification commits cited (#247, #235, #197,
+  #250, #234, #243, #238, #236, #239, #195).
+
 ## [0.8.2] - 2026-05-01
 
 ### Fixed
diff --git a/Cargo.lock b/Cargo.lock
index fc3c19c7..25b8cc66 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1011,7 +1011,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-agent"
-version = "0.8.2"
+version = "0.8.3"
 dependencies = [
  "deepseek-config",
  "serde",
@@ -1019,7 +1019,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-app-server"
-version = "0.8.2"
+version = "0.8.3"
 dependencies = [
  "anyhow",
  "axum",
@@ -1042,7 +1042,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-config"
-version = "0.8.2"
+version = "0.8.3"
 dependencies = [
  "anyhow",
  "deepseek-secrets",
@@ -1055,7 +1055,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-core"
-version = "0.8.2"
+version = "0.8.3"
 dependencies = [
  "anyhow",
  "chrono",
@@ -1074,7 +1074,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-execpolicy"
-version = "0.8.2"
+version = "0.8.3"
 dependencies = [
  "anyhow",
  "deepseek-protocol",
@@ -1083,7 +1083,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-hooks"
-version = "0.8.2"
+version = "0.8.3"
 dependencies = [
  "anyhow",
  "async-trait",
@@ -1097,7 +1097,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-mcp"
-version = "0.8.2"
+version = "0.8.3"
 dependencies = [
  "anyhow",
  "deepseek-protocol",
@@ -1107,7 +1107,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-protocol"
-version = "0.8.2"
+version = "0.8.3"
 dependencies = [
  "serde",
  "serde_json",
@@ -1115,7 +1115,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-secrets"
-version = "0.8.2"
+version = "0.8.3"
 dependencies = [
  "dirs",
  "keyring",
@@ -1128,7 +1128,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-state"
-version = "0.8.2"
+version = "0.8.3"
 dependencies = [
  "anyhow",
  "chrono",
@@ -1140,7 +1140,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-tools"
-version = "0.8.2"
+version = "0.8.3"
 dependencies = [
  "anyhow",
  "async-trait",
@@ -1153,7 +1153,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-tui"
-version = "0.8.2"
+version = "0.8.3"
 dependencies = [
  "anyhow",
  "arboard",
@@ -1214,7 +1214,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-tui-cli"
-version = "0.8.2"
+version = "0.8.3"
 dependencies = [
  "anyhow",
  "chrono",
@@ -1237,7 +1237,7 @@ dependencies = [
 
 [[package]]
 name = "deepseek-tui-core"
-version = "0.8.2"
+version = "0.8.3"
 
 [[package]]
 name = "deranged"
diff --git a/Cargo.toml b/Cargo.toml
index 7d75c363..b22cfdfa 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -19,7 +19,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"]
 resolver = "2"
 
 [workspace.package]
-version = "0.8.2"
+version = "0.8.3"
 edition = "2024"
 license = "MIT"
 repository = "https://github.com/Hmbown/DeepSeek-TUI"
diff --git a/crates/agent/Cargo.toml b/crates/agent/Cargo.toml
index 0745d4b3..ce061459 100644
--- a/crates/agent/Cargo.toml
+++ b/crates/agent/Cargo.toml
@@ -7,5 +7,5 @@ repository.workspace = true
 description = "Model/provider registry and fallback strategy for DeepSeek workspace architecture"
 
 [dependencies]
-deepseek-config = { path = "../config", version = "0.8.2" }
+deepseek-config = { path = "../config", version = "0.8.3" }
 serde.workspace = true
diff --git a/crates/app-server/Cargo.toml b/crates/app-server/Cargo.toml
index d0e0623d..60b1ab8e 100644
--- a/crates/app-server/Cargo.toml
+++ b/crates/app-server/Cargo.toml
@@ -10,15 +10,15 @@ description = "Codex-style app-server transport for DeepSeek workspace architect
 anyhow.workspace = true
 axum.workspace = true
 clap.workspace = true
-deepseek-agent = { path = "../agent", version = "0.8.2" }
-deepseek-config = { path = "../config", version = "0.8.2" }
-deepseek-core = { path = "../core", version = "0.8.2" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.8.2" }
-deepseek-hooks = { path = "../hooks", version = "0.8.2" }
-deepseek-mcp = { path = "../mcp", version = "0.8.2" }
-deepseek-protocol = { path = "../protocol", version = "0.8.2" }
-deepseek-state = { path = "../state", version = "0.8.2" }
-deepseek-tools = { path = "../tools", version = "0.8.2" }
+deepseek-agent = { path = "../agent", version = "0.8.3" }
+deepseek-config = { path = "../config", version = "0.8.3" }
+deepseek-core = { path = "../core", version = "0.8.3" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.8.3" }
+deepseek-hooks = { path = "../hooks", version = "0.8.3" }
+deepseek-mcp = { path = "../mcp", version = "0.8.3" }
+deepseek-protocol = { path = "../protocol", version = "0.8.3" }
+deepseek-state = { path = "../state", version = "0.8.3" }
+deepseek-tools = { path = "../tools", version = "0.8.3" }
 serde.workspace = true
 serde_json.workspace = true
 tokio.workspace = true
diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml
index af286a6c..bed047cc 100644
--- a/crates/cli/Cargo.toml
+++ b/crates/cli/Cargo.toml
@@ -14,13 +14,13 @@ path = "src/main.rs"
 anyhow.workspace = true
 clap.workspace = true
 clap_complete.workspace = true
-deepseek-agent = { path = "../agent", version = "0.8.2" }
-deepseek-app-server = { path = "../app-server", version = "0.8.2" }
-deepseek-config = { path = "../config", version = "0.8.2" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.8.2" }
-deepseek-mcp = { path = "../mcp", version = "0.8.2" }
-deepseek-secrets = { path = "../secrets", version = "0.8.2" }
-deepseek-state = { path = "../state", version = "0.8.2" }
+deepseek-agent = { path = "../agent", version = "0.8.3" }
+deepseek-app-server = { path = "../app-server", version = "0.8.3" }
+deepseek-config = { path = "../config", version = "0.8.3" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.8.3" }
+deepseek-mcp = { path = "../mcp", version = "0.8.3" }
+deepseek-secrets = { path = "../secrets", version = "0.8.3" }
+deepseek-state = { path = "../state", version = "0.8.3" }
 chrono.workspace = true
 dirs.workspace = true
 serde.workspace = true
diff --git a/crates/config/Cargo.toml b/crates/config/Cargo.toml
index ea0c3151..fea07d63 100644
--- a/crates/config/Cargo.toml
+++ b/crates/config/Cargo.toml
@@ -8,7 +8,7 @@ description = "Config schema and precedence model for DeepSeek workspace archite
 
 [dependencies]
 anyhow.workspace = true
-deepseek-secrets = { path = "../secrets", version = "0.8.2" }
+deepseek-secrets = { path = "../secrets", version = "0.8.3" }
 dirs.workspace = true
 serde.workspace = true
 serde_json.workspace = true
diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml
index c1aac52d..cda119ab 100644
--- a/crates/core/Cargo.toml
+++ b/crates/core/Cargo.toml
@@ -9,14 +9,14 @@ description = "Core runtime boundaries for DeepSeek workspace architecture"
 [dependencies]
 anyhow.workspace = true
 chrono.workspace = true
-deepseek-agent = { path = "../agent", version = "0.8.2" }
-deepseek-config = { path = "../config", version = "0.8.2" }
-deepseek-execpolicy = { path = "../execpolicy", version = "0.8.2" }
-deepseek-hooks = { path = "../hooks", version = "0.8.2" }
-deepseek-mcp = { path = "../mcp", version = "0.8.2" }
-deepseek-protocol = { path = "../protocol", version = "0.8.2" }
-deepseek-state = { path = "../state", version = "0.8.2" }
-deepseek-tools = { path = "../tools", version = "0.8.2" }
+deepseek-agent = { path = "../agent", version = "0.8.3" }
+deepseek-config = { path = "../config", version = "0.8.3" }
+deepseek-execpolicy = { path = "../execpolicy", version = "0.8.3" }
+deepseek-hooks = { path = "../hooks", version = "0.8.3" }
+deepseek-mcp = { path = "../mcp", version = "0.8.3" }
+deepseek-protocol = { path = "../protocol", version = "0.8.3" }
+deepseek-state = { path = "../state", version = "0.8.3" }
+deepseek-tools = { path = "../tools", version = "0.8.3" }
 serde_json.workspace = true
 tokio.workspace = true
 uuid.workspace = true
diff --git a/crates/execpolicy/Cargo.toml b/crates/execpolicy/Cargo.toml
index 1891a5a6..7d15b484 100644
--- a/crates/execpolicy/Cargo.toml
+++ b/crates/execpolicy/Cargo.toml
@@ -8,5 +8,5 @@ description = "Execution policy and approval model parity for DeepSeek workspace
 
 [dependencies]
 anyhow.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.8.2" }
+deepseek-protocol = { path = "../protocol", version = "0.8.3" }
 serde.workspace = true
diff --git a/crates/hooks/Cargo.toml b/crates/hooks/Cargo.toml
index 0edef814..c570f50b 100644
--- a/crates/hooks/Cargo.toml
+++ b/crates/hooks/Cargo.toml
@@ -10,7 +10,7 @@ description = "Hook dispatch and notifications parity for DeepSeek workspace arc
 anyhow.workspace = true
 async-trait.workspace = true
 chrono.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.8.2" }
+deepseek-protocol = { path = "../protocol", version = "0.8.3" }
 reqwest.workspace = true
 serde.workspace = true
 serde_json.workspace = true
diff --git a/crates/mcp/Cargo.toml b/crates/mcp/Cargo.toml
index f05ce838..532dbbd4 100644
--- a/crates/mcp/Cargo.toml
+++ b/crates/mcp/Cargo.toml
@@ -8,6 +8,6 @@ description = "MCP server lifecycle and tool proxy compatibility for DeepSeek wo
 
 [dependencies]
 anyhow.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.8.2" }
+deepseek-protocol = { path = "../protocol", version = "0.8.3" }
 serde.workspace = true
 serde_json.workspace = true
diff --git a/crates/tools/Cargo.toml b/crates/tools/Cargo.toml
index 9edb91c8..6accfd76 100644
--- a/crates/tools/Cargo.toml
+++ b/crates/tools/Cargo.toml
@@ -9,7 +9,7 @@ description = "Tool invocation lifecycle, schema validation, and scheduler paral
 [dependencies]
 anyhow.workspace = true
 async-trait.workspace = true
-deepseek-protocol = { path = "../protocol", version = "0.8.2" }
+deepseek-protocol = { path = "../protocol", version = "0.8.3" }
 serde.workspace = true
 serde_json.workspace = true
 tokio.workspace = true
diff --git a/crates/tui/Cargo.toml b/crates/tui/Cargo.toml
index e880e936..91c0c3fe 100644
--- a/crates/tui/Cargo.toml
+++ b/crates/tui/Cargo.toml
@@ -14,9 +14,9 @@ path = "src/main.rs"
 [dependencies]
 anyhow = "1.0.100"
 arboard = "3.4"
-deepseek-tui-cli = { path = "../cli", version = "0.8.2" }
-deepseek-secrets = { path = "../secrets", version = "0.8.2" }
-deepseek-tools = { path = "../tools", version = "0.8.2" }
+deepseek-tui-cli = { path = "../cli", version = "0.8.3" }
+deepseek-secrets = { path = "../secrets", version = "0.8.3" }
+deepseek-tools = { path = "../tools", version = "0.8.3" }
 async-stream = "0.3.6"
 async-trait = "0.1"
 bytes = "1.11.0"
diff --git a/npm/deepseek-tui/package.json b/npm/deepseek-tui/package.json
index 1154b7ba..bfc940e7 100644
--- a/npm/deepseek-tui/package.json
+++ b/npm/deepseek-tui/package.json
@@ -1,7 +1,7 @@
 {
   "name": "deepseek-tui",
-  "version": "0.8.2",
-  "deepseekBinaryVersion": "0.8.2",
+  "version": "0.8.3",
+  "deepseekBinaryVersion": "0.8.3",
   "description": "Install and run deepseek and deepseek-tui binaries from GitHub release artifacts.",
   "author": "Hmbown",
   "license": "MIT",

From 1042e37fbddfa6140c3748f4371d92c96583c053 Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 1 May 2026 11:06:42 -0500
Subject: [PATCH 16/17] refactor(cli): centralize feature command output

---
 crates/tui/src/commands/mod.rs | 47 +++++++++++++++++++---
 crates/tui/src/features.rs     | 72 +++++++++++++++++++++++++++++-----
 crates/tui/src/main.rs         | 27 +++----------
 3 files changed, 108 insertions(+), 38 deletions(-)

diff --git a/crates/tui/src/commands/mod.rs b/crates/tui/src/commands/mod.rs
index 187d10bf..bfd66bb2 100644
--- a/crates/tui/src/commands/mod.rs
+++ b/crates/tui/src/commands/mod.rs
@@ -303,12 +303,6 @@ pub const COMMANDS: &[CommandInfo] = &[
         description: "Show current system prompt",
         usage: "/system",
     },
-    CommandInfo {
-        name: "context",
-        aliases: &[],
-        description: "Show context window usage",
-        usage: "/context",
-    },
     CommandInfo {
         name: "undo",
         aliases: &[],
@@ -687,6 +681,47 @@ mod tests {
         assert_eq!(links.aliases, &["dashboard", "api"]);
     }
 
+    #[test]
+    fn command_registry_has_unique_names_and_aliases() {
+        let mut names = std::collections::BTreeSet::new();
+        for command in COMMANDS {
+            assert!(
+                names.insert(command.name),
+                "duplicate command name /{}",
+                command.name
+            );
+        }
+
+        let mut aliases = std::collections::BTreeSet::new();
+        for command in COMMANDS {
+            for alias in command.aliases {
+                assert!(
+                    !names.contains(alias),
+                    "alias /{} collides with a command name",
+                    alias
+                );
+                assert!(aliases.insert(*alias), "duplicate command alias /{alias}");
+            }
+        }
+    }
+
+    #[test]
+    fn context_command_opens_inspector_and_keeps_ctx_alias() {
+        let context = COMMANDS
+            .iter()
+            .find(|cmd| cmd.name == "context")
+            .expect("context command should exist");
+        assert_eq!(context.aliases, &["ctx"]);
+        assert!(context.description.contains("inspector"));
+
+        let mut app = create_test_app();
+        let result = execute("/ctx", &mut app);
+        assert!(matches!(
+            result.action,
+            Some(AppAction::OpenContextInspector)
+        ));
+    }
+
     #[test]
     fn execute_config_opens_config_view_action() {
         let mut app = create_test_app();
diff --git a/crates/tui/src/features.rs b/crates/tui/src/features.rs
index 3736ce66..0dbc4f81 100644
--- a/crates/tui/src/features.rs
+++ b/crates/tui/src/features.rs
@@ -1,10 +1,9 @@
-// TODO(integrate): Wire feature flags into engine/tool registration — tracked as future work
 #![allow(dead_code)]
 
 //! Feature flags and metadata for DeepSeek TUI.
 
 use std::collections::{BTreeMap, BTreeSet};
-use std::fmt;
+use std::fmt::{self, Write as _};
 
 use serde::{Deserialize, Serialize};
 
@@ -18,6 +17,18 @@ pub enum Stage {
     Removed,
 }
 
+impl Stage {
+    pub fn as_str(self) -> &'static str {
+        match self {
+            Self::Experimental => "experimental",
+            Self::Beta => "beta",
+            Self::Stable => "stable",
+            Self::Deprecated => "deprecated",
+            Self::Removed => "removed",
+        }
+    }
+}
+
 /// Unique features toggled via configuration.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum Feature {
@@ -37,14 +48,7 @@ pub enum Feature {
 
 impl fmt::Display for Stage {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let label = match self {
-            Self::Experimental => "experimental",
-            Self::Beta => "beta",
-            Self::Stable => "stable",
-            Self::Deprecated => "deprecated",
-            Self::Removed => "removed",
-        };
-        f.write_str(label)
+        f.write_str(self.as_str())
     }
 }
 
@@ -136,6 +140,20 @@ pub fn feature_spec_by_key(key: &str) -> Option<&'static FeatureSpec> {
     FEATURES.iter().find(|spec| spec.key == key)
 }
 
+pub fn render_feature_table(features: &Features) -> String {
+    let mut output = String::from("feature\tstage\tenabled\n");
+    for spec in FEATURES {
+        let _ = writeln!(
+            output,
+            "{}\t{}\t{}",
+            spec.key,
+            spec.stage,
+            features.enabled(spec.id)
+        );
+    }
+    output
+}
+
 /// Deserializable features table for TOML.
 #[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq)]
 pub struct FeaturesToml {
@@ -190,3 +208,37 @@ pub const FEATURES: &[FeatureSpec] = &[
         default_enabled: true,
     },
 ];
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn apply_map_toggles_known_features_and_ignores_unknown_keys() {
+        let mut features = Features::with_defaults();
+        let entries = BTreeMap::from([
+            ("mcp".to_string(), false),
+            ("shell_tool".to_string(), false),
+            ("not_real".to_string(), false),
+        ]);
+
+        features.apply_map(&entries);
+
+        assert!(!features.enabled(Feature::Mcp));
+        assert!(!features.enabled(Feature::ShellTool));
+        assert_eq!(feature_from_key("not_real"), None);
+    }
+
+    #[test]
+    fn render_feature_table_uses_registry_order_and_effective_state() {
+        let mut features = Features::with_defaults();
+        features.disable(Feature::Mcp);
+
+        let table = render_feature_table(&features);
+        let lines = table.lines().collect::<Vec<_>>();
+
+        assert_eq!(lines.first(), Some(&"feature\tstage\tenabled"));
+        assert!(lines.contains(&"shell_tool\tstable\ttrue"));
+        assert!(lines.contains(&"mcp\texperimental\tfalse"));
+    }
+}
diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs
index f87d08b9..5b21d7f5 100644
--- a/crates/tui/src/main.rs
+++ b/crates/tui/src/main.rs
@@ -63,7 +63,7 @@ mod workspace_trust;
 
 use crate::config::{Config, DEFAULT_TEXT_MODEL, MAX_SUBAGENTS};
 use crate::eval::{EvalHarness, EvalHarnessConfig, ScenarioStepKind};
-use crate::features::Feature;
+use crate::features::{Feature, render_feature_table};
 use crate::llm_client::LlmClient;
 use crate::mcp::{McpConfig, McpPool, McpServerConfig};
 use crate::models::{ContentBlock, Message, MessageRequest, SystemPrompt};
@@ -1876,30 +1876,13 @@ fn run_execpolicy_command(command: ExecpolicyCommand) -> Result<()> {
 
 fn run_features_command(config: &Config, command: FeaturesCli) -> Result<()> {
     match command.command {
-        FeaturesSubcommand::List => run_features_list(config),
+        FeaturesSubcommand::List => {
+            print!("{}", render_feature_table(&config.features()));
+            Ok(())
+        }
     }
 }
 
-fn stage_str(stage: features::Stage) -> &'static str {
-    match stage {
-        features::Stage::Experimental => "experimental",
-        features::Stage::Beta => "beta",
-        features::Stage::Stable => "stable",
-        features::Stage::Deprecated => "deprecated",
-        features::Stage::Removed => "removed",
-    }
-}
-
-fn run_features_list(config: &Config) -> Result<()> {
-    let features = config.features();
-    println!("feature\tstage\tenabled");
-    for spec in features::FEATURES {
-        let enabled = features.enabled(spec.id);
-        println!("{}\t{}\t{enabled}", spec.key, stage_str(spec.stage));
-    }
-    Ok(())
-}
-
 async fn run_models(config: &Config, args: ModelsArgs) -> Result<()> {
     use crate::client::DeepSeekClient;
 

From 997c7f4bcdde12183820a670e46eb7b9b5c06731 Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Fri, 1 May 2026 11:06:45 -0500
Subject: [PATCH 17/17] chore(release): verify dual registry publish state

---
 docs/RELEASE_RUNBOOK.md            |  11 +++
 scripts/release/check-published.sh |  95 +++++++++++++++++++++++++
 scripts/release/crates.sh          |  19 +++++
 scripts/release/publish-crates.sh  | 110 ++++++++++++++---------------
 4 files changed, 178 insertions(+), 57 deletions(-)
 create mode 100755 scripts/release/check-published.sh
 create mode 100755 scripts/release/crates.sh

diff --git a/docs/RELEASE_RUNBOOK.md b/docs/RELEASE_RUNBOOK.md
index 5aaabd18..9179fcc8 100644
--- a/docs/RELEASE_RUNBOOK.md
+++ b/docs/RELEASE_RUNBOOK.md
@@ -90,6 +90,17 @@ Set `DEEPSEEK_TUI_VERSION` to the npm package version you are verifying for that
 The CI workflow runs the same tarball install + delegated-entrypoint smoke test
 on Linux, macOS, and Windows.
 
+After publishing, prove the release is visible in both registries:
+
+```bash
+./scripts/release/check-published.sh X.Y.Z
+```
+
+Do not mark a Rust release complete until that command sees `deepseek-tui@X.Y.Z`
+on npm and every `deepseek-*` crate at `X.Y.Z` on crates.io. For a rare
+npm packaging-only release, run with `--allow-npm-binary-mismatch` and keep the
+release notes explicit that no new Rust binary version shipped.
+
 ## Rust Crates Release
 
 1. Update the workspace version in [Cargo.toml](../Cargo.toml).
diff --git a/scripts/release/check-published.sh b/scripts/release/check-published.sh
new file mode 100755
index 00000000..e02a1936
--- /dev/null
+++ b/scripts/release/check-published.sh
@@ -0,0 +1,95 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+repo_root="$(cd "${script_dir}/../.." && pwd)"
+# shellcheck source=scripts/release/crates.sh
+source "${script_dir}/crates.sh"
+
+usage() {
+  cat <<'EOF'
+usage: scripts/release/check-published.sh [--allow-npm-binary-mismatch] [VERSION]
+
+Verifies that a release version is visible on both npm and crates.io.
+Defaults VERSION to the workspace version in Cargo.toml.
+
+Use --allow-npm-binary-mismatch only for npm packaging-only releases where
+the npm package intentionally points at an older GitHub binary release.
+EOF
+}
+
+allow_npm_binary_mismatch=0
+version=""
+
+while (($# > 0)); do
+  case "$1" in
+    --allow-npm-binary-mismatch)
+      allow_npm_binary_mismatch=1
+      ;;
+    -h|--help)
+      usage
+      exit 0
+      ;;
+    *)
+      if [[ -n "${version}" ]]; then
+        usage >&2
+        exit 2
+      fi
+      version="$1"
+      ;;
+  esac
+  shift
+done
+
+cd "${repo_root}"
+
+if [[ -z "${version}" ]]; then
+  version="$(grep -E '^version = "' Cargo.toml | head -n1 | sed -E 's/^version = "([^"]+)".*/\1/')"
+fi
+
+if [[ -z "${version}" ]]; then
+  echo "Could not determine release version." >&2
+  exit 1
+fi
+
+fail=0
+
+echo "Checking published release ${version}..."
+
+if npm_version="$(npm view "deepseek-tui@${version}" version 2>/dev/null)"; then
+  echo "npm deepseek-tui@${npm_version} is published."
+else
+  echo "npm deepseek-tui@${version} is not published." >&2
+  fail=1
+fi
+
+if npm_binary_version="$(npm view "deepseek-tui@${version}" deepseekBinaryVersion 2>/dev/null)"; then
+  if [[ "${npm_binary_version}" == "${version}" ]]; then
+    echo "npm deepseekBinaryVersion=${npm_binary_version}."
+  elif [[ "${allow_npm_binary_mismatch}" == "1" ]]; then
+    echo "npm deepseekBinaryVersion=${npm_binary_version} (allowed packaging-only mismatch)."
+  else
+    echo "npm deepseekBinaryVersion=${npm_binary_version}, expected ${version}." >&2
+    fail=1
+  fi
+elif [[ "${allow_npm_binary_mismatch}" == "1" ]]; then
+  echo "npm deepseekBinaryVersion is absent (allowed packaging-only mismatch)."
+else
+  echo "npm deepseekBinaryVersion is absent for deepseek-tui@${version}." >&2
+  fail=1
+fi
+
+for crate in "${release_crates[@]}"; do
+  if curl -fsSL "https://crates.io/api/v1/crates/${crate}/${version}" >/dev/null 2>&1; then
+    echo "crates.io ${crate}@${version} is published."
+  else
+    echo "crates.io ${crate}@${version} is not published." >&2
+    fail=1
+  fi
+done
+
+if [[ "${fail}" == "0" ]]; then
+  echo "Published release OK: npm deepseek-tui@${version} and ${#release_crates[@]} crates are visible."
+fi
+
+exit "${fail}"
diff --git a/scripts/release/crates.sh b/scripts/release/crates.sh
new file mode 100755
index 00000000..34b0e188
--- /dev/null
+++ b/scripts/release/crates.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+# Crates published for each DeepSeek TUI release, in dependency order.
+release_crates=(
+  deepseek-secrets
+  deepseek-config
+  deepseek-protocol
+  deepseek-state
+  deepseek-agent
+  deepseek-execpolicy
+  deepseek-hooks
+  deepseek-mcp
+  deepseek-tools
+  deepseek-core
+  deepseek-app-server
+  deepseek-tui-core
+  deepseek-tui-cli
+  deepseek-tui
+)
diff --git a/scripts/release/publish-crates.sh b/scripts/release/publish-crates.sh
index 2c836124..2c52b800 100755
--- a/scripts/release/publish-crates.sh
+++ b/scripts/release/publish-crates.sh
@@ -1,6 +1,10 @@
 #!/usr/bin/env bash
 set -euo pipefail
 
+script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+# shellcheck source=scripts/release/crates.sh
+source "${script_dir}/crates.sh"
+
 mode="${1:-dry-run}"
 case "${mode}" in
   dry-run|publish) ;;
@@ -10,42 +14,22 @@ case "${mode}" in
     ;;
 esac
 
-packages=(
-  deepseek-secrets
-  deepseek-config
-  deepseek-protocol
-  deepseek-state
-  deepseek-agent
-  deepseek-execpolicy
-  deepseek-hooks
-  deepseek-mcp
-  deepseek-tools
-  deepseek-core
-  deepseek-app-server
-  deepseek-tui-core
-  deepseek-tui-cli
-  deepseek-tui
-)
-
-workspace_version="$(
-  python3 - <<'PY'
-import json
-import subprocess
-
-metadata = json.loads(
-    subprocess.check_output(["cargo", "metadata", "--format-version", "1", "--no-deps"])
-)
-workspace_members = set(metadata["workspace_members"])
-for pkg in metadata["packages"]:
-    if pkg["id"] in workspace_members:
-        print(pkg["version"])
-        break
-PY
-)"
+packages=("${release_crates[@]}")
 
+workspace_version=""
 workspace_deepseek_packages=()
-while IFS= read -r workspace_package; do
-  workspace_deepseek_packages+=("${workspace_package}")
+workspace_package_dep_flags=()
+
+while IFS=$'\t' read -r kind name value; do
+  case "${kind}" in
+    version)
+      workspace_version="${name}"
+      ;;
+    crate)
+      workspace_deepseek_packages+=("${name}")
+      workspace_package_dep_flags+=("${value}")
+      ;;
+  esac
 done < <(
   python3 - <<'PY'
 import json
@@ -55,12 +39,34 @@ metadata = json.loads(
     subprocess.check_output(["cargo", "metadata", "--format-version", "1", "--no-deps"])
 )
 workspace_members = set(metadata["workspace_members"])
-for pkg in sorted(metadata["packages"], key=lambda item: item["name"]):
-    if pkg["id"] in workspace_members and pkg["name"].startswith("deepseek-"):
-        print(pkg["name"])
+workspace_packages = [
+    pkg for pkg in metadata["packages"] if pkg["id"] in workspace_members
+]
+workspace_by_name = {pkg["name"]: pkg for pkg in workspace_packages}
+
+versions = sorted({pkg["version"] for pkg in workspace_packages})
+if not versions:
+    raise SystemExit("workspace has no packages")
+if len(versions) != 1:
+    raise SystemExit(f"workspace packages have mixed versions: {', '.join(versions)}")
+print(f"version\t{versions[0]}\t")
+
+for pkg in sorted(workspace_packages, key=lambda item: item["name"]):
+    if not pkg["name"].startswith("deepseek-"):
+        continue
+    has_workspace_dep = any(
+        dep.get("path") and dep["name"] in workspace_by_name
+        for dep in pkg["dependencies"]
+    )
+    print(f"crate\t{pkg['name']}\t{1 if has_workspace_dep else 0}")
 PY
 )
 
+if [[ -z "${workspace_version}" ]]; then
+  echo "Could not determine workspace version." >&2
+  exit 1
+fi
+
 missing_packages=()
 for workspace_package in "${workspace_deepseek_packages[@]}"; do
   found=0
@@ -101,26 +107,16 @@ fi
 
 package_has_workspace_deps() {
   local package_name="$1"
-  python3 - "${package_name}" <<'PY'
-import json
-import subprocess
-import sys
+  local index
+  for ((index = 0; index < ${#workspace_deepseek_packages[@]}; index += 1)); do
+    if [[ "${workspace_deepseek_packages[$index]}" == "${package_name}" ]]; then
+      [[ "${workspace_package_dep_flags[$index]}" == "1" ]]
+      return
+    fi
+  done
 
-package_name = sys.argv[1]
-metadata = json.loads(
-    subprocess.check_output(["cargo", "metadata", "--format-version", "1", "--no-deps"])
-)
-workspace_ids = set(metadata["workspace_members"])
-workspace_packages = {
-    pkg["name"]: pkg for pkg in metadata["packages"] if pkg["id"] in workspace_ids
-}
-package = workspace_packages[package_name]
-has_workspace_dep = any(
-    dep.get("path") and dep["name"] in workspace_packages
-    for dep in package["dependencies"]
-)
-print("1" if has_workspace_dep else "0")
-PY
+  echo "Unknown workspace crate: ${package_name}" >&2
+  return 1
 }
 
 crate_version_exists() {
@@ -149,7 +145,7 @@ wait_for_crate_version() {
 for package in "${packages[@]}"; do
   echo "::group::${mode} ${package}"
   if [[ "${mode}" == "dry-run" ]]; then
-    if [[ "$(package_has_workspace_deps "${package}")" == "1" ]]; then
+    if package_has_workspace_deps "${package}"; then
       cargo package --allow-dirty --locked --list -p "${package}" >/dev/null
       echo "Verified package contents for ${package}; full crates.io dry-run requires workspace dependencies at ${workspace_version} to be published first."
     else