From eac8d82ab85b104449e4e40e391a0cd4798364a3 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 1 May 2026 02:36:56 -0500 Subject: [PATCH 01/17] test(palette): cover disabled MCP server case (#197) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Acceptance list called for tests of no-config / healthy / disabled / failed servers. Healthy and failed already had a single dense test (`command_palette_includes_mcp_discovery_and_failed_servers`); no-config is implicit in the existing call sites that pass `None` for the snapshot. Disabled was the actual gap — adds one focused case asserting the `[disabled]` state tag appears in the rendered description so users can see disabled servers in the palette without opening the MCP manager. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/tui/src/tui/command_palette.rs | 44 +++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/crates/tui/src/tui/command_palette.rs b/crates/tui/src/tui/command_palette.rs index 1c8a57cd..3dedae7f 100644 --- a/crates/tui/src/tui/command_palette.rs +++ b/crates/tui/src/tui/command_palette.rs @@ -1017,6 +1017,50 @@ mod tests { assert_eq!(use_entry.command, "mcp_fs_read"); } + #[test] + fn command_palette_marks_disabled_servers_visibly() { + // The healthy/failed cases are covered above; disabled was the + // remaining gap from #197's acceptance list. Disabled servers must + // appear in the palette with a `[disabled]` state tag so users can + // see them without opening the MCP manager. + let snapshot = crate::mcp::McpManagerSnapshot { + config_path: Path::new("mcp.json").to_path_buf(), + config_exists: true, + restart_required: false, + servers: vec![crate::mcp::McpServerSnapshot { + name: "muted".to_string(), + enabled: false, + required: false, + transport: "stdio".to_string(), + command_or_url: "node disabled.js".to_string(), + connect_timeout: 10, + execute_timeout: 60, + read_timeout: 120, + connected: false, + error: None, + tools: Vec::new(), + resources: Vec::new(), + prompts: Vec::new(), + }], + }; + let entries = build_entries( + Path::new("."), + Path::new("."), + Path::new("mcp.json"), + Some(&snapshot), + ); + + let muted = entries + .iter() + .find(|entry| entry.label == "mcp:muted") + .expect("disabled server should still appear in the palette"); + assert!( + muted.description.contains("[disabled]"), + "expected `[disabled]` state tag in description, got: {}", + muted.description + ); + } + #[test] fn command_palette_emits_actions_not_raw_insertions() { let entries = vec![CommandPaletteEntry { From d8acd6e3cb8ae29a9d0d6e2753ab4ce3d7f140a8 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 1 May 2026 02:36:56 -0500 Subject: [PATCH 02/17] fix(skills): use real on-disk path in model-visible block, not frontmatter name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `render_available_skills_context` rendered each skill's file path as `//SKILL.md`. The directory name and the frontmatter `name` can differ — community installs and manually-placed skills routinely have this drift — and when they do, the model is told the file lives at a path that does not exist, so it can't open the SKILL.md it needs to actually use the skill. `Skill` now carries a `path: PathBuf` populated by `discover()` from the real directory entry. Renderer uses it directly. Adds a regression test that creates a skill at `weird-dir-name/SKILL.md` with `name: friendly-name` and asserts the rendered prompt contains the real path and not the fabricated one. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/tui/src/skills/mod.rs | 66 +++++++++++++++++++++++++++++++++--- 1 file changed, 62 insertions(+), 4 deletions(-) diff --git a/crates/tui/src/skills/mod.rs b/crates/tui/src/skills/mod.rs index 7e01766a..d795e5d6 100644 --- a/crates/tui/src/skills/mod.rs +++ b/crates/tui/src/skills/mod.rs @@ -44,6 +44,11 @@ pub struct Skill { pub name: String, pub description: String, pub body: String, + /// On-disk path to the `SKILL.md` this was loaded from. The directory + /// name can differ from the frontmatter `name` for community installs + /// or manually-placed skills, so callers must use this rather than + /// reconstructing `//SKILL.md`. + pub path: PathBuf, } /// Collection of discovered skills. @@ -70,7 +75,10 @@ impl SkillRegistry { let skill_path = entry.path().join("SKILL.md"); match fs::read_to_string(&skill_path) { Ok(content) => match Self::parse_skill(&skill_path, &content) { - Ok(skill) => registry.skills.push(skill), + Ok(mut skill) => { + skill.path = skill_path.clone(); + registry.skills.push(skill); + } Err(reason) => registry.push_warning(format!( "Failed to parse {}: {reason}", skill_path.display() @@ -137,6 +145,9 @@ impl SkillRegistry { name, description, body, + // Filled in by `discover` after parse succeeds; default to an + // empty path so direct constructors (e.g. tests) compile. + path: PathBuf::new(), }) } @@ -196,16 +207,19 @@ instructions when using a specific skill.\n\n", let mut omitted = 0usize; for skill in skills { - let path = skills_dir.join(&skill.name).join("SKILL.md"); + // Use the real on-disk path captured at discovery — the directory + // name can differ from the frontmatter `name` for community + // installs, in which case `//SKILL.md` would not exist + // and the model would fail to open it. let description = truncate_for_prompt(&skill.description, MAX_SKILL_DESCRIPTION_CHARS); let line = if description.is_empty() { - format!("- {}: (file: {})\n", skill.name, path.display()) + format!("- {}: (file: {})\n", skill.name, skill.path.display()) } else { format!( "- {}: {} (file: {})\n", skill.name, description, - path.display() + skill.path.display() ) }; @@ -336,6 +350,50 @@ mod tests { assert!(rendered.contains("### How to use skills")); } + #[test] + fn render_available_skills_context_uses_real_dir_name_not_frontmatter_name() { + // Regression: when a community-installed or manually-placed skill + // lives in a directory whose name differs from its frontmatter + // `name`, the rendered prompt must point to the real on-disk file + // path, not //SKILL.md (which does + // not exist). + let tmpdir = TempDir::new().unwrap(); + create_skill_dir( + &tmpdir, + "weird-dir-name", + "---\nname: friendly-name\ndescription: drift case\n---\nbody", + ); + + let rendered = crate::skills::render_available_skills_context( + &tmpdir.path().join("skills"), + ) + .expect("skill context"); + + let real_path = tmpdir + .path() + .join("skills") + .join("weird-dir-name") + .join("SKILL.md") + .display() + .to_string(); + let stale_path = tmpdir + .path() + .join("skills") + .join("friendly-name") + .join("SKILL.md") + .display() + .to_string(); + + assert!( + rendered.contains(&real_path), + "expected real on-disk path {real_path:?} in rendered output, got:\n{rendered}" + ); + assert!( + !rendered.contains(&stale_path), + "rendered output must not invent a path under the frontmatter name:\n{rendered}" + ); + } + #[test] fn render_available_skills_context_returns_none_when_empty() { let tmpdir = TempDir::new().unwrap(); From 1512afae69569889231b1f5bb746596aefba090e Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 1 May 2026 02:46:20 -0500 Subject: [PATCH 03/17] feat(privacy): contract \$HOME to ~ in user-visible display paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Anywhere the TUI, doctor stdout, setup stdout, or onboarding shows a file path, it used to print the absolute form (e.g. /Users//...). On macOS/Linux the home-directory segment reveals the OS account name, which is often the same as a public handle — undesirable for users who share screenshots, screencasts, or paste doctor output into a public help request. Adds `crate::utils::display_path` that contracts a leading $HOME to `~` and falls through unchanged otherwise. Used at every viewer-visible site: doctor: workspace, config.toml, MCP config, all skills dirs, selected skills dir, tools dir, plugins dir setup: workspace, skills/tools/plugins paths and status output TUI: context inspector header, trust-directory onboarding, shell-job cwd (sidebar + detail pager), subagent task header Persisted state, audit log, session checkpoints, and LLM-bound system prompts intentionally keep the absolute path — those need full fidelity to resolve correctly across processes and the LLM provider sees absolute paths anyway by virtue of the workspace summary. `display_path` has 4 tests covering: home contraction, bare-`~` for home itself, untouched-when-unrelated, and a username-prefix regression guard (so `/Users/alice2/...` doesn't get rewritten when $HOME is `/Users/alice`). Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/tui/src/main.rs | 54 +++++----- crates/tui/src/tui/context_inspector.rs | 6 +- .../tui/src/tui/onboarding/trust_directory.rs | 2 +- crates/tui/src/tui/shell_job_routing.rs | 4 +- crates/tui/src/tui/subagent_routing.rs | 5 +- crates/tui/src/utils.rs | 98 +++++++++++++++++++ 6 files changed, 140 insertions(+), 29 deletions(-) diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index de6544fa..9238975a 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -979,7 +979,7 @@ fn run_setup(config: &Config, workspace: &Path, args: SetupArgs) -> Result<()> { "DeepSeek Setup".truecolor(aqua_r, aqua_g, aqua_b).bold() ); println!("{}", "==============".truecolor(sky_r, sky_g, sky_b)); - println!("Workspace: {}", workspace.display()); + println!("Workspace: {}", crate::utils::display_path(workspace)); if run_mcp { let mcp_path = config.mcp_config_path(); @@ -1022,10 +1022,13 @@ fn run_setup(config: &Config, workspace: &Path, args: SetupArgs) -> Result<()> { if args.local { println!( " Local skills dir enabled for this workspace: {}", - skills_dir.display() + crate::utils::display_path(&skills_dir) ); } else { - println!(" Skills dir: {}", skills_dir.display()); + println!( + " Skills dir: {}", + crate::utils::display_path(&skills_dir) + ); } println!(" Next: run the TUI and use `/skills` then `/skill getting-started`."); } @@ -1035,7 +1038,7 @@ fn run_setup(config: &Config, workspace: &Path, args: SetupArgs) -> Result<()> { let (dir, readme_status, example_status) = init_tools_dir(&tools_dir, args.force)?; report_write_status("Tools README", &dir.join("README.md"), readme_status); report_write_status("Example tool", &dir.join("example.sh"), example_status); - println!(" Tools dir: {}", dir.display()); + println!(" Tools dir: {}", crate::utils::display_path(&dir)); println!(" Next: drop scripts here; surface them via skills/MCP when ready."); } @@ -1045,7 +1048,10 @@ fn run_setup(config: &Config, workspace: &Path, args: SetupArgs) -> Result<()> { init_plugins_dir(&plugins_dir, args.force)?; report_write_status("Plugins README", &readme_path, readme_status); report_write_status("Example plugin", &example_path, example_status); - println!(" Plugins dir: {}", plugins_dir.display()); + println!( + " Plugins dir: {}", + crate::utils::display_path(&plugins_dir) + ); println!(" Next: copy the example dir, edit PLUGIN.md, wire via skill/MCP."); } @@ -1200,7 +1206,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> { println!( " · skills: {} at {}", skills_count_for(&skills_dir), - skills_dir.display() + crate::utils::display_path(&skills_dir) ); let tools_dir = default_tools_dir(); @@ -1216,7 +1222,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> { } else { 0 }, - tools_dir.display() + crate::utils::display_path(&tools_dir) ); let plugins_dir = default_plugins_dir(); @@ -1232,7 +1238,7 @@ fn run_setup_status(config: &Config, workspace: &Path) -> Result<()> { } else { 0 }, - plugins_dir.display() + crate::utils::display_path(&plugins_dir) ); let sandbox = crate::sandbox::get_platform_sandbox(); @@ -1348,16 +1354,16 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt println!( " {} config.toml found at {}", "✓".truecolor(aqua_r, aqua_g, aqua_b), - config_path.display() + crate::utils::display_path(&config_path) ); } else { println!( " {} config.toml not found at {} (using defaults/env)", "!".truecolor(sky_r, sky_g, sky_b), - config_path.display() + crate::utils::display_path(&config_path) ); } - println!(" workspace: {}", workspace.display()); + println!(" workspace: {}", crate::utils::display_path(workspace)); // Check API keys println!(); @@ -1477,7 +1483,7 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt println!( " {} MCP config found at {}", "✓".truecolor(aqua_r, aqua_g, aqua_b), - mcp_config_path.display() + crate::utils::display_path(&mcp_config_path) ); match load_mcp_config(&mcp_config_path) { Ok(cfg) if cfg.servers.is_empty() => { @@ -1532,7 +1538,7 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt println!( " {} MCP config not found at {}", "·".dimmed(), - mcp_config_path.display() + crate::utils::display_path(&mcp_config_path) ); println!(" Run `deepseek mcp init` or `deepseek setup --mcp`."); } @@ -1561,14 +1567,14 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt println!( " {} local skills dir found at {} ({} items)", "✓".truecolor(aqua_r, aqua_g, aqua_b), - local_skills_dir.display(), + crate::utils::display_path(&local_skills_dir), describe_dir(&local_skills_dir) ); } else { println!( " {} local skills dir not found at {}", "·".dimmed(), - local_skills_dir.display() + crate::utils::display_path(&local_skills_dir) ); } @@ -1576,14 +1582,14 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt println!( " {} .agents skills dir found at {} ({} items)", "✓".truecolor(aqua_r, aqua_g, aqua_b), - agents_skills_dir.display(), + crate::utils::display_path(&agents_skills_dir), describe_dir(&agents_skills_dir) ); } else { println!( " {} .agents skills dir not found at {}", "·".dimmed(), - agents_skills_dir.display() + crate::utils::display_path(&agents_skills_dir) ); } @@ -1591,21 +1597,21 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt println!( " {} global skills dir found at {} ({} items)", "✓".truecolor(aqua_r, aqua_g, aqua_b), - global_skills_dir.display(), + crate::utils::display_path(&global_skills_dir), describe_dir(&global_skills_dir) ); } else { println!( " {} global skills dir not found at {}", "·".dimmed(), - global_skills_dir.display() + crate::utils::display_path(&global_skills_dir) ); } println!( " {} selected skills dir: {}", "·".dimmed(), - selected_skills_dir.display() + crate::utils::display_path(&selected_skills_dir) ); if !agents_skills_dir.exists() && !local_skills_dir.exists() && !global_skills_dir.exists() { println!(" Run `deepseek setup --skills` (or add --local for ./skills)."); @@ -1620,14 +1626,14 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt println!( " {} tools dir found at {} ({} items)", "✓".truecolor(aqua_r, aqua_g, aqua_b), - tools_dir.display(), + crate::utils::display_path(&tools_dir), count ); } else { println!( " {} tools dir not found at {}", "·".dimmed(), - tools_dir.display() + crate::utils::display_path(&tools_dir) ); println!(" Run `deepseek-tui setup --tools` to scaffold a starter dir."); } @@ -1641,14 +1647,14 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt println!( " {} plugins dir found at {} ({} items)", "✓".truecolor(aqua_r, aqua_g, aqua_b), - plugins_dir.display(), + crate::utils::display_path(&plugins_dir), count ); } else { println!( " {} plugins dir not found at {}", "·".dimmed(), - plugins_dir.display() + crate::utils::display_path(&plugins_dir) ); println!(" Run `deepseek-tui setup --plugins` to scaffold a starter dir."); } diff --git a/crates/tui/src/tui/context_inspector.rs b/crates/tui/src/tui/context_inspector.rs index d0e7a586..c9e5c091 100644 --- a/crates/tui/src/tui/context_inspector.rs +++ b/crates/tui/src/tui/context_inspector.rs @@ -30,7 +30,11 @@ pub fn build_context_inspector_text(app: &App) -> String { let _ = writeln!(out, "Session Context"); let _ = writeln!(out, "---------------"); let _ = writeln!(out, "Model: {}", app.model); - let _ = writeln!(out, "Workspace: {}", app.workspace.display()); + let _ = writeln!( + out, + "Workspace: {}", + crate::utils::display_path(&app.workspace) + ); if let Some(session_id) = app.current_session_id.as_deref() { let _ = writeln!(out, "Session: {}", session_id); } diff --git a/crates/tui/src/tui/onboarding/trust_directory.rs b/crates/tui/src/tui/onboarding/trust_directory.rs index 3d51107a..44c18b7d 100644 --- a/crates/tui/src/tui/onboarding/trust_directory.rs +++ b/crates/tui/src/tui/onboarding/trust_directory.rs @@ -20,7 +20,7 @@ pub fn lines(app: &App) -> Vec> { Style::default().fg(palette::TEXT_PRIMARY), ))); lines.push(Line::from(Span::styled( - format!("Workspace: {}", app.workspace.display()), + format!("Workspace: {}", crate::utils::display_path(&app.workspace)), Style::default().fg(palette::TEXT_MUTED), ))); lines.push(Line::from("")); diff --git a/crates/tui/src/tui/shell_job_routing.rs b/crates/tui/src/tui/shell_job_routing.rs index 428c4351..6c08566f 100644 --- a/crates/tui/src/tui/shell_job_routing.rs +++ b/crates/tui/src/tui/shell_job_routing.rs @@ -52,7 +52,7 @@ pub(super) fn format_shell_job_list(jobs: &[ShellJobSnapshot]) -> String { job.exit_code, task )); - lines.push(format!(" cwd: {}", job.cwd.display())); + lines.push(format!(" cwd: {}", crate::utils::display_path(&job.cwd))); lines.push(format!(" cmd: {}", job.command)); let tail = if !job.stderr_tail.trim().is_empty() { job.stderr_tail.trim() @@ -115,7 +115,7 @@ fn format_shell_job_detail(detail: &ShellJobDetail) -> String { format!("Job: {}", job.id), format!("Status: {}", status_label(&job.status, job.stale)), format!("Command: {}", job.command), - format!("Cwd: {}", job.cwd.display()), + format!("Cwd: {}", crate::utils::display_path(&job.cwd)), format!("Elapsed: {}", format_elapsed(job.elapsed_ms)), format!("Exit Code: {:?}", job.exit_code), format!("Stdin Available: {}", job.stdin_available), diff --git a/crates/tui/src/tui/subagent_routing.rs b/crates/tui/src/tui/subagent_routing.rs index fabc0c7b..c5fb44ec 100644 --- a/crates/tui/src/tui/subagent_routing.rs +++ b/crates/tui/src/tui/subagent_routing.rs @@ -489,7 +489,10 @@ fn format_task_detail(task: &TaskRecord) -> String { lines.push(format!("Status: {}", task_status_label(task.status))); lines.push(format!("Mode: {}", task.mode)); lines.push(format!("Model: {}", task.model)); - lines.push(format!("Workspace: {}", task.workspace.display())); + lines.push(format!( + "Workspace: {}", + crate::utils::display_path(&task.workspace) + )); if let Some(thread_id) = task.thread_id.as_ref() { lines.push(format!("Runtime Thread: {thread_id}")); } diff --git a/crates/tui/src/utils.rs b/crates/tui/src/utils.rs index 1801ca6b..80a8e403 100644 --- a/crates/tui/src/utils.rs +++ b/crates/tui/src/utils.rs @@ -186,6 +186,33 @@ pub fn url_encode(input: &str) -> String { encoded } +/// Render a path for **user-facing display** with the home directory +/// contracted to `~`. Use this in the TUI, doctor/setup stdout, and any +/// other place a viewer might see the output (screenshot, video, +/// pasted-into-issue help). On macOS/Linux the absolute path +/// `/Users//...` or `/home//...` reveals the OS account name, +/// which is often the same as a public handle — undesirable for users +/// who share their terminal. +/// +/// **Do not use** this for paths that get persisted (sessions, audit log) +/// or sent to the LLM provider — those want full fidelity so they +/// resolve correctly across processes. +#[must_use] +pub fn display_path(path: &Path) -> String { + let Some(home) = dirs::home_dir() else { + return path.display().to_string(); + }; + if let Ok(rest) = path.strip_prefix(&home) { + if rest.as_os_str().is_empty() { + return "~".to_string(); + } + // Render with the platform-correct separator after the tilde. + let sep = std::path::MAIN_SEPARATOR; + return format!("~{sep}{}", rest.display()); + } + path.display().to_string() +} + /// Estimate the total character count across message content blocks. #[must_use] pub fn estimate_message_chars(messages: &[Message]) -> usize { @@ -205,3 +232,74 @@ pub fn estimate_message_chars(messages: &[Message]) -> usize { } total } + +#[cfg(test)] +mod tests { + use super::display_path; + use std::path::PathBuf; + + /// Save and restore $HOME inside one test so a panic anywhere can't + /// poison sibling tests that read the env var. + fn with_home(home: &str, f: impl FnOnce() -> R) -> R { + let prev = std::env::var_os("HOME"); + // SAFETY: tests in this crate are run single-threaded with respect + // to env-var mutation by the integration harness, and we restore + // immediately after the closure. + unsafe { std::env::set_var("HOME", home) }; + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(f)); + match prev { + Some(v) => unsafe { std::env::set_var("HOME", v) }, + None => unsafe { std::env::remove_var("HOME") }, + } + match result { + Ok(v) => v, + Err(p) => std::panic::resume_unwind(p), + } + } + + #[test] + fn display_path_contracts_home_prefix() { + with_home("/Users/alice", || { + assert_eq!( + display_path(&PathBuf::from("/Users/alice/projects/foo")), + format!( + "~{}projects{}foo", + std::path::MAIN_SEPARATOR, + std::path::MAIN_SEPARATOR + ), + ); + }); + } + + #[test] + fn display_path_returns_bare_tilde_for_home_itself() { + with_home("/Users/alice", || { + assert_eq!(display_path(&PathBuf::from("/Users/alice")), "~"); + }); + } + + #[test] + fn display_path_leaves_unrelated_paths_alone() { + with_home("/Users/alice", || { + // Different user — must not get rewritten or share the tilde. + assert_eq!( + display_path(&PathBuf::from("/Users/bob/Code")), + "/Users/bob/Code".to_string() + ); + // System path must stay absolute. + assert_eq!(display_path(&PathBuf::from("/etc/hosts")), "/etc/hosts"); + }); + } + + #[test] + fn display_path_does_not_match_username_prefix() { + // Regression guard: a directory named like the user's home + // *prefix* but not under it must not get rewritten. + with_home("/Users/alice", || { + assert_eq!( + display_path(&PathBuf::from("/Users/alice2/work")), + "/Users/alice2/work" + ); + }); + } +} From 100efced804e86dee119b1bcb86bf9f10e2002ee Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 1 May 2026 02:53:29 -0500 Subject: [PATCH 04/17] chore: cargo fmt + drop needless borrow flagged by clippy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI on PR #256 flagged two minor lint hits in the privacy lane: - skills/mod.rs: rustfmt wanted the new regression test's `let rendered = …` line collapsed to one chain. - main.rs:1614: `selected_skills_dir` is already `&PathBuf`, so passing `&selected_skills_dir` is a `&&PathBuf` and clippy's `needless_borrow` triggers under `-D warnings`. No behavior change; same coverage and outputs. Re-runs locally: cargo fmt --all -- --check → clean cargo clippy --workspace --all-targets --all-features --locked -- -D warnings → clean Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/tui/src/main.rs | 2 +- crates/tui/src/skills/mod.rs | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 9238975a..f87d08b9 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -1611,7 +1611,7 @@ async fn run_doctor(config: &Config, workspace: &Path, config_path_override: Opt println!( " {} selected skills dir: {}", "·".dimmed(), - crate::utils::display_path(&selected_skills_dir) + crate::utils::display_path(selected_skills_dir) ); if !agents_skills_dir.exists() && !local_skills_dir.exists() && !global_skills_dir.exists() { println!(" Run `deepseek setup --skills` (or add --local for ./skills)."); diff --git a/crates/tui/src/skills/mod.rs b/crates/tui/src/skills/mod.rs index d795e5d6..d07b57cb 100644 --- a/crates/tui/src/skills/mod.rs +++ b/crates/tui/src/skills/mod.rs @@ -364,10 +364,9 @@ mod tests { "---\nname: friendly-name\ndescription: drift case\n---\nbody", ); - let rendered = crate::skills::render_available_skills_context( - &tmpdir.path().join("skills"), - ) - .expect("skill context"); + let rendered = + crate::skills::render_available_skills_context(&tmpdir.path().join("skills")) + .expect("skill context"); let real_path = tmpdir .path() From df53a221135e63b0d2a60893b21d4f1173c8507c Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 1 May 2026 02:59:57 -0500 Subject: [PATCH 05/17] test(utils): gate display_path tests to cfg(unix) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tests set \$HOME to drive `dirs::home_dir()`. On Unix that's the contract dirs uses; on Windows dirs reads %USERPROFILE% first, so setting HOME has no effect and the tests fail. The `display_path` function itself is platform-identical — it delegates to `dirs::home_dir()` for the home prefix and uses `std::path::MAIN_SEPARATOR` for the separator after the tilde. The contraction logic is exercised on macOS/Linux which is sufficient coverage for an abstraction whose platform detail is delegated. If we want Windows-specific assertion coverage in the future, it should either set USERPROFILE alongside HOME or accept an injected home dir. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/tui/src/utils.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/crates/tui/src/utils.rs b/crates/tui/src/utils.rs index 80a8e403..a1b8bded 100644 --- a/crates/tui/src/utils.rs +++ b/crates/tui/src/utils.rs @@ -233,7 +233,14 @@ pub fn estimate_message_chars(messages: &[Message]) -> usize { total } -#[cfg(test)] +// Tests below set `HOME` to drive `dirs::home_dir()`, which is honored on +// Unix but not on Windows (which reads `USERPROFILE` first). The +// `display_path` contraction logic itself is platform-identical — it +// delegates to `dirs::home_dir()`. Gate to `cfg(unix)` so we cover the +// behavior on the platform whose env-var contract matches the test +// driver, instead of writing platform-specific test scaffolding for a +// pure abstraction. +#[cfg(all(test, unix))] mod tests { use super::display_path; use std::path::PathBuf; From 84da3b7fc6ff1e3b4dcbe0108ae24656ceaf9d33 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 1 May 2026 03:09:05 -0500 Subject: [PATCH 06/17] test(rlm): cover bridge batch and depth guard --- crates/tui/src/rlm/bridge.rs | 151 +++++++++++++++++++++++++++++------ crates/tui/src/rlm/turn.rs | 16 ++++ 2 files changed, 141 insertions(+), 26 deletions(-) diff --git a/crates/tui/src/rlm/bridge.rs b/crates/tui/src/rlm/bridge.rs index 0b8e7754..36ec6680 100644 --- a/crates/tui/src/rlm/bridge.rs +++ b/crates/tui/src/rlm/bridge.rs @@ -121,19 +121,8 @@ impl RlmBridge { } async fn dispatch_llm_batch(&self, prompts: Vec, model: Option) -> BatchResp { - if prompts.is_empty() { - return BatchResp { results: vec![] }; - } - if prompts.len() > MAX_BATCH { - return BatchResp { - results: prompts - .iter() - .map(|_| SingleResp { - text: String::new(), - error: Some(format!("batch too large: {} > {MAX_BATCH}", prompts.len())), - }) - .collect(), - }; + if let Some(resp) = batch_guard(prompts.len()) { + return resp; } let model = Arc::new( @@ -201,19 +190,8 @@ impl RlmBridge { } async fn dispatch_rlm_batch(&self, prompts: Vec, model: Option) -> BatchResp { - if prompts.is_empty() { - return BatchResp { results: vec![] }; - } - if prompts.len() > MAX_BATCH { - return BatchResp { - results: prompts - .iter() - .map(|_| SingleResp { - text: String::new(), - error: Some(format!("batch too large: {} > {MAX_BATCH}", prompts.len())), - }) - .collect(), - }; + if let Some(resp) = batch_guard(prompts.len()) { + return resp; } let model = Arc::new(model); @@ -227,6 +205,23 @@ impl RlmBridge { } } +fn batch_guard(prompt_count: usize) -> Option { + if prompt_count == 0 { + return Some(BatchResp { results: vec![] }); + } + if prompt_count > MAX_BATCH { + return Some(BatchResp { + results: (0..prompt_count) + .map(|_| SingleResp { + text: String::new(), + error: Some(format!("batch too large: {prompt_count} > {MAX_BATCH}")), + }) + .collect(), + }); + } + None +} + impl RpcDispatcher for RlmBridge { fn dispatch<'a>( &'a self, @@ -255,3 +250,107 @@ impl RpcDispatcher for RlmBridge { }) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::config::{Config, ProviderConfig, ProvidersConfig}; + use serde_json::json; + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + fn client_for(server: &MockServer) -> DeepSeekClient { + let config = Config { + provider: Some("sglang".to_string()), + providers: Some(ProvidersConfig { + sglang: ProviderConfig { + base_url: Some(server.uri()), + ..ProviderConfig::default() + }, + ..ProvidersConfig::default() + }), + ..Config::default() + }; + DeepSeekClient::new(&config).expect("test client") + } + + fn chat_response(text: &str) -> serde_json::Value { + json!({ + "id": "chatcmpl-test", + "model": "test-model", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": text, + }, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 3, + "completion_tokens": 5 + } + }) + } + + #[test] + fn batch_guard_allows_non_empty_batches_at_the_cap() { + assert!(batch_guard(MAX_BATCH).is_none()); + } + + #[test] + fn batch_guard_returns_empty_response_for_empty_batches() { + let response = batch_guard(0).expect("empty batch should be handled"); + assert!(response.results.is_empty()); + } + + #[test] + fn batch_guard_returns_one_error_per_oversized_prompt() { + let response = batch_guard(MAX_BATCH + 2).expect("oversized batch should be handled"); + assert_eq!(response.results.len(), MAX_BATCH + 2); + assert!(response.results.iter().all(|result| { + result.text.is_empty() + && result + .error + .as_deref() + .is_some_and(|err| err.contains("batch too large")) + })); + } + + #[tokio::test] + async fn rlm_dispatch_at_depth_zero_falls_back_to_plain_llm_query() { + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/v1/responses")) + .respond_with(ResponseTemplate::new(404).set_body_string("responses unavailable")) + .mount(&server) + .await; + Mock::given(method("POST")) + .and(path("/v1/chat/completions")) + .respond_with( + ResponseTemplate::new(200).set_body_json(chat_response("fallback answer")), + ) + .mount(&server) + .await; + + let bridge = RlmBridge::new(client_for(&server), "child-model".to_string(), 0); + let response = bridge + .dispatch(RpcRequest::Rlm { + prompt: "nested prompt".to_string(), + model: Some("override-model".to_string()), + }) + .await; + + match response { + RpcResponse::Single(single) => { + assert_eq!(single.text, "fallback answer"); + assert!(single.error.is_none()); + } + other => panic!("expected single response, got {other:?}"), + } + + let usage = bridge.usage.lock().await; + assert_eq!(usage.input_tokens, 3); + assert_eq!(usage.output_tokens, 5); + } +} diff --git a/crates/tui/src/rlm/turn.rs b/crates/tui/src/rlm/turn.rs index 08d59818..bc73dbf5 100644 --- a/crates/tui/src/rlm/turn.rs +++ b/crates/tui/src/rlm/turn.rs @@ -835,6 +835,22 @@ mod tests { assert!(text.contains("FINAL")); } + #[test] + fn build_metadata_truncates_long_context_without_leaking_tail() { + let secret_tail = "DO_NOT_LEAK_CONTEXT_TAIL"; + let prompt = format!("{}{}", "a".repeat(PROMPT_PREVIEW_LEN + 100), secret_tail); + let msg = build_metadata_message(&prompt, None, 0, None, None); + let text = extract_text_blocks(&msg.content); + + assert!(text.contains(&format!("- Length: {} chars", prompt.chars().count()))); + assert!(text.contains("- Preview: \"")); + assert!(text.contains("...")); + assert!( + !text.contains(secret_tail), + "metadata leaked the non-preview tail of context" + ); + } + #[test] fn build_metadata_with_iteration_shows_previous_code() { let msg = build_metadata_message("Test prompt", None, 3, Some("print('hi')"), Some("hi")); From d2c007833f316c5e9ef95cbfefa6e479e6f5cb22 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 1 May 2026 04:10:59 -0500 Subject: [PATCH 07/17] test(rlm): make bridge client seam mockable --- crates/tui/src/llm_client/mod.rs | 5 +- crates/tui/src/rlm/bridge.rs | 205 ++++++++++++++++++++++--------- crates/tui/src/rlm/turn.rs | 76 ++++++++---- 3 files changed, 202 insertions(+), 84 deletions(-) diff --git a/crates/tui/src/llm_client/mod.rs b/crates/tui/src/llm_client/mod.rs index 48b28b18..009f701b 100644 --- a/crates/tui/src/llm_client/mod.rs +++ b/crates/tui/src/llm_client/mod.rs @@ -58,7 +58,10 @@ pub trait LlmClient: Send + Sync { fn model(&self) -> &str; /// Creates a non-streaming message completion - async fn create_message(&self, request: MessageRequest) -> Result; + fn create_message( + &self, + request: MessageRequest, + ) -> impl Future> + Send; /// Creates a streaming message completion /// diff --git a/crates/tui/src/rlm/bridge.rs b/crates/tui/src/rlm/bridge.rs index 36ec6680..6338762b 100644 --- a/crates/tui/src/rlm/bridge.rs +++ b/crates/tui/src/rlm/bridge.rs @@ -13,13 +13,14 @@ use std::sync::Arc; use std::time::Duration; +use std::{future::Future, pin::Pin}; +use anyhow::Result; use futures_util::future::join_all; use tokio::sync::Mutex; -use crate::client::DeepSeekClient; -use crate::llm_client::LlmClient as _; -use crate::models::{ContentBlock, Message, MessageRequest, SystemPrompt, Usage}; +use crate::llm_client::LlmClient; +use crate::models::{ContentBlock, Message, MessageRequest, MessageResponse, SystemPrompt, Usage}; use crate::repl::runtime::{BatchResp, RpcDispatcher, RpcRequest, RpcResponse, SingleResp}; /// Per-child completion timeout — same as the previous sidecar default. @@ -29,18 +30,46 @@ const DEFAULT_CHILD_MAX_TOKENS: u32 = 4096; /// Hard cap on prompts per batch RPC. pub const MAX_BATCH: usize = 16; +/// Object-safe slice of the LLM client interface that the RLM bridge needs. +/// +/// `LlmClient` itself uses native async trait methods, which are not dyn-safe. +/// The bridge only needs non-streaming completions, so this boxed-future shim +/// gives tests a clean mock seam without changing the wider provider trait. +pub(crate) trait RlmLlmClient: Send + Sync { + fn create_message_boxed( + &self, + request: MessageRequest, + ) -> Pin> + Send + '_>>; +} + +impl RlmLlmClient for T +where + T: LlmClient + Send + Sync, +{ + fn create_message_boxed( + &self, + request: MessageRequest, + ) -> Pin> + Send + '_>> { + Box::pin(self.create_message(request)) + } +} + /// State shared with the bridge across all RPC calls in one turn. pub struct RlmBridge { - pub client: DeepSeekClient, - pub child_model: String, + client: Arc, + child_model: String, /// Recursion budget remaining for `Rlm` / `RlmBatch` requests. When /// zero, those requests fall back to plain `Llm` completions. - pub depth_remaining: u32, - pub usage: Arc>, + depth_remaining: u32, + usage: Arc>, } impl RlmBridge { - pub fn new(client: DeepSeekClient, child_model: String, depth_remaining: u32) -> Self { + pub(crate) fn new( + client: Arc, + child_model: String, + depth_remaining: u32, + ) -> Self { Self { client, child_model, @@ -83,7 +112,7 @@ impl RlmBridge { top_p: Some(0.9_f32), }; - let fut = self.client.create_message(request); + let fut = self.client.create_message_boxed(request); let response = match tokio::time::timeout(Duration::from_secs(CHILD_TIMEOUT_SECS), fut).await { Ok(Ok(r)) => r, @@ -165,7 +194,7 @@ impl RlmBridge { // Recursive call. The dyn-erasure on `run_rlm_turn_inner` breaks // the `bridge → turn → bridge` opaque-future cycle. let result = super::turn::run_rlm_turn_inner( - &self.client, + Arc::clone(&self.client), child_model.clone(), prompt, None, @@ -254,43 +283,32 @@ impl RpcDispatcher for RlmBridge { #[cfg(test)] mod tests { use super::*; - use crate::config::{Config, ProviderConfig, ProvidersConfig}; - use serde_json::json; - use wiremock::matchers::{method, path}; - use wiremock::{Mock, MockServer, ResponseTemplate}; + use crate::llm_client::mock::MockLlmClient; - fn client_for(server: &MockServer) -> DeepSeekClient { - let config = Config { - provider: Some("sglang".to_string()), - providers: Some(ProvidersConfig { - sglang: ProviderConfig { - base_url: Some(server.uri()), - ..ProviderConfig::default() - }, - ..ProvidersConfig::default() - }), - ..Config::default() - }; - DeepSeekClient::new(&config).expect("test client") + fn mock_response(text: &str, input_tokens: u32, output_tokens: u32) -> MessageResponse { + MessageResponse { + id: "mock_msg".to_string(), + r#type: "message".to_string(), + role: "assistant".to_string(), + content: vec![ContentBlock::Text { + text: text.to_string(), + cache_control: None, + }], + model: "mock-model".to_string(), + stop_reason: Some("end_turn".to_string()), + stop_sequence: None, + container: None, + usage: Usage { + input_tokens, + output_tokens, + ..Usage::default() + }, + } } - fn chat_response(text: &str) -> serde_json::Value { - json!({ - "id": "chatcmpl-test", - "model": "test-model", - "choices": [{ - "index": 0, - "message": { - "role": "assistant", - "content": text, - }, - "finish_reason": "stop" - }], - "usage": { - "prompt_tokens": 3, - "completion_tokens": 5 - } - }) + fn bridge_for(mock: Arc, depth_remaining: u32) -> RlmBridge { + let client: Arc = mock; + RlmBridge::new(client, "child-model".to_string(), depth_remaining) } #[test] @@ -318,22 +336,89 @@ mod tests { } #[tokio::test] - async fn rlm_dispatch_at_depth_zero_falls_back_to_plain_llm_query() { - let server = MockServer::start().await; - Mock::given(method("POST")) - .and(path("/v1/responses")) - .respond_with(ResponseTemplate::new(404).set_body_string("responses unavailable")) - .mount(&server) - .await; - Mock::given(method("POST")) - .and(path("/v1/chat/completions")) - .respond_with( - ResponseTemplate::new(200).set_body_json(chat_response("fallback answer")), - ) - .mount(&server) + async fn llm_dispatch_uses_trait_backed_mock_client() { + let mock = Arc::new(MockLlmClient::new(Vec::new())); + mock.push_message_response(mock_response("child answer", 7, 11)); + let bridge = bridge_for(Arc::clone(&mock), 1); + + let response = bridge + .dispatch(RpcRequest::Llm { + prompt: "child prompt".to_string(), + model: Some("override-model".to_string()), + max_tokens: Some(123), + system: Some("child system".to_string()), + }) .await; - let bridge = RlmBridge::new(client_for(&server), "child-model".to_string(), 0); + match response { + RpcResponse::Single(single) => { + assert_eq!(single.text, "child answer"); + assert!(single.error.is_none()); + } + other => panic!("expected single response, got {other:?}"), + } + + let captured = mock.captured_requests(); + assert_eq!(captured.len(), 1); + assert_eq!(captured[0].model, "override-model"); + assert_eq!(captured[0].max_tokens, 123); + assert_eq!( + captured[0].system, + Some(SystemPrompt::Text("child system".to_string())) + ); + + let usage = bridge.usage.lock().await; + assert_eq!(usage.input_tokens, 7); + assert_eq!(usage.output_tokens, 11); + } + + #[tokio::test] + async fn llm_batch_dispatch_preserves_result_count_and_usage() { + let mock = Arc::new(MockLlmClient::new(Vec::new())); + mock.push_message_response(mock_response("one", 1, 2)); + mock.push_message_response(mock_response("two", 3, 4)); + mock.push_message_response(mock_response("three", 5, 6)); + let bridge = bridge_for(Arc::clone(&mock), 1); + + let response = bridge + .dispatch(RpcRequest::LlmBatch { + prompts: vec!["a".to_string(), "b".to_string(), "c".to_string()], + model: Some("batch-model".to_string()), + }) + .await; + + match response { + RpcResponse::Batch(batch) => { + let texts: Vec<_> = batch + .results + .iter() + .map(|result| result.text.as_str()) + .collect(); + assert_eq!(texts, ["one", "two", "three"]); + assert!(batch.results.iter().all(|result| result.error.is_none())); + } + other => panic!("expected batch response, got {other:?}"), + } + + let captured = mock.captured_requests(); + assert_eq!(captured.len(), 3); + assert!( + captured + .iter() + .all(|request| request.model == "batch-model") + ); + + let usage = bridge.usage.lock().await; + assert_eq!(usage.input_tokens, 9); + assert_eq!(usage.output_tokens, 12); + } + + #[tokio::test] + async fn rlm_dispatch_at_depth_zero_falls_back_to_plain_llm_query() { + let mock = Arc::new(MockLlmClient::new(Vec::new())); + mock.push_message_response(mock_response("fallback answer", 3, 5)); + let bridge = bridge_for(Arc::clone(&mock), 0); + let response = bridge .dispatch(RpcRequest::Rlm { prompt: "nested prompt".to_string(), @@ -352,5 +437,9 @@ mod tests { let usage = bridge.usage.lock().await; assert_eq!(usage.input_tokens, 3); assert_eq!(usage.output_tokens, 5); + + let captured = mock.captured_requests(); + assert_eq!(captured.len(), 1); + assert_eq!(captured[0].model, "override-model"); } } diff --git a/crates/tui/src/rlm/turn.rs b/crates/tui/src/rlm/turn.rs index bc73dbf5..c97f5ad1 100644 --- a/crates/tui/src/rlm/turn.rs +++ b/crates/tui/src/rlm/turn.rs @@ -2,6 +2,7 @@ //! subprocess + stdin/stdout RPC bridge (no HTTP sidecar). use std::path::PathBuf; +use std::sync::Arc; use std::time::{Duration, Instant}; use tokio::sync::mpsc; @@ -9,11 +10,10 @@ use uuid::Uuid; use crate::client::DeepSeekClient; use crate::core::events::Event; -use crate::llm_client::LlmClient; -use crate::models::{ContentBlock, Message, MessageRequest, Usage}; +use crate::models::{ContentBlock, Message, MessageRequest, SystemPrompt, Usage}; use crate::repl::PythonRuntime; -use super::bridge::RlmBridge; +use super::bridge::{RlmBridge, RlmLlmClient}; use super::prompt::rlm_system_prompt; // --------------------------------------------------------------------------- @@ -99,7 +99,7 @@ pub async fn run_rlm_turn( max_depth: u32, ) -> RlmTurnResult { run_rlm_turn_inner( - client, + Arc::new(client.clone()), model, prompt, None, @@ -122,7 +122,7 @@ pub async fn run_rlm_turn_with_root( max_depth: u32, ) -> RlmTurnResult { run_rlm_turn_inner( - client, + Arc::new(client.clone()), model, prompt, root_prompt, @@ -136,15 +136,15 @@ pub async fn run_rlm_turn_with_root( /// Inner entry point — also used by the bridge when it recurses. Returns /// a boxed future to break the recursive opaque-future-type cycle: /// `run_rlm_turn_inner` → `RlmBridge::dispatch` → `run_rlm_turn_inner`. -pub(crate) fn run_rlm_turn_inner<'a>( - client: &'a DeepSeekClient, +pub(crate) fn run_rlm_turn_inner( + client: Arc, model: String, prompt: String, root_prompt: Option, child_model: String, tx_event: mpsc::Sender, max_depth: u32, -) -> std::pin::Pin + Send + 'a>> { +) -> std::pin::Pin + Send>> { Box::pin(run_rlm_turn_impl( client, model, @@ -161,7 +161,7 @@ pub(crate) fn run_rlm_turn_inner<'a>( // --------------------------------------------------------------------------- async fn run_rlm_turn_impl( - client: &DeepSeekClient, + client: Arc, model: String, prompt: String, root_prompt: Option, @@ -212,7 +212,7 @@ async fn run_rlm_turn_impl( }; // 3. Build the bridge that services llm_query / rlm_query RPCs. - let bridge = RlmBridge::new(client.clone(), child_model.clone(), max_depth); + let bridge = RlmBridge::new(Arc::clone(&client), child_model.clone(), max_depth); let usage_handle = bridge.usage_handle(); let _ = tx_event @@ -262,22 +262,9 @@ async fn run_rlm_turn_impl( .await; // 4a. Root LLM generates code from metadata-only context. - let request = MessageRequest { - model: model.clone(), - messages: messages.clone(), - max_tokens: ROOT_MAX_TOKENS, - system: Some(system.clone()), - tools: None, - tool_choice: None, - metadata: None, - thinking: None, - reasoning_effort: None, - stream: Some(false), - temperature: Some(ROOT_TEMPERATURE), - top_p: Some(0.9_f32), - }; + let request = build_root_request(&model, &messages, &system); - let response = match client.create_message(request).await { + let response = match client.create_message_boxed(request).await { Ok(r) => r, Err(e) => { break 'turn RlmTurnResult { @@ -551,6 +538,23 @@ fn write_context_file(prompt: &str) -> std::io::Result { Ok(path) } +fn build_root_request(model: &str, messages: &[Message], system: &SystemPrompt) -> MessageRequest { + MessageRequest { + model: model.to_string(), + messages: messages.to_vec(), + max_tokens: ROOT_MAX_TOKENS, + system: Some(system.clone()), + tools: None, + tool_choice: None, + metadata: None, + thinking: None, + reasoning_effort: None, + stream: Some(false), + temperature: Some(ROOT_TEMPERATURE), + top_p: Some(0.9_f32), + } +} + /// Build `Metadata(state)` from the paper. Surfaces: /// - the small `root_prompt` (if any) — repeated each iteration /// - `context` length + preview @@ -851,6 +855,28 @@ mod tests { ); } + #[test] + fn build_root_request_keeps_context_tail_out_of_root_payload() { + let secret_tail = "DO_NOT_LEAK_ROOT_REQUEST"; + let prompt = format!("{}{}", "a".repeat(PROMPT_PREVIEW_LEN + 100), secret_tail); + let messages = vec![build_metadata_message( + &prompt, + Some("answer from the long context"), + 0, + None, + None, + )]; + + let request = build_root_request("root-model", &messages, &rlm_system_prompt()); + let payload = serde_json::to_string(&request).expect("request should serialize"); + + assert!(payload.contains(&format!("- Length: {} chars", prompt.chars().count()))); + assert!( + !payload.contains(secret_tail), + "root LLM request leaked the non-preview tail of context" + ); + } + #[test] fn build_metadata_with_iteration_shows_previous_code() { let msg = build_metadata_message("Test prompt", None, 3, Some("print('hi')"), Some("hi")); From 0887a88465cd7610aed747419db48dacd47a08c0 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 1 May 2026 05:09:39 -0500 Subject: [PATCH 08/17] refactor(engine): extract tool catalog helpers --- crates/tui/src/core/engine.rs | 426 +-------------------- crates/tui/src/core/engine/tool_catalog.rs | 418 ++++++++++++++++++++ 2 files changed, 428 insertions(+), 416 deletions(-) create mode 100644 crates/tui/src/core/engine/tool_catalog.rs diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 99a12e30..04bb656a 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -42,7 +42,7 @@ use crate::seam_manager::{SeamConfig, SeamManager}; use crate::tools::plan::{SharedPlanState, new_shared_plan_state}; use crate::tools::shell::{SharedShellManager, new_shared_shell_manager}; use crate::tools::spec::RuntimeToolServices; -use crate::tools::spec::{ApprovalRequirement, ToolError, ToolResult, required_str}; +use crate::tools::spec::{ApprovalRequirement, ToolError, ToolResult}; use crate::tools::subagent::{ Mailbox, SharedSubAgentManager, SubAgentRuntime, SubAgentType, new_shared_subagent_manager, }; @@ -398,14 +398,6 @@ pub(crate) const TOOL_CALL_START_MARKERS: [&str; 5] = [ "", ]; -const MULTI_TOOL_PARALLEL_NAME: &str = "multi_tool_use.parallel"; -const REQUEST_USER_INPUT_NAME: &str = "request_user_input"; -const CODE_EXECUTION_TOOL_NAME: &str = "code_execution"; -const CODE_EXECUTION_TOOL_TYPE: &str = "code_execution_20250825"; -const TOOL_SEARCH_REGEX_NAME: &str = "tool_search_tool_regex"; -const TOOL_SEARCH_REGEX_TYPE: &str = "tool_search_tool_regex_20251119"; -const TOOL_SEARCH_BM25_NAME: &str = "tool_search_tool_bm25"; -const TOOL_SEARCH_BM25_TYPE: &str = "tool_search_tool_bm25_20251119"; pub(crate) const TOOL_CALL_END_MARKERS: [&str; 5] = [ "[/TOOL_CALL]", "", @@ -463,413 +455,6 @@ pub(crate) fn filter_tool_call_delta(delta: &str, in_tool_call: &mut bool) -> St output } -/// Compute the tool input that should be reported when a tool's stream block -/// closes (`ContentBlockStop`). Prefers the parsed `input_buffer` over the -/// initial `input` placeholder so a `ToolCallStarted` event never carries a -/// stale `{}` when args were actually streamed in via `InputJsonDelta`. -/// -/// Order of preference: -/// 1. `input_buffer` parses cleanly → use that. -/// 2. `input_buffer` is empty → fall back to `input` (model embedded args -/// directly in the `ContentBlockStart` frame and sent no deltas). -/// 3. `input_buffer` non-empty but unparseable → fall back to `input` -/// (the per-delta parser has already mirrored the most recent valid -/// partial parse into `tool_state.input`). -fn is_tool_search_tool(name: &str) -> bool { - matches!(name, TOOL_SEARCH_REGEX_NAME | TOOL_SEARCH_BM25_NAME) -} - -fn should_default_defer_tool(name: &str, mode: AppMode) -> bool { - if mode == AppMode::Yolo { - return false; - } - - // Shell tools are kept active in Agent so the model can run verification - // commands (build/test/git/cargo) without first having to discover the - // tool through ToolSearch. Plan mode never registers shell tools. - let always_loaded_in_action_modes = matches!(mode, AppMode::Agent) - && matches!( - name, - "exec_shell" - | "exec_shell_wait" - | "exec_shell_interact" - | "exec_wait" - | "exec_interact" - ); - if always_loaded_in_action_modes { - return false; - } - - !matches!( - name, - "read_file" - | "list_dir" - | "grep_files" - | "file_search" - | "diagnostics" - | "rlm" - | "recall_archive" - | MULTI_TOOL_PARALLEL_NAME - | "update_plan" - | "checklist_write" - | "todo_write" - | "task_create" - | "task_list" - | "task_read" - | "task_gate_run" - | "task_shell_start" - | "task_shell_wait" - | "github_issue_context" - | "github_pr_context" - | REQUEST_USER_INPUT_NAME - ) -} - -fn ensure_advanced_tooling(catalog: &mut Vec) { - if !catalog.iter().any(|t| t.name == CODE_EXECUTION_TOOL_NAME) { - catalog.push(Tool { - tool_type: Some(CODE_EXECUTION_TOOL_TYPE.to_string()), - name: CODE_EXECUTION_TOOL_NAME.to_string(), - description: "Execute Python code in a local sandboxed runtime and return stdout/stderr/return_code as JSON.".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "code": { "type": "string", "description": "Python source code to execute." } - }, - "required": ["code"] - }), - allowed_callers: Some(vec!["direct".to_string()]), - defer_loading: Some(false), - input_examples: None, - strict: None, - cache_control: None, - }); - } - - if !catalog.iter().any(|t| t.name == TOOL_SEARCH_REGEX_NAME) { - catalog.push(Tool { - tool_type: Some(TOOL_SEARCH_REGEX_TYPE.to_string()), - name: TOOL_SEARCH_REGEX_NAME.to_string(), - description: "Search deferred tool definitions using a regex query and return matching tool references.".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "query": { "type": "string", "description": "Regex pattern to search tool names/descriptions/schema." } - }, - "required": ["query"] - }), - allowed_callers: Some(vec!["direct".to_string()]), - defer_loading: Some(false), - input_examples: None, - strict: None, - cache_control: None, - }); - } - - if !catalog.iter().any(|t| t.name == TOOL_SEARCH_BM25_NAME) { - catalog.push(Tool { - tool_type: Some(TOOL_SEARCH_BM25_TYPE.to_string()), - name: TOOL_SEARCH_BM25_NAME.to_string(), - description: "Search deferred tool definitions using natural-language matching and return matching tool references.".to_string(), - input_schema: json!({ - "type": "object", - "properties": { - "query": { "type": "string", "description": "Natural language query for tool discovery." } - }, - "required": ["query"] - }), - allowed_callers: Some(vec!["direct".to_string()]), - defer_loading: Some(false), - input_examples: None, - strict: None, - cache_control: None, - }); - } -} - -fn initial_active_tools(catalog: &[Tool]) -> std::collections::HashSet { - let mut active = std::collections::HashSet::new(); - for tool in catalog { - if !tool.defer_loading.unwrap_or(false) || is_tool_search_tool(&tool.name) { - active.insert(tool.name.clone()); - } - } - if active.is_empty() - && !catalog.is_empty() - && let Some(first) = catalog.first() - { - active.insert(first.name.clone()); - } - active -} - -fn active_tool_list_from_catalog( - catalog: &[Tool], - active: &std::collections::HashSet, -) -> Vec { - catalog - .iter() - .filter(|tool| active.contains(&tool.name)) - .cloned() - .collect() -} - -fn active_tools_for_step( - catalog: &[Tool], - active: &std::collections::HashSet, - force_update_plan: bool, -) -> Vec { - // DeepSeek reasoning models reject explicit named tool_choice forcing here, so for - // obvious quick-plan asks we narrow the first-step tool surface to update_plan instead. - if force_update_plan { - let forced: Vec<_> = catalog - .iter() - .filter(|tool| tool.name == "update_plan") - .cloned() - .collect(); - if !forced.is_empty() { - return forced; - } - } - - active_tool_list_from_catalog(catalog, active) -} - -fn tool_search_haystack(tool: &Tool) -> String { - format!( - "{}\n{}\n{}", - tool.name.to_lowercase(), - tool.description.to_lowercase(), - tool.input_schema.to_string().to_lowercase() - ) -} - -fn discover_tools_with_regex(catalog: &[Tool], query: &str) -> Result, ToolError> { - let regex = regex::Regex::new(query) - .map_err(|err| ToolError::invalid_input(format!("Invalid regex query: {err}")))?; - - let mut matches = Vec::new(); - for tool in catalog { - if is_tool_search_tool(&tool.name) { - continue; - } - let hay = tool_search_haystack(tool); - if regex.is_match(&hay) { - matches.push(tool.name.clone()); - } - if matches.len() >= 5 { - break; - } - } - Ok(matches) -} - -fn discover_tools_with_bm25_like(catalog: &[Tool], query: &str) -> Vec { - let terms: Vec = query - .split_whitespace() - .map(|term| term.trim().to_lowercase()) - .filter(|term| !term.is_empty()) - .collect(); - if terms.is_empty() { - return Vec::new(); - } - - let mut scored: Vec<(i64, String)> = Vec::new(); - for tool in catalog { - if is_tool_search_tool(&tool.name) { - continue; - } - let hay = tool_search_haystack(tool); - let mut score = 0i64; - for term in &terms { - if hay.contains(term) { - score += 1; - } - if tool.name.to_lowercase().contains(term) { - score += 2; - } - } - if score > 0 { - scored.push((score, tool.name.clone())); - } - } - scored.sort_by(|a, b| b.0.cmp(&a.0).then_with(|| a.1.cmp(&b.1))); - scored.into_iter().take(5).map(|(_, name)| name).collect() -} - -fn edit_distance(a: &str, b: &str) -> usize { - if a == b { - return 0; - } - if a.is_empty() { - return b.chars().count(); - } - if b.is_empty() { - return a.chars().count(); - } - - let b_chars: Vec = b.chars().collect(); - let mut prev: Vec = (0..=b_chars.len()).collect(); - let mut curr = vec![0usize; b_chars.len() + 1]; - - for (i, a_ch) in a.chars().enumerate() { - curr[0] = i + 1; - for (j, b_ch) in b_chars.iter().enumerate() { - let cost = if a_ch == *b_ch { 0 } else { 1 }; - let delete = prev[j + 1] + 1; - let insert = curr[j] + 1; - let substitute = prev[j] + cost; - curr[j + 1] = delete.min(insert).min(substitute); - } - std::mem::swap(&mut prev, &mut curr); - } - - prev[b_chars.len()] -} - -fn suggest_tool_names(catalog: &[Tool], requested: &str, limit: usize) -> Vec { - let requested = requested.trim().to_ascii_lowercase(); - if requested.is_empty() || limit == 0 { - return Vec::new(); - } - - let mut candidates: Vec<(u8, usize, String)> = Vec::new(); - for tool in catalog { - let candidate = tool.name.to_ascii_lowercase(); - let prefix_match = candidate.starts_with(&requested) || requested.starts_with(&candidate); - let contains_match = candidate.contains(&requested) || requested.contains(&candidate); - let distance = edit_distance(&candidate, &requested); - let close_typo = distance <= 3; - - if !(prefix_match || contains_match || close_typo) { - continue; - } - - let rank = if prefix_match { - 0 - } else if contains_match { - 1 - } else { - 2 - }; - candidates.push((rank, distance, tool.name.clone())); - } - - candidates.sort_by(|a, b| { - a.0.cmp(&b.0) - .then_with(|| a.1.cmp(&b.1)) - .then_with(|| a.2.cmp(&b.2)) - }); - candidates.dedup_by(|a, b| a.2 == b.2); - candidates - .into_iter() - .take(limit) - .map(|(_, _, name)| name) - .collect() -} - -fn missing_tool_error_message(tool_name: &str, catalog: &[Tool]) -> String { - let suggestions = suggest_tool_names(catalog, tool_name, 3); - if suggestions.is_empty() { - return format!( - "Tool '{tool_name}' is not available in the current tool catalog. \ - Verify mode/feature flags, or use {TOOL_SEARCH_BM25_NAME} with a short query." - ); - } - - format!( - "Tool '{tool_name}' is not available in the current tool catalog. \ - Did you mean: {}? You can also use {TOOL_SEARCH_BM25_NAME} to discover tools.", - suggestions.join(", ") - ) -} - -fn maybe_activate_requested_deferred_tool( - tool_name: &str, - catalog: &[Tool], - active_tools: &mut std::collections::HashSet, -) -> bool { - let Some(def) = catalog.iter().find(|def| def.name == tool_name) else { - return false; - }; - - if !def.defer_loading.unwrap_or(false) || active_tools.contains(tool_name) { - return false; - } - - active_tools.insert(tool_name.to_string()) -} - -fn execute_tool_search( - tool_name: &str, - input: &serde_json::Value, - catalog: &[Tool], - active_tools: &mut std::collections::HashSet, -) -> Result { - let query = required_str(input, "query")?; - let discovered = if tool_name == TOOL_SEARCH_REGEX_NAME { - discover_tools_with_regex(catalog, query)? - } else { - discover_tools_with_bm25_like(catalog, query) - }; - - for name in &discovered { - active_tools.insert(name.clone()); - } - - let references = discovered - .iter() - .map(|name| json!({"type": "tool_reference", "tool_name": name})) - .collect::>(); - - let payload = json!({ - "type": "tool_search_tool_search_result", - "tool_references": references, - }); - - Ok(ToolResult { - content: serde_json::to_string(&payload).unwrap_or_else(|_| payload.to_string()), - success: true, - metadata: Some(json!({ - "tool_references": discovered, - })), - }) -} - -async fn execute_code_execution_tool( - input: &serde_json::Value, - workspace: &std::path::Path, -) -> Result { - let code = required_str(input, "code")?; - let mut cmd = tokio::process::Command::new("python3"); - cmd.arg("-c"); - cmd.arg(code); - cmd.current_dir(workspace); - - let output = tokio::time::timeout(Duration::from_secs(120), cmd.output()) - .await - .map_err(|_| ToolError::Timeout { seconds: 120 }) - .and_then(|res| res.map_err(|e| ToolError::execution_failed(e.to_string())))?; - - let stdout = String::from_utf8_lossy(&output.stdout).to_string(); - let stderr = String::from_utf8_lossy(&output.stderr).to_string(); - let return_code = output.status.code().unwrap_or(-1); - let success = output.status.success(); - let payload = json!({ - "type": "code_execution_result", - "stdout": stdout, - "stderr": stderr, - "return_code": return_code, - "content": [], - }); - - Ok(ToolResult { - content: serde_json::to_string(&payload).unwrap_or_else(|_| payload.to_string()), - success, - metadata: Some(payload), - }) -} - fn caller_type_for_tool_use(caller: Option<&ToolCaller>) -> &str { caller.map_or("direct", |c| c.caller_type.as_str()) } @@ -2878,6 +2463,7 @@ pub(crate) fn mock_engine_handle() -> MockEngineHandle { mod approval; mod capacity_flow; mod dispatch; +mod tool_catalog; mod turn_loop; use self::approval::{ApprovalDecision, ApprovalResult, UserInputDecision}; @@ -2887,6 +2473,14 @@ use self::dispatch::{ mcp_tool_is_read_only, parse_parallel_tool_calls, parse_tool_input, should_force_update_plan_first, should_parallelize_tool_batch, should_stop_after_plan_tool, }; +#[cfg(test)] +use self::tool_catalog::TOOL_SEARCH_BM25_NAME; +use self::tool_catalog::{ + CODE_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME, REQUEST_USER_INPUT_NAME, + active_tools_for_step, ensure_advanced_tooling, execute_code_execution_tool, + execute_tool_search, initial_active_tools, is_tool_search_tool, + maybe_activate_requested_deferred_tool, missing_tool_error_message, should_default_defer_tool, +}; #[cfg(test)] mod tests; diff --git a/crates/tui/src/core/engine/tool_catalog.rs b/crates/tui/src/core/engine/tool_catalog.rs new file mode 100644 index 00000000..011d632e --- /dev/null +++ b/crates/tui/src/core/engine/tool_catalog.rs @@ -0,0 +1,418 @@ +//! Deferred tool catalog and built-in advanced tool helpers. +//! +//! The streaming turn loop owns when tools are offered or executed. This module +//! owns the catalog-level policy around deferred loading, tool search, missing +//! tool suggestions, and the small set of built-in advanced tools that are not +//! registered by the normal runtime tool registry. + +use std::collections::HashSet; +use std::path::Path; +use std::time::Duration; + +use serde_json::json; + +use crate::models::Tool; +use crate::tools::spec::{ToolError, ToolResult, required_str}; +use crate::tui::app::AppMode; + +pub(super) const MULTI_TOOL_PARALLEL_NAME: &str = "multi_tool_use.parallel"; +pub(super) const REQUEST_USER_INPUT_NAME: &str = "request_user_input"; +pub(super) const CODE_EXECUTION_TOOL_NAME: &str = "code_execution"; +const CODE_EXECUTION_TOOL_TYPE: &str = "code_execution_20250825"; +const TOOL_SEARCH_REGEX_NAME: &str = "tool_search_tool_regex"; +const TOOL_SEARCH_REGEX_TYPE: &str = "tool_search_tool_regex_20251119"; +pub(super) const TOOL_SEARCH_BM25_NAME: &str = "tool_search_tool_bm25"; +const TOOL_SEARCH_BM25_TYPE: &str = "tool_search_tool_bm25_20251119"; + +pub(super) fn is_tool_search_tool(name: &str) -> bool { + matches!(name, TOOL_SEARCH_REGEX_NAME | TOOL_SEARCH_BM25_NAME) +} + +pub(super) fn should_default_defer_tool(name: &str, mode: AppMode) -> bool { + if mode == AppMode::Yolo { + return false; + } + + // Shell tools are kept active in Agent so the model can run verification + // commands (build/test/git/cargo) without first having to discover the + // tool through ToolSearch. Plan mode never registers shell tools. + let always_loaded_in_action_modes = matches!(mode, AppMode::Agent) + && matches!( + name, + "exec_shell" + | "exec_shell_wait" + | "exec_shell_interact" + | "exec_wait" + | "exec_interact" + ); + if always_loaded_in_action_modes { + return false; + } + + !matches!( + name, + "read_file" + | "list_dir" + | "grep_files" + | "file_search" + | "diagnostics" + | "rlm" + | "recall_archive" + | MULTI_TOOL_PARALLEL_NAME + | "update_plan" + | "checklist_write" + | "todo_write" + | "task_create" + | "task_list" + | "task_read" + | "task_gate_run" + | "task_shell_start" + | "task_shell_wait" + | "github_issue_context" + | "github_pr_context" + | REQUEST_USER_INPUT_NAME + ) +} + +pub(super) fn ensure_advanced_tooling(catalog: &mut Vec) { + if !catalog.iter().any(|t| t.name == CODE_EXECUTION_TOOL_NAME) { + catalog.push(Tool { + tool_type: Some(CODE_EXECUTION_TOOL_TYPE.to_string()), + name: CODE_EXECUTION_TOOL_NAME.to_string(), + description: "Execute Python code in a local sandboxed runtime and return stdout/stderr/return_code as JSON.".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "code": { "type": "string", "description": "Python source code to execute." } + }, + "required": ["code"] + }), + allowed_callers: Some(vec!["direct".to_string()]), + defer_loading: Some(false), + input_examples: None, + strict: None, + cache_control: None, + }); + } + + if !catalog.iter().any(|t| t.name == TOOL_SEARCH_REGEX_NAME) { + catalog.push(Tool { + tool_type: Some(TOOL_SEARCH_REGEX_TYPE.to_string()), + name: TOOL_SEARCH_REGEX_NAME.to_string(), + description: "Search deferred tool definitions using a regex query and return matching tool references.".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "query": { "type": "string", "description": "Regex pattern to search tool names/descriptions/schema." } + }, + "required": ["query"] + }), + allowed_callers: Some(vec!["direct".to_string()]), + defer_loading: Some(false), + input_examples: None, + strict: None, + cache_control: None, + }); + } + + if !catalog.iter().any(|t| t.name == TOOL_SEARCH_BM25_NAME) { + catalog.push(Tool { + tool_type: Some(TOOL_SEARCH_BM25_TYPE.to_string()), + name: TOOL_SEARCH_BM25_NAME.to_string(), + description: "Search deferred tool definitions using natural-language matching and return matching tool references.".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "query": { "type": "string", "description": "Natural language query for tool discovery." } + }, + "required": ["query"] + }), + allowed_callers: Some(vec!["direct".to_string()]), + defer_loading: Some(false), + input_examples: None, + strict: None, + cache_control: None, + }); + } +} + +pub(super) fn initial_active_tools(catalog: &[Tool]) -> HashSet { + let mut active = HashSet::new(); + for tool in catalog { + if !tool.defer_loading.unwrap_or(false) || is_tool_search_tool(&tool.name) { + active.insert(tool.name.clone()); + } + } + if active.is_empty() + && !catalog.is_empty() + && let Some(first) = catalog.first() + { + active.insert(first.name.clone()); + } + active +} + +fn active_tool_list_from_catalog(catalog: &[Tool], active: &HashSet) -> Vec { + catalog + .iter() + .filter(|tool| active.contains(&tool.name)) + .cloned() + .collect() +} + +pub(super) fn active_tools_for_step( + catalog: &[Tool], + active: &HashSet, + force_update_plan: bool, +) -> Vec { + // DeepSeek reasoning models reject explicit named tool_choice forcing here, + // so for obvious quick-plan asks we narrow the first-step tool surface to + // update_plan instead. + if force_update_plan { + let forced: Vec<_> = catalog + .iter() + .filter(|tool| tool.name == "update_plan") + .cloned() + .collect(); + if !forced.is_empty() { + return forced; + } + } + + active_tool_list_from_catalog(catalog, active) +} + +fn tool_search_haystack(tool: &Tool) -> String { + format!( + "{}\n{}\n{}", + tool.name.to_lowercase(), + tool.description.to_lowercase(), + tool.input_schema.to_string().to_lowercase() + ) +} + +fn discover_tools_with_regex(catalog: &[Tool], query: &str) -> Result, ToolError> { + let regex = regex::Regex::new(query) + .map_err(|err| ToolError::invalid_input(format!("Invalid regex query: {err}")))?; + + let mut matches = Vec::new(); + for tool in catalog { + if is_tool_search_tool(&tool.name) { + continue; + } + let hay = tool_search_haystack(tool); + if regex.is_match(&hay) { + matches.push(tool.name.clone()); + } + if matches.len() >= 5 { + break; + } + } + Ok(matches) +} + +fn discover_tools_with_bm25_like(catalog: &[Tool], query: &str) -> Vec { + let terms: Vec = query + .split_whitespace() + .map(|term| term.trim().to_lowercase()) + .filter(|term| !term.is_empty()) + .collect(); + if terms.is_empty() { + return Vec::new(); + } + + let mut scored: Vec<(i64, String)> = Vec::new(); + for tool in catalog { + if is_tool_search_tool(&tool.name) { + continue; + } + let hay = tool_search_haystack(tool); + let mut score = 0i64; + for term in &terms { + if hay.contains(term) { + score += 1; + } + if tool.name.to_lowercase().contains(term) { + score += 2; + } + } + if score > 0 { + scored.push((score, tool.name.clone())); + } + } + scored.sort_by(|a, b| b.0.cmp(&a.0).then_with(|| a.1.cmp(&b.1))); + scored.into_iter().take(5).map(|(_, name)| name).collect() +} + +fn edit_distance(a: &str, b: &str) -> usize { + if a == b { + return 0; + } + if a.is_empty() { + return b.chars().count(); + } + if b.is_empty() { + return a.chars().count(); + } + + let b_chars: Vec = b.chars().collect(); + let mut prev: Vec = (0..=b_chars.len()).collect(); + let mut curr = vec![0usize; b_chars.len() + 1]; + + for (i, a_ch) in a.chars().enumerate() { + curr[0] = i + 1; + for (j, b_ch) in b_chars.iter().enumerate() { + let cost = if a_ch == *b_ch { 0 } else { 1 }; + let delete = prev[j + 1] + 1; + let insert = curr[j] + 1; + let substitute = prev[j] + cost; + curr[j + 1] = delete.min(insert).min(substitute); + } + std::mem::swap(&mut prev, &mut curr); + } + + prev[b_chars.len()] +} + +fn suggest_tool_names(catalog: &[Tool], requested: &str, limit: usize) -> Vec { + let requested = requested.trim().to_ascii_lowercase(); + if requested.is_empty() || limit == 0 { + return Vec::new(); + } + + let mut candidates: Vec<(u8, usize, String)> = Vec::new(); + for tool in catalog { + let candidate = tool.name.to_ascii_lowercase(); + let prefix_match = candidate.starts_with(&requested) || requested.starts_with(&candidate); + let contains_match = candidate.contains(&requested) || requested.contains(&candidate); + let distance = edit_distance(&candidate, &requested); + let close_typo = distance <= 3; + + if !(prefix_match || contains_match || close_typo) { + continue; + } + + let rank = if prefix_match { + 0 + } else if contains_match { + 1 + } else { + 2 + }; + candidates.push((rank, distance, tool.name.clone())); + } + + candidates.sort_by(|a, b| { + a.0.cmp(&b.0) + .then_with(|| a.1.cmp(&b.1)) + .then_with(|| a.2.cmp(&b.2)) + }); + candidates.dedup_by(|a, b| a.2 == b.2); + candidates + .into_iter() + .take(limit) + .map(|(_, _, name)| name) + .collect() +} + +pub(super) fn missing_tool_error_message(tool_name: &str, catalog: &[Tool]) -> String { + let suggestions = suggest_tool_names(catalog, tool_name, 3); + if suggestions.is_empty() { + return format!( + "Tool '{tool_name}' is not available in the current tool catalog. \ + Verify mode/feature flags, or use {TOOL_SEARCH_BM25_NAME} with a short query." + ); + } + + format!( + "Tool '{tool_name}' is not available in the current tool catalog. \ + Did you mean: {}? You can also use {TOOL_SEARCH_BM25_NAME} to discover tools.", + suggestions.join(", ") + ) +} + +pub(super) fn maybe_activate_requested_deferred_tool( + tool_name: &str, + catalog: &[Tool], + active_tools: &mut HashSet, +) -> bool { + let Some(def) = catalog.iter().find(|def| def.name == tool_name) else { + return false; + }; + + if !def.defer_loading.unwrap_or(false) || active_tools.contains(tool_name) { + return false; + } + + active_tools.insert(tool_name.to_string()) +} + +pub(super) fn execute_tool_search( + tool_name: &str, + input: &serde_json::Value, + catalog: &[Tool], + active_tools: &mut HashSet, +) -> Result { + let query = required_str(input, "query")?; + let discovered = if tool_name == TOOL_SEARCH_REGEX_NAME { + discover_tools_with_regex(catalog, query)? + } else { + discover_tools_with_bm25_like(catalog, query) + }; + + for name in &discovered { + active_tools.insert(name.clone()); + } + + let references = discovered + .iter() + .map(|name| json!({"type": "tool_reference", "tool_name": name})) + .collect::>(); + + let payload = json!({ + "type": "tool_search_tool_search_result", + "tool_references": references, + }); + + Ok(ToolResult { + content: serde_json::to_string(&payload).unwrap_or_else(|_| payload.to_string()), + success: true, + metadata: Some(json!({ + "tool_references": discovered, + })), + }) +} + +pub(super) async fn execute_code_execution_tool( + input: &serde_json::Value, + workspace: &Path, +) -> Result { + let code = required_str(input, "code")?; + let mut cmd = tokio::process::Command::new("python3"); + cmd.arg("-c"); + cmd.arg(code); + cmd.current_dir(workspace); + + let output = tokio::time::timeout(Duration::from_secs(120), cmd.output()) + .await + .map_err(|_| ToolError::Timeout { seconds: 120 }) + .and_then(|res| res.map_err(|e| ToolError::execution_failed(e.to_string())))?; + + let stdout = String::from_utf8_lossy(&output.stdout).to_string(); + let stderr = String::from_utf8_lossy(&output.stderr).to_string(); + let return_code = output.status.code().unwrap_or(-1); + let success = output.status.success(); + let payload = json!({ + "type": "code_execution_result", + "stdout": stdout, + "stderr": stderr, + "return_code": return_code, + "content": [], + }); + + Ok(ToolResult { + content: serde_json::to_string(&payload).unwrap_or_else(|_| payload.to_string()), + success, + metadata: Some(payload), + }) +} From f0fad7aa2ebdb976757ad0d24047c51c1031cf7a Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 1 May 2026 06:07:59 -0500 Subject: [PATCH 09/17] refactor(engine): modularize turn tool setup --- crates/tui/src/core/engine.rs | 77 +++------------------- crates/tui/src/core/engine/tests.rs | 70 ++++++++++++++++++++ crates/tui/src/core/engine/tool_catalog.rs | 35 ++++++++++ crates/tui/src/core/engine/tool_setup.rs | 52 +++++++++++++++ 4 files changed, 167 insertions(+), 67 deletions(-) create mode 100644 crates/tui/src/core/engine/tool_setup.rs diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 04bb656a..af3998c6 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -1302,43 +1302,7 @@ impl Engine { let plan_state = self.config.plan_state.clone(); let tool_context = self.build_tool_context(mode, auto_approve); - let mut builder = if mode == AppMode::Plan { - ToolRegistryBuilder::new() - .with_read_only_file_tools() - .with_search_tools() - .with_git_tools() - .with_git_history_tools() - .with_diagnostics_tool() - .with_validation_tools() - .with_runtime_task_tools() - .with_todo_tool(todo_list.clone()) - .with_plan_tool(plan_state.clone()) - } else { - ToolRegistryBuilder::new() - .with_agent_tools(self.session.allow_shell) - .with_todo_tool(todo_list.clone()) - .with_plan_tool(plan_state.clone()) - }; - - builder = builder - .with_review_tool(self.deepseek_client.clone(), self.session.model.clone()) - .with_rlm_tool(self.deepseek_client.clone(), self.session.model.clone()) - .with_user_input_tool() - .with_parallel_tool(); - - if self.config.features.enabled(Feature::ApplyPatch) && mode != AppMode::Plan { - builder = builder.with_patch_tools(); - } - if self.config.features.enabled(Feature::WebSearch) { - builder = builder.with_web_tools(); - } - // Plan mode now keeps shell available — the existing approval flow - // and command-safety classifier gate destructive commands. Writes - // and patches stay blocked above; that's the only "destructive" - // boundary plan mode enforces by tool registration. - if self.config.features.enabled(Feature::ShellTool) && self.session.allow_shell { - builder = builder.with_shell_tools(); - } + let builder = self.build_turn_tool_registry_builder(mode, todo_list, plan_state); // Mailbox for structured sub-agent envelopes (#128/#130). One per // turn: the receiver is drained by a short-lived task that converts @@ -1411,31 +1375,9 @@ impl Engine { } else { Vec::new() }; - let tools = tool_registry.as_ref().map(|registry| { - let mut tools = registry.to_api_tools(); - for tool in &mut tools { - tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode)); - } - let mut mcp_tools = mcp_tools; - for tool in &mut mcp_tools { - if mode == AppMode::Yolo { - tool.defer_loading = Some(false); - continue; - } - - let keep_loaded = matches!( - tool.name.as_str(), - "list_mcp_resources" - | "list_mcp_resource_templates" - | "mcp_read_resource" - | "read_mcp_resource" - | "mcp_get_prompt" - ); - tool.defer_loading = Some(!keep_loaded); - } - tools.extend(mcp_tools); - tools - }); + let tools = tool_registry + .as_ref() + .map(|registry| build_model_tool_catalog(registry.to_api_tools(), mcp_tools, mode)); // Main turn loop let (status, error) = self @@ -2464,6 +2406,7 @@ mod approval; mod capacity_flow; mod dispatch; mod tool_catalog; +mod tool_setup; mod turn_loop; use self::approval::{ApprovalDecision, ApprovalResult, UserInputDecision}; @@ -2473,14 +2416,14 @@ use self::dispatch::{ mcp_tool_is_read_only, parse_parallel_tool_calls, parse_tool_input, should_force_update_plan_first, should_parallelize_tool_batch, should_stop_after_plan_tool, }; -#[cfg(test)] -use self::tool_catalog::TOOL_SEARCH_BM25_NAME; use self::tool_catalog::{ CODE_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME, REQUEST_USER_INPUT_NAME, - active_tools_for_step, ensure_advanced_tooling, execute_code_execution_tool, - execute_tool_search, initial_active_tools, is_tool_search_tool, - maybe_activate_requested_deferred_tool, missing_tool_error_message, should_default_defer_tool, + active_tools_for_step, build_model_tool_catalog, ensure_advanced_tooling, + execute_code_execution_tool, execute_tool_search, initial_active_tools, is_tool_search_tool, + maybe_activate_requested_deferred_tool, missing_tool_error_message, }; +#[cfg(test)] +use self::tool_catalog::{TOOL_SEARCH_BM25_NAME, should_default_defer_tool}; #[cfg(test)] mod tests; diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index d4194473..8dfc291c 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -36,6 +36,20 @@ fn make_plan( } } +fn api_tool(name: &str) -> Tool { + Tool { + tool_type: Some("function".to_string()), + name: name.to_string(), + description: format!("Test tool {name}"), + input_schema: json!({"type": "object"}), + allowed_callers: Some(vec!["direct".to_string()]), + defer_loading: None, + input_examples: None, + strict: None, + cache_control: None, + } +} + #[test] fn engine_handle_cancel_tracks_latest_turn_token() { let (mut engine, handle) = Engine::new(EngineConfig::default(), &Config::default()); @@ -205,6 +219,62 @@ fn non_yolo_mode_retains_default_defer_policy() { )); } +#[test] +fn model_tool_catalog_applies_native_and_mcp_deferral() { + let catalog = build_model_tool_catalog( + vec![ + api_tool("read_file"), + api_tool("exec_shell"), + api_tool("project_map"), + ], + vec![api_tool("list_mcp_resources"), api_tool("mcp_server_write")], + AppMode::Agent, + ); + + let defer_loading = |name: &str| { + catalog + .iter() + .find(|tool| tool.name == name) + .and_then(|tool| tool.defer_loading) + }; + + assert_eq!(defer_loading("read_file"), Some(false)); + assert_eq!(defer_loading("exec_shell"), Some(false)); + assert_eq!(defer_loading("project_map"), Some(true)); + assert_eq!(defer_loading("list_mcp_resources"), Some(false)); + assert_eq!(defer_loading("mcp_server_write"), Some(true)); +} + +#[test] +fn model_tool_catalog_keeps_everything_loaded_in_yolo_mode() { + let catalog = build_model_tool_catalog( + vec![api_tool("project_map")], + vec![api_tool("mcp_server_write")], + AppMode::Yolo, + ); + + assert!(catalog.iter().all(|tool| tool.defer_loading == Some(false))); +} + +#[test] +fn turn_tool_registry_builder_keeps_plan_mode_read_only_for_files() { + let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default()); + let registry = engine + .build_turn_tool_registry_builder( + AppMode::Plan, + engine.config.todos.clone(), + engine.config.plan_state.clone(), + ) + .build(engine.build_tool_context(AppMode::Plan, false)); + + assert!(registry.contains("read_file")); + assert!(registry.contains("list_dir")); + assert!(!registry.contains("write_file")); + assert!(!registry.contains("edit_file")); + assert!(registry.contains("update_plan")); + assert!(registry.contains("task_create")); +} + #[test] fn agent_mode_can_build_auto_approved_tool_context() { let (engine, _handle) = Engine::new(EngineConfig::default(), &Config::default()); diff --git a/crates/tui/src/core/engine/tool_catalog.rs b/crates/tui/src/core/engine/tool_catalog.rs index 011d632e..2581fd4f 100644 --- a/crates/tui/src/core/engine/tool_catalog.rs +++ b/crates/tui/src/core/engine/tool_catalog.rs @@ -74,6 +74,41 @@ pub(super) fn should_default_defer_tool(name: &str, mode: AppMode) -> bool { ) } +pub(super) fn apply_native_tool_deferral(catalog: &mut [Tool], mode: AppMode) { + for tool in catalog { + tool.defer_loading = Some(should_default_defer_tool(&tool.name, mode)); + } +} + +fn should_keep_mcp_tool_loaded(name: &str) -> bool { + matches!( + name, + "list_mcp_resources" + | "list_mcp_resource_templates" + | "mcp_read_resource" + | "read_mcp_resource" + | "mcp_get_prompt" + ) +} + +pub(super) fn apply_mcp_tool_deferral(catalog: &mut [Tool], mode: AppMode) { + for tool in catalog { + tool.defer_loading = + Some(mode != AppMode::Yolo && !should_keep_mcp_tool_loaded(&tool.name)); + } +} + +pub(super) fn build_model_tool_catalog( + mut native_tools: Vec, + mut mcp_tools: Vec, + mode: AppMode, +) -> Vec { + apply_native_tool_deferral(&mut native_tools, mode); + apply_mcp_tool_deferral(&mut mcp_tools, mode); + native_tools.extend(mcp_tools); + native_tools +} + pub(super) fn ensure_advanced_tooling(catalog: &mut Vec) { if !catalog.iter().any(|t| t.name == CODE_EXECUTION_TOOL_NAME) { catalog.push(Tool { diff --git a/crates/tui/src/core/engine/tool_setup.rs b/crates/tui/src/core/engine/tool_setup.rs new file mode 100644 index 00000000..220d4706 --- /dev/null +++ b/crates/tui/src/core/engine/tool_setup.rs @@ -0,0 +1,52 @@ +//! Per-turn tool registry setup. +//! +//! This keeps mode/feature-specific registry construction out of the send path. + +use super::*; + +impl Engine { + pub(super) fn build_turn_tool_registry_builder( + &self, + mode: AppMode, + todo_list: SharedTodoList, + plan_state: SharedPlanState, + ) -> ToolRegistryBuilder { + let mut builder = if mode == AppMode::Plan { + ToolRegistryBuilder::new() + .with_read_only_file_tools() + .with_search_tools() + .with_git_tools() + .with_git_history_tools() + .with_diagnostics_tool() + .with_validation_tools() + .with_runtime_task_tools() + .with_todo_tool(todo_list) + .with_plan_tool(plan_state) + } else { + ToolRegistryBuilder::new() + .with_agent_tools(self.session.allow_shell) + .with_todo_tool(todo_list) + .with_plan_tool(plan_state) + }; + + builder = builder + .with_review_tool(self.deepseek_client.clone(), self.session.model.clone()) + .with_rlm_tool(self.deepseek_client.clone(), self.session.model.clone()) + .with_user_input_tool() + .with_parallel_tool(); + + if self.config.features.enabled(Feature::ApplyPatch) && mode != AppMode::Plan { + builder = builder.with_patch_tools(); + } + if self.config.features.enabled(Feature::WebSearch) { + builder = builder.with_web_tools(); + } + // Plan mode keeps shell available when the session allows it; command + // safety and approval checks still gate risky commands. + if self.config.features.enabled(Feature::ShellTool) && self.session.allow_shell { + builder = builder.with_shell_tools(); + } + + builder + } +} From 8dd5ed38d7fcfedac2cf600f5bbf97c90f836e33 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 1 May 2026 07:09:30 -0500 Subject: [PATCH 10/17] refactor(engine): extract context helpers --- crates/tui/src/core/engine.rs | 279 +------------------- crates/tui/src/core/engine/capacity_flow.rs | 2 + crates/tui/src/core/engine/context.rs | 279 ++++++++++++++++++++ crates/tui/src/core/engine/tests.rs | 2 + 4 files changed, 294 insertions(+), 268 deletions(-) create mode 100644 crates/tui/src/core/engine/context.rs diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index af3998c6..d98e0700 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -22,7 +22,7 @@ use tokio_util::sync::CancellationToken; use crate::client::DeepSeekClient; use crate::compaction::{ - CompactionConfig, compact_messages_safe, estimate_tokens, merge_system_prompts, should_compact, + CompactionConfig, compact_messages_safe, merge_system_prompts, should_compact, }; use crate::config::{Config, DEFAULT_MAX_SUBAGENTS, DEFAULT_TEXT_MODEL}; use crate::cycle_manager::{ @@ -35,7 +35,7 @@ use crate::llm_client::LlmClient; use crate::mcp::McpPool; use crate::models::{ ContentBlock, ContentBlockStart, DEFAULT_CONTEXT_WINDOW_TOKENS, Delta, Message, MessageRequest, - StreamEvent, SystemBlock, SystemPrompt, Tool, ToolCaller, Usage, context_window_for_model, + StreamEvent, SystemPrompt, Tool, ToolCaller, Usage, }; use crate::prompts; use crate::seam_manager::{SeamConfig, SeamManager}; @@ -358,38 +358,6 @@ fn should_transparently_retry_stream( ) -> bool { !any_content_received && transparent_attempts < MAX_TRANSPARENT_STREAM_RETRIES && !cancelled } -/// Max output tokens requested for normal agent turns. Generous on purpose: -/// V4 thinking models can produce tens of thousands of reasoning tokens on -/// hard prompts before the visible reply, and DeepSeek V4 ships with a 1M -/// context window. v0.7.5 keeps this cap fixed instead of silently lowering -/// `max_tokens` near pressure; hard-cycle/preflight checks reserve this budget -/// plus safety headroom before sending the next request. -const TURN_MAX_OUTPUT_TOKENS: u32 = 262_144; -/// Keep this many most recent messages when emergency trimming is required. -const MIN_RECENT_MESSAGES_TO_KEEP: usize = 4; -/// Allow a few emergency recovery attempts before failing the turn. -const MAX_CONTEXT_RECOVERY_ATTEMPTS: u8 = 2; -/// Reserve additional headroom to avoid hitting provider hard limits. -const CONTEXT_HEADROOM_TOKENS: usize = 1024; -/// Hard cap for any tool output inserted into model context. -const TOOL_RESULT_CONTEXT_HARD_LIMIT_CHARS: usize = 12_000; -/// Soft cap for known noisy tools inserted into model context. -const TOOL_RESULT_CONTEXT_SOFT_LIMIT_CHARS: usize = 2_000; -/// Snippet length kept when compacting tool output for model context. -const TOOL_RESULT_CONTEXT_SNIPPET_CHARS: usize = 900; -/// Hard cap for tool output inserted into a large-context model. -const LARGE_CONTEXT_TOOL_RESULT_HARD_LIMIT_CHARS: usize = 180_000; -/// Soft cap for known noisy tools inserted into a large-context model. -const LARGE_CONTEXT_TOOL_RESULT_SOFT_LIMIT_CHARS: usize = 60_000; -/// Snippet length kept when compacting large-context tool output. -const LARGE_CONTEXT_TOOL_RESULT_SNIPPET_CHARS: usize = 40_000; -/// Context window size at which tool output limits can be relaxed. -const LARGE_CONTEXT_WINDOW_TOKENS: u32 = 500_000; -/// Max chars to keep from metadata-provided output summaries. -const TOOL_RESULT_METADATA_SUMMARY_CHARS: usize = 320; -const COMPACTION_SUMMARY_MARKER: &str = "Conversation Summary (Auto-Generated)"; -const WORKING_SET_SUMMARY_MARKER: &str = "## Repo Working Set"; - pub(crate) const TOOL_CALL_START_MARKERS: [&str; 5] = [ "[TOOL_CALL]", " String { } } -fn summarize_text(text: &str, limit: usize) -> String { - if text.chars().count() <= limit { - return text.to_string(); - } - let take = limit.saturating_sub(3); - let mut out: String = text.chars().take(take).collect(); - out.push_str("..."); - out -} - -fn summarize_text_head_tail(text: &str, limit: usize) -> String { - let total = text.chars().count(); - if total <= limit { - return text.to_string(); - } - if limit <= 20 { - return summarize_text(text, limit); - } - - let marker = "\n\n[... output truncated for context ...]\n\n"; - let marker_len = marker.chars().count(); - if limit <= marker_len + 20 { - return summarize_text(text, limit); - } - - let remaining = limit - marker_len; - let head_len = remaining.saturating_mul(2) / 3; - let tail_len = remaining.saturating_sub(head_len); - let head: String = text.chars().take(head_len).collect(); - let tail_vec: Vec = text.chars().rev().take(tail_len).collect(); - let tail: String = tail_vec.into_iter().rev().collect(); - format!("{head}{marker}{tail}") -} - -fn tool_result_is_noisy(tool_name: &str) -> bool { - matches!( - tool_name, - "exec_shell" - | "exec_shell_wait" - | "exec_shell_interact" - | "multi_tool_use.parallel" - | "web_search" - ) -} - -fn tool_result_metadata_summary(metadata: Option<&serde_json::Value>) -> Option { - let obj = metadata?.as_object()?; - for key in ["summary", "stdout_summary", "stderr_summary", "message"] { - if let Some(text) = obj.get(key).and_then(serde_json::Value::as_str) { - let trimmed = text.trim(); - if !trimmed.is_empty() { - return Some(summarize_text(trimmed, TOOL_RESULT_METADATA_SUMMARY_CHARS)); - } - } - } - None -} - -#[derive(Debug, Clone, Copy)] -struct ToolResultContextLimits { - hard_limit_chars: usize, - noisy_soft_limit_chars: usize, - snippet_chars: usize, -} - -fn tool_result_context_limits_for_model(model: &str) -> ToolResultContextLimits { - let is_large_context = - context_window_for_model(model).is_some_and(|window| window >= LARGE_CONTEXT_WINDOW_TOKENS); - - if is_large_context { - ToolResultContextLimits { - hard_limit_chars: LARGE_CONTEXT_TOOL_RESULT_HARD_LIMIT_CHARS, - noisy_soft_limit_chars: LARGE_CONTEXT_TOOL_RESULT_SOFT_LIMIT_CHARS, - snippet_chars: LARGE_CONTEXT_TOOL_RESULT_SNIPPET_CHARS, - } - } else { - ToolResultContextLimits { - hard_limit_chars: TOOL_RESULT_CONTEXT_HARD_LIMIT_CHARS, - noisy_soft_limit_chars: TOOL_RESULT_CONTEXT_SOFT_LIMIT_CHARS, - snippet_chars: TOOL_RESULT_CONTEXT_SNIPPET_CHARS, - } - } -} - -pub(crate) fn compact_tool_result_for_context( - model: &str, - tool_name: &str, - output: &ToolResult, -) -> String { - let raw = output.content.trim(); - if raw.is_empty() { - return String::new(); - } - - let limits = tool_result_context_limits_for_model(model); - let raw_chars = raw.chars().count(); - let should_compact = raw_chars > limits.hard_limit_chars - || (tool_result_is_noisy(tool_name) && raw_chars > limits.noisy_soft_limit_chars); - if !should_compact { - return raw.to_string(); - } - - let snippet = summarize_text_head_tail(raw, limits.snippet_chars); - let omitted = raw_chars.saturating_sub(snippet.chars().count()); - let summary = tool_result_metadata_summary(output.metadata.as_ref()); - - if let Some(summary) = summary { - format!( - "[{tool_name} output compacted to protect context]\nSummary: {summary}\nSnippet: {snippet}\n(Original: {raw_chars} chars, omitted: {omitted} chars.)" - ) - } else { - format!( - "[{tool_name} output compacted to protect context]\nSnippet: {snippet}\n(Original: {raw_chars} chars, omitted: {omitted} chars.)" - ) - } -} - -fn extract_compaction_summary_prompt(prompt: Option) -> Option { - match prompt { - Some(SystemPrompt::Blocks(blocks)) => { - let summary_blocks: Vec<_> = blocks - .into_iter() - .filter(|block| block.text.contains(COMPACTION_SUMMARY_MARKER)) - .collect(); - if summary_blocks.is_empty() { - None - } else { - Some(SystemPrompt::Blocks(summary_blocks)) - } - } - Some(SystemPrompt::Text(text)) => { - if text.contains(COMPACTION_SUMMARY_MARKER) { - Some(SystemPrompt::Text(text)) - } else { - None - } - } - None => None, - } -} - -fn remove_working_set_summary(prompt: Option<&SystemPrompt>) -> Option { - match prompt { - Some(SystemPrompt::Blocks(blocks)) => { - let filtered: Vec = blocks - .iter() - .filter(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER)) - .cloned() - .collect(); - if filtered.is_empty() { - None - } else { - Some(SystemPrompt::Blocks(filtered)) - } - } - Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Text(text.clone())), - None => None, - } -} - -fn append_working_set_summary( - prompt: Option, - working_set_summary: Option<&str>, -) -> Option { - let Some(summary) = working_set_summary.map(str::trim).filter(|s| !s.is_empty()) else { - return prompt; - }; - let working_set_block = SystemBlock { - block_type: "text".to_string(), - text: summary.to_string(), - cache_control: None, - }; - - match prompt { - Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Blocks(vec![ - SystemBlock { - block_type: "text".to_string(), - text, - cache_control: None, - }, - working_set_block, - ])), - Some(SystemPrompt::Blocks(mut blocks)) => { - blocks.retain(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER)); - blocks.push(working_set_block); - Some(SystemPrompt::Blocks(blocks)) - } - None => Some(SystemPrompt::Blocks(vec![working_set_block])), - } -} - -fn estimate_text_tokens_conservative(text: &str) -> usize { - text.chars().count().div_ceil(3) -} - -fn estimate_system_tokens_conservative(system: Option<&SystemPrompt>) -> usize { - match system { - Some(SystemPrompt::Text(text)) => estimate_text_tokens_conservative(text), - Some(SystemPrompt::Blocks(blocks)) => blocks - .iter() - .map(|block| estimate_text_tokens_conservative(&block.text)) - .sum(), - None => 0, - } -} - -fn estimate_input_tokens_conservative( - messages: &[Message], - system: Option<&SystemPrompt>, -) -> usize { - let message_tokens = estimate_tokens(messages).saturating_mul(3).div_ceil(2); - let system_tokens = estimate_system_tokens_conservative(system); - let framing_overhead = messages.len().saturating_mul(12).saturating_add(48); - message_tokens - .saturating_add(system_tokens) - .saturating_add(framing_overhead) -} - -fn context_input_budget(model: &str, requested_output_tokens: u32) -> Option { - let window = usize::try_from(context_window_for_model(model)?).ok()?; - let output = usize::try_from(requested_output_tokens).ok()?; - window - .checked_sub(output) - .and_then(|v| v.checked_sub(CONTEXT_HEADROOM_TOKENS)) -} - -fn turn_response_headroom_tokens() -> u64 { - u64::from(TURN_MAX_OUTPUT_TOKENS).saturating_add(CONTEXT_HEADROOM_TOKENS as u64) -} - -fn is_context_length_error_message(message: &str) -> bool { - crate::error_taxonomy::classify_error_message(message) == ErrorCategory::InvalidInput -} - fn emit_tool_audit(event: serde_json::Value) { let Some(path) = std::env::var_os("DEEPSEEK_TOOL_AUDIT_LOG") else { return; @@ -2404,6 +2138,15 @@ pub(crate) fn mock_engine_handle() -> MockEngineHandle { mod approval; mod capacity_flow; +mod context; +pub(crate) use context::compact_tool_result_for_context; +use context::{ + COMPACTION_SUMMARY_MARKER, MAX_CONTEXT_RECOVERY_ATTEMPTS, MIN_RECENT_MESSAGES_TO_KEEP, + TURN_MAX_OUTPUT_TOKENS, append_working_set_summary, context_input_budget, + estimate_input_tokens_conservative, extract_compaction_summary_prompt, + is_context_length_error_message, remove_working_set_summary, summarize_text, + turn_response_headroom_tokens, +}; mod dispatch; mod tool_catalog; mod tool_setup; diff --git a/crates/tui/src/core/engine/capacity_flow.rs b/crates/tui/src/core/engine/capacity_flow.rs index f5fbfe4a..f280e644 100644 --- a/crates/tui/src/core/engine/capacity_flow.rs +++ b/crates/tui/src/core/engine/capacity_flow.rs @@ -7,6 +7,8 @@ use super::*; +use crate::models::context_window_for_model; + impl Engine { pub(super) async fn run_capacity_pre_request_checkpoint( &mut self, diff --git a/crates/tui/src/core/engine/context.rs b/crates/tui/src/core/engine/context.rs new file mode 100644 index 00000000..c1e49686 --- /dev/null +++ b/crates/tui/src/core/engine/context.rs @@ -0,0 +1,279 @@ +//! Context budgeting and prompt-shaping helpers for the engine. +//! +//! These functions are shared by the streaming turn loop, capacity flow, and +//! engine session maintenance code. Keeping them here prevents the top-level +//! engine module from accumulating unrelated context-policy details. + +use crate::compaction::estimate_tokens; +use crate::error_taxonomy::ErrorCategory; +use crate::models::{Message, SystemBlock, SystemPrompt, context_window_for_model}; +use crate::tools::spec::ToolResult; + +/// Max output tokens requested for normal agent turns. Generous on purpose: +/// V4 thinking models can produce tens of thousands of reasoning tokens on +/// hard prompts before the visible reply, and DeepSeek V4 ships with a 1M +/// context window. v0.7.5 keeps this cap fixed instead of silently lowering +/// `max_tokens` near pressure; hard-cycle/preflight checks reserve this budget +/// plus safety headroom before sending the next request. +pub(super) const TURN_MAX_OUTPUT_TOKENS: u32 = 262_144; +/// Keep this many most recent messages when emergency trimming is required. +pub(super) const MIN_RECENT_MESSAGES_TO_KEEP: usize = 4; +/// Allow a few emergency recovery attempts before failing the turn. +pub(super) const MAX_CONTEXT_RECOVERY_ATTEMPTS: u8 = 2; +/// Reserve additional headroom to avoid hitting provider hard limits. +const CONTEXT_HEADROOM_TOKENS: usize = 1024; +/// Hard cap for any tool output inserted into model context. +const TOOL_RESULT_CONTEXT_HARD_LIMIT_CHARS: usize = 12_000; +/// Soft cap for known noisy tools inserted into model context. +const TOOL_RESULT_CONTEXT_SOFT_LIMIT_CHARS: usize = 2_000; +/// Snippet length kept when compacting tool output for model context. +const TOOL_RESULT_CONTEXT_SNIPPET_CHARS: usize = 900; +/// Hard cap for tool output inserted into a large-context model. +const LARGE_CONTEXT_TOOL_RESULT_HARD_LIMIT_CHARS: usize = 180_000; +/// Soft cap for known noisy tools inserted into a large-context model. +const LARGE_CONTEXT_TOOL_RESULT_SOFT_LIMIT_CHARS: usize = 60_000; +/// Snippet length kept when compacting large-context tool output. +const LARGE_CONTEXT_TOOL_RESULT_SNIPPET_CHARS: usize = 40_000; +/// Context window size at which tool output limits can be relaxed. +const LARGE_CONTEXT_WINDOW_TOKENS: u32 = 500_000; +/// Max chars to keep from metadata-provided output summaries. +const TOOL_RESULT_METADATA_SUMMARY_CHARS: usize = 320; + +pub(super) const COMPACTION_SUMMARY_MARKER: &str = "Conversation Summary (Auto-Generated)"; +pub(super) const WORKING_SET_SUMMARY_MARKER: &str = "## Repo Working Set"; + +#[derive(Debug, Clone, Copy)] +struct ToolResultContextLimits { + hard_limit_chars: usize, + noisy_soft_limit_chars: usize, + snippet_chars: usize, +} + +pub(super) fn summarize_text(text: &str, limit: usize) -> String { + if text.chars().count() <= limit { + return text.to_string(); + } + let take = limit.saturating_sub(3); + let mut out: String = text.chars().take(take).collect(); + out.push_str("..."); + out +} + +fn summarize_text_head_tail(text: &str, limit: usize) -> String { + let total = text.chars().count(); + if total <= limit { + return text.to_string(); + } + if limit <= 20 { + return summarize_text(text, limit); + } + + let marker = "\n\n[... output truncated for context ...]\n\n"; + let marker_len = marker.chars().count(); + if limit <= marker_len + 20 { + return summarize_text(text, limit); + } + + let remaining = limit - marker_len; + let head_len = remaining.saturating_mul(2) / 3; + let tail_len = remaining.saturating_sub(head_len); + let head: String = text.chars().take(head_len).collect(); + let tail_vec: Vec = text.chars().rev().take(tail_len).collect(); + let tail: String = tail_vec.into_iter().rev().collect(); + format!("{head}{marker}{tail}") +} + +fn tool_result_is_noisy(tool_name: &str) -> bool { + matches!( + tool_name, + "exec_shell" + | "exec_shell_wait" + | "exec_shell_interact" + | "multi_tool_use.parallel" + | "web_search" + ) +} + +fn tool_result_metadata_summary(metadata: Option<&serde_json::Value>) -> Option { + let obj = metadata?.as_object()?; + for key in ["summary", "stdout_summary", "stderr_summary", "message"] { + if let Some(text) = obj.get(key).and_then(serde_json::Value::as_str) { + let trimmed = text.trim(); + if !trimmed.is_empty() { + return Some(summarize_text(trimmed, TOOL_RESULT_METADATA_SUMMARY_CHARS)); + } + } + } + None +} + +fn tool_result_context_limits_for_model(model: &str) -> ToolResultContextLimits { + let is_large_context = + context_window_for_model(model).is_some_and(|window| window >= LARGE_CONTEXT_WINDOW_TOKENS); + + if is_large_context { + ToolResultContextLimits { + hard_limit_chars: LARGE_CONTEXT_TOOL_RESULT_HARD_LIMIT_CHARS, + noisy_soft_limit_chars: LARGE_CONTEXT_TOOL_RESULT_SOFT_LIMIT_CHARS, + snippet_chars: LARGE_CONTEXT_TOOL_RESULT_SNIPPET_CHARS, + } + } else { + ToolResultContextLimits { + hard_limit_chars: TOOL_RESULT_CONTEXT_HARD_LIMIT_CHARS, + noisy_soft_limit_chars: TOOL_RESULT_CONTEXT_SOFT_LIMIT_CHARS, + snippet_chars: TOOL_RESULT_CONTEXT_SNIPPET_CHARS, + } + } +} + +pub(crate) fn compact_tool_result_for_context( + model: &str, + tool_name: &str, + output: &ToolResult, +) -> String { + let raw = output.content.trim(); + if raw.is_empty() { + return String::new(); + } + + let limits = tool_result_context_limits_for_model(model); + let raw_chars = raw.chars().count(); + let should_compact = raw_chars > limits.hard_limit_chars + || (tool_result_is_noisy(tool_name) && raw_chars > limits.noisy_soft_limit_chars); + if !should_compact { + return raw.to_string(); + } + + let snippet = summarize_text_head_tail(raw, limits.snippet_chars); + let omitted = raw_chars.saturating_sub(snippet.chars().count()); + let summary = tool_result_metadata_summary(output.metadata.as_ref()); + + if let Some(summary) = summary { + format!( + "[{tool_name} output compacted to protect context]\nSummary: {summary}\nSnippet: {snippet}\n(Original: {raw_chars} chars, omitted: {omitted} chars.)" + ) + } else { + format!( + "[{tool_name} output compacted to protect context]\nSnippet: {snippet}\n(Original: {raw_chars} chars, omitted: {omitted} chars.)" + ) + } +} + +pub(super) fn extract_compaction_summary_prompt( + prompt: Option, +) -> Option { + match prompt { + Some(SystemPrompt::Blocks(blocks)) => { + let summary_blocks: Vec<_> = blocks + .into_iter() + .filter(|block| block.text.contains(COMPACTION_SUMMARY_MARKER)) + .collect(); + if summary_blocks.is_empty() { + None + } else { + Some(SystemPrompt::Blocks(summary_blocks)) + } + } + Some(SystemPrompt::Text(text)) => { + if text.contains(COMPACTION_SUMMARY_MARKER) { + Some(SystemPrompt::Text(text)) + } else { + None + } + } + None => None, + } +} + +pub(super) fn remove_working_set_summary(prompt: Option<&SystemPrompt>) -> Option { + match prompt { + Some(SystemPrompt::Blocks(blocks)) => { + let filtered: Vec = blocks + .iter() + .filter(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER)) + .cloned() + .collect(); + if filtered.is_empty() { + None + } else { + Some(SystemPrompt::Blocks(filtered)) + } + } + Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Text(text.clone())), + None => None, + } +} + +pub(super) fn append_working_set_summary( + prompt: Option, + working_set_summary: Option<&str>, +) -> Option { + let Some(summary) = working_set_summary.map(str::trim).filter(|s| !s.is_empty()) else { + return prompt; + }; + let working_set_block = SystemBlock { + block_type: "text".to_string(), + text: summary.to_string(), + cache_control: None, + }; + + match prompt { + Some(SystemPrompt::Text(text)) => Some(SystemPrompt::Blocks(vec![ + SystemBlock { + block_type: "text".to_string(), + text, + cache_control: None, + }, + working_set_block, + ])), + Some(SystemPrompt::Blocks(mut blocks)) => { + blocks.retain(|block| !block.text.contains(WORKING_SET_SUMMARY_MARKER)); + blocks.push(working_set_block); + Some(SystemPrompt::Blocks(blocks)) + } + None => Some(SystemPrompt::Blocks(vec![working_set_block])), + } +} + +fn estimate_text_tokens_conservative(text: &str) -> usize { + text.chars().count().div_ceil(3) +} + +fn estimate_system_tokens_conservative(system: Option<&SystemPrompt>) -> usize { + match system { + Some(SystemPrompt::Text(text)) => estimate_text_tokens_conservative(text), + Some(SystemPrompt::Blocks(blocks)) => blocks + .iter() + .map(|block| estimate_text_tokens_conservative(&block.text)) + .sum(), + None => 0, + } +} + +pub(super) fn estimate_input_tokens_conservative( + messages: &[Message], + system: Option<&SystemPrompt>, +) -> usize { + let message_tokens = estimate_tokens(messages).saturating_mul(3).div_ceil(2); + let system_tokens = estimate_system_tokens_conservative(system); + let framing_overhead = messages.len().saturating_mul(12).saturating_add(48); + message_tokens + .saturating_add(system_tokens) + .saturating_add(framing_overhead) +} + +pub(super) fn context_input_budget(model: &str, requested_output_tokens: u32) -> Option { + let window = usize::try_from(context_window_for_model(model)?).ok()?; + let output = usize::try_from(requested_output_tokens).ok()?; + window + .checked_sub(output) + .and_then(|v| v.checked_sub(CONTEXT_HEADROOM_TOKENS)) +} + +pub(super) fn turn_response_headroom_tokens() -> u64 { + u64::from(TURN_MAX_OUTPUT_TOKENS).saturating_add(CONTEXT_HEADROOM_TOKENS as u64) +} + +pub(super) fn is_context_length_error_message(message: &str) -> bool { + crate::error_taxonomy::classify_error_message(message) == ErrorCategory::InvalidInput +} diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index 8dfc291c..f4a7f7ed 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -1,5 +1,7 @@ use super::*; +use super::context::WORKING_SET_SUMMARY_MARKER; +use crate::models::SystemBlock; use serde_json::json; use std::fs; use std::path::PathBuf; From 8379230ef1fbdc78d72a946fc5da551c32093a2f Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 1 May 2026 08:07:22 -0500 Subject: [PATCH 11/17] refactor(engine): split tool execution helpers --- crates/tui/src/core/engine.rs | 304 +------------------ crates/tui/src/core/engine/lsp_hooks.rs | 128 ++++++++ crates/tui/src/core/engine/tool_execution.rs | 197 ++++++++++++ 3 files changed, 330 insertions(+), 299 deletions(-) create mode 100644 crates/tui/src/core/engine/lsp_hooks.rs create mode 100644 crates/tui/src/core/engine/tool_execution.rs diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index d98e0700..6e2e5911 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -11,7 +11,6 @@ use std::collections::HashMap; use std::path::PathBuf; use std::sync::{Arc, Mutex as StdMutex}; use std::time::{Duration, Instant}; -use std::{fs::OpenOptions, io::Write}; use anyhow::Result; use futures_util::StreamExt; @@ -427,67 +426,6 @@ fn caller_type_for_tool_use(caller: Option<&ToolCaller>) -> &str { caller.map_or("direct", |c| c.caller_type.as_str()) } -/// #136: derive the file path(s) edited by a tool call. Returns the empty -/// vec for tools that don't modify files. We intentionally only handle the -/// three known edit tools — adding more (e.g. specialized refactor tools) -/// is a one-line change here. -fn edited_paths_for_tool(tool_name: &str, input: &serde_json::Value) -> Vec { - match tool_name { - "edit_file" | "write_file" => { - if let Some(path) = input.get("path").and_then(|v| v.as_str()) { - vec![PathBuf::from(path)] - } else { - Vec::new() - } - } - "apply_patch" => { - // `apply_patch` accepts either a `path` override or a list of - // `files` (each `{path, content}`). We try both shapes. - let mut out = Vec::new(); - if let Some(path) = input.get("path").and_then(|v| v.as_str()) { - out.push(PathBuf::from(path)); - } - if let Some(files) = input.get("files").and_then(|v| v.as_array()) { - for entry in files { - if let Some(path) = entry.get("path").and_then(|v| v.as_str()) { - out.push(PathBuf::from(path)); - } - } - } - // Fallback: parse `---`/`+++` headers from a unified diff payload. - if out.is_empty() - && let Some(patch) = input.get("patch").and_then(|v| v.as_str()) - { - out.extend(parse_patch_paths(patch)); - } - out - } - _ => Vec::new(), - } -} - -/// Lightweight parser for `+++ b/` lines in a unified diff. Used as a -/// fallback when `apply_patch` is invoked with raw `patch` text and no -/// `path`/`files` override. We deliberately keep this dumb — the real -/// `apply_patch` tool already validates the patch shape; we only need a -/// best-effort hint for the LSP hook. -fn parse_patch_paths(patch: &str) -> Vec { - let mut out = Vec::new(); - for line in patch.lines() { - if let Some(rest) = line.strip_prefix("+++ ") { - let trimmed = rest.trim(); - // Strip leading `b/` per git diff conventions. - let path = trimmed.strip_prefix("b/").unwrap_or(trimmed); - // Skip `/dev/null` (deletion). - if path == "/dev/null" { - continue; - } - out.push(PathBuf::from(path)); - } - } - out -} - fn caller_allowed_for_tool(caller: Option<&ToolCaller>, tool_def: Option<&Tool>) -> bool { let requested = caller_type_for_tool_use(caller); if let Some(def) = tool_def @@ -533,23 +471,6 @@ fn format_tool_error(err: &ToolError, tool_name: &str) -> String { } } -fn emit_tool_audit(event: serde_json::Value) { - let Some(path) = std::env::var_os("DEEPSEEK_TOOL_AUDIT_LOG") else { - return; - }; - let line = match serde_json::to_string(&event) { - Ok(line) => line, - Err(_) => return, - }; - let path = PathBuf::from(path); - if let Some(parent) = path.parent() { - let _ = std::fs::create_dir_all(parent); - } - if let Ok(mut file) = OpenOptions::new().create(true).append(true).open(path) { - let _ = writeln!(file, "{line}"); - } -} - impl Engine { fn reset_cancel_token(&mut self) { let token = CancellationToken::new(); @@ -889,57 +810,6 @@ impl Engine { self.emit_session_updated().await; } - /// #136: post-edit hook. Inspects the tool name + input, derives the - /// edited file path, and asks the LSP manager for diagnostics. The - /// rendered block is queued in `pending_lsp_blocks` and flushed to the - /// session message stream just before the next API request. Failure is - /// silent by design — a missing/crashing LSP server must never block - /// the agent. - async fn run_post_edit_lsp_hook(&mut self, tool_name: &str, tool_input: &serde_json::Value) { - if !self.lsp_manager.config().enabled { - return; - } - let paths = edited_paths_for_tool(tool_name, tool_input); - for path in paths { - let absolute = if path.is_absolute() { - path.clone() - } else { - self.session.workspace.join(&path) - }; - // Use a short edit-sequence based on the existing turn counter so - // log output stays correlated even though we do not currently - // batch by sequence. - let seq = self.turn_counter; - if let Some(block) = self.lsp_manager.diagnostics_for(&absolute, seq).await { - self.pending_lsp_blocks.push(block); - } - } - } - - /// Drain `pending_lsp_blocks` into a single synthetic user message so the - /// model sees the diagnostics on its next request. Skips when nothing is - /// pending. The message uses the standard `text` content block shape - /// (the same shape as the post-tool steer messages) so we don't need to - /// invent a new envelope. - async fn flush_pending_lsp_diagnostics(&mut self) { - if self.pending_lsp_blocks.is_empty() { - return; - } - let blocks = std::mem::take(&mut self.pending_lsp_blocks); - let rendered = crate::lsp::render_blocks(&blocks); - if rendered.is_empty() { - return; - } - self.add_session_message(Message { - role: "user".to_string(), - content: vec![ContentBlock::Text { - text: rendered, - cache_control: None, - }], - }) - .await; - } - /// Handle a send message operation #[allow(clippy::too_many_arguments)] async fn handle_send_message( @@ -1550,175 +1420,6 @@ impl Engine { pool.to_api_tools() } - async fn execute_mcp_tool_with_pool( - pool: Arc>, - name: &str, - input: serde_json::Value, - ) -> Result { - let mut pool = pool.lock().await; - let result = pool - .call_tool(name, input) - .await - .map_err(|e| ToolError::execution_failed(format!("MCP tool failed: {e}")))?; - let content = serde_json::to_string_pretty(&result).unwrap_or_else(|_| result.to_string()); - Ok(ToolResult::success(content)) - } - - async fn execute_parallel_tool( - &mut self, - input: serde_json::Value, - tool_registry: Option<&crate::tools::ToolRegistry>, - tool_exec_lock: Arc>, - ) -> Result { - let calls = parse_parallel_tool_calls(&input)?; - let mcp_pool = if calls.iter().any(|(tool, _)| McpPool::is_mcp_tool(tool)) { - Some(self.ensure_mcp_pool().await?) - } else { - None - }; - let Some(registry) = tool_registry else { - return Err(ToolError::not_available( - "tool registry unavailable for multi_tool_use.parallel", - )); - }; - - let mut tasks = FuturesUnordered::new(); - for (tool_name, tool_input) in calls { - if tool_name == MULTI_TOOL_PARALLEL_NAME { - return Err(ToolError::invalid_input( - "multi_tool_use.parallel cannot call itself", - )); - } - if McpPool::is_mcp_tool(&tool_name) { - if !mcp_tool_is_parallel_safe(&tool_name) { - return Err(ToolError::invalid_input(format!( - "Tool '{tool_name}' is an MCP tool and cannot run in parallel. \ - Allowed MCP tools: list_mcp_resources, list_mcp_resource_templates, \ - mcp_read_resource, read_mcp_resource, mcp_get_prompt." - ))); - } - } else { - let Some(spec) = registry.get(&tool_name) else { - return Err(ToolError::not_available(format!( - "tool '{tool_name}' is not registered" - ))); - }; - if !spec.is_read_only() { - return Err(ToolError::invalid_input(format!( - "Tool '{tool_name}' is not read-only and cannot run in parallel" - ))); - } - if spec.approval_requirement() != ApprovalRequirement::Auto { - return Err(ToolError::invalid_input(format!( - "Tool '{tool_name}' requires approval and cannot run in parallel" - ))); - } - if !spec.supports_parallel() { - return Err(ToolError::invalid_input(format!( - "Tool '{tool_name}' does not support parallel execution" - ))); - } - } - - let registry_ref = registry; - let lock = tool_exec_lock.clone(); - let tx_event = self.tx_event.clone(); - let mcp_pool = mcp_pool.clone(); - tasks.push(async move { - let result = Engine::execute_tool_with_lock( - lock, - true, - false, - tx_event, - tool_name.clone(), - tool_input.clone(), - Some(registry_ref), - mcp_pool, - None, - ) - .await; - (tool_name, result) - }); - } - - let mut results = Vec::new(); - while let Some((tool_name, result)) = tasks.next().await { - match result { - Ok(output) => { - let mut error = None; - if !output.success { - error = Some(output.content.clone()); - } - results.push(ParallelToolResultEntry { - tool_name, - success: output.success, - content: output.content, - error, - }); - } - Err(err) => { - let message = format!("{err}"); - results.push(ParallelToolResultEntry { - tool_name, - success: false, - content: format!("Error: {message}"), - error: Some(message), - }); - } - } - } - - ToolResult::json(&ParallelToolResult { results }) - .map_err(|e| ToolError::execution_failed(e.to_string())) - } - - #[allow(clippy::too_many_arguments)] - async fn execute_tool_with_lock( - lock: Arc>, - supports_parallel: bool, - interactive: bool, - tx_event: mpsc::Sender, - tool_name: String, - tool_input: serde_json::Value, - registry: Option<&crate::tools::ToolRegistry>, - mcp_pool: Option>>, - context_override: Option, - ) -> Result { - let _guard = if supports_parallel { - ToolExecGuard::Read(lock.read().await) - } else { - ToolExecGuard::Write(lock.write().await) - }; - - if interactive { - let _ = tx_event.send(Event::PauseEvents).await; - } - - let result = if McpPool::is_mcp_tool(&tool_name) { - if let Some(pool) = mcp_pool { - Engine::execute_mcp_tool_with_pool(pool, &tool_name, tool_input).await - } else { - Err(ToolError::not_available(format!( - "tool '{tool_name}' is not registered" - ))) - } - } else if let Some(registry) = registry { - registry - .execute_full_with_context(&tool_name, tool_input, context_override.as_ref()) - .await - } else { - Err(ToolError::not_available(format!( - "tool '{tool_name}' is not registered" - ))) - }; - - if interactive { - let _ = tx_event.send(Event::ResumeEvents).await; - } - - result - } - /// Handle a turn using the DeepSeek API. #[allow(clippy::too_many_lines)] /// Run the pre-request layered-context checkpoint (#159). Checks whether @@ -2148,7 +1849,9 @@ use context::{ turn_response_headroom_tokens, }; mod dispatch; +mod lsp_hooks; mod tool_catalog; +mod tool_execution; mod tool_setup; mod turn_loop; @@ -2159,6 +1862,8 @@ use self::dispatch::{ mcp_tool_is_read_only, parse_parallel_tool_calls, parse_tool_input, should_force_update_plan_first, should_parallelize_tool_batch, should_stop_after_plan_tool, }; +#[cfg(test)] +use self::lsp_hooks::{edited_paths_for_tool, parse_patch_paths}; use self::tool_catalog::{ CODE_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME, REQUEST_USER_INPUT_NAME, active_tools_for_step, build_model_tool_catalog, ensure_advanced_tooling, @@ -2167,6 +1872,7 @@ use self::tool_catalog::{ }; #[cfg(test)] use self::tool_catalog::{TOOL_SEARCH_BM25_NAME, should_default_defer_tool}; +use self::tool_execution::emit_tool_audit; #[cfg(test)] mod tests; diff --git a/crates/tui/src/core/engine/lsp_hooks.rs b/crates/tui/src/core/engine/lsp_hooks.rs new file mode 100644 index 00000000..535a869f --- /dev/null +++ b/crates/tui/src/core/engine/lsp_hooks.rs @@ -0,0 +1,128 @@ +//! Post-edit LSP diagnostics hooks for engine tool execution. +//! +//! The turn loop only needs to ask "did a successful edit produce diagnostics?" +//! This module owns the tool-input path extraction and the synthetic diagnostic +//! message injection so the top-level engine module stays focused on session +//! orchestration. + +use std::path::PathBuf; + +use super::*; + +/// #136: derive the file path(s) edited by a tool call. Returns the empty +/// vec for tools that don't modify files. We intentionally only handle the +/// three known edit tools — adding more (e.g. specialized refactor tools) +/// is a one-line change here. +pub(super) fn edited_paths_for_tool(tool_name: &str, input: &serde_json::Value) -> Vec { + match tool_name { + "edit_file" | "write_file" => { + if let Some(path) = input.get("path").and_then(|v| v.as_str()) { + vec![PathBuf::from(path)] + } else { + Vec::new() + } + } + "apply_patch" => { + // `apply_patch` accepts either a `path` override or a list of + // `files` (each `{path, content}`). We try both shapes. + let mut out = Vec::new(); + if let Some(path) = input.get("path").and_then(|v| v.as_str()) { + out.push(PathBuf::from(path)); + } + if let Some(files) = input.get("files").and_then(|v| v.as_array()) { + for entry in files { + if let Some(path) = entry.get("path").and_then(|v| v.as_str()) { + out.push(PathBuf::from(path)); + } + } + } + // Fallback: parse `---`/`+++` headers from a unified diff payload. + if out.is_empty() + && let Some(patch) = input.get("patch").and_then(|v| v.as_str()) + { + out.extend(parse_patch_paths(patch)); + } + out + } + _ => Vec::new(), + } +} + +/// Lightweight parser for `+++ b/` lines in a unified diff. Used as a +/// fallback when `apply_patch` is invoked with raw `patch` text and no +/// `path`/`files` override. We deliberately keep this dumb — the real +/// `apply_patch` tool already validates the patch shape; we only need a +/// best-effort hint for the LSP hook. +pub(super) fn parse_patch_paths(patch: &str) -> Vec { + let mut out = Vec::new(); + for line in patch.lines() { + if let Some(rest) = line.strip_prefix("+++ ") { + let trimmed = rest.trim(); + // Strip leading `b/` per git diff conventions. + let path = trimmed.strip_prefix("b/").unwrap_or(trimmed); + // Skip `/dev/null` (deletion). + if path == "/dev/null" { + continue; + } + out.push(PathBuf::from(path)); + } + } + out +} + +impl Engine { + /// #136: post-edit hook. Inspects the tool name + input, derives the + /// edited file path, and asks the LSP manager for diagnostics. The + /// rendered block is queued in `pending_lsp_blocks` and flushed to the + /// session message stream just before the next API request. Failure is + /// silent by design — a missing/crashing LSP server must never block + /// the agent. + pub(super) async fn run_post_edit_lsp_hook( + &mut self, + tool_name: &str, + tool_input: &serde_json::Value, + ) { + if !self.lsp_manager.config().enabled { + return; + } + let paths = edited_paths_for_tool(tool_name, tool_input); + for path in paths { + let absolute = if path.is_absolute() { + path.clone() + } else { + self.session.workspace.join(&path) + }; + // Use a short edit-sequence based on the existing turn counter so + // log output stays correlated even though we do not currently + // batch by sequence. + let seq = self.turn_counter; + if let Some(block) = self.lsp_manager.diagnostics_for(&absolute, seq).await { + self.pending_lsp_blocks.push(block); + } + } + } + + /// Drain `pending_lsp_blocks` into a single synthetic user message so the + /// model sees the diagnostics on its next request. Skips when nothing is + /// pending. The message uses the standard `text` content block shape + /// (the same shape as the post-tool steer messages) so we don't need to + /// invent a new envelope. + pub(super) async fn flush_pending_lsp_diagnostics(&mut self) { + if self.pending_lsp_blocks.is_empty() { + return; + } + let blocks = std::mem::take(&mut self.pending_lsp_blocks); + let rendered = crate::lsp::render_blocks(&blocks); + if rendered.is_empty() { + return; + } + self.add_session_message(Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: rendered, + cache_control: None, + }], + }) + .await; + } +} diff --git a/crates/tui/src/core/engine/tool_execution.rs b/crates/tui/src/core/engine/tool_execution.rs new file mode 100644 index 00000000..e18e235b --- /dev/null +++ b/crates/tui/src/core/engine/tool_execution.rs @@ -0,0 +1,197 @@ +//! Low-level tool execution helpers for the engine turn loop. +//! +//! This module keeps the mechanics of MCP dispatch, execution locking, and +//! parallel-tool fanout out of `engine.rs`; the turn loop still owns planning, +//! approval, and how tool results are written back into session state. + +use std::{fs::OpenOptions, io::Write}; + +use super::*; + +pub(super) fn emit_tool_audit(event: serde_json::Value) { + let Some(path) = std::env::var_os("DEEPSEEK_TOOL_AUDIT_LOG") else { + return; + }; + let line = match serde_json::to_string(&event) { + Ok(line) => line, + Err(_) => return, + }; + let path = PathBuf::from(path); + if let Some(parent) = path.parent() { + let _ = std::fs::create_dir_all(parent); + } + if let Ok(mut file) = OpenOptions::new().create(true).append(true).open(path) { + let _ = writeln!(file, "{line}"); + } +} + +impl Engine { + pub(super) async fn execute_mcp_tool_with_pool( + pool: Arc>, + name: &str, + input: serde_json::Value, + ) -> Result { + let mut pool = pool.lock().await; + let result = pool + .call_tool(name, input) + .await + .map_err(|e| ToolError::execution_failed(format!("MCP tool failed: {e}")))?; + let content = serde_json::to_string_pretty(&result).unwrap_or_else(|_| result.to_string()); + Ok(ToolResult::success(content)) + } + + pub(super) async fn execute_parallel_tool( + &mut self, + input: serde_json::Value, + tool_registry: Option<&crate::tools::ToolRegistry>, + tool_exec_lock: Arc>, + ) -> Result { + let calls = parse_parallel_tool_calls(&input)?; + let mcp_pool = if calls.iter().any(|(tool, _)| McpPool::is_mcp_tool(tool)) { + Some(self.ensure_mcp_pool().await?) + } else { + None + }; + let Some(registry) = tool_registry else { + return Err(ToolError::not_available( + "tool registry unavailable for multi_tool_use.parallel", + )); + }; + + let mut tasks = FuturesUnordered::new(); + for (tool_name, tool_input) in calls { + if tool_name == MULTI_TOOL_PARALLEL_NAME { + return Err(ToolError::invalid_input( + "multi_tool_use.parallel cannot call itself", + )); + } + if McpPool::is_mcp_tool(&tool_name) { + if !mcp_tool_is_parallel_safe(&tool_name) { + return Err(ToolError::invalid_input(format!( + "Tool '{tool_name}' is an MCP tool and cannot run in parallel. \ + Allowed MCP tools: list_mcp_resources, list_mcp_resource_templates, \ + mcp_read_resource, read_mcp_resource, mcp_get_prompt." + ))); + } + } else { + let Some(spec) = registry.get(&tool_name) else { + return Err(ToolError::not_available(format!( + "tool '{tool_name}' is not registered" + ))); + }; + if !spec.is_read_only() { + return Err(ToolError::invalid_input(format!( + "Tool '{tool_name}' is not read-only and cannot run in parallel" + ))); + } + if spec.approval_requirement() != ApprovalRequirement::Auto { + return Err(ToolError::invalid_input(format!( + "Tool '{tool_name}' requires approval and cannot run in parallel" + ))); + } + if !spec.supports_parallel() { + return Err(ToolError::invalid_input(format!( + "Tool '{tool_name}' does not support parallel execution" + ))); + } + } + + let registry_ref = registry; + let lock = tool_exec_lock.clone(); + let tx_event = self.tx_event.clone(); + let mcp_pool = mcp_pool.clone(); + tasks.push(async move { + let result = Engine::execute_tool_with_lock( + lock, + true, + false, + tx_event, + tool_name.clone(), + tool_input.clone(), + Some(registry_ref), + mcp_pool, + None, + ) + .await; + (tool_name, result) + }); + } + + let mut results = Vec::new(); + while let Some((tool_name, result)) = tasks.next().await { + match result { + Ok(output) => { + let mut error = None; + if !output.success { + error = Some(output.content.clone()); + } + results.push(ParallelToolResultEntry { + tool_name, + success: output.success, + content: output.content, + error, + }); + } + Err(err) => { + let message = format!("{err}"); + results.push(ParallelToolResultEntry { + tool_name, + success: false, + content: format!("Error: {message}"), + error: Some(message), + }); + } + } + } + + ToolResult::json(&ParallelToolResult { results }) + .map_err(|e| ToolError::execution_failed(e.to_string())) + } + + #[allow(clippy::too_many_arguments)] + pub(super) async fn execute_tool_with_lock( + lock: Arc>, + supports_parallel: bool, + interactive: bool, + tx_event: mpsc::Sender, + tool_name: String, + tool_input: serde_json::Value, + registry: Option<&crate::tools::ToolRegistry>, + mcp_pool: Option>>, + context_override: Option, + ) -> Result { + let _guard = if supports_parallel { + ToolExecGuard::Read(lock.read().await) + } else { + ToolExecGuard::Write(lock.write().await) + }; + + if interactive { + let _ = tx_event.send(Event::PauseEvents).await; + } + + let result = if McpPool::is_mcp_tool(&tool_name) { + if let Some(pool) = mcp_pool { + Engine::execute_mcp_tool_with_pool(pool, &tool_name, tool_input).await + } else { + Err(ToolError::not_available(format!( + "tool '{tool_name}' is not registered" + ))) + } + } else if let Some(registry) = registry { + registry + .execute_full_with_context(&tool_name, tool_input, context_override.as_ref()) + .await + } else { + Err(ToolError::not_available(format!( + "tool '{tool_name}' is not registered" + ))) + }; + + if interactive { + let _ = tx_event.send(Event::ResumeEvents).await; + } + + result + } +} From a64bc9bbe553cbac176909da572eb4f6846ef4a2 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 1 May 2026 09:09:09 -0500 Subject: [PATCH 12/17] refactor(engine): isolate streaming state helpers --- crates/tui/src/core/engine.rs | 200 ++---------------------- crates/tui/src/core/engine/dispatch.rs | 56 ++++++- crates/tui/src/core/engine/streaming.rs | 137 ++++++++++++++++ 3 files changed, 209 insertions(+), 184 deletions(-) create mode 100644 crates/tui/src/core/engine/streaming.rs diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 6e2e5911..d69a2b2a 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -32,9 +32,11 @@ use crate::error_taxonomy::{ErrorCategory, ErrorEnvelope, StreamError}; use crate::features::{Feature, Features}; use crate::llm_client::LlmClient; use crate::mcp::McpPool; +#[cfg(test)] +use crate::models::ToolCaller; use crate::models::{ ContentBlock, ContentBlockStart, DEFAULT_CONTEXT_WINDOW_TOKENS, Delta, Message, MessageRequest, - StreamEvent, SystemPrompt, Tool, ToolCaller, Usage, + StreamEvent, SystemPrompt, Tool, Usage, }; use crate::prompts; use crate::seam_manager::{SeamConfig, SeamManager}; @@ -291,185 +293,7 @@ pub struct Engine { pending_lsp_blocks: Vec, } -// === Internal stream helpers === - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -enum ContentBlockKind { - Text, - Thinking, - ToolUse, -} - -#[derive(Debug, Clone)] -struct ToolUseState { - id: String, - name: String, - input: serde_json::Value, - caller: Option, - input_buffer: String, -} - -/// Maximum time to wait for a single stream chunk before assuming a stall. -/// **This is the idle timeout** — it resets on every SSE chunk, so long -/// thinking turns that ARE producing reasoning_content stay alive. Only a -/// genuine `chunk_timeout` window of silence kills the stream. -const STREAM_CHUNK_TIMEOUT_SECS: u64 = 90; -/// Maximum total bytes of text/thinking content before aborting the stream. -const STREAM_MAX_CONTENT_BYTES: usize = 10 * 1024 * 1024; // 10 MB -/// Sanity backstop for total stream wall-clock duration. **Not** a routine -/// kill switch — `STREAM_CHUNK_TIMEOUT_SECS` (idle) is the primary stall -/// detector. The wall-clock cap is here only to bound pathological cases -/// (e.g. a server that keeps sending heartbeats forever without progress). -/// -/// History: this used to be 300s (5 min) which was too aggressive — V4 -/// thinking turns on hard prompts legitimately exceed 5 minutes wall-clock -/// while still emitting reasoning_content chunks the whole way. Bumped to -/// 30 min in v0.6.6 to address `TODO_FIXES.md` #1. Codex defaults to a -/// per-chunk idle of 300s with no wall-clock cap; we keep both layers but -/// give the wall-clock a generous window so it never fires in practice. -const STREAM_MAX_DURATION_SECS: u64 = 1800; // 30 minutes (was 300s; #103/#1) -/// Hard cap on consecutive recoverable stream errors before we surface a turn -/// failure. Bumped 3 → 5 in v0.6.7 along with the HTTP/2 keepalive defaults -/// (#103) — keepalive should make spurious decode errors rarer, so we can -/// tolerate a longer streak before giving up on the turn. -const MAX_STREAM_ERRORS_BEFORE_FAIL: u32 = 5; -/// Cap on transparent stream-level retries — these only happen when the wire -/// dies before any content was streamed, so DeepSeek hasn't billed us and -/// the user hasn't seen anything. Two attempts is enough to ride out a -/// flaky edge node without amplifying real outages (#103). -const MAX_TRANSPARENT_STREAM_RETRIES: u32 = 2; - -/// Decide whether a stream error is eligible for a transparent retry. -/// -/// True only when ALL three conditions hold: -/// 1. No content has been received on the current attempt — otherwise DeepSeek -/// has already billed us for output tokens and the user has seen partial -/// deltas; resending would double-bill and desync the UI. -/// 2. We still have transparent-retry budget remaining. -/// 3. The turn has not been cancelled. -/// -/// Extracted as a pure function so the four #103 retry cases can be exercised -/// in unit tests without booting the full engine state machine. -fn should_transparently_retry_stream( - any_content_received: bool, - transparent_attempts: u32, - cancelled: bool, -) -> bool { - !any_content_received && transparent_attempts < MAX_TRANSPARENT_STREAM_RETRIES && !cancelled -} -pub(crate) const TOOL_CALL_START_MARKERS: [&str; 5] = [ - "[TOOL_CALL]", - "", -]; - -pub(crate) const TOOL_CALL_END_MARKERS: [&str; 5] = [ - "[/TOOL_CALL]", - "", - "", - "", - "", -]; - -/// Compact one-shot notice emitted when a model attempts to forge a tool-call -/// wrapper in plain text instead of using the API tool channel. The visible -/// content is still scrubbed; this exists so the user can see why their text -/// shrank. -pub(crate) const FAKE_WRAPPER_NOTICE: &str = - "Stripped non-API tool-call wrapper from model output (use the API tool channel)"; - -/// True if `text` contains any of the known fake-wrapper start markers. Used by -/// the streaming loop to decide whether to emit `FAKE_WRAPPER_NOTICE`. -pub(crate) fn contains_fake_tool_wrapper(text: &str) -> bool { - TOOL_CALL_START_MARKERS.iter().any(|m| text.contains(m)) -} - -fn find_first_marker(text: &str, markers: &[&str]) -> Option<(usize, usize)> { - markers - .iter() - .filter_map(|marker| text.find(marker).map(|idx| (idx, marker.len()))) - .min_by_key(|(idx, _)| *idx) -} - -pub(crate) fn filter_tool_call_delta(delta: &str, in_tool_call: &mut bool) -> String { - if delta.is_empty() { - return String::new(); - } - - let mut output = String::new(); - let mut rest = delta; - - loop { - if *in_tool_call { - let Some((idx, len)) = find_first_marker(rest, &TOOL_CALL_END_MARKERS) else { - break; - }; - rest = &rest[idx + len..]; - *in_tool_call = false; - } else { - let Some((idx, len)) = find_first_marker(rest, &TOOL_CALL_START_MARKERS) else { - output.push_str(rest); - break; - }; - output.push_str(&rest[..idx]); - rest = &rest[idx + len..]; - *in_tool_call = true; - } - } - - output -} - -fn caller_type_for_tool_use(caller: Option<&ToolCaller>) -> &str { - caller.map_or("direct", |c| c.caller_type.as_str()) -} - -fn caller_allowed_for_tool(caller: Option<&ToolCaller>, tool_def: Option<&Tool>) -> bool { - let requested = caller_type_for_tool_use(caller); - if let Some(def) = tool_def - && let Some(allowed) = &def.allowed_callers - { - if allowed.is_empty() { - return requested == "direct"; - } - return allowed.iter().any(|item| item == requested); - } - requested == "direct" -} - -fn format_tool_error(err: &ToolError, tool_name: &str) -> String { - match err { - ToolError::InvalidInput { message } => { - format!("Invalid input for tool '{tool_name}': {message}") - } - ToolError::MissingField { field } => { - format!("Tool '{tool_name}' is missing required field '{field}'") - } - ToolError::PathEscape { path } => format!( - "Path escapes workspace: {}. Use a workspace-relative path or enable trust mode.", - path.display() - ), - ToolError::ExecutionFailed { message } => message.clone(), - ToolError::Timeout { seconds } => format!( - "Tool '{tool_name}' timed out after {seconds}s. Try a narrower scope or a longer timeout." - ), - ToolError::NotAvailable { message } => { - let lower = message.to_ascii_lowercase(); - if lower.contains("current tool catalog") || lower.contains("did you mean:") { - message.clone() - } else { - format!( - "Tool '{tool_name}' is not available: {message}. Check mode, feature flags, or tool name." - ) - } - } - ToolError::PermissionDenied { message } => format!( - "Tool '{tool_name}' was denied: {message}. Adjust approval mode or request permission." - ), - } -} +// === Internal tool helpers === impl Engine { fn reset_cancel_token(&mut self) { @@ -1850,6 +1674,7 @@ use context::{ }; mod dispatch; mod lsp_hooks; +mod streaming; mod tool_catalog; mod tool_execution; mod tool_setup; @@ -1858,12 +1683,21 @@ mod turn_loop; use self::approval::{ApprovalDecision, ApprovalResult, UserInputDecision}; use self::dispatch::{ ParallelToolResult, ParallelToolResultEntry, ToolExecGuard, ToolExecOutcome, ToolExecutionPlan, - final_tool_input, mcp_tool_approval_description, mcp_tool_is_parallel_safe, - mcp_tool_is_read_only, parse_parallel_tool_calls, parse_tool_input, - should_force_update_plan_first, should_parallelize_tool_batch, should_stop_after_plan_tool, + caller_allowed_for_tool, caller_type_for_tool_use, final_tool_input, format_tool_error, + mcp_tool_approval_description, mcp_tool_is_parallel_safe, mcp_tool_is_read_only, + parse_parallel_tool_calls, parse_tool_input, should_force_update_plan_first, + should_parallelize_tool_batch, should_stop_after_plan_tool, }; #[cfg(test)] use self::lsp_hooks::{edited_paths_for_tool, parse_patch_paths}; +#[cfg(test)] +use self::streaming::TOOL_CALL_START_MARKERS; +use self::streaming::{ + ContentBlockKind, FAKE_WRAPPER_NOTICE, MAX_STREAM_ERRORS_BEFORE_FAIL, + MAX_TRANSPARENT_STREAM_RETRIES, STREAM_CHUNK_TIMEOUT_SECS, STREAM_MAX_CONTENT_BYTES, + STREAM_MAX_DURATION_SECS, ToolUseState, contains_fake_tool_wrapper, filter_tool_call_delta, + should_transparently_retry_stream, +}; use self::tool_catalog::{ CODE_EXECUTION_TOOL_NAME, MULTI_TOOL_PARALLEL_NAME, REQUEST_USER_INPUT_NAME, active_tools_for_step, build_model_tool_catalog, ensure_advanced_tooling, diff --git a/crates/tui/src/core/engine/dispatch.rs b/crates/tui/src/core/engine/dispatch.rs index 94b8c864..eb39f41f 100644 --- a/crates/tui/src/core/engine/dispatch.rs +++ b/crates/tui/src/core/engine/dispatch.rs @@ -17,7 +17,7 @@ use serde_json::json; -use crate::models::ToolCaller; +use crate::models::{Tool, ToolCaller}; use crate::tools::spec::{ToolError, ToolResult}; use crate::tui::app::AppMode; @@ -71,6 +71,60 @@ pub(super) enum ToolExecGuard<'a> { Write(#[allow(dead_code)] tokio::sync::RwLockWriteGuard<'a, ()>), } +// === Caller policy and errors ======================================== + +pub(super) fn caller_type_for_tool_use(caller: Option<&ToolCaller>) -> &str { + caller.map_or("direct", |c| c.caller_type.as_str()) +} + +pub(super) fn caller_allowed_for_tool( + caller: Option<&ToolCaller>, + tool_def: Option<&Tool>, +) -> bool { + let requested = caller_type_for_tool_use(caller); + if let Some(def) = tool_def + && let Some(allowed) = &def.allowed_callers + { + if allowed.is_empty() { + return requested == "direct"; + } + return allowed.iter().any(|item| item == requested); + } + requested == "direct" +} + +pub(super) fn format_tool_error(err: &ToolError, tool_name: &str) -> String { + match err { + ToolError::InvalidInput { message } => { + format!("Invalid input for tool '{tool_name}': {message}") + } + ToolError::MissingField { field } => { + format!("Tool '{tool_name}' is missing required field '{field}'") + } + ToolError::PathEscape { path } => format!( + "Path escapes workspace: {}. Use a workspace-relative path or enable trust mode.", + path.display() + ), + ToolError::ExecutionFailed { message } => message.clone(), + ToolError::Timeout { seconds } => format!( + "Tool '{tool_name}' timed out after {seconds}s. Try a narrower scope or a longer timeout." + ), + ToolError::NotAvailable { message } => { + let lower = message.to_ascii_lowercase(); + if lower.contains("current tool catalog") || lower.contains("did you mean:") { + message.clone() + } else { + format!( + "Tool '{tool_name}' is not available: {message}. Check mode, feature flags, or tool name." + ) + } + } + ToolError::PermissionDenied { message } => format!( + "Tool '{tool_name}' was denied: {message}. Adjust approval mode or request permission." + ), + } +} + // === Streaming-buffer parsing ========================================= /// Promote a streaming `ToolUseState` to a finalized JSON input. diff --git a/crates/tui/src/core/engine/streaming.rs b/crates/tui/src/core/engine/streaming.rs new file mode 100644 index 00000000..1855dc18 --- /dev/null +++ b/crates/tui/src/core/engine/streaming.rs @@ -0,0 +1,137 @@ +//! Streaming response state and guardrails. +//! +//! This module owns the local state used while decoding one model stream: +//! content block kind tracking, streamed tool-use buffers, transparent retry +//! policy, and scrubbers for text that looks like a forged tool-call wrapper. + +use crate::models::ToolCaller; + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(super) enum ContentBlockKind { + Text, + Thinking, + ToolUse, +} + +#[derive(Debug, Clone)] +pub(super) struct ToolUseState { + pub(super) id: String, + pub(super) name: String, + pub(super) input: serde_json::Value, + pub(super) caller: Option, + pub(super) input_buffer: String, +} + +/// Maximum time to wait for a single stream chunk before assuming a stall. +/// **This is the idle timeout** — it resets on every SSE chunk, so long +/// thinking turns that ARE producing reasoning_content stay alive. Only a +/// genuine `chunk_timeout` window of silence kills the stream. +pub(super) const STREAM_CHUNK_TIMEOUT_SECS: u64 = 90; +/// Maximum total bytes of text/thinking content before aborting the stream. +pub(super) const STREAM_MAX_CONTENT_BYTES: usize = 10 * 1024 * 1024; // 10 MB +/// Sanity backstop for total stream wall-clock duration. **Not** a routine +/// kill switch — `STREAM_CHUNK_TIMEOUT_SECS` (idle) is the primary stall +/// detector. The wall-clock cap is here only to bound pathological cases +/// (e.g. a server that keeps sending heartbeats forever without progress). +/// +/// History: this used to be 300s (5 min) which was too aggressive — V4 +/// thinking turns on hard prompts legitimately exceed 5 minutes wall-clock +/// while still emitting reasoning_content chunks the whole way. Bumped to +/// 30 min in v0.6.6 to address `TODO_FIXES.md` #1. Codex defaults to a +/// per-chunk idle of 300s with no wall-clock cap; we keep both layers but +/// give the wall-clock a generous window so it never fires in practice. +pub(super) const STREAM_MAX_DURATION_SECS: u64 = 1800; // 30 minutes (was 300s; #103/#1) +/// Hard cap on consecutive recoverable stream errors before we surface a turn +/// failure. Bumped 3 → 5 in v0.6.7 along with the HTTP/2 keepalive defaults +/// (#103) — keepalive should make spurious decode errors rarer, so we can +/// tolerate a longer streak before giving up on the turn. +pub(super) const MAX_STREAM_ERRORS_BEFORE_FAIL: u32 = 5; +/// Cap on transparent stream-level retries — these only happen when the wire +/// dies before any content was streamed, so DeepSeek hasn't billed us and +/// the user hasn't seen anything. Two attempts is enough to ride out a +/// flaky edge node without amplifying real outages (#103). +pub(super) const MAX_TRANSPARENT_STREAM_RETRIES: u32 = 2; + +/// Decide whether a stream error is eligible for a transparent retry. +/// +/// True only when ALL three conditions hold: +/// 1. No content has been received on the current attempt — otherwise DeepSeek +/// has already billed us for output tokens and the user has seen partial +/// deltas; resending would double-bill and desync the UI. +/// 2. We still have transparent-retry budget remaining. +/// 3. The turn has not been cancelled. +/// +/// Extracted as a pure function so the four #103 retry cases can be exercised +/// in unit tests without booting the full engine state machine. +pub(super) fn should_transparently_retry_stream( + any_content_received: bool, + transparent_attempts: u32, + cancelled: bool, +) -> bool { + !any_content_received && transparent_attempts < MAX_TRANSPARENT_STREAM_RETRIES && !cancelled +} + +pub(crate) const TOOL_CALL_START_MARKERS: [&str; 5] = [ + "[TOOL_CALL]", + "", +]; + +pub(crate) const TOOL_CALL_END_MARKERS: [&str; 5] = [ + "[/TOOL_CALL]", + "", + "", + "", + "", +]; + +/// Compact one-shot notice emitted when a model attempts to forge a tool-call +/// wrapper in plain text instead of using the API tool channel. The visible +/// content is still scrubbed; this exists so the user can see why their text +/// shrank. +pub(crate) const FAKE_WRAPPER_NOTICE: &str = + "Stripped non-API tool-call wrapper from model output (use the API tool channel)"; + +/// True if `text` contains any of the known fake-wrapper start markers. Used by +/// the streaming loop to decide whether to emit `FAKE_WRAPPER_NOTICE`. +pub(crate) fn contains_fake_tool_wrapper(text: &str) -> bool { + TOOL_CALL_START_MARKERS.iter().any(|m| text.contains(m)) +} + +fn find_first_marker(text: &str, markers: &[&str]) -> Option<(usize, usize)> { + markers + .iter() + .filter_map(|marker| text.find(marker).map(|idx| (idx, marker.len()))) + .min_by_key(|(idx, _)| *idx) +} + +pub(crate) fn filter_tool_call_delta(delta: &str, in_tool_call: &mut bool) -> String { + if delta.is_empty() { + return String::new(); + } + + let mut output = String::new(); + let mut rest = delta; + + loop { + if *in_tool_call { + let Some((idx, len)) = find_first_marker(rest, &TOOL_CALL_END_MARKERS) else { + break; + }; + rest = &rest[idx + len..]; + *in_tool_call = false; + } else { + let Some((idx, len)) = find_first_marker(rest, &TOOL_CALL_START_MARKERS) else { + output.push_str(rest); + break; + }; + output.push_str(&rest[..idx]); + rest = &rest[idx + len..]; + *in_tool_call = true; + } + } + + output +} From bb88ab912979fc958836b0730ac43c5ae34c4b94 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 1 May 2026 09:28:28 -0500 Subject: [PATCH 13/17] fix(cli): make missing-companion-binary error actually helpful (#258) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit @whereiszebra (issue #258) downloaded just \`deepseek-macos-arm64\` from the GitHub Release, ran it, hit: error: deepseek-tui binary not found at /path/to/deepseek-tui. Build workspace default members to install it, or set DEEPSEEK_TUI_BIN to its absolute path. …spent 11 minutes figuring out they also needed \`deepseek-tui-macos-arm64\` sitting next to it, and self-closed with: "Release page does not document that both deepseek-macos-arm64 and deepseek-tui-macos-arm64 must be downloaded together." The dispatcher's error was the wrong message for the population that hits it most often — direct GitHub Release downloaders. "Build workspace default members" is meaningless if you didn't clone the repo. \`DEEPSEEK_TUI_BIN\` is also not what they need. New message lists the three concrete install paths that actually work for a fresh user — npm, cargo, or grab BOTH binaries from the same release page — and keeps the env var override as a final fallback for power users. No logic change; just better text. Existing \`locate_sibling_tui_binary_honours_env_override\` test still passes. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/cli/src/lib.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index 43cd70df..8b4da41d 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -1111,7 +1111,17 @@ fn locate_sibling_tui_binary() -> Result { // expected name, not "deepseek-tui" on Windows. let expected = current.with_file_name(format!("deepseek-tui{}", std::env::consts::EXE_SUFFIX)); bail!( - "deepseek-tui binary not found at {}. Build workspace default members to install it, or set DEEPSEEK_TUI_BIN to its absolute path.", + "Companion `deepseek-tui` binary not found at {}.\n\ +\n\ +The `deepseek` dispatcher delegates interactive sessions to a sibling \ +`deepseek-tui` binary. To fix this, install one of:\n\ + • npm: npm install -g deepseek-tui (downloads both binaries)\n\ + • cargo: cargo install deepseek-tui-cli deepseek-tui --locked\n\ + • GitHub Releases: download BOTH `deepseek-` AND \ +`deepseek-tui-` from https://github.com/Hmbown/DeepSeek-TUI/releases/latest \ +and place them in the same directory.\n\ +\n\ +Or set DEEPSEEK_TUI_BIN to the absolute path of an existing `deepseek-tui` binary.", expected.display() ); } From a7e629ae4dc38f3f62655e6980e4610ede688206 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 1 May 2026 09:34:11 -0500 Subject: [PATCH 14/17] test(parity): scan engine submodules after decomposition refactor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The protocol-recovery contract tests `include_str!`-ed `engine.rs` and asserted the fake-wrapper markers (`[TOOL_CALL]`, ``, …) appeared as string literals in that file. The recent engine decomposition refactor (commits f0fad7aa..a64bc9bb) split engine.rs into `engine/streaming.rs`, `engine/turn_loop.rs`, `engine/dispatch.rs`, `engine/tool_setup.rs`, `engine/tool_execution.rs`, `engine/tool_catalog.rs`, `engine/context.rs`, `engine/approval.rs`, `engine/capacity_flow.rs`, and `engine/lsp_hooks.rs`. The marker literals followed the code into those files, so the original single-file `include_str!` no longer saw them and 4 protocol-recovery tests went red. Switch to an `ENGINE_SOURCES: &[&str]` array of `include_str!`s across engine.rs + every submodule, with a small `any_engine_source_contains` helper. Test bodies are otherwise unchanged. The file-size sanity check on `engine.rs` (>10_000 bytes) still passes — engine.rs is still ~65k bytes after the refactor. Same regression coverage as before; just survives the new file layout. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/tui/tests/protocol_recovery.rs | 47 ++++++++++++++++++++------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/crates/tui/tests/protocol_recovery.rs b/crates/tui/tests/protocol_recovery.rs index 3de0080e..b42f174e 100644 --- a/crates/tui/tests/protocol_recovery.rs +++ b/crates/tui/tests/protocol_recovery.rs @@ -23,7 +23,28 @@ use std::fs; #[allow(dead_code)] mod tool_parser; -const ENGINE_SRC: &str = include_str!("../src/core/engine.rs"); +// `engine.rs` was decomposed into submodules under `core/engine/`. The +// protocol-scrubbing strings the tests below assert on are now spread +// across `engine.rs` and several `engine/*.rs` files. We compile-time +// include each so a contributor moving a marker into a sibling submodule +// does not silently break these regression checks. +const ENGINE_SOURCES: &[&str] = &[ + include_str!("../src/core/engine.rs"), + include_str!("../src/core/engine/streaming.rs"), + include_str!("../src/core/engine/turn_loop.rs"), + include_str!("../src/core/engine/dispatch.rs"), + include_str!("../src/core/engine/tool_setup.rs"), + include_str!("../src/core/engine/tool_execution.rs"), + include_str!("../src/core/engine/tool_catalog.rs"), + include_str!("../src/core/engine/context.rs"), + include_str!("../src/core/engine/approval.rs"), + include_str!("../src/core/engine/capacity_flow.rs"), + include_str!("../src/core/engine/lsp_hooks.rs"), +]; + +fn any_engine_source_contains(needle: &str) -> bool { + ENGINE_SOURCES.iter().any(|src| src.contains(needle)) +} const EXPECTED_START_MARKERS: &[&str] = &[ "[TOOL_CALL]", @@ -46,9 +67,10 @@ fn engine_keeps_known_fake_wrapper_start_markers() { for marker in EXPECTED_START_MARKERS { let needle = format!("\"{marker}\""); assert!( - ENGINE_SRC.contains(&needle), - "engine.rs no longer mentions start marker `{marker}` — protocol \ - scrubbing may have regressed. Searched for {needle:?}." + any_engine_source_contains(&needle), + "no engine source file still mentions start marker `{marker}` — \ + protocol scrubbing may have regressed. Searched for {needle:?} \ + across engine.rs and engine/* submodules." ); } } @@ -58,9 +80,10 @@ fn engine_keeps_known_fake_wrapper_end_markers() { for marker in EXPECTED_END_MARKERS { let needle = format!("\"{marker}\""); assert!( - ENGINE_SRC.contains(&needle), - "engine.rs no longer mentions end marker `{marker}` — protocol \ - scrubbing may have regressed. Searched for {needle:?}." + any_engine_source_contains(&needle), + "no engine source file still mentions end marker `{marker}` — \ + protocol scrubbing may have regressed. Searched for {needle:?} \ + across engine.rs and engine/* submodules." ); } } @@ -71,18 +94,18 @@ fn engine_marker_counts_stay_paired() { // filter able to enter tool-call mode without ever leaving it. Lock the // count to whatever the constants currently declare. assert_eq!(EXPECTED_START_MARKERS.len(), EXPECTED_END_MARKERS.len()); - assert!(ENGINE_SRC.contains("TOOL_CALL_START_MARKERS")); - assert!(ENGINE_SRC.contains("TOOL_CALL_END_MARKERS")); + assert!(any_engine_source_contains("TOOL_CALL_START_MARKERS")); + assert!(any_engine_source_contains("TOOL_CALL_END_MARKERS")); } #[test] fn engine_emits_compact_fake_wrapper_notice() { assert!( - ENGINE_SRC.contains("FAKE_WRAPPER_NOTICE"), - "engine.rs no longer references the protocol-recovery notice constant" + any_engine_source_contains("FAKE_WRAPPER_NOTICE"), + "no engine source file references the protocol-recovery notice constant" ); assert!( - ENGINE_SRC.contains("API tool channel"), + any_engine_source_contains("API tool channel"), "the protocol-recovery notice should mention the API tool channel" ); } From e620e75f99d643a2ab379194dc27e69392f4f689 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 1 May 2026 09:46:21 -0500 Subject: [PATCH 15/17] chore: release v0.8.3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps workspace, all internal path-deps, and npm wrapper (version + deepseekBinaryVersion) from 0.8.2 → 0.8.3. Lockfile re-locked offline. CHANGELOG entry summarizing the 0.8.3 lane: skills path bug fix, privacy contraction, helpful missing-companion error (#258), engine decomposition (#227), bridge/persistence/palette test gap closures, crates.io badge, and 10 issue closures. Local v0.8.3 verified at /tmp/deepseek-0.8.3-test/ before publish. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 44 +++++++++++++++++++++++++++++++++++ Cargo.lock | 28 +++++++++++----------- Cargo.toml | 2 +- crates/agent/Cargo.toml | 2 +- crates/app-server/Cargo.toml | 18 +++++++------- crates/cli/Cargo.toml | 14 +++++------ crates/config/Cargo.toml | 2 +- crates/core/Cargo.toml | 16 ++++++------- crates/execpolicy/Cargo.toml | 2 +- crates/hooks/Cargo.toml | 2 +- crates/mcp/Cargo.toml | 2 +- crates/tools/Cargo.toml | 2 +- crates/tui/Cargo.toml | 6 ++--- npm/deepseek-tui/package.json | 4 ++-- 14 files changed, 94 insertions(+), 50 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 12b10002..b3576469 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,50 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.8.3] - 2026-05-01 + +### Fixed +- **Skills prompt referenced fabricated paths** — `render_available_skills_context` + rendered each skill's file as `//SKILL.md`, + which did not exist when the directory name differed from the frontmatter + `name` (community installs, manually-placed skills). `Skill` now carries the + real path captured at discovery and renders that. +- **Missing-companion error was hostile to direct GitHub Release downloaders** + (#258) — replaced "Build workspace default members to install it" wall of + text with a concrete three-path checklist: `npm install -g deepseek-tui`, + `cargo install deepseek-tui-cli deepseek-tui --locked`, or downloading both + `deepseek-` AND `deepseek-tui-` from the same Release + page. `DEEPSEEK_TUI_BIN` stays as a power-user fallback. + +### Added +- **Privacy: `$HOME` contracts to `~` in viewer-visible paths** — the TUI, + `deepseek doctor`, `deepseek setup`, and onboarding now contract the home + directory to `~` in every path shown on screen, so screenshots, screencasts, + and pasted help output do not leak the OS account name. Persisted state, + audit log, session checkpoints, and LLM-bound system prompts intentionally + keep absolute paths for full fidelity. +- **`crates.io` badge** alongside the CI and npm badges in both English and + Simplified Chinese READMEs. +- **Engine decomposition** (#227) — `core/engine.rs` is split into focused + submodules (`engine/{streaming,turn_loop,dispatch,tool_setup,tool_execution,tool_catalog,context,approval,capacity_flow,lsp_hooks,tests}.rs`). + No behavior change; preparation for the future agent-loop work. + +### Tests +- RLM bridge: `batch_guard` extracted and tested for the empty-batch and + oversize-batch invariants; depth-guard fallback covered (partial #231). +- Persistence: schema-version rejection covered for `load_session`, + `load_offline_queue_state`, `runtime_threads::load_turn`, + `runtime_threads::load_item` (partial #233). +- Command palette: `[disabled]` server description tag (closes the + remaining #197 acceptance gap). +- Protocol-recovery contract tests now scan the engine submodules in + addition to `engine.rs` so the decomposition refactor doesn't silently + hide the fake-wrapper marker assertions. + +### Issue triage +- 10 issues closed with verification commits cited (#247, #235, #197, + #250, #234, #243, #238, #236, #239, #195). + ## [0.8.2] - 2026-05-01 ### Fixed diff --git a/Cargo.lock b/Cargo.lock index fc3c19c7..25b8cc66 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1011,7 +1011,7 @@ dependencies = [ [[package]] name = "deepseek-agent" -version = "0.8.2" +version = "0.8.3" dependencies = [ "deepseek-config", "serde", @@ -1019,7 +1019,7 @@ dependencies = [ [[package]] name = "deepseek-app-server" -version = "0.8.2" +version = "0.8.3" dependencies = [ "anyhow", "axum", @@ -1042,7 +1042,7 @@ dependencies = [ [[package]] name = "deepseek-config" -version = "0.8.2" +version = "0.8.3" dependencies = [ "anyhow", "deepseek-secrets", @@ -1055,7 +1055,7 @@ dependencies = [ [[package]] name = "deepseek-core" -version = "0.8.2" +version = "0.8.3" dependencies = [ "anyhow", "chrono", @@ -1074,7 +1074,7 @@ dependencies = [ [[package]] name = "deepseek-execpolicy" -version = "0.8.2" +version = "0.8.3" dependencies = [ "anyhow", "deepseek-protocol", @@ -1083,7 +1083,7 @@ dependencies = [ [[package]] name = "deepseek-hooks" -version = "0.8.2" +version = "0.8.3" dependencies = [ "anyhow", "async-trait", @@ -1097,7 +1097,7 @@ dependencies = [ [[package]] name = "deepseek-mcp" -version = "0.8.2" +version = "0.8.3" dependencies = [ "anyhow", "deepseek-protocol", @@ -1107,7 +1107,7 @@ dependencies = [ [[package]] name = "deepseek-protocol" -version = "0.8.2" +version = "0.8.3" dependencies = [ "serde", "serde_json", @@ -1115,7 +1115,7 @@ dependencies = [ [[package]] name = "deepseek-secrets" -version = "0.8.2" +version = "0.8.3" dependencies = [ "dirs", "keyring", @@ -1128,7 +1128,7 @@ dependencies = [ [[package]] name = "deepseek-state" -version = "0.8.2" +version = "0.8.3" dependencies = [ "anyhow", "chrono", @@ -1140,7 +1140,7 @@ dependencies = [ [[package]] name = "deepseek-tools" -version = "0.8.2" +version = "0.8.3" dependencies = [ "anyhow", "async-trait", @@ -1153,7 +1153,7 @@ dependencies = [ [[package]] name = "deepseek-tui" -version = "0.8.2" +version = "0.8.3" dependencies = [ "anyhow", "arboard", @@ -1214,7 +1214,7 @@ dependencies = [ [[package]] name = "deepseek-tui-cli" -version = "0.8.2" +version = "0.8.3" dependencies = [ "anyhow", "chrono", @@ -1237,7 +1237,7 @@ dependencies = [ [[package]] name = "deepseek-tui-core" -version = "0.8.2" +version = "0.8.3" [[package]] name = "deranged" diff --git a/Cargo.toml b/Cargo.toml index 7d75c363..b22cfdfa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ default-members = ["crates/cli", "crates/app-server", "crates/tui"] resolver = "2" [workspace.package] -version = "0.8.2" +version = "0.8.3" edition = "2024" license = "MIT" repository = "https://github.com/Hmbown/DeepSeek-TUI" diff --git a/crates/agent/Cargo.toml b/crates/agent/Cargo.toml index 0745d4b3..ce061459 100644 --- a/crates/agent/Cargo.toml +++ b/crates/agent/Cargo.toml @@ -7,5 +7,5 @@ repository.workspace = true description = "Model/provider registry and fallback strategy for DeepSeek workspace architecture" [dependencies] -deepseek-config = { path = "../config", version = "0.8.2" } +deepseek-config = { path = "../config", version = "0.8.3" } serde.workspace = true diff --git a/crates/app-server/Cargo.toml b/crates/app-server/Cargo.toml index d0e0623d..60b1ab8e 100644 --- a/crates/app-server/Cargo.toml +++ b/crates/app-server/Cargo.toml @@ -10,15 +10,15 @@ description = "Codex-style app-server transport for DeepSeek workspace architect anyhow.workspace = true axum.workspace = true clap.workspace = true -deepseek-agent = { path = "../agent", version = "0.8.2" } -deepseek-config = { path = "../config", version = "0.8.2" } -deepseek-core = { path = "../core", version = "0.8.2" } -deepseek-execpolicy = { path = "../execpolicy", version = "0.8.2" } -deepseek-hooks = { path = "../hooks", version = "0.8.2" } -deepseek-mcp = { path = "../mcp", version = "0.8.2" } -deepseek-protocol = { path = "../protocol", version = "0.8.2" } -deepseek-state = { path = "../state", version = "0.8.2" } -deepseek-tools = { path = "../tools", version = "0.8.2" } +deepseek-agent = { path = "../agent", version = "0.8.3" } +deepseek-config = { path = "../config", version = "0.8.3" } +deepseek-core = { path = "../core", version = "0.8.3" } +deepseek-execpolicy = { path = "../execpolicy", version = "0.8.3" } +deepseek-hooks = { path = "../hooks", version = "0.8.3" } +deepseek-mcp = { path = "../mcp", version = "0.8.3" } +deepseek-protocol = { path = "../protocol", version = "0.8.3" } +deepseek-state = { path = "../state", version = "0.8.3" } +deepseek-tools = { path = "../tools", version = "0.8.3" } serde.workspace = true serde_json.workspace = true tokio.workspace = true diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index af286a6c..bed047cc 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -14,13 +14,13 @@ path = "src/main.rs" anyhow.workspace = true clap.workspace = true clap_complete.workspace = true -deepseek-agent = { path = "../agent", version = "0.8.2" } -deepseek-app-server = { path = "../app-server", version = "0.8.2" } -deepseek-config = { path = "../config", version = "0.8.2" } -deepseek-execpolicy = { path = "../execpolicy", version = "0.8.2" } -deepseek-mcp = { path = "../mcp", version = "0.8.2" } -deepseek-secrets = { path = "../secrets", version = "0.8.2" } -deepseek-state = { path = "../state", version = "0.8.2" } +deepseek-agent = { path = "../agent", version = "0.8.3" } +deepseek-app-server = { path = "../app-server", version = "0.8.3" } +deepseek-config = { path = "../config", version = "0.8.3" } +deepseek-execpolicy = { path = "../execpolicy", version = "0.8.3" } +deepseek-mcp = { path = "../mcp", version = "0.8.3" } +deepseek-secrets = { path = "../secrets", version = "0.8.3" } +deepseek-state = { path = "../state", version = "0.8.3" } chrono.workspace = true dirs.workspace = true serde.workspace = true diff --git a/crates/config/Cargo.toml b/crates/config/Cargo.toml index ea0c3151..fea07d63 100644 --- a/crates/config/Cargo.toml +++ b/crates/config/Cargo.toml @@ -8,7 +8,7 @@ description = "Config schema and precedence model for DeepSeek workspace archite [dependencies] anyhow.workspace = true -deepseek-secrets = { path = "../secrets", version = "0.8.2" } +deepseek-secrets = { path = "../secrets", version = "0.8.3" } dirs.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/core/Cargo.toml b/crates/core/Cargo.toml index c1aac52d..cda119ab 100644 --- a/crates/core/Cargo.toml +++ b/crates/core/Cargo.toml @@ -9,14 +9,14 @@ description = "Core runtime boundaries for DeepSeek workspace architecture" [dependencies] anyhow.workspace = true chrono.workspace = true -deepseek-agent = { path = "../agent", version = "0.8.2" } -deepseek-config = { path = "../config", version = "0.8.2" } -deepseek-execpolicy = { path = "../execpolicy", version = "0.8.2" } -deepseek-hooks = { path = "../hooks", version = "0.8.2" } -deepseek-mcp = { path = "../mcp", version = "0.8.2" } -deepseek-protocol = { path = "../protocol", version = "0.8.2" } -deepseek-state = { path = "../state", version = "0.8.2" } -deepseek-tools = { path = "../tools", version = "0.8.2" } +deepseek-agent = { path = "../agent", version = "0.8.3" } +deepseek-config = { path = "../config", version = "0.8.3" } +deepseek-execpolicy = { path = "../execpolicy", version = "0.8.3" } +deepseek-hooks = { path = "../hooks", version = "0.8.3" } +deepseek-mcp = { path = "../mcp", version = "0.8.3" } +deepseek-protocol = { path = "../protocol", version = "0.8.3" } +deepseek-state = { path = "../state", version = "0.8.3" } +deepseek-tools = { path = "../tools", version = "0.8.3" } serde_json.workspace = true tokio.workspace = true uuid.workspace = true diff --git a/crates/execpolicy/Cargo.toml b/crates/execpolicy/Cargo.toml index 1891a5a6..7d15b484 100644 --- a/crates/execpolicy/Cargo.toml +++ b/crates/execpolicy/Cargo.toml @@ -8,5 +8,5 @@ description = "Execution policy and approval model parity for DeepSeek workspace [dependencies] anyhow.workspace = true -deepseek-protocol = { path = "../protocol", version = "0.8.2" } +deepseek-protocol = { path = "../protocol", version = "0.8.3" } serde.workspace = true diff --git a/crates/hooks/Cargo.toml b/crates/hooks/Cargo.toml index 0edef814..c570f50b 100644 --- a/crates/hooks/Cargo.toml +++ b/crates/hooks/Cargo.toml @@ -10,7 +10,7 @@ description = "Hook dispatch and notifications parity for DeepSeek workspace arc anyhow.workspace = true async-trait.workspace = true chrono.workspace = true -deepseek-protocol = { path = "../protocol", version = "0.8.2" } +deepseek-protocol = { path = "../protocol", version = "0.8.3" } reqwest.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/crates/mcp/Cargo.toml b/crates/mcp/Cargo.toml index f05ce838..532dbbd4 100644 --- a/crates/mcp/Cargo.toml +++ b/crates/mcp/Cargo.toml @@ -8,6 +8,6 @@ description = "MCP server lifecycle and tool proxy compatibility for DeepSeek wo [dependencies] anyhow.workspace = true -deepseek-protocol = { path = "../protocol", version = "0.8.2" } +deepseek-protocol = { path = "../protocol", version = "0.8.3" } serde.workspace = true serde_json.workspace = true diff --git a/crates/tools/Cargo.toml b/crates/tools/Cargo.toml index 9edb91c8..6accfd76 100644 --- a/crates/tools/Cargo.toml +++ b/crates/tools/Cargo.toml @@ -9,7 +9,7 @@ description = "Tool invocation lifecycle, schema validation, and scheduler paral [dependencies] anyhow.workspace = true async-trait.workspace = true -deepseek-protocol = { path = "../protocol", version = "0.8.2" } +deepseek-protocol = { path = "../protocol", version = "0.8.3" } serde.workspace = true serde_json.workspace = true tokio.workspace = true diff --git a/crates/tui/Cargo.toml b/crates/tui/Cargo.toml index e880e936..91c0c3fe 100644 --- a/crates/tui/Cargo.toml +++ b/crates/tui/Cargo.toml @@ -14,9 +14,9 @@ path = "src/main.rs" [dependencies] anyhow = "1.0.100" arboard = "3.4" -deepseek-tui-cli = { path = "../cli", version = "0.8.2" } -deepseek-secrets = { path = "../secrets", version = "0.8.2" } -deepseek-tools = { path = "../tools", version = "0.8.2" } +deepseek-tui-cli = { path = "../cli", version = "0.8.3" } +deepseek-secrets = { path = "../secrets", version = "0.8.3" } +deepseek-tools = { path = "../tools", version = "0.8.3" } async-stream = "0.3.6" async-trait = "0.1" bytes = "1.11.0" diff --git a/npm/deepseek-tui/package.json b/npm/deepseek-tui/package.json index 1154b7ba..bfc940e7 100644 --- a/npm/deepseek-tui/package.json +++ b/npm/deepseek-tui/package.json @@ -1,7 +1,7 @@ { "name": "deepseek-tui", - "version": "0.8.2", - "deepseekBinaryVersion": "0.8.2", + "version": "0.8.3", + "deepseekBinaryVersion": "0.8.3", "description": "Install and run deepseek and deepseek-tui binaries from GitHub release artifacts.", "author": "Hmbown", "license": "MIT", From 1042e37fbddfa6140c3748f4371d92c96583c053 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 1 May 2026 11:06:42 -0500 Subject: [PATCH 16/17] refactor(cli): centralize feature command output --- crates/tui/src/commands/mod.rs | 47 +++++++++++++++++++--- crates/tui/src/features.rs | 72 +++++++++++++++++++++++++++++----- crates/tui/src/main.rs | 27 +++---------- 3 files changed, 108 insertions(+), 38 deletions(-) diff --git a/crates/tui/src/commands/mod.rs b/crates/tui/src/commands/mod.rs index 187d10bf..bfd66bb2 100644 --- a/crates/tui/src/commands/mod.rs +++ b/crates/tui/src/commands/mod.rs @@ -303,12 +303,6 @@ pub const COMMANDS: &[CommandInfo] = &[ description: "Show current system prompt", usage: "/system", }, - CommandInfo { - name: "context", - aliases: &[], - description: "Show context window usage", - usage: "/context", - }, CommandInfo { name: "undo", aliases: &[], @@ -687,6 +681,47 @@ mod tests { assert_eq!(links.aliases, &["dashboard", "api"]); } + #[test] + fn command_registry_has_unique_names_and_aliases() { + let mut names = std::collections::BTreeSet::new(); + for command in COMMANDS { + assert!( + names.insert(command.name), + "duplicate command name /{}", + command.name + ); + } + + let mut aliases = std::collections::BTreeSet::new(); + for command in COMMANDS { + for alias in command.aliases { + assert!( + !names.contains(alias), + "alias /{} collides with a command name", + alias + ); + assert!(aliases.insert(*alias), "duplicate command alias /{alias}"); + } + } + } + + #[test] + fn context_command_opens_inspector_and_keeps_ctx_alias() { + let context = COMMANDS + .iter() + .find(|cmd| cmd.name == "context") + .expect("context command should exist"); + assert_eq!(context.aliases, &["ctx"]); + assert!(context.description.contains("inspector")); + + let mut app = create_test_app(); + let result = execute("/ctx", &mut app); + assert!(matches!( + result.action, + Some(AppAction::OpenContextInspector) + )); + } + #[test] fn execute_config_opens_config_view_action() { let mut app = create_test_app(); diff --git a/crates/tui/src/features.rs b/crates/tui/src/features.rs index 3736ce66..0dbc4f81 100644 --- a/crates/tui/src/features.rs +++ b/crates/tui/src/features.rs @@ -1,10 +1,9 @@ -// TODO(integrate): Wire feature flags into engine/tool registration — tracked as future work #![allow(dead_code)] //! Feature flags and metadata for DeepSeek TUI. use std::collections::{BTreeMap, BTreeSet}; -use std::fmt; +use std::fmt::{self, Write as _}; use serde::{Deserialize, Serialize}; @@ -18,6 +17,18 @@ pub enum Stage { Removed, } +impl Stage { + pub fn as_str(self) -> &'static str { + match self { + Self::Experimental => "experimental", + Self::Beta => "beta", + Self::Stable => "stable", + Self::Deprecated => "deprecated", + Self::Removed => "removed", + } + } +} + /// Unique features toggled via configuration. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum Feature { @@ -37,14 +48,7 @@ pub enum Feature { impl fmt::Display for Stage { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let label = match self { - Self::Experimental => "experimental", - Self::Beta => "beta", - Self::Stable => "stable", - Self::Deprecated => "deprecated", - Self::Removed => "removed", - }; - f.write_str(label) + f.write_str(self.as_str()) } } @@ -136,6 +140,20 @@ pub fn feature_spec_by_key(key: &str) -> Option<&'static FeatureSpec> { FEATURES.iter().find(|spec| spec.key == key) } +pub fn render_feature_table(features: &Features) -> String { + let mut output = String::from("feature\tstage\tenabled\n"); + for spec in FEATURES { + let _ = writeln!( + output, + "{}\t{}\t{}", + spec.key, + spec.stage, + features.enabled(spec.id) + ); + } + output +} + /// Deserializable features table for TOML. #[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq)] pub struct FeaturesToml { @@ -190,3 +208,37 @@ pub const FEATURES: &[FeatureSpec] = &[ default_enabled: true, }, ]; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn apply_map_toggles_known_features_and_ignores_unknown_keys() { + let mut features = Features::with_defaults(); + let entries = BTreeMap::from([ + ("mcp".to_string(), false), + ("shell_tool".to_string(), false), + ("not_real".to_string(), false), + ]); + + features.apply_map(&entries); + + assert!(!features.enabled(Feature::Mcp)); + assert!(!features.enabled(Feature::ShellTool)); + assert_eq!(feature_from_key("not_real"), None); + } + + #[test] + fn render_feature_table_uses_registry_order_and_effective_state() { + let mut features = Features::with_defaults(); + features.disable(Feature::Mcp); + + let table = render_feature_table(&features); + let lines = table.lines().collect::>(); + + assert_eq!(lines.first(), Some(&"feature\tstage\tenabled")); + assert!(lines.contains(&"shell_tool\tstable\ttrue")); + assert!(lines.contains(&"mcp\texperimental\tfalse")); + } +} diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index f87d08b9..5b21d7f5 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -63,7 +63,7 @@ mod workspace_trust; use crate::config::{Config, DEFAULT_TEXT_MODEL, MAX_SUBAGENTS}; use crate::eval::{EvalHarness, EvalHarnessConfig, ScenarioStepKind}; -use crate::features::Feature; +use crate::features::{Feature, render_feature_table}; use crate::llm_client::LlmClient; use crate::mcp::{McpConfig, McpPool, McpServerConfig}; use crate::models::{ContentBlock, Message, MessageRequest, SystemPrompt}; @@ -1876,30 +1876,13 @@ fn run_execpolicy_command(command: ExecpolicyCommand) -> Result<()> { fn run_features_command(config: &Config, command: FeaturesCli) -> Result<()> { match command.command { - FeaturesSubcommand::List => run_features_list(config), + FeaturesSubcommand::List => { + print!("{}", render_feature_table(&config.features())); + Ok(()) + } } } -fn stage_str(stage: features::Stage) -> &'static str { - match stage { - features::Stage::Experimental => "experimental", - features::Stage::Beta => "beta", - features::Stage::Stable => "stable", - features::Stage::Deprecated => "deprecated", - features::Stage::Removed => "removed", - } -} - -fn run_features_list(config: &Config) -> Result<()> { - let features = config.features(); - println!("feature\tstage\tenabled"); - for spec in features::FEATURES { - let enabled = features.enabled(spec.id); - println!("{}\t{}\t{enabled}", spec.key, stage_str(spec.stage)); - } - Ok(()) -} - async fn run_models(config: &Config, args: ModelsArgs) -> Result<()> { use crate::client::DeepSeekClient; From 997c7f4bcdde12183820a670e46eb7b9b5c06731 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Fri, 1 May 2026 11:06:45 -0500 Subject: [PATCH 17/17] chore(release): verify dual registry publish state --- docs/RELEASE_RUNBOOK.md | 11 +++ scripts/release/check-published.sh | 95 +++++++++++++++++++++++++ scripts/release/crates.sh | 19 +++++ scripts/release/publish-crates.sh | 110 ++++++++++++++--------------- 4 files changed, 178 insertions(+), 57 deletions(-) create mode 100755 scripts/release/check-published.sh create mode 100755 scripts/release/crates.sh diff --git a/docs/RELEASE_RUNBOOK.md b/docs/RELEASE_RUNBOOK.md index 5aaabd18..9179fcc8 100644 --- a/docs/RELEASE_RUNBOOK.md +++ b/docs/RELEASE_RUNBOOK.md @@ -90,6 +90,17 @@ Set `DEEPSEEK_TUI_VERSION` to the npm package version you are verifying for that The CI workflow runs the same tarball install + delegated-entrypoint smoke test on Linux, macOS, and Windows. +After publishing, prove the release is visible in both registries: + +```bash +./scripts/release/check-published.sh X.Y.Z +``` + +Do not mark a Rust release complete until that command sees `deepseek-tui@X.Y.Z` +on npm and every `deepseek-*` crate at `X.Y.Z` on crates.io. For a rare +npm packaging-only release, run with `--allow-npm-binary-mismatch` and keep the +release notes explicit that no new Rust binary version shipped. + ## Rust Crates Release 1. Update the workspace version in [Cargo.toml](../Cargo.toml). diff --git a/scripts/release/check-published.sh b/scripts/release/check-published.sh new file mode 100755 index 00000000..e02a1936 --- /dev/null +++ b/scripts/release/check-published.sh @@ -0,0 +1,95 @@ +#!/usr/bin/env bash +set -euo pipefail + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +repo_root="$(cd "${script_dir}/../.." && pwd)" +# shellcheck source=scripts/release/crates.sh +source "${script_dir}/crates.sh" + +usage() { + cat <<'EOF' +usage: scripts/release/check-published.sh [--allow-npm-binary-mismatch] [VERSION] + +Verifies that a release version is visible on both npm and crates.io. +Defaults VERSION to the workspace version in Cargo.toml. + +Use --allow-npm-binary-mismatch only for npm packaging-only releases where +the npm package intentionally points at an older GitHub binary release. +EOF +} + +allow_npm_binary_mismatch=0 +version="" + +while (($# > 0)); do + case "$1" in + --allow-npm-binary-mismatch) + allow_npm_binary_mismatch=1 + ;; + -h|--help) + usage + exit 0 + ;; + *) + if [[ -n "${version}" ]]; then + usage >&2 + exit 2 + fi + version="$1" + ;; + esac + shift +done + +cd "${repo_root}" + +if [[ -z "${version}" ]]; then + version="$(grep -E '^version = "' Cargo.toml | head -n1 | sed -E 's/^version = "([^"]+)".*/\1/')" +fi + +if [[ -z "${version}" ]]; then + echo "Could not determine release version." >&2 + exit 1 +fi + +fail=0 + +echo "Checking published release ${version}..." + +if npm_version="$(npm view "deepseek-tui@${version}" version 2>/dev/null)"; then + echo "npm deepseek-tui@${npm_version} is published." +else + echo "npm deepseek-tui@${version} is not published." >&2 + fail=1 +fi + +if npm_binary_version="$(npm view "deepseek-tui@${version}" deepseekBinaryVersion 2>/dev/null)"; then + if [[ "${npm_binary_version}" == "${version}" ]]; then + echo "npm deepseekBinaryVersion=${npm_binary_version}." + elif [[ "${allow_npm_binary_mismatch}" == "1" ]]; then + echo "npm deepseekBinaryVersion=${npm_binary_version} (allowed packaging-only mismatch)." + else + echo "npm deepseekBinaryVersion=${npm_binary_version}, expected ${version}." >&2 + fail=1 + fi +elif [[ "${allow_npm_binary_mismatch}" == "1" ]]; then + echo "npm deepseekBinaryVersion is absent (allowed packaging-only mismatch)." +else + echo "npm deepseekBinaryVersion is absent for deepseek-tui@${version}." >&2 + fail=1 +fi + +for crate in "${release_crates[@]}"; do + if curl -fsSL "https://crates.io/api/v1/crates/${crate}/${version}" >/dev/null 2>&1; then + echo "crates.io ${crate}@${version} is published." + else + echo "crates.io ${crate}@${version} is not published." >&2 + fail=1 + fi +done + +if [[ "${fail}" == "0" ]]; then + echo "Published release OK: npm deepseek-tui@${version} and ${#release_crates[@]} crates are visible." +fi + +exit "${fail}" diff --git a/scripts/release/crates.sh b/scripts/release/crates.sh new file mode 100755 index 00000000..34b0e188 --- /dev/null +++ b/scripts/release/crates.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +# Crates published for each DeepSeek TUI release, in dependency order. +release_crates=( + deepseek-secrets + deepseek-config + deepseek-protocol + deepseek-state + deepseek-agent + deepseek-execpolicy + deepseek-hooks + deepseek-mcp + deepseek-tools + deepseek-core + deepseek-app-server + deepseek-tui-core + deepseek-tui-cli + deepseek-tui +) diff --git a/scripts/release/publish-crates.sh b/scripts/release/publish-crates.sh index 2c836124..2c52b800 100755 --- a/scripts/release/publish-crates.sh +++ b/scripts/release/publish-crates.sh @@ -1,6 +1,10 @@ #!/usr/bin/env bash set -euo pipefail +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +# shellcheck source=scripts/release/crates.sh +source "${script_dir}/crates.sh" + mode="${1:-dry-run}" case "${mode}" in dry-run|publish) ;; @@ -10,42 +14,22 @@ case "${mode}" in ;; esac -packages=( - deepseek-secrets - deepseek-config - deepseek-protocol - deepseek-state - deepseek-agent - deepseek-execpolicy - deepseek-hooks - deepseek-mcp - deepseek-tools - deepseek-core - deepseek-app-server - deepseek-tui-core - deepseek-tui-cli - deepseek-tui -) - -workspace_version="$( - python3 - <<'PY' -import json -import subprocess - -metadata = json.loads( - subprocess.check_output(["cargo", "metadata", "--format-version", "1", "--no-deps"]) -) -workspace_members = set(metadata["workspace_members"]) -for pkg in metadata["packages"]: - if pkg["id"] in workspace_members: - print(pkg["version"]) - break -PY -)" +packages=("${release_crates[@]}") +workspace_version="" workspace_deepseek_packages=() -while IFS= read -r workspace_package; do - workspace_deepseek_packages+=("${workspace_package}") +workspace_package_dep_flags=() + +while IFS=$'\t' read -r kind name value; do + case "${kind}" in + version) + workspace_version="${name}" + ;; + crate) + workspace_deepseek_packages+=("${name}") + workspace_package_dep_flags+=("${value}") + ;; + esac done < <( python3 - <<'PY' import json @@ -55,12 +39,34 @@ metadata = json.loads( subprocess.check_output(["cargo", "metadata", "--format-version", "1", "--no-deps"]) ) workspace_members = set(metadata["workspace_members"]) -for pkg in sorted(metadata["packages"], key=lambda item: item["name"]): - if pkg["id"] in workspace_members and pkg["name"].startswith("deepseek-"): - print(pkg["name"]) +workspace_packages = [ + pkg for pkg in metadata["packages"] if pkg["id"] in workspace_members +] +workspace_by_name = {pkg["name"]: pkg for pkg in workspace_packages} + +versions = sorted({pkg["version"] for pkg in workspace_packages}) +if not versions: + raise SystemExit("workspace has no packages") +if len(versions) != 1: + raise SystemExit(f"workspace packages have mixed versions: {', '.join(versions)}") +print(f"version\t{versions[0]}\t") + +for pkg in sorted(workspace_packages, key=lambda item: item["name"]): + if not pkg["name"].startswith("deepseek-"): + continue + has_workspace_dep = any( + dep.get("path") and dep["name"] in workspace_by_name + for dep in pkg["dependencies"] + ) + print(f"crate\t{pkg['name']}\t{1 if has_workspace_dep else 0}") PY ) +if [[ -z "${workspace_version}" ]]; then + echo "Could not determine workspace version." >&2 + exit 1 +fi + missing_packages=() for workspace_package in "${workspace_deepseek_packages[@]}"; do found=0 @@ -101,26 +107,16 @@ fi package_has_workspace_deps() { local package_name="$1" - python3 - "${package_name}" <<'PY' -import json -import subprocess -import sys + local index + for ((index = 0; index < ${#workspace_deepseek_packages[@]}; index += 1)); do + if [[ "${workspace_deepseek_packages[$index]}" == "${package_name}" ]]; then + [[ "${workspace_package_dep_flags[$index]}" == "1" ]] + return + fi + done -package_name = sys.argv[1] -metadata = json.loads( - subprocess.check_output(["cargo", "metadata", "--format-version", "1", "--no-deps"]) -) -workspace_ids = set(metadata["workspace_members"]) -workspace_packages = { - pkg["name"]: pkg for pkg in metadata["packages"] if pkg["id"] in workspace_ids -} -package = workspace_packages[package_name] -has_workspace_dep = any( - dep.get("path") and dep["name"] in workspace_packages - for dep in package["dependencies"] -) -print("1" if has_workspace_dep else "0") -PY + echo "Unknown workspace crate: ${package_name}" >&2 + return 1 } crate_version_exists() { @@ -149,7 +145,7 @@ wait_for_crate_version() { for package in "${packages[@]}"; do echo "::group::${mode} ${package}" if [[ "${mode}" == "dry-run" ]]; then - if [[ "$(package_has_workspace_deps "${package}")" == "1" ]]; then + if package_has_workspace_deps "${package}"; then cargo package --allow-dirty --locked --list -p "${package}" >/dev/null echo "Verified package contents for ${package}; full crates.io dry-run requires workspace dependencies at ${workspace_version} to be published first." else