diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8d802a45..e6e56eb1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,6 +5,8 @@ on: branches: [master, main] pull_request: branches: [master, main] + schedule: + - cron: '31 6 * * 1' env: CARGO_TERM_COLOR: always @@ -67,28 +69,13 @@ jobs: - name: Run Offline Eval Harness run: cargo run -p deepseek-tui --all-features -- eval - build: - name: Build - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable - - name: Install Linux system dependencies - if: runner.os == 'Linux' - run: sudo apt-get update && sudo apt-get install -y libdbus-1-dev pkg-config - - uses: Swatinem/rust-cache@v2 - - name: Build - run: cargo build --release - npm-wrapper-smoke: name: npm wrapper smoke + if: github.event_name != 'schedule' runs-on: ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macos-latest, windows-latest] + os: ${{ fromJSON(github.event_name == 'pull_request' && '["ubuntu-latest"]' || '["ubuntu-latest","macos-latest","windows-latest"]') }} steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@stable @@ -107,6 +94,7 @@ jobs: # Check documentation builds without warnings docs: name: Documentation + if: github.event_name == 'schedule' runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/crates-publish.yml b/.github/workflows/crates-publish.yml deleted file mode 100644 index c95f8539..00000000 --- a/.github/workflows/crates-publish.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: Publish to Crates.io - -on: - release: - types: [published] - workflow_dispatch: - -jobs: - publish: - name: Publish workspace crates to crates.io - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@stable - - name: Install Linux system dependencies - if: runner.os == 'Linux' - run: sudo apt-get update && sudo apt-get install -y libdbus-1-dev pkg-config - - uses: Swatinem/rust-cache@v2 - - - name: Verify version matches tag - if: github.event_name == 'release' - run: ./scripts/release/verify-workspace-version.sh "${GITHUB_REF#refs/tags/v}" - - - name: Preflight workspace crate publishes in workspace order - run: ./scripts/release/publish-crates.sh dry-run - - - name: Publish crates.io packages in workspace order - run: ./scripts/release/publish-crates.sh publish - env: - CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }} diff --git a/.github/workflows/parity.yml b/.github/workflows/parity.yml deleted file mode 100644 index 087828d4..00000000 --- a/.github/workflows/parity.yml +++ /dev/null @@ -1,62 +0,0 @@ -name: parity - -on: - pull_request: - push: - branches: - - main - -env: - CARGO_TERM_COLOR: always - RUSTFLAGS: -Dwarnings - -jobs: - parity: - runs-on: ubuntu-latest - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Install Rust toolchain - uses: dtolnay/rust-toolchain@stable - with: - components: clippy, rustfmt - - - name: Install Linux system dependencies - if: runner.os == 'Linux' - run: sudo apt-get update && sudo apt-get install -y libdbus-1-dev pkg-config - - - name: Cache Cargo registry - uses: actions/cache@v4 - with: - path: | - ~/.cargo/registry - ~/.cargo/git - target - key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} - restore-keys: | - ${{ runner.os }}-cargo- - - - name: Format check - run: cargo fmt --all -- --check - - - name: Compile check - run: cargo check --workspace --all-targets --locked - - - name: Clippy - run: cargo clippy --workspace --all-targets --all-features --locked -- -D warnings - - - name: Unit and parity tests - run: cargo test --workspace --all-features --locked - - - name: TUI snapshot parity - run: cargo test -p deepseek-tui-core --test snapshot --locked - - - name: Protocol schema sanity - run: cargo test -p deepseek-protocol --test parity_protocol --locked - - - name: State persistence sanity - run: cargo test -p deepseek-state --test parity_state --locked - - - name: Lockfile drift guard - run: git diff --exit-code -- Cargo.lock diff --git a/.github/workflows/publish-npm.yml b/.github/workflows/publish-npm.yml deleted file mode 100644 index 57392f6e..00000000 --- a/.github/workflows/publish-npm.yml +++ /dev/null @@ -1,42 +0,0 @@ -name: Publish npm - -on: - workflow_dispatch: - inputs: - version: - description: 'Package/release version to publish, without the leading v' - required: true - type: string - -env: - CARGO_TERM_COLOR: always - -jobs: - publish: - runs-on: ubuntu-latest - # `release.yml` no longer publishes to npm — see CLAUDE.md "Releases" for - # the manual flow we actually use. This workflow remains as inert plumbing - # that only works if npm Trusted Publishing is configured for it; trigger - # via `gh workflow run publish-npm.yml -f version=X.Y.Z` if you've set - # that up on the npm side. - permissions: - contents: read - id-token: write - steps: - - uses: actions/checkout@v4 - - uses: actions/setup-node@v4 - with: - node-version: '24' - registry-url: 'https://registry.npmjs.org' - - name: Verify package version - working-directory: npm/deepseek-tui - run: | - actual="$(node -p "require('./package.json').version")" - expected="${{ inputs.version }}" - if [ "${actual}" != "${expected}" ]; then - echo "package.json version ${actual} does not match requested ${expected}" >&2 - exit 1 - fi - - name: Publish wrapper to npm - working-directory: npm/deepseek-tui - run: npm publish --access public diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 52cd68db..fab5f071 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -46,6 +46,12 @@ jobs: build: needs: parity + # `parity` is gated to tag-push events. On manual `workflow_dispatch`, + # parity is skipped, so let `build` proceed when parity either succeeded + # or was skipped — but never when it actually failed or the run was + # cancelled. Operators using dispatch are expected to have already run + # the same gates locally / via ci.yml on `main`. + if: ${{ !cancelled() && (needs.parity.result == 'success' || needs.parity.result == 'skipped') }} strategy: matrix: include: diff --git a/README.md b/README.md index 5a9c151d..2a24aa5b 100644 --- a/README.md +++ b/README.md @@ -197,7 +197,7 @@ anywhere. ### 📖 Docs refresh README hero updated with intent statement and architecture summary. -ARCHITECTURE.md cleaned up for v0.8.6 (swarm references removed, current +ARCHITECTURE.md cleaned up for v0.8.6 (removed swarm tool surface, current crate map). CONTRIBUTING.md now has a "shape of a PR" section. Full changelog: [CHANGELOG.md](CHANGELOG.md). @@ -307,12 +307,12 @@ Filenames containing multi-byte characters (e.g., `dialogue_line__冰糖.mp3`) no longer panic the `matches_glob` function — byte-index slicing was replaced with `char_indices()` boundary-safe iteration. -### 🔄 Swarm UI reconciliation +### 🔄 Fanout UI reconciliation The fanout card no longer pre-seeds with zero-state workers, eliminating the "0 done · 0 running · 0 failed · N pending" vs sidebar "N running" contradiction. The sidebar now shows "dispatching N" before the first progress -event arrives from a `agent_swarm` invocation. +event arrives from a legacy fanout invocation. Full changelog: [CHANGELOG.md](CHANGELOG.md). diff --git a/config.example.toml b/config.example.toml index fc3993c6..e850b07d 100644 --- a/config.example.toml +++ b/config.example.toml @@ -75,7 +75,11 @@ sandbox_mode = "workspace-write" # read-only | workspace-write | danger-full-acc # auto_allow = ["cargo check", "npm run"] # # auto_allow = [] -max_subagents = 5 # optional (1-20) +max_subagents = 10 # optional (1-20) + +# Optional sub-agent tuning. max_concurrent overrides top-level max_subagents. +# [subagents] +# max_concurrent = 10 # Optional managed policy paths (defaults to /etc/deepseek/*.toml on unix): # managed_config_path = "/etc/deepseek/managed_config.toml" @@ -166,6 +170,7 @@ max_subagents = 5 # optional (1-20) [tui] alternate_screen = "auto" # auto | always | never mouse_capture = true # true copies only transcript user/assistant text; false uses raw terminal selection/copy +terminal_probe_timeout_ms = 500 # optional startup terminal-mode timeout (100-5000ms) # ───────────────────────────────────────────────────────────────────────────────── # Feature Flags diff --git a/crates/cli/src/update.rs b/crates/cli/src/update.rs index eb1b83b0..1a299a49 100644 --- a/crates/cli/src/update.rs +++ b/crates/cli/src/update.rs @@ -19,10 +19,8 @@ pub fn run_update() -> Result<()> { println!("Checking for updates..."); println!("Current binary: {}", current_exe.display()); - // Detect platform info - let os = std::env::consts::OS; - let arch = std::env::consts::ARCH; - let binary_name = format!("deepseek-{os}-{arch}"); + let binary_name = + release_asset_stem_for(¤t_exe, std::env::consts::OS, std::env::consts::ARCH); // Step 1: Fetch latest release metadata let release = fetch_latest_release()?; @@ -30,22 +28,18 @@ pub fn run_update() -> Result<()> { println!("Latest release: {latest_tag}"); // Step 2: Find the matching asset - let asset = release - .assets - .iter() - .find(|a| a.name.contains(&binary_name)) - .with_context(|| { - format!( - "no asset found for platform {binary_name} in release {latest_tag}. \ + let asset = select_platform_asset(&release, &binary_name).with_context(|| { + format!( + "no asset found for platform {binary_name} in release {latest_tag}. \ Available assets: {}", - release - .assets - .iter() - .map(|a| a.name.as_str()) - .collect::>() - .join(", ") - ) - })?; + release + .assets + .iter() + .map(|a| a.name.as_str()) + .collect::>() + .join(", ") + ) + })?; println!("Downloading {}...", asset.name); @@ -90,6 +84,48 @@ pub fn run_update() -> Result<()> { Ok(()) } +pub(crate) fn release_arch_for_rust_arch(arch: &str) -> &str { + match arch { + "aarch64" => "arm64", + "x86_64" => "x64", + other => other, + } +} + +pub(crate) fn binary_prefix_for_exe(current_exe: &Path) -> &'static str { + let exe_name = current_exe + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or("deepseek"); + if exe_name.contains("deepseek-tui") { + "deepseek-tui" + } else { + "deepseek" + } +} + +pub(crate) fn release_asset_stem_for(current_exe: &Path, os: &str, rust_arch: &str) -> String { + let prefix = binary_prefix_for_exe(current_exe); + let arch = release_arch_for_rust_arch(rust_arch); + format!("{prefix}-{os}-{arch}") +} + +pub(crate) fn asset_matches_platform(asset_name: &str, binary_name: &str) -> bool { + if asset_name.ends_with(".sha256") { + return false; + } + asset_name == binary_name + || asset_name == format!("{binary_name}.exe") + || asset_name.starts_with(&format!("{binary_name}.")) +} + +fn select_platform_asset<'a>(release: &'a Release, binary_name: &str) -> Option<&'a Asset> { + release + .assets + .iter() + .find(|asset| asset_matches_platform(&asset.name, binary_name)) +} + /// GitHub release metadata. #[derive(serde::Deserialize, Debug)] struct Release { @@ -245,6 +281,99 @@ fn backup_path_for(target: &Path) -> std::path::PathBuf { mod tests { use super::*; + /// Verify the arch mapping used when constructing asset names. + /// The mapping must use release-asset naming (arm64/x64), not Rust + /// stdlib constants (aarch64/x86_64). + #[test] + fn test_arch_mapping() { + assert_eq!(release_arch_for_rust_arch("aarch64"), "arm64"); + assert_eq!(release_arch_for_rust_arch("x86_64"), "x64"); + // Pass-through for unknown arches + assert_eq!(release_arch_for_rust_arch("riscv64"), "riscv64"); + // The currently-compiled arch maps to a release asset name + let compiled_arch = std::env::consts::ARCH; + let asset_arch = release_arch_for_rust_arch(compiled_arch); + // Must not contain the raw Rust constant names + assert!( + !asset_arch.contains("aarch64") && !asset_arch.contains("x86_64"), + "asset arch '{asset_arch}' still uses raw Rust constant name" + ); + } + + /// Verify binary prefix detection for dispatcher vs TUI binary. + #[test] + fn test_binary_prefix_detection() { + // TUI binary should use deepseek-tui prefix + assert_eq!( + binary_prefix_for_exe(Path::new("deepseek-tui")), + "deepseek-tui" + ); + assert_eq!( + binary_prefix_for_exe(Path::new("deepseek-tui.exe")), + "deepseek-tui" + ); + assert_eq!( + binary_prefix_for_exe(Path::new("/usr/local/bin/deepseek-tui")), + "deepseek-tui" + ); + + // Dispatcher binary should use deepseek prefix + assert_eq!(binary_prefix_for_exe(Path::new("deepseek")), "deepseek"); + assert_eq!(binary_prefix_for_exe(Path::new("deepseek.exe")), "deepseek"); + assert_eq!( + binary_prefix_for_exe(Path::new("/usr/local/bin/deepseek")), + "deepseek" + ); + + // Fallback for unknown names + assert_eq!(binary_prefix_for_exe(Path::new("other-binary")), "deepseek"); + } + + #[test] + fn test_release_asset_stem_for_supported_platforms() { + let cases = [ + ("deepseek", "macos", "aarch64", "deepseek-macos-arm64"), + ("deepseek", "macos", "x86_64", "deepseek-macos-x64"), + ("deepseek", "linux", "x86_64", "deepseek-linux-x64"), + ("deepseek", "windows", "x86_64", "deepseek-windows-x64"), + ( + "deepseek-tui", + "macos", + "aarch64", + "deepseek-tui-macos-arm64", + ), + ("deepseek-tui", "linux", "x86_64", "deepseek-tui-linux-x64"), + ]; + + for (exe, os, arch, expected) in cases { + assert_eq!(release_asset_stem_for(Path::new(exe), os, arch), expected); + } + } + + #[test] + fn test_asset_matching_accepts_binary_assets_and_rejects_checksums() { + assert!(asset_matches_platform( + "deepseek-macos-arm64", + "deepseek-macos-arm64" + )); + assert!(asset_matches_platform( + "deepseek-macos-arm64.tar.gz", + "deepseek-macos-arm64" + )); + assert!(asset_matches_platform( + "deepseek-tui-windows-x64.exe", + "deepseek-tui-windows-x64" + )); + assert!(!asset_matches_platform( + "deepseek-tui-windows-x64.exe.sha256", + "deepseek-tui-windows-x64" + )); + assert!(!asset_matches_platform( + "deepseek-macos-aarch64.tar.gz", + "deepseek-macos-arm64" + )); + } + #[test] fn test_sha256_hex_known_value() { let data = b"hello"; @@ -285,4 +414,53 @@ mod tests { let content = std::fs::read_to_string(&target).unwrap(); assert_eq!(content, "fresh binary"); } + + /// Mocked GitHub release payload covering both the dispatcher (`deepseek`) + /// and the legacy TUI (`deepseek-tui`) binaries across our published + /// platform/arch matrix, plus a checksum sibling that must never be picked + /// as the primary binary. + fn mocked_release() -> Release { + let json = r#"{ + "tag_name": "v0.8.8", + "assets": [ + { "name": "deepseek-linux-x64", "browser_download_url": "https://example.invalid/deepseek-linux-x64" }, + { "name": "deepseek-macos-x64", "browser_download_url": "https://example.invalid/deepseek-macos-x64" }, + { "name": "deepseek-macos-arm64", "browser_download_url": "https://example.invalid/deepseek-macos-arm64" }, + { "name": "deepseek-windows-x64.exe", "browser_download_url": "https://example.invalid/deepseek-windows-x64.exe" }, + { "name": "deepseek-windows-x64.exe.sha256", "browser_download_url": "https://example.invalid/deepseek-windows-x64.exe.sha256" }, + { "name": "deepseek-tui-linux-x64", "browser_download_url": "https://example.invalid/deepseek-tui-linux-x64" }, + { "name": "deepseek-tui-macos-x64", "browser_download_url": "https://example.invalid/deepseek-tui-macos-x64" }, + { "name": "deepseek-tui-macos-arm64", "browser_download_url": "https://example.invalid/deepseek-tui-macos-arm64" }, + { "name": "deepseek-tui-windows-x64.exe","browser_download_url": "https://example.invalid/deepseek-tui-windows-x64.exe" } + ] + }"#; + serde_json::from_str(json).expect("mock release JSON") + } + + #[test] + fn mocked_release_selects_dispatcher_asset_for_supported_platforms() { + let release = mocked_release(); + let cases = [ + ("macos", "aarch64", "deepseek-macos-arm64"), + ("macos", "x86_64", "deepseek-macos-x64"), + ("linux", "x86_64", "deepseek-linux-x64"), + ("windows", "x86_64", "deepseek-windows-x64.exe"), + ]; + + for (os, arch, expected) in cases { + let stem = release_asset_stem_for(Path::new("/usr/local/bin/deepseek"), os, arch); + let asset = select_platform_asset(&release, &stem) + .unwrap_or_else(|| panic!("no asset for {os}/{arch} (stem {stem})")); + assert_eq!(asset.name, expected, "{os}/{arch}"); + } + } + + #[test] + fn mocked_release_selects_tui_asset_when_tui_binary_invokes_update() { + let release = mocked_release(); + let stem = + release_asset_stem_for(Path::new("/usr/local/bin/deepseek-tui"), "macos", "aarch64"); + let asset = select_platform_asset(&release, &stem).expect("TUI platform asset"); + assert_eq!(asset.name, "deepseek-tui-macos-arm64"); + } } diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 40d139b7..5ea98fdf 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -13,7 +13,7 @@ use crate::audit::log_sensitive_event; use crate::features::{Features, FeaturesToml, is_known_feature_key}; use crate::hooks::HooksConfig; -pub const DEFAULT_MAX_SUBAGENTS: usize = 5; +pub const DEFAULT_MAX_SUBAGENTS: usize = 10; pub const MAX_SUBAGENTS: usize = 20; pub const DEFAULT_TEXT_MODEL: &str = "deepseek-v4-pro"; pub const DEFAULT_NVIDIA_NIM_MODEL: &str = "deepseek-ai/deepseek-v4-pro"; @@ -326,6 +326,9 @@ pub struct RetryConfig { pub struct TuiConfig { pub alternate_screen: Option, pub mouse_capture: Option, + /// Timeout for startup terminal mode/probe calls in milliseconds. + /// Defaults to 500ms when omitted. + pub terminal_probe_timeout_ms: Option, /// Ordered list of footer items the user wants visible. `None` (the field /// missing from `config.toml`) means "use the built-in default order"; an /// empty `Some(vec![])` means "show nothing in the footer". @@ -514,7 +517,7 @@ impl StatusItem { StatusItem::Cost => "running USD total for this session", StatusItem::Status => "what the agent is doing right now", StatusItem::Coherence => "shown only when the engine intervenes", - StatusItem::Agents => "swarm in progress", + StatusItem::Agents => "agents or RLM work in progress", StatusItem::ReasoningReplay => "thinking tokens replayed each turn", StatusItem::Cache => "% of prompt served from cache", StatusItem::ContextPercent => "tokens used / model context window", @@ -644,6 +647,10 @@ pub struct SubagentsConfig { pub custom_model: Option, #[serde(default)] pub models: Option>, + /// Maximum concurrent sub-agents. Overrides the top-level max_subagents + /// setting. Clamped to [1, MAX_SUBAGENTS]. + #[serde(default)] + pub max_concurrent: Option, } /// Per-model context tuning. @@ -1263,15 +1270,24 @@ impl Config { } /// Return the maximum number of concurrent sub-agents. + /// Checks [subagents] max_concurrent first, then top-level max_subagents, + /// then falls back to DEFAULT_MAX_SUBAGENTS. #[must_use] pub fn max_subagents(&self) -> usize { + // Check [subagents] max_concurrent first + if let Some(subagents_cfg) = self.subagents.as_ref() + && let Some(max) = subagents_cfg.max_concurrent + { + return max.clamp(1, MAX_SUBAGENTS); + } + // Fall back to top-level max_subagents self.max_subagents .unwrap_or(DEFAULT_MAX_SUBAGENTS) .clamp(1, MAX_SUBAGENTS) } /// Raw sub-agent model override map. Values are validated at spawn time - /// so an invalid role/type model fails before any partial swarm spawn. + /// so an invalid role/type model fails before any partial agent spawn. #[must_use] pub fn subagent_model_overrides(&self) -> HashMap { let mut overrides = HashMap::new(); @@ -2507,6 +2523,47 @@ mod tests { } } + #[test] + fn max_subagents_defaults_to_ten() { + assert_eq!(Config::default().max_subagents(), DEFAULT_MAX_SUBAGENTS); + assert_eq!(DEFAULT_MAX_SUBAGENTS, 10); + } + + #[test] + fn subagents_max_concurrent_overrides_top_level_cap() { + let config = Config { + max_subagents: Some(3), + subagents: Some(SubagentsConfig { + max_concurrent: Some(12), + ..SubagentsConfig::default() + }), + ..Config::default() + }; + + assert_eq!(config.max_subagents(), 12); + } + + #[test] + fn max_subagents_clamps_subagents_max_concurrent() { + let low = Config { + subagents: Some(SubagentsConfig { + max_concurrent: Some(0), + ..SubagentsConfig::default() + }), + ..Config::default() + }; + assert_eq!(low.max_subagents(), 1); + + let high = Config { + subagents: Some(SubagentsConfig { + max_concurrent: Some(MAX_SUBAGENTS + 10), + ..SubagentsConfig::default() + }), + ..Config::default() + }; + assert_eq!(high.max_subagents(), MAX_SUBAGENTS); + } + #[test] fn save_api_key_writes_config() -> Result<()> { let _lock = lock_test_env(); diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 37275833..ba4b3c19 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -505,7 +505,7 @@ impl Engine { .background_runtime(); let result = { - let mut manager = self.subagent_manager.lock().await; + let mut manager = self.subagent_manager.write().await; manager.spawn_background( Arc::clone(&self.subagent_manager), runtime, @@ -537,7 +537,7 @@ impl Engine { } Op::ListSubAgents => { let agents = { - let mut manager = self.subagent_manager.lock().await; + let mut manager = self.subagent_manager.write().await; manager.cleanup(Duration::from_secs(60 * 60)); manager.list() }; diff --git a/crates/tui/src/core/engine/context.rs b/crates/tui/src/core/engine/context.rs index c1e49686..7e80008b 100644 --- a/crates/tui/src/core/engine/context.rs +++ b/crates/tui/src/core/engine/context.rs @@ -107,6 +107,105 @@ fn tool_result_metadata_summary(metadata: Option<&serde_json::Value>) -> Option< None } +fn summarize_subagent_status(status: &serde_json::Value) -> String { + if let Some(raw) = status.as_str() { + return raw.to_string(); + } + if let Some(obj) = status.as_object() + && let Some((kind, value)) = obj.iter().next() + { + if let Some(reason) = value.as_str().filter(|s| !s.trim().is_empty()) { + return format!("{kind}({})", summarize_text(reason.trim(), 120)); + } + return kind.to_string(); + } + status.to_string() +} + +fn summarize_subagent_snapshot(snapshot: &serde_json::Value, index: usize) -> String { + let Some(obj) = snapshot.as_object() else { + return format!( + "- item {index}: {}", + summarize_text(&snapshot.to_string(), 240) + ); + }; + + let agent_id = obj + .get("agent_id") + .and_then(serde_json::Value::as_str) + .unwrap_or("unknown"); + let agent_type = obj + .get("agent_type") + .and_then(serde_json::Value::as_str) + .unwrap_or("agent"); + let status = obj + .get("status") + .map(summarize_subagent_status) + .unwrap_or_else(|| "unknown".to_string()); + let objective = obj + .get("assignment") + .and_then(|assignment| assignment.get("objective")) + .and_then(serde_json::Value::as_str) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(|s| summarize_text(s, 220)); + let result = obj + .get("result") + .and_then(serde_json::Value::as_str) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(|s| summarize_text(s, 1_600)); + let steps = obj.get("steps_taken").and_then(serde_json::Value::as_u64); + let duration_ms = obj.get("duration_ms").and_then(serde_json::Value::as_u64); + + let mut lines = vec![format!("- {agent_id} ({agent_type}) status={status}")]; + if let Some(objective) = objective { + lines.push(format!(" objective: {objective}")); + } + match result { + Some(result) => lines.push(format!(" result: {result}")), + None => lines.push(" result: not available yet".to_string()), + } + if steps.is_some() || duration_ms.is_some() { + let steps = steps + .map(|n| n.to_string()) + .unwrap_or_else(|| "?".to_string()); + let duration_ms = duration_ms + .map(|n| n.to_string()) + .unwrap_or_else(|| "?".to_string()); + lines.push(format!(" stats: steps={steps}, duration_ms={duration_ms}")); + } + lines.join("\n") +} + +fn compact_subagent_tool_result_for_context(tool_name: &str, raw: &str) -> Option { + if !matches!(tool_name, "agent_result" | "agent_wait" | "wait") { + return None; + } + + let parsed: serde_json::Value = serde_json::from_str(raw).ok()?; + let snapshots: Vec<&serde_json::Value> = match &parsed { + serde_json::Value::Array(items) => items.iter().collect(), + serde_json::Value::Object(_) => vec![&parsed], + _ => return None, + }; + + let mut out = String::from("[sub-agent result summarized for parent context]\n"); + out.push_str("Use `agent_result` again only if you need the full raw payload.\n"); + for (idx, snapshot) in snapshots.iter().enumerate() { + if idx >= 8 { + out.push_str(&format!( + "- ... {} more sub-agent result(s) omitted from context summary\n", + snapshots.len().saturating_sub(idx) + )); + break; + } + out.push_str(&summarize_subagent_snapshot(snapshot, idx + 1)); + out.push('\n'); + } + Some(out.trim_end().to_string()) +} + fn tool_result_context_limits_for_model(model: &str) -> ToolResultContextLimits { let is_large_context = context_window_for_model(model).is_some_and(|window| window >= LARGE_CONTEXT_WINDOW_TOKENS); @@ -136,6 +235,10 @@ pub(crate) fn compact_tool_result_for_context( return String::new(); } + if let Some(summary) = compact_subagent_tool_result_for_context(tool_name, raw) { + return summary; + } + let limits = tool_result_context_limits_for_model(model); let raw_chars = raw.chars().count(); let should_compact = raw_chars > limits.hard_limit_chars diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index df7467a1..a84cc926 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -451,6 +451,34 @@ fn v4_tool_outputs_keep_large_file_reads_in_context() { assert!(legacy_context.len() < v4_context.len()); } +#[test] +fn subagent_results_are_summarized_before_parent_context_insertion() { + let long_result = "verified detail\n".repeat(1_000); + let output = ToolResult::success( + json!({ + "agent_id": "agent_1234abcd", + "agent_type": "explore", + "assignment": { + "objective": "Inspect the RLM rendering path and report the smallest fix." + }, + "model": "deepseek-v4-flash", + "status": "Completed", + "result": long_result, + "steps_taken": 12, + "duration_ms": 3456 + }) + .to_string(), + ); + + let context = compact_tool_result_for_context("deepseek-v4-pro", "agent_result", &output); + + assert!(context.contains("[sub-agent result summarized for parent context]")); + assert!(context.contains("agent_1234abcd (explore) status=Completed")); + assert!(context.contains("Inspect the RLM rendering path")); + assert!(context.contains("steps=12")); + assert!(context.len() < output.content.len()); +} + #[test] fn refresh_system_prompt_places_working_set_after_stable_prefix() { let tmp = tempdir().expect("tempdir"); diff --git a/crates/tui/src/cycle_manager.rs b/crates/tui/src/cycle_manager.rs index b7b7a19e..60687b3f 100644 --- a/crates/tui/src/cycle_manager.rs +++ b/crates/tui/src/cycle_manager.rs @@ -237,7 +237,7 @@ impl StructuredState { }; let subagent_snapshots = if let Some(handle) = subagents { - let guard = handle.lock().await; + let guard = handle.read().await; guard .list() .into_iter() diff --git a/crates/tui/src/localization.rs b/crates/tui/src/localization.rs index ead59a43..c4323143 100644 --- a/crates/tui/src/localization.rs +++ b/crates/tui/src/localization.rs @@ -765,7 +765,7 @@ fn english(id: MessageId) -> &'static str { MessageId::CmdStatuslineDescription => "Configure which items appear in the footer", MessageId::CmdSubagentsDescription => "List sub-agent status", MessageId::CmdSwarmDescription => { - "Run a multi-agent swarm turn (sequential | mixture | distill | deliberate)" + "Run a multi-agent fanout turn (sequential | mixture | distill | deliberate)" } MessageId::CmdSystemDescription => "Show current system prompt", MessageId::CmdTaskDescription => "Manage background tasks", @@ -873,7 +873,7 @@ fn english(id: MessageId) -> &'static str { } MessageId::KbJumpPlanAgentYolo => "Jump directly to Plan / Agent / YOLO mode", MessageId::KbAltJumpPlanAgentYolo => "Alternative jump to Plan / Agent / YOLO mode", - MessageId::KbFocusSidebar => "Focus Plan / Todos / Tasks / Agents / Agents / Auto sidebar", + MessageId::KbFocusSidebar => "Focus Plan / Todos / Tasks / Agents / Auto sidebar", MessageId::KbTogglePlanAgent => "Toggle between Plan and Agent modes", MessageId::KbSessionPicker => "Open the session picker", MessageId::KbPasteAttach => "Paste text or attach a clipboard image", @@ -1037,7 +1037,7 @@ fn japanese(id: MessageId) -> Option<&'static str> { MessageId::CmdStatuslineDescription => "フッターに表示する項目を設定", MessageId::CmdSubagentsDescription => "サブエージェントの状態を一覧表示", MessageId::CmdSwarmDescription => { - "マルチエージェントのスワームターンを実行(sequential | mixture | distill | deliberate)" + "マルチエージェントのファンアウトターンを実行(sequential | mixture | distill | deliberate)" } MessageId::CmdSystemDescription => "現在のシステムプロンプトを表示", MessageId::CmdTaskDescription => "バックグラウンドタスクを管理", @@ -1144,9 +1144,7 @@ fn japanese(id: MessageId) -> Option<&'static str> { } MessageId::KbJumpPlanAgentYolo => "Plan / Agent / YOLO モードに直接ジャンプ", MessageId::KbAltJumpPlanAgentYolo => "Plan / Agent / YOLO モードへの代替ジャンプ", - MessageId::KbFocusSidebar => { - "Plan / Todos / Tasks / Agents / Agents / Auto サイドバーにフォーカス" - } + MessageId::KbFocusSidebar => "Plan / Todos / Tasks / Agents / Auto サイドバーにフォーカス", MessageId::KbTogglePlanAgent => "Plan モードと Agent モードを切り替え", MessageId::KbSessionPicker => "セッションピッカーを開く", MessageId::KbPasteAttach => "テキストを貼り付けまたはクリップボード画像を添付", @@ -1287,7 +1285,7 @@ fn chinese_simplified(id: MessageId) -> Option<&'static str> { MessageId::CmdStatuslineDescription => "配置底栏要显示哪些条目", MessageId::CmdSubagentsDescription => "列出子代理状态", MessageId::CmdSwarmDescription => { - "运行多代理集群轮次(sequential | mixture | distill | deliberate)" + "运行多代理扇出轮次(sequential | mixture | distill | deliberate)" } MessageId::CmdSystemDescription => "显示当前系统提示词", MessageId::CmdTaskDescription => "管理后台任务", @@ -1545,7 +1543,7 @@ fn portuguese_brazil(id: MessageId) -> Option<&'static str> { MessageId::CmdStatuslineDescription => "Configurar quais itens aparecem no rodapé", MessageId::CmdSubagentsDescription => "Listar o status dos sub-agentes", MessageId::CmdSwarmDescription => { - "Executar turno de enxame multi-agente (sequential | mixture | distill | deliberate)" + "Executar turno fanout multi-agente (sequential | mixture | distill | deliberate)" } MessageId::CmdSystemDescription => "Exibir o prompt de sistema atual", MessageId::CmdTaskDescription => "Gerenciar tarefas em segundo plano", @@ -1660,9 +1658,7 @@ fn portuguese_brazil(id: MessageId) -> Option<&'static str> { } MessageId::KbJumpPlanAgentYolo => "Pular direto para modo Plan / Agent / YOLO", MessageId::KbAltJumpPlanAgentYolo => "Salto alternativo para modo Plan / Agent / YOLO", - MessageId::KbFocusSidebar => { - "Focar barra lateral Plan / Todos / Tasks / Agents / Agents / Auto" - } + MessageId::KbFocusSidebar => "Focar barra lateral Plan / Todos / Tasks / Agents / Auto", MessageId::KbTogglePlanAgent => "Alternar entre modos Plan e Agent", MessageId::KbSessionPicker => "Abrir seletor de sessões", MessageId::KbPasteAttach => "Colar texto ou anexar imagem da área de transferência", diff --git a/crates/tui/src/main.rs b/crates/tui/src/main.rs index 553f204f..140a9157 100644 --- a/crates/tui/src/main.rs +++ b/crates/tui/src/main.rs @@ -525,7 +525,9 @@ async fn main() -> Result<()> { // Restore the terminal first so the panic message itself, plus the // user's shell after exit, are visible. Best-effort — we may not be // in raw / alt-screen mode if the panic happens pre-TUI. + use crossterm::event::PopKeyboardEnhancementFlags; use crossterm::terminal::{LeaveAlternateScreen, disable_raw_mode}; + let _ = crossterm::execute!(std::io::stdout(), PopKeyboardEnhancementFlags); let _ = disable_raw_mode(); let _ = crossterm::execute!(std::io::stdout(), LeaveAlternateScreen); @@ -3368,6 +3370,7 @@ mod terminal_mode_tests { tui: Some(crate::config::TuiConfig { alternate_screen: None, mouse_capture: Some(false), + terminal_probe_timeout_ms: None, status_items: None, }), ..Config::default() @@ -3391,6 +3394,7 @@ mod terminal_mode_tests { tui: Some(crate::config::TuiConfig { alternate_screen: None, mouse_capture: Some(true), + terminal_probe_timeout_ms: None, status_items: None, }), ..Config::default() diff --git a/crates/tui/src/palette.rs b/crates/tui/src/palette.rs index 7f95b729..874b44ab 100644 --- a/crates/tui/src/palette.rs +++ b/crates/tui/src/palette.rs @@ -109,6 +109,18 @@ pub struct UiTheme { pub composer_bg: Color, pub selection_bg: Color, pub header_bg: Color, + /// Statusline mode colors (agent/yolo/plan) + pub mode_agent: Color, + pub mode_yolo: Color, + pub mode_plan: Color, + /// Statusline status colors + pub status_ready: Color, + pub status_working: Color, + pub status_warning: Color, + /// Statusline text colors + pub text_dim: Color, + pub text_hint: Color, + pub text_muted: Color, } pub const UI_THEME: UiTheme = UiTheme { @@ -116,6 +128,15 @@ pub const UI_THEME: UiTheme = UiTheme { composer_bg: DEEPSEEK_SLATE, selection_bg: SELECTION_BG, header_bg: DEEPSEEK_INK, + mode_agent: MODE_AGENT, + mode_yolo: MODE_YOLO, + mode_plan: MODE_PLAN, + status_ready: TEXT_MUTED, + status_working: DEEPSEEK_SKY, + status_warning: STATUS_WARNING, + text_dim: TEXT_DIM, + text_hint: TEXT_HINT, + text_muted: TEXT_MUTED, }; // === Color depth + brightness helpers (v0.6.6 UI redesign) === diff --git a/crates/tui/src/prompts/base.md b/crates/tui/src/prompts/base.md index a3ccd520..54682ca6 100644 --- a/crates/tui/src/prompts/base.md +++ b/crates/tui/src/prompts/base.md @@ -66,7 +66,7 @@ Sub-agents are cheap — DeepSeek V4 Flash costs $0.14/M input. Use them liberal - **Parallel implementation**: After a plan is laid out, spawn one sub-agent per independent leaf task. Each does one thing well; you integrate results. - **Solo tasks**: A single read, a single search, a focused question — do these yourself. Spawning has overhead; one-turn reads are faster direct. - **Sequential work**: If step B depends on step A's output, run A yourself, then decide whether to spawn B based on what A found. Don't pre-spawn dependent work. -- **Max 5 in flight**: The dispatcher caps concurrent sub-agents at 5. When you need more, batch them: spawn 5, wait for completions, spawn the next 5. +- **Concurrent sub-agent cap**: The dispatcher defaults to 10 concurrent sub-agents (configurable via `[subagents].max_concurrent` in `config.toml`, hard ceiling 20). When you need more, batch them: spawn up to the cap, wait for completions, then spawn the next batch. ## Parallel-First Heuristic diff --git a/crates/tui/src/session_manager.rs b/crates/tui/src/session_manager.rs index 33328b87..9c3ab118 100644 --- a/crates/tui/src/session_manager.rs +++ b/crates/tui/src/session_manager.rs @@ -46,6 +46,10 @@ pub struct QueuedSessionMessage { pub struct OfflineQueueState { #[serde(default = "default_queue_schema_version")] pub schema_version: u32, + /// Session ID this queue belongs to. Queue is only restored when + /// resuming the same session to prevent stale messages leaking into new chats. + #[serde(default)] + pub session_id: Option, #[serde(default)] pub messages: Vec, #[serde(default)] @@ -56,6 +60,7 @@ impl Default for OfflineQueueState { fn default() -> Self { Self { schema_version: CURRENT_QUEUE_SCHEMA_VERSION, + session_id: None, messages: Vec::new(), draft: None, } @@ -208,11 +213,17 @@ impl SessionManager { } /// Save offline queue state (queued + draft messages). - pub fn save_offline_queue_state(&self, state: &OfflineQueueState) -> std::io::Result { + pub fn save_offline_queue_state( + &self, + state: &OfflineQueueState, + session_id: Option<&str>, + ) -> std::io::Result { let checkpoints = self.sessions_dir.join("checkpoints"); fs::create_dir_all(&checkpoints)?; let path = checkpoints.join("offline_queue.json"); - let content = serde_json::to_string_pretty(state) + let mut state_with_id = state.clone(); + state_with_id.session_id = session_id.map(|s| s.to_string()); + let content = serde_json::to_string_pretty(&state_with_id) .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; write_atomic(&path, content.as_bytes())?; Ok(path) @@ -894,7 +905,7 @@ mod tests { }; manager - .save_offline_queue_state(&state) + .save_offline_queue_state(&state, Some("test-session")) .expect("save queue state"); let loaded = manager .load_offline_queue_state() diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs index 6bd682ca..25c51535 100644 --- a/crates/tui/src/tools/subagent/mod.rs +++ b/crates/tui/src/tools/subagent/mod.rs @@ -9,7 +9,7 @@ use std::fs; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; -use tokio::sync::Mutex; +use tokio::sync::{Mutex, RwLock}; use anyhow::{Result, anyhow}; use async_trait::async_trait; @@ -526,7 +526,7 @@ impl SubAgentRuntime { /// Return a child runtime that is deliberately detached from the parent /// turn cancellation token. Background sub-agents should keep running when - /// the parent turn is cancelled; explicit agent/swarm cancellation still + /// the parent turn is cancelled; explicit agent cancellation still /// aborts their task handles through the manager. #[must_use] pub fn background_runtime(&self) -> Self { @@ -769,13 +769,16 @@ impl SubAgentManager { self.agents .values() .filter(|agent| { + // Exclude non-running statuses if agent.status != SubAgentStatus::Running { return false; } - !agent - .task_handle - .as_ref() - .is_some_and(tokio::task::JoinHandle::is_finished) + // Exclude persisted agents with no task_handle (they're not actually running) + let Some(handle) = agent.task_handle.as_ref() else { + return false; + }; + // Exclude agents whose task has finished (status will be updated to Completed shortly) + !handle.is_finished() }) .count() } @@ -1192,7 +1195,7 @@ impl SubAgentManager { } /// Thread-safe wrapper for `SubAgentManager`. -pub type SharedSubAgentManager = Arc>; +pub type SharedSubAgentManager = Arc>; fn default_state_path(workspace: &Path) -> PathBuf { workspace @@ -1234,7 +1237,7 @@ pub fn new_shared_subagent_manager(workspace: PathBuf, max_agents: usize) -> Sha if let Err(err) = manager.load_state() { eprintln!("Failed to load sub-agent state: {err}"); } - Arc::new(Mutex::new(manager)) + Arc::new(RwLock::new(manager)) } // === Tool Implementations === @@ -1276,9 +1279,10 @@ impl ToolSpec for AgentSpawnTool { fn description(&self) -> &'static str { "Spawn a background sub-agent for a focused task. Returns an agent_id immediately; \ - follow with agent_result to retrieve the final result. Max 5 in flight (each is a \ - full sub-agent loop; cancel or wait if you hit the cap). For parallel one-shot LLM \ - queries, just emit multiple tool calls in one turn — the dispatcher runs them in parallel." + follow with agent_result to retrieve the final result. Default cap of 10 concurrent \ + sub-agents (configurable via `[subagents].max_concurrent`); each is a full sub-agent \ + loop, so cancel or wait if you hit the cap. For parallel one-shot LLM queries, just \ + emit multiple tool calls in one turn — the dispatcher runs them in parallel." } fn input_schema(&self) -> Value { @@ -1415,7 +1419,7 @@ impl ToolSpec for AgentSpawnTool { }; child_runtime.model = effective_model.clone(); - let mut manager = self.manager.lock().await; + let mut manager = self.manager.write().await; let result = manager .spawn_background_with_assignment_options( @@ -1526,7 +1530,7 @@ impl ToolSpec for AgentResultTool { let (result, timed_out) = if block { wait_for_result(&self.manager, agent_id, Duration::from_millis(timeout_ms)).await? } else { - let manager = self.manager.lock().await; + let manager = self.manager.read().await; ( manager .get_result(agent_id) @@ -1599,7 +1603,7 @@ impl ToolSpec for AgentCancelTool { async fn execute(&self, input: Value, _context: &ToolContext) -> Result { let agent_id = required_str(&input, "agent_id")?; - let mut manager = self.manager.lock().await; + let mut manager = self.manager.write().await; let result = manager .cancel(agent_id) .map_err(|e| ToolError::execution_failed(format!("Failed to cancel sub-agent: {e}")))?; @@ -1669,7 +1673,7 @@ impl ToolSpec for AgentCloseTool { .or_else(|| input.get("agent_id")) .and_then(|v| v.as_str()) .ok_or_else(|| ToolError::missing_field("id"))?; - let mut manager = self.manager.lock().await; + let mut manager = self.manager.write().await; let result = manager .cancel(agent_id) .map_err(|e| ToolError::execution_failed(format!("Failed to close sub-agent: {e}")))?; @@ -1740,7 +1744,7 @@ impl ToolSpec for AgentResumeTool { .or_else(|| input.get("agent_id")) .and_then(|v| v.as_str()) .ok_or_else(|| ToolError::missing_field("id"))?; - let mut manager = self.manager.lock().await; + let mut manager = self.manager.write().await; let result = manager .resume(Arc::clone(&self.manager), self.runtime.clone(), agent_id) .map_err(|e| ToolError::execution_failed(format!("Failed to resume sub-agent: {e}")))?; @@ -1783,7 +1787,7 @@ impl ToolSpec for AgentListTool { _input: Value, _context: &ToolContext, ) -> Result { - let mut manager = self.manager.lock().await; + let mut manager = self.manager.write().await; manager.cleanup(COMPLETED_AGENT_RETENTION); let results = manager.list(); ToolResult::json(&results).map_err(|e| ToolError::execution_failed(e.to_string())) @@ -1862,7 +1866,7 @@ impl ToolSpec for AgentSendInputTool { let message = parse_text_or_items(&input, &["message", "input"], "items", "message")?; let interrupt = optional_bool(&input, "interrupt", false); - let mut manager = self.manager.lock().await; + let mut manager = self.manager.write().await; manager .send_input(agent_id, message, interrupt) .map_err(|e| ToolError::execution_failed(e.to_string()))?; @@ -1965,7 +1969,7 @@ impl ToolSpec for AgentAssignTool { async fn execute(&self, input: Value, _context: &ToolContext) -> Result { let request = parse_assign_request(&input)?; - let mut manager = self.manager.lock().await; + let mut manager = self.manager.write().await; let result = manager .assign( &request.agent_id, @@ -2049,7 +2053,7 @@ impl ToolSpec for AgentWaitTool { .clamp(MIN_WAIT_TIMEOUT_MS, MAX_RESULT_TIMEOUT_MS); let mut ids = parse_wait_ids(&input); if ids.is_empty() { - let manager = self.manager.lock().await; + let manager = self.manager.read().await; ids = manager .list() .into_iter() @@ -2280,7 +2284,7 @@ async fn run_subagent_task(task: SubAgentTask) { ) .await; - let mut manager = task.manager_handle.lock().await; + let mut manager = task.manager_handle.write().await; match &result { Ok(res) => manager.update_from_result(&task.agent_id, res.clone()), Err(err) => manager.update_failed(&task.agent_id, err.to_string()), @@ -2645,7 +2649,7 @@ async fn wait_for_result( loop { let snapshot = { - let manager = manager.lock().await; + let manager = manager.read().await; manager .get_result(agent_id) .map_err(|e| ToolError::execution_failed(e.to_string()))? @@ -2672,7 +2676,7 @@ async fn wait_for_agents( loop { let snapshots = { - let manager = manager.lock().await; + let manager = manager.read().await; ids.iter() .map(|id| { manager diff --git a/crates/tui/src/tools/subagent/tests.rs b/crates/tui/src/tools/subagent/tests.rs index af65f7d0..754ba29f 100644 --- a/crates/tui/src/tools/subagent/tests.rs +++ b/crates/tui/src/tools/subagent/tests.rs @@ -165,7 +165,7 @@ fn test_parse_assign_request_requires_update_fields() { #[test] fn test_send_input_schema_does_not_require_message_field() { - let manager = Arc::new(Mutex::new(SubAgentManager::new(PathBuf::from("."), 1))); + let manager = Arc::new(RwLock::new(SubAgentManager::new(PathBuf::from("."), 1))); let schema = AgentSendInputTool::new(manager, "send_input").input_schema(); let required = schema .get("required") @@ -300,7 +300,7 @@ fn test_subagent_tool_registry_reports_unavailable_tools() { #[tokio::test] async fn test_wait_for_result_reports_timeout_when_still_running() { - let manager = Arc::new(Mutex::new(SubAgentManager::new(PathBuf::from("."), 2))); + let manager = Arc::new(RwLock::new(SubAgentManager::new(PathBuf::from("."), 2))); let (input_tx, _input_rx) = mpsc::unbounded_channel(); let agent = SubAgent::new( SubAgentType::Explore, @@ -313,7 +313,7 @@ async fn test_wait_for_result_reports_timeout_when_still_running() { ); let agent_id = agent.id.clone(); { - let mut guard = manager.lock().await; + let mut guard = manager.write().await; guard.agents.insert(agent_id.clone(), agent); } @@ -324,8 +324,38 @@ async fn test_wait_for_result_reports_timeout_when_still_running() { assert_eq!(snapshot.status, SubAgentStatus::Running); } +#[tokio::test] +async fn test_running_count_counts_only_agents_with_live_task_handles() { + let mut manager = SubAgentManager::new(PathBuf::from("."), 1); + let (input_tx, _input_rx) = mpsc::unbounded_channel(); + let mut agent = SubAgent::new( + SubAgentType::Explore, + "prompt".to_string(), + make_assignment(), + "deepseek-v4-flash".to_string(), + Some("Blue".to_string()), + Some(vec!["read_file".to_string()]), + input_tx, + ); + agent.status = SubAgentStatus::Running; + let handle = tokio::spawn(async { + tokio::time::sleep(Duration::from_secs(60)).await; + }); + agent.task_handle = Some(handle); + let agent_id = agent.id.clone(); + manager.agents.insert(agent.id.clone(), agent); + + assert_eq!(manager.running_count(), 1); + manager + .agents + .get_mut(&agent_id) + .and_then(|agent| agent.task_handle.take()) + .expect("live task handle") + .abort(); +} + #[test] -fn test_running_count_respects_limit() { +fn test_running_count_ignores_running_status_without_task_handle() { let mut manager = SubAgentManager::new(PathBuf::from("."), 1); let (input_tx, _input_rx) = mpsc::unbounded_channel(); let mut agent = SubAgent::new( @@ -340,7 +370,7 @@ fn test_running_count_respects_limit() { agent.status = SubAgentStatus::Running; manager.agents.insert(agent.id.clone(), agent); - assert_eq!(manager.running_count(), 1); + assert_eq!(manager.running_count(), 0); } #[tokio::test] @@ -543,7 +573,7 @@ fn test_wrap_with_deprecation_notice_preserves_existing_metadata() { #[test] fn test_canonical_agent_send_input_has_no_deprecation() { - let manager = Arc::new(Mutex::new(SubAgentManager::new(PathBuf::from("."), 1))); + let manager = Arc::new(RwLock::new(SubAgentManager::new(PathBuf::from("."), 1))); // The canonical name "agent_send_input" must NOT receive a deprecation notice. // We verify this by inspecting the tool's name — the deprecation branch // only fires when name == "send_input". diff --git a/crates/tui/src/tui/app.rs b/crates/tui/src/tui/app.rs index 7dbc408c..084480c2 100644 --- a/crates/tui/src/tui/app.rs +++ b/crates/tui/src/tui/app.rs @@ -2387,6 +2387,111 @@ impl App { self.needs_redraw = true; } + /// Delete the word before the cursor. + pub fn delete_word_backward(&mut self) { + self.clear_input_history_navigation(); + self.selected_attachment_index = None; + if self.cursor_position == 0 { + return; + } + + let cursor_byte = byte_index_at_char(&self.input, self.cursor_position); + let mut word_start = cursor_byte; + + while word_start > 0 { + let Some((prev, ch)) = self.input[..word_start].char_indices().next_back() else { + break; + }; + if !ch.is_whitespace() { + break; + } + word_start = prev; + } + + while word_start > 0 { + let Some((prev, ch)) = self.input[..word_start].char_indices().next_back() else { + break; + }; + if ch.is_whitespace() { + break; + } + word_start = prev; + } + + if word_start < cursor_byte { + self.input.replace_range(word_start..cursor_byte, ""); + self.cursor_position = char_count(&self.input[..word_start]); + self.slash_menu_hidden = false; + self.mention_menu_hidden = false; + self.mention_menu_selected = 0; + self.needs_redraw = true; + } + } + + /// Delete from the cursor to the start of the line. + pub fn delete_to_start_of_line(&mut self) { + self.clear_input_history_navigation(); + self.selected_attachment_index = None; + if self.cursor_position == 0 { + return; + } + + let cursor_byte = byte_index_at_char(&self.input, self.cursor_position); + // Find the start of the current line (last newline or start of string) + let line_start = self.input[..cursor_byte] + .rfind('\n') + .map(|idx| idx + 1) + .unwrap_or(0); + + if line_start < cursor_byte { + self.input.replace_range(line_start..cursor_byte, ""); + self.cursor_position = char_count(&self.input[..line_start]); + self.slash_menu_hidden = false; + self.mention_menu_hidden = false; + self.mention_menu_selected = 0; + self.needs_redraw = true; + } + } + + /// Delete the word after the cursor. + pub fn delete_word_forward(&mut self) { + self.clear_input_history_navigation(); + self.selected_attachment_index = None; + let cursor_byte = byte_index_at_char(&self.input, self.cursor_position); + if cursor_byte >= self.input.len() { + return; + } + + let mut word_end = cursor_byte; + while word_end < self.input.len() { + let Some(ch) = self.input[word_end..].chars().next() else { + break; + }; + if !ch.is_whitespace() { + break; + } + word_end += ch.len_utf8(); + } + + while word_end < self.input.len() { + let Some(ch) = self.input[word_end..].chars().next() else { + break; + }; + if ch.is_whitespace() { + break; + } + word_end += ch.len_utf8(); + } + + if cursor_byte < word_end { + self.input.replace_range(cursor_byte..word_end, ""); + self.slash_menu_hidden = false; + self.mention_menu_hidden = false; + self.mention_menu_selected = 0; + self.needs_redraw = true; + } + } + /// Cut from the cursor to the end of the current logical line into the /// kill buffer. If the cursor is already at end-of-line and a trailing /// newline exists, that newline is consumed so repeated invocations @@ -3921,6 +4026,54 @@ mod tests { } } + #[test] + fn delete_word_backward_removes_previous_word_only() { + let mut app = App::new(test_options(false), &Config::default()); + app.input = "hello world".to_string(); + app.cursor_position = char_count(&app.input); + + app.delete_word_backward(); + + assert_eq!(app.input, "hello "); + assert_eq!(app.cursor_position, char_count("hello ")); + } + + #[test] + fn delete_word_backward_handles_trailing_space_and_utf8() { + let mut app = App::new(test_options(false), &Config::default()); + app.input = "cafe 你好 ".to_string(); + app.cursor_position = char_count(&app.input); + + app.delete_word_backward(); + + assert_eq!(app.input, "cafe "); + assert_eq!(app.cursor_position, char_count("cafe ")); + } + + #[test] + fn delete_word_forward_handles_leading_space_and_utf8() { + let mut app = App::new(test_options(false), &Config::default()); + app.input = "hello 你好 world".to_string(); + app.cursor_position = char_count("hello"); + + app.delete_word_forward(); + + assert_eq!(app.input, "hello world"); + assert_eq!(app.cursor_position, char_count("hello")); + } + + #[test] + fn delete_to_start_of_line_respects_multiline_cursor() { + let mut app = App::new(test_options(false), &Config::default()); + app.input = "first\nsecond line".to_string(); + app.cursor_position = char_count("first\nsecond"); + + app.delete_to_start_of_line(); + + assert_eq!(app.input, "first\n line"); + assert_eq!(app.cursor_position, char_count("first\n")); + } + #[test] fn kill_and_yank_handle_multibyte_utf8() { let mut app = App::new(test_options(false), &Config::default()); diff --git a/crates/tui/src/tui/history.rs b/crates/tui/src/tui/history.rs index dd78af10..fed68a5c 100644 --- a/crates/tui/src/tui/history.rs +++ b/crates/tui/src/tui/history.rs @@ -3099,6 +3099,32 @@ mod tests { ); } + #[test] + fn generic_tool_cell_renders_rlm_with_rlm_label_not_swarm() { + let cell = GenericToolCell { + name: "rlm".to_string(), + status: ToolStatus::Running, + input_summary: Some("task: compare source trees".to_string()), + output: None, + prompts: None, + }; + let lines = cell.lines_with_mode(80, true, super::RenderMode::Live); + let header_visible: String = lines[0] + .spans + .iter() + .map(|s| s.content.as_ref()) + .collect::(); + + assert!( + header_visible.contains(" rlm "), + "RLM card should identify RLM work: {header_visible:?}" + ); + assert!( + !header_visible.contains("swarm"), + "RLM card must not use removed swarm wording: {header_visible:?}" + ); + } + // === Reasoning treatment tests (v0.6.6 UI redesign) === #[test] diff --git a/crates/tui/src/tui/sidebar.rs b/crates/tui/src/tui/sidebar.rs index a49d9269..9cf32914 100644 --- a/crates/tui/src/tui/sidebar.rs +++ b/crates/tui/src/tui/sidebar.rs @@ -22,6 +22,7 @@ use crate::tools::subagent::SubAgentStatus; use crate::tools::todo::TodoStatus; use super::app::{App, SidebarFocus}; +use super::history::{HistoryCell, ToolCell, ToolStatus}; use super::subagent_routing::active_fanout_counts; use super::ui::truncate_line_to_width; @@ -386,6 +387,7 @@ fn render_sidebar_subagents(f: &mut Frame, area: Rect, app: &App) { let (fanout_running, fanout_total) = active_fanout_counts(app) .map(|(running, total)| (running, Some(total))) .unwrap_or((0, None)); + let foreground_rlm_running = foreground_rlm_running(app); let summary = SidebarSubagentSummary { cached_total: app.subagent_cache.len(), @@ -393,6 +395,7 @@ fn render_sidebar_subagents(f: &mut Frame, area: Rect, app: &App) { progress_only_count, fanout_total, fanout_running, + foreground_rlm_running, role_counts, }; let lines = subagent_navigator_lines(&summary, content_width); @@ -410,9 +413,22 @@ pub struct SidebarSubagentSummary { pub progress_only_count: usize, pub fanout_total: Option, pub fanout_running: usize, + pub foreground_rlm_running: bool, pub role_counts: std::collections::BTreeMap, } +fn foreground_rlm_running(app: &App) -> bool { + app.active_cell.as_ref().is_some_and(|active| { + active.entries().iter().any(|entry| { + matches!( + entry, + HistoryCell::Tool(ToolCell::Generic(generic)) + if generic.name == "rlm" && generic.status == ToolStatus::Running + ) + }) + }) +} + /// Build the demoted navigator lines from a summary projection. Public /// for the snapshot test in this module. pub fn subagent_navigator_lines( @@ -422,7 +438,11 @@ pub fn subagent_navigator_lines( let mut lines: Vec> = Vec::with_capacity(4); let fanout_total = summary.fanout_total.unwrap_or(0); - if summary.cached_total == 0 && summary.progress_only_count == 0 && fanout_total == 0 { + if summary.cached_total == 0 + && summary.progress_only_count == 0 + && fanout_total == 0 + && !summary.foreground_rlm_running + { lines.push(Line::from(Span::styled( "No agents", Style::default().fg(palette::TEXT_MUTED), @@ -471,6 +491,16 @@ pub fn subagent_navigator_lines( ))); } + if summary.foreground_rlm_running { + lines.push(Line::from(vec![ + Span::styled("RLM", Style::default().fg(palette::DEEPSEEK_SKY).bold()), + Span::styled( + " foreground work active", + Style::default().fg(palette::TEXT_DIM), + ), + ])); + } + lines.push(Line::from(Span::styled( "(see transcript card for detail)", Style::default().fg(palette::TEXT_MUTED).italic(), @@ -564,6 +594,7 @@ mod tests { progress_only_count: 0, fanout_total: None, fanout_running: 0, + foreground_rlm_running: false, role_counts, }; let text = lines_to_text(&subagent_navigator_lines(&summary, 64)); @@ -581,13 +612,14 @@ mod tests { } #[test] - fn navigator_uses_fanout_total_when_swarm_has_seeded_slots() { + fn navigator_uses_fanout_total_when_fanout_has_seeded_slots() { let summary = SidebarSubagentSummary { cached_total: 1, cached_running: 1, progress_only_count: 0, fanout_total: Some(6), fanout_running: 1, + foreground_rlm_running: false, role_counts: std::collections::BTreeMap::new(), }; @@ -607,6 +639,7 @@ mod tests { progress_only_count: 0, fanout_total: None, fanout_running: 0, + foreground_rlm_running: false, role_counts, }; let text = lines_to_text(&subagent_navigator_lines(&summary, 32)); @@ -626,6 +659,7 @@ mod tests { progress_only_count: 0, fanout_total: None, fanout_running: 0, + foreground_rlm_running: false, role_counts, }; let lines = subagent_navigator_lines(&summary, 16); @@ -639,4 +673,20 @@ mod tests { "role line {role_line:?} exceeded content_width" ); } + + #[test] + fn navigator_shows_foreground_rlm_work_when_no_subagents_exist() { + let summary = SidebarSubagentSummary { + foreground_rlm_running: true, + ..SidebarSubagentSummary::default() + }; + let text = lines_to_text(&subagent_navigator_lines(&summary, 64)); + + assert!(!text[0].contains("No agents"), "header: {:?}", text); + assert!( + text.iter() + .any(|line| line.contains("RLM foreground work active")), + "RLM work must be visible in Agents panel: {text:?}" + ); + } } diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 48c74dba..c3112c75 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -11,7 +11,7 @@ use crossterm::{ event::{ self, DisableBracketedPaste, DisableMouseCapture, EnableBracketedPaste, EnableMouseCapture, Event, KeyCode, KeyEvent, KeyEventKind, KeyModifiers, MouseButton, MouseEvent, - MouseEventKind, + MouseEventKind, PopKeyboardEnhancementFlags, }, execute, terminal::{EnterAlternateScreen, LeaveAlternateScreen, disable_raw_mode, enable_raw_mode}, @@ -130,6 +130,7 @@ const WEB_CONFIG_POLL_MS: u64 = 16; const UI_STATUS_ANIMATION_MS: u64 = 80; const WORKSPACE_CONTEXT_REFRESH_SECS: u64 = 15; const SIDEBAR_VISIBLE_MIN_WIDTH: u16 = 100; +const DEFAULT_TERMINAL_PROBE_TIMEOUT_MS: u64 = 500; /// Run the interactive TUI event loop. /// @@ -146,7 +147,29 @@ pub async fn run_tui(config: &Config, options: TuiOptions) -> Result<()> { let use_alt_screen = options.use_alt_screen; let use_mouse_capture = options.use_mouse_capture; let use_bracketed_paste = options.use_bracketed_paste; - enable_raw_mode()?; + + // Terminal probe with timeout to prevent hanging on unresponsive terminals + let probe_timeout = terminal_probe_timeout(config); + let enable_raw = tokio::task::spawn_blocking(move || { + enable_raw_mode().map_err(|e| anyhow::anyhow!("Failed to enable raw mode: {}", e)) + }); + + match tokio::time::timeout(probe_timeout, enable_raw).await { + Ok(inner_result) => { + inner_result??; // propagate both join and raw-mode errors + } + Err(_) => { + tracing::warn!( + "Terminal probe timed out after {}ms - terminal may be unresponsive", + probe_timeout.as_millis() + ); + return Err(anyhow::anyhow!( + "Terminal probe timed out after {}ms", + probe_timeout.as_millis() + )); + } + } + let mut stdout = io::stdout(); if use_alt_screen { execute!(stdout, EnterAlternateScreen)?; @@ -233,17 +256,29 @@ pub async fn run_tui(config: &Config, options: TuiOptions) -> Result<()> { if let Ok(manager) = SessionManager::default_location() { match manager.load_offline_queue_state() { Ok(Some(state)) => { - app.queued_messages = state - .messages - .into_iter() - .map(queued_session_to_ui) - .collect(); - app.queued_draft = state.draft.map(queued_session_to_ui); - if app.status_message.is_none() && app.queued_message_count() > 0 { - app.status_message = Some(format!( - "Recovered {} queued message(s)", - app.queued_message_count() - )); + // Only restore queue if session_id matches (or if we're resuming the same session) + let should_restore = match (&state.session_id, &app.current_session_id) { + (Some(saved_id), Some(current_id)) => saved_id == current_id, + (None, _) => false, // Legacy unscoped queues are stale-risky; fail closed. + (_, None) => false, // No current session - don't restore + }; + + if should_restore { + app.queued_messages = state + .messages + .into_iter() + .map(queued_session_to_ui) + .collect(); + app.queued_draft = state.draft.map(queued_session_to_ui); + if app.status_message.is_none() && app.queued_message_count() > 0 { + app.status_message = Some(format!( + "Restored {} queued message(s) from previous session — ↑ to edit, Ctrl+X to discard", + app.queued_message_count() + )); + } + } else { + // Session mismatch - clear the stale queue + let _ = manager.clear_offline_queue_state(); } } Ok(None) => {} @@ -342,6 +377,7 @@ pub async fn run_tui(config: &Config, options: TuiOptions) -> Result<()> { persistence_actor::persist(PersistRequest::ClearCheckpoint); persistence_actor::persist(PersistRequest::Shutdown); + let _ = execute!(terminal.backend_mut(), PopKeyboardEnhancementFlags); disable_raw_mode()?; if use_alt_screen { execute!(terminal.backend_mut(), LeaveAlternateScreen)?; @@ -357,6 +393,16 @@ pub async fn run_tui(config: &Config, options: TuiOptions) -> Result<()> { result } +fn terminal_probe_timeout(config: &Config) -> Duration { + let timeout_ms = config + .tui + .as_ref() + .and_then(|tui| tui.terminal_probe_timeout_ms) + .unwrap_or(DEFAULT_TERMINAL_PROBE_TIMEOUT_MS) + .clamp(100, 5_000); + Duration::from_millis(timeout_ms) +} + fn build_engine_config(app: &App, config: &Config) -> EngineConfig { EngineConfig { model: app.model.clone(), @@ -2189,6 +2235,41 @@ async fn run_event_loop( } } } + KeyCode::Backspace + if key.modifiers.contains(KeyModifiers::SUPER) + && !app.remove_selected_composer_attachment() => + { + app.delete_to_start_of_line(); + } + KeyCode::Backspace if key.modifiers.contains(KeyModifiers::SUPER) => {} + KeyCode::Backspace + if key.modifiers.contains(KeyModifiers::ALT) + && !app.remove_selected_composer_attachment() => + { + app.delete_word_backward(); + } + KeyCode::Backspace if key.modifiers.contains(KeyModifiers::ALT) => {} + KeyCode::Backspace + if key.modifiers.contains(KeyModifiers::CONTROL) + && !app.remove_selected_composer_attachment() => + { + app.delete_word_backward(); + } + KeyCode::Backspace if key.modifiers.contains(KeyModifiers::CONTROL) => {} + KeyCode::Delete + if key.modifiers.contains(KeyModifiers::ALT) + && !app.remove_selected_composer_attachment() => + { + app.delete_word_forward(); + } + KeyCode::Delete if key.modifiers.contains(KeyModifiers::ALT) => {} + KeyCode::Delete + if key.modifiers.contains(KeyModifiers::CONTROL) + && !app.remove_selected_composer_attachment() => + { + app.delete_word_forward(); + } + KeyCode::Delete if key.modifiers.contains(KeyModifiers::CONTROL) => {} KeyCode::Backspace if !app.remove_selected_composer_attachment() => { app.delete_char(); } @@ -2283,6 +2364,11 @@ async fn run_event_loop( KeyCode::Char('u') if key.modifiers.contains(KeyModifiers::CONTROL) => { app.clear_input_recoverable(); } + KeyCode::Char('w') | KeyCode::Char('W') + if key.modifiers.contains(KeyModifiers::CONTROL) => + { + app.delete_word_backward(); + } KeyCode::Char('y') if key.modifiers.contains(KeyModifiers::CONTROL) => { // #379: context-sensitive Ctrl+Y. // When the composer has content → emacs-style yank @@ -2476,7 +2562,7 @@ fn persist_offline_queue_state(app: &App) { draft: app.queued_draft.as_ref().map(queued_ui_to_session), ..OfflineQueueState::default() }; - let _ = manager.save_offline_queue_state(&state); + let _ = manager.save_offline_queue_state(&state, app.current_session_id.as_deref()); } } @@ -5441,12 +5527,12 @@ fn collect_active_tool_status(cell: &HistoryCell, snapshot: &mut ActiveToolStatu snapshot.record(format!("search {}", search.query), search.status, None); } ToolCell::Generic(generic) => { - // Fanout-class dispatch tools represent themselves through the - // FanoutCard + Agents sidebar. Counting them again here would - // produce the contradiction the user observed: footer "1 active" - // while the card and sidebar already showed the dispatch's own - // worker counts (#236, #238). Skip them entirely. - if matches!(generic.name.as_str(), "rlm" | "agent_spawn") { + // Sub-agent dispatch represents itself through the DelegateCard + // + Agents sidebar. Counting it again here would duplicate the + // status. RLM is different today: it is a foreground tool call, + // so keep it in the live tool footer until the async RLM + // workbench lands (#513). + if generic.name == "agent_spawn" { return; } snapshot.record(format!("tool {}", generic.name), generic.status, None); @@ -5484,8 +5570,8 @@ fn render_footer_from( footer_state_label(app) } else { // "ready" is the sentinel the widget uses to skip the status segment; - // pair it with TEXT_MUTED for visual neutrality. - ("ready", palette::TEXT_MUTED) + // pair it with theme text_muted for visual neutrality. + ("ready", app.ui_theme.text_muted) }; let coherence = if has(S::Coherence) { @@ -5761,14 +5847,14 @@ fn footer_status_line_spans(app: &App, max_width: usize) -> Vec> { let mut spans = vec![ Span::styled(mode_label.to_string(), Style::default().fg(mode_color)), - Span::styled(sep.to_string(), Style::default().fg(palette::TEXT_DIM)), - Span::styled(model_label, Style::default().fg(palette::TEXT_HINT)), + Span::styled(sep.to_string(), Style::default().fg(app.ui_theme.text_dim)), + Span::styled(model_label, Style::default().fg(app.ui_theme.text_hint)), ]; if show_status { spans.push(Span::styled( sep.to_string(), - Style::default().fg(palette::TEXT_DIM), + Style::default().fg(app.ui_theme.text_dim), )); spans.push(Span::styled( status_label.to_string(), @@ -5781,7 +5867,7 @@ fn footer_status_line_spans(app: &App, max_width: usize) -> Vec> { fn footer_state_label(app: &App) -> (&'static str, ratatui::style::Color) { if app.is_compacting { - return ("compacting \u{238B}", palette::STATUS_WARNING); + return ("compacting \u{238B}", app.ui_theme.status_warning); } // Note: we deliberately do NOT show a "thinking" label for `is_loading`. // The animated water-spout strip in the footer's spacer is the visual @@ -5790,30 +5876,30 @@ fn footer_state_label(app: &App) -> (&'static str, ratatui::style::Color) { // not strictly reasoning. Sub-agents still surface "working" because // that's a distinct lifecycle the user can act on (open `/agents`). if running_agent_count(app) > 0 { - return ("working", palette::DEEPSEEK_SKY); + return ("working", app.ui_theme.status_working); } if app.queued_draft.is_some() { - return ("draft", palette::TEXT_MUTED); + return ("draft", app.ui_theme.text_muted); } if !app.view_stack.is_empty() { - return ("overlay", palette::TEXT_MUTED); + return ("overlay", app.ui_theme.text_muted); } if !app.input.is_empty() { - return ("draft", palette::TEXT_MUTED); + return ("draft", app.ui_theme.text_muted); } - ("ready", palette::TEXT_MUTED) + ("ready", app.ui_theme.status_ready) } #[allow(dead_code)] fn footer_mode_style(app: &App) -> (&'static str, ratatui::style::Color) { let label = app.mode.as_setting(); let color = match app.mode { - crate::tui::app::AppMode::Agent => palette::MODE_AGENT, - crate::tui::app::AppMode::Yolo => palette::MODE_YOLO, - crate::tui::app::AppMode::Plan => palette::MODE_PLAN, + crate::tui::app::AppMode::Agent => app.ui_theme.mode_agent, + crate::tui::app::AppMode::Yolo => app.ui_theme.mode_yolo, + crate::tui::app::AppMode::Plan => app.ui_theme.mode_plan, }; (label, color) } diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index 650bcaf0..0b6687fb 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -554,6 +554,68 @@ fn active_tool_status_label_summarizes_live_tool_group() { assert!(label.contains("Alt+V")); } +#[test] +fn active_tool_status_label_counts_foreground_rlm_work() { + let mut app = create_test_app(); + app.turn_started_at = Some(Instant::now() - Duration::from_secs(5)); + let mut active = ActiveCell::new(); + active.push_tool( + "rlm-1", + HistoryCell::Tool(ToolCell::Generic(GenericToolCell { + name: "rlm".to_string(), + status: ToolStatus::Running, + input_summary: Some("task: compare projects".to_string()), + output: None, + prompts: None, + })), + ); + app.active_cell = Some(active); + + let label = active_tool_status_label(&app).expect("status label"); + + assert!(label.contains("tool rlm"), "label: {label}"); + assert!(label.contains("1 active"), "label: {label}"); +} + +#[test] +fn terminal_probe_timeout_defaults_to_500ms() { + let config = Config::default(); + + assert_eq!(terminal_probe_timeout(&config), Duration::from_millis(500)); +} + +#[test] +fn terminal_probe_timeout_uses_tui_config_and_clamps() { + let mut config = Config { + tui: Some(crate::config::TuiConfig { + alternate_screen: None, + mouse_capture: None, + terminal_probe_timeout_ms: Some(750), + status_items: None, + }), + ..Config::default() + }; + + assert_eq!(terminal_probe_timeout(&config), Duration::from_millis(750)); + + config + .tui + .as_mut() + .expect("tui config") + .terminal_probe_timeout_ms = Some(0); + assert_eq!(terminal_probe_timeout(&config), Duration::from_millis(100)); + + config + .tui + .as_mut() + .expect("tui config") + .terminal_probe_timeout_ms = Some(60_000); + assert_eq!( + terminal_probe_timeout(&config), + Duration::from_millis(5_000) + ); +} + #[test] fn file_mentions_add_local_text_context_to_model_payload() { let tmpdir = TempDir::new().expect("tempdir"); diff --git a/crates/tui/src/tui/views/mod.rs b/crates/tui/src/tui/views/mod.rs index 1ca01983..715a3d62 100644 --- a/crates/tui/src/tui/views/mod.rs +++ b/crates/tui/src/tui/views/mod.rs @@ -1434,7 +1434,7 @@ impl ModalView for SubAgentsView { ]; lines.push(Line::from(Span::styled( - "Sub-agent swarm", + "Sub-agents", Style::default().fg(palette::DEEPSEEK_SKY).bold(), ))); diff --git a/crates/tui/src/tui/widgets/agent_card.rs b/crates/tui/src/tui/widgets/agent_card.rs index a8879fdb..f923bdc4 100644 --- a/crates/tui/src/tui/widgets/agent_card.rs +++ b/crates/tui/src/tui/widgets/agent_card.rs @@ -225,7 +225,7 @@ impl FanoutCard { } } - /// Attach a real agent id to the first pending placeholder slot. Swarm + /// Attach a real agent id to the first pending placeholder slot. Fanout /// cards are seeded from task ids before child agents exist; when a child /// starts, this keeps the dot count stable instead of appending a second /// circle for the same unit of work. @@ -283,12 +283,12 @@ impl FanoutCard { let mut lines = Vec::with_capacity(3); let header_status = self.aggregate_status(); let title = format!("{} ({} workers)", self.kind, self.workers.len()); - lines.push(card_header( - ToolFamily::Fanout, - header_status, - &self.kind, - &title, - )); + let family = if self.kind == "rlm" { + ToolFamily::Rlm + } else { + ToolFamily::Fanout + }; + lines.push(card_header(family, header_status, &self.kind, &title)); lines.push(Line::from(vec![ Span::styled(" ", Style::default()), Span::styled( @@ -560,7 +560,7 @@ mod tests { #[test] fn fanout_card_dot_grid_renders_stateful_worker_slots() { - let mut card = FanoutCard::new("swarm") + let mut card = FanoutCard::new("fanout") .with_workers(["w_1", "w_2", "w_3", "w_4", "w_5", "w_6", "w_7"]); card.upsert_worker("w_1", AgentLifecycle::Completed); card.upsert_worker("w_2", AgentLifecycle::Completed); @@ -600,7 +600,7 @@ mod tests { #[test] fn fanout_apply_inserts_unknown_worker_via_child_spawned() { - let mut card = FanoutCard::new("swarm"); + let mut card = FanoutCard::new("fanout"); let msg = MailboxMessage::ChildSpawned { parent_id: "root".into(), child_id: "agent_late".into(), @@ -613,7 +613,7 @@ mod tests { #[test] fn fanout_started_claims_seeded_pending_slot_without_growing_grid() { - let mut card = FanoutCard::new("agent_swarm").with_workers(["task:a", "task:b"]); + let mut card = FanoutCard::new("fanout").with_workers(["task:a", "task:b"]); let started = MailboxMessage::started("agent_live", crate::tools::subagent::SubAgentType::General); @@ -628,7 +628,7 @@ mod tests { #[test] fn fanout_apply_transitions_worker_through_lifecycle() { - let mut card = FanoutCard::new("swarm").with_workers(["w_1"]); + let mut card = FanoutCard::new("fanout").with_workers(["w_1"]); let started = MailboxMessage::started("w_1", crate::tools::subagent::SubAgentType::General); apply_to_fanout(&mut card, &started); assert_eq!(card.workers[0].status, AgentLifecycle::Running); @@ -657,7 +657,7 @@ mod tests { ]; for (total, done, expected) in cases { let ids: Vec = (0..*total).map(|i| format!("w_{i}")).collect(); - let mut card = FanoutCard::new("swarm").with_workers(ids.iter().cloned()); + let mut card = FanoutCard::new("fanout").with_workers(ids.iter().cloned()); for id in ids.iter().take(*done) { card.upsert_worker(id, AgentLifecycle::Completed); } diff --git a/crates/tui/src/tui/widgets/footer.rs b/crates/tui/src/tui/widgets/footer.rs index b2f2223b..f960bca3 100644 --- a/crates/tui/src/tui/widgets/footer.rs +++ b/crates/tui/src/tui/widgets/footer.rs @@ -33,6 +33,12 @@ pub struct FooterProps { pub mode_label: &'static str, /// Color used for the mode chip. pub mode_color: Color, + /// Color used for small separators between chips. + pub text_dim_color: Color, + /// Color used for the model label. + pub text_hint_color: Color, + /// Color used for steady secondary chips such as cost. + pub text_muted_color: Color, /// Status label like `"ready"`, `"thinking ⌫"`, `"working"`. When the /// label equals `"ready"` the footer hides the status segment entirely. pub state_label: String, @@ -192,11 +198,14 @@ impl FooterProps { cache: Vec>, cost: Vec>, ) -> Self { - let (mode_label, mode_color) = mode_style(app.mode); + let (mode_label, mode_color) = mode_style(app); Self { model: app.model.clone(), mode_label, mode_color, + text_dim_color: app.ui_theme.text_dim, + text_hint_color: app.ui_theme.text_hint, + text_muted_color: app.ui_theme.text_muted, state_label: state_label.to_string(), state_color, coherence, @@ -210,16 +219,16 @@ impl FooterProps { } } -fn mode_style(mode: AppMode) -> (&'static str, Color) { - let label = match mode { +fn mode_style(app: &App) -> (&'static str, Color) { + let label = match app.mode { AppMode::Agent => "agent", AppMode::Yolo => "yolo", AppMode::Plan => "plan", }; - let color = match mode { - AppMode::Agent => palette::MODE_AGENT, - AppMode::Yolo => palette::MODE_YOLO, - AppMode::Plan => palette::MODE_PLAN, + let color = match app.mode { + AppMode::Agent => app.ui_theme.mode_agent, + AppMode::Yolo => app.ui_theme.mode_yolo, + AppMode::Plan => app.ui_theme.mode_plan, }; (label, color) } @@ -389,31 +398,31 @@ impl FooterWidget { if !spans.is_empty() { spans.push(Span::styled( sep.to_string(), - Style::default().fg(palette::TEXT_DIM), + Style::default().fg(self.props.text_dim_color), )); } spans.push(Span::styled( model_label, - Style::default().fg(palette::TEXT_HINT), + Style::default().fg(self.props.text_hint_color), )); } if let Some(cost_text) = cost { if !spans.is_empty() { spans.push(Span::styled( sep.to_string(), - Style::default().fg(palette::TEXT_DIM), + Style::default().fg(self.props.text_dim_color), )); } spans.push(Span::styled( cost_text, - Style::default().fg(palette::TEXT_MUTED), + Style::default().fg(self.props.text_muted_color), )); } if let Some(status_label) = status { if !spans.is_empty() { spans.push(Span::styled( sep.to_string(), - Style::default().fg(palette::TEXT_DIM), + Style::default().fg(self.props.text_dim_color), )); } spans.push(Span::styled( @@ -574,6 +583,9 @@ mod tests { assert_eq!(props.state_color, palette::TEXT_MUTED); assert_eq!(props.mode_label, "agent"); assert_eq!(props.mode_color, palette::MODE_AGENT); + assert_eq!(props.text_dim_color, palette::TEXT_DIM); + assert_eq!(props.text_hint_color, palette::TEXT_HINT); + assert_eq!(props.text_muted_color, palette::TEXT_MUTED); assert_eq!(props.model, "deepseek-v4-flash"); assert!(props.coherence.is_empty()); assert!(props.agents.is_empty()); @@ -602,6 +614,22 @@ mod tests { assert_eq!(props.state_color, palette::STATUS_WARNING); } + #[test] + fn from_app_statusline_colors_come_from_ui_theme() { + let mut app = make_app(); + app.ui_theme.mode_agent = Color::Rgb(1, 2, 3); + app.ui_theme.text_dim = Color::Rgb(4, 5, 6); + app.ui_theme.text_hint = Color::Rgb(7, 8, 9); + app.ui_theme.text_muted = Color::Rgb(10, 11, 12); + + let props = idle_props_for(&app); + + assert_eq!(props.mode_color, Color::Rgb(1, 2, 3)); + assert_eq!(props.text_dim_color, Color::Rgb(4, 5, 6)); + assert_eq!(props.text_hint_color, Color::Rgb(7, 8, 9)); + assert_eq!(props.text_muted_color, Color::Rgb(10, 11, 12)); + } + // ---- agents chip wording ---- #[test] fn footer_agents_chip_is_empty_when_no_agents_running() { diff --git a/crates/tui/src/tui/widgets/tool_card.rs b/crates/tui/src/tui/widgets/tool_card.rs index 87db95a2..143262f1 100644 --- a/crates/tui/src/tui/widgets/tool_card.rs +++ b/crates/tui/src/tui/widgets/tool_card.rs @@ -39,6 +39,8 @@ pub enum ToolFamily { Delegate, /// Multi-agent fanout dispatch (rlm). `⋮⋮ fanout`. Fanout, + /// Recursive language model work. `⋮⋮ rlm`. + Rlm, /// Reasoning / chain-of-thought. `… think`. Reasoning has its own /// render path (`render_thinking` in `history.rs`); the family is /// declared here for completeness so any future code that reaches for @@ -78,7 +80,7 @@ pub fn tool_family_for_name(name: &str) -> ToolFamily { "exec_shell" | "exec_shell_wait" | "exec_shell_interact" => ToolFamily::Run, "grep_files" | "file_search" | "web_search" | "fetch_url" => ToolFamily::Find, "agent_spawn" => ToolFamily::Delegate, - "rlm" => ToolFamily::Fanout, + "rlm" => ToolFamily::Rlm, _ => ToolFamily::Generic, } } @@ -96,7 +98,9 @@ pub fn tool_header_summary_for_name(name: &str, input_summary: Option<&str>) -> ToolFamily::Read | ToolFamily::Patch => ["path", "file", "target", "content"].as_slice(), ToolFamily::Run => ["command", "cmd", "script"].as_slice(), ToolFamily::Find => ["query", "pattern", "path", "scope"].as_slice(), - ToolFamily::Delegate | ToolFamily::Fanout => ["prompt", "task", "model"].as_slice(), + ToolFamily::Delegate | ToolFamily::Fanout | ToolFamily::Rlm => { + ["prompt", "task", "model"].as_slice() + } ToolFamily::Think | ToolFamily::Generic => { ["query", "path", "command", "prompt"].as_slice() } @@ -137,6 +141,7 @@ pub fn family_glyph(family: ToolFamily) -> &'static str { ToolFamily::Find => "\u{2315}", // ⌕ ToolFamily::Delegate => "\u{25D0}", // ◐ ToolFamily::Fanout => "\u{22EE}\u{22EE}", // ⋮⋮ (two cells) + ToolFamily::Rlm => "\u{22EE}\u{22EE}", // ⋮⋮ (two cells) ToolFamily::Think => "\u{2026}", // … ToolFamily::Generic => "\u{2022}", // • } @@ -153,7 +158,8 @@ pub fn family_label(family: ToolFamily) -> &'static str { ToolFamily::Run => "run", ToolFamily::Find => "find", ToolFamily::Delegate => "delegate", - ToolFamily::Fanout => "swarm", + ToolFamily::Fanout => "fanout", + ToolFamily::Rlm => "rlm", ToolFamily::Think => "think", ToolFamily::Generic => "tool", } @@ -212,7 +218,7 @@ mod tests { assert_eq!(tool_family_for_name("exec_shell"), ToolFamily::Run); assert_eq!(tool_family_for_name("grep_files"), ToolFamily::Find); assert_eq!(tool_family_for_name("agent_spawn"), ToolFamily::Delegate); - assert_eq!(tool_family_for_name("rlm"), ToolFamily::Fanout); + assert_eq!(tool_family_for_name("rlm"), ToolFamily::Rlm); assert_eq!( tool_family_for_name("totally_new_tool"), ToolFamily::Generic @@ -252,6 +258,7 @@ mod tests { ToolFamily::Find, ToolFamily::Delegate, ToolFamily::Fanout, + ToolFamily::Rlm, ToolFamily::Think, ToolFamily::Generic, ] { diff --git a/crates/tui/src/utils.rs b/crates/tui/src/utils.rs index fba22ce4..d2b98030 100644 --- a/crates/tui/src/utils.rs +++ b/crates/tui/src/utils.rs @@ -338,14 +338,21 @@ pub fn url_encode(input: &str) -> String { /// resolve correctly across processes. #[must_use] pub fn display_path(path: &Path) -> String { - let Some(home) = dirs::home_dir() else { + display_path_with_home(path, dirs::home_dir().as_deref()) +} + +/// Like [`display_path`] but takes an explicit home directory instead of +/// reading `$HOME` / `dirs::home_dir()`. Used in tests and anywhere the +/// caller already has the home path available. +#[must_use] +pub fn display_path_with_home(path: &Path, home: Option<&Path>) -> String { + let Some(home) = home else { return path.display().to_string(); }; - if let Ok(rest) = path.strip_prefix(&home) { + if let Ok(rest) = path.strip_prefix(home) { if rest.as_os_str().is_empty() { return "~".to_string(); } - // Render with the platform-correct separator after the tilde. let sep = std::path::MAIN_SEPARATOR; return format!("~{sep}{}", rest.display()); } @@ -390,81 +397,75 @@ pub fn estimate_message_chars(messages: &[Message]) -> usize { total } -// Tests below set `HOME` to drive `dirs::home_dir()`, which is honored on -// Unix but not on Windows (which reads `USERPROFILE` first). The -// `display_path` contraction logic itself is platform-identical — it -// delegates to `dirs::home_dir()`. Gate to `cfg(unix)` so we cover the -// behavior on the platform whose env-var contract matches the test -// driver, instead of writing platform-specific test scaffolding for a -// pure abstraction. -#[cfg(all(test, unix))] +// Tests use `display_path_with_home` so they never mutate the global `HOME` +// env var. Mutating `HOME` via `std::env::set_var` is not thread-safe; Cargo +// runs tests in parallel by default and CI runners are multi-core, so any test +// that stomps `HOME` will race with tests that *read* it. Using the injected +// helper avoids the race entirely and makes the tests portable to Windows +// without additional platform scaffolding. +#[cfg(test)] mod tests { - use super::display_path; + use super::display_path_with_home; use std::path::PathBuf; - /// Save and restore $HOME inside one test so a panic anywhere can't - /// poison sibling tests that read the env var. - fn with_home(home: &str, f: impl FnOnce() -> R) -> R { - let prev = std::env::var_os("HOME"); - // SAFETY: tests in this crate are run single-threaded with respect - // to env-var mutation by the integration harness, and we restore - // immediately after the closure. - unsafe { std::env::set_var("HOME", home) }; - let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(f)); - match prev { - Some(v) => unsafe { std::env::set_var("HOME", v) }, - None => unsafe { std::env::remove_var("HOME") }, - } - match result { - Ok(v) => v, - Err(p) => std::panic::resume_unwind(p), - } + fn home(s: &str) -> Option { + Some(PathBuf::from(s)) } #[test] fn display_path_contracts_home_prefix() { - with_home("/Users/alice", || { - assert_eq!( - display_path(&PathBuf::from("/Users/alice/projects/foo")), - format!( - "~{}projects{}foo", - std::path::MAIN_SEPARATOR, - std::path::MAIN_SEPARATOR - ), - ); - }); + let h = home("/Users/alice"); + assert_eq!( + display_path_with_home(&PathBuf::from("/Users/alice/projects/foo"), h.as_deref()), + format!( + "~{}projects{}foo", + std::path::MAIN_SEPARATOR, + std::path::MAIN_SEPARATOR + ), + ); } #[test] fn display_path_returns_bare_tilde_for_home_itself() { - with_home("/Users/alice", || { - assert_eq!(display_path(&PathBuf::from("/Users/alice")), "~"); - }); + let h = home("/Users/alice"); + assert_eq!( + display_path_with_home(&PathBuf::from("/Users/alice"), h.as_deref()), + "~" + ); } #[test] fn display_path_leaves_unrelated_paths_alone() { - with_home("/Users/alice", || { - // Different user — must not get rewritten or share the tilde. - assert_eq!( - display_path(&PathBuf::from("/Users/bob/Code")), - "/Users/bob/Code".to_string() - ); - // System path must stay absolute. - assert_eq!(display_path(&PathBuf::from("/etc/hosts")), "/etc/hosts"); - }); + let h = home("/Users/alice"); + // Different user — must not get rewritten or share the tilde. + assert_eq!( + display_path_with_home(&PathBuf::from("/Users/bob/Code"), h.as_deref()), + "/Users/bob/Code".to_string() + ); + // System path must stay absolute. + assert_eq!( + display_path_with_home(&PathBuf::from("/etc/hosts"), h.as_deref()), + "/etc/hosts" + ); } #[test] fn display_path_does_not_match_username_prefix() { // Regression guard: a directory named like the user's home // *prefix* but not under it must not get rewritten. - with_home("/Users/alice", || { - assert_eq!( - display_path(&PathBuf::from("/Users/alice2/work")), - "/Users/alice2/work" - ); - }); + let h = home("/Users/alice"); + assert_eq!( + display_path_with_home(&PathBuf::from("/Users/alice2/work"), h.as_deref()), + "/Users/alice2/work" + ); + } + + #[test] + fn display_path_with_no_home_returns_full_path() { + assert_eq!( + display_path_with_home(&PathBuf::from("/some/path"), None), + "/some/path" + ); } } diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 2fb88d6c..6d938108 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -8,7 +8,7 @@ Current boundary note (v0.8.6): - The LSP subsystem (`crates/tui/src/lsp/`) is fully wired into the engine's post-tool-execution path (`core/engine/lsp_hooks.rs`), providing inline diagnostics after every edit_file/apply_patch/write_file. - The swarm agent system was removed in v0.8.5 in favour of sub-agents (agent_spawn) and RLM (rlm_query). - No swarm references remain in the active codebase. + No model-visible swarm tool remains in the active codebase. ## High-Level Overview diff --git a/docs/COMPETITIVE_ANALYSIS.md b/docs/COMPETITIVE_ANALYSIS.md new file mode 100644 index 00000000..3f3dd52f --- /dev/null +++ b/docs/COMPETITIVE_ANALYSIS.md @@ -0,0 +1,317 @@ +# Competitive Analysis: DeepSeek TUI vs OpenCode vs Codex CLI + +Analysis of capabilities across three AI coding agents: OpenCode (`/Volumes/VIXinSSD/opencode`), Codex CLI (`/Volumes/VIXinSSD/codex-main`), and DeepSeek TUI (`/Volumes/VIXinSSD/deepseek-tui`). + +## Tool Matrix + +| Capability | OpenCode | Codex CLI | DeepSeek TUI | +|---|---|---|---| +| File read | ✅ Read | ✅ | ✅ file | +| File write | ✅ Write | ✅ | ✅ file | +| File edit | ✅ Edit (string replace) | ✅ apply_patch (diff format) | ✅ edit_file + apply_patch | +| File glob | ✅ Glob | ✅ | ✅ file_search | +| Code search | ✅ Grep + CodeSearch (Exa) | ✅ | ✅ grep_files + search | +| Shell exec | ✅ Bash | ✅ exec/shell | ✅ shell | +| Web fetch | ✅ WebFetch | ✅ | ✅ fetch_url | +| Web search | ✅ WebSearch | ✅ WebSearchRequest | ✅ web_search | +| Web browse | ❌ | ❌ | ✅ web_run | +| LSP | ✅ Lsp (experimental) | ❌ | ❌ | +| Task/todo tracking | ✅ TodoWrite | ✅ | ✅ todo_write | +| Subagent spawn | ✅ Task | ✅ Collab/SpawnCsv | ✅ agent_spawn | +| Skill system | ✅ Skill (multi-location discovery) | ✅ core-skills | ⚠️ Partial (.deepseek/skills/) | +| Plan mode | ✅ plan-enter/exit | ✅ Plan mode | ✅ Plan mode | +| User question | ✅ Question | ✅ request_user_input | ✅ user_input | +| Patch apply | ✅ apply_patch (custom format) | ✅ apply_patch (diff format) | ✅ apply_patch | +| Data validation | ❌ | ❌ | ✅ validate_data | +| Finance | ❌ | ❌ | ✅ finance | +| Git ops | Via Bash tool | ✅ git-utils | ✅ git module | +| GitHub ops | Via Bash (gh) | ✅ | ✅ github | +| Test running | ❌ | ✅ | ✅ test_runner | +| Automation | ❌ | ❌ | ✅ automation | +| Code review | ❌ | ✅ GuardianApproval | ✅ review | +| Recall/archive | ❌ | ❌ | ✅ recall_archive | +| Diagnostics | ❌ | ✅ | ✅ diagnostics | +| Revert turn | ❌ | ❌ | ✅ revert_turn | +| Image generation | ❌ | ✅ ImageGeneration | ❌ | +| Browser use | ❌ | ✅ BrowserUse | ❌ (web_run is headless) | +| Computer use | ❌ | ✅ ComputerUse | ❌ | +| Realtime voice | ❌ | ✅ RealtimeConversation | ❌ | + +--- + +## High Priority Gaps + +These are capabilities that would most directly improve DeepSeek TUI's effectiveness as a coding agent. + +### 1. LSP Integration + +**What it is:** A model-callable tool that queries Language Server Protocol servers for code intelligence — go-to-definition, find references, hover (type info), document symbols, workspace symbols, call hierarchy, and implementations. + +**Why it matters:** The single biggest capability gap. Every codebase exploration currently costs shell `rg` calls and sequential file reads. With LSP, the agent can jump to definitions, find all callers of a function, and inspect types in a single tool call. Estimated 30–50% reduction in exploration turns for structured codebases. + +**OpenCode implementation:** `packages/opencode/src/tool/lsp.ts` exposes nine operations with file/line/character parameters. The tool prompts are in `tool/lsp.txt`. LSP servers must be configured per file type. + +``` +Supported operations: +- goToDefinition +- findReferences +- hover +- documentSymbol +- workspaceSymbol +- goToImplementation +- prepareCallHierarchy +- incomingCalls +- outgoingCalls +``` + +**What DeepSeek TUI would need:** A new `lsp.rs` tool in `crates/tui/src/tools/`, integration with tower-lsp or lsp-server crate, and per-language server configuration. + +### 2. Granular Permission System + +**What it is:** Allow/deny/ask rules keyed on tool name × file path pattern, with wildcard support, home-directory expansion, and cascading to pending requests. + +**Why it matters:** The current all-or-nothing approval model creates friction. Users can't express "always allow reads in `src/` but always ask for `.env` files." The ability to permanently approve a pattern reduces approval fatigue by 60–80% over a long session. + +**OpenCode implementation:** `packages/opencode/src/permission/index.ts` implements: + +- `Action`: `allow | deny | ask` +- `Rule`: `{ permission: string, pattern: string, action: Action }` +- `Ruleset`: ordered list of rules with last-match-wins semantics +- Pattern expansion for `~/`, `$HOME/` +- Wildcard matching on both permission names and path patterns +- Reply modes: `once` (approve this one call), `always` (approve pattern forever), `reject` (deny this one) +- Automatic cascading: an "always" reply auto-resolves pending requests for the same session +- Distinct error types: `DeniedError` (rule-based), `RejectedError` (user said no), `CorrectedError` (user said no with feedback) + +Agent definitions inherit permission rulesets that can be user-overridden: +```typescript +build: { + permission: merge(defaults, { question: "allow", plan_enter: "allow" }, user), +} +plan: { + permission: merge(defaults, { edit: { "*": "deny" } }, user), +} +explore: { + permission: merge(defaults, { "*": "deny", grep: "allow", read: "allow", ... }, user), +} +``` + +**What DeepSeek TUI would need:** A permission rule engine with the same dimension (tool name × path pattern × action), persistence to disk, and hook integration so approval decisions can cascade. + +### 3. Lifecycle Hooks + +**What it is:** User-defined shell commands or plugin functions that fire on specific lifecycle events — before a tool executes, after it completes, when permission is requested, at session start, when the user submits a prompt, and at session stop. + +**Why it matters:** Hooks are the escape hatch that lets users enforce invariants without polluting the system prompt. "Always run `cargo fmt` after writing a `.rs` file." "Warn me before any `rm -rf`." "Log every shell command to a file." They are composable, auditable, and don't consume context window tokens. + +**Codex CLI implementation:** `codex-rs/hooks/` defines six event types with typed request/response payloads: + +| Event | When it fires | Payload | +|---|---|---| +| `PreToolUse` | Before tool execution | tool name, input params, sandbox state | +| `PostToolUse` | After tool execution | tool name, input, success/failure, duration, output preview | +| `PermissionRequest` | When model requests permission | permission type, justification | +| `SessionStart` | New session begins | session ID, cwd, source (new/resume) | +| `UserPromptSubmit` | User sends a message | prompt text | +| `Stop` | Session ending | reason | + +Each hook handler supports: +- `matcher`: optional regex to filter which tool calls trigger the hook +- `command`: shell command to run +- `timeout_sec`: maximum runtime +- `status_message`: shown to the user while the hook runs +- `source_path` + `source`: tracks where the hook was defined (project hooks.json, user config, plugin) +- Hooks can return `Success`, `FailedContinue`, or `FailedAbort` (blocks the operation) + +**What DeepSeek TUI would need:** Extend `crates/hooks/` to support the full event surface, add matcher-based filtering, and provide a `hooks.json` discovery mechanism similar to Codex CLI's. + +### 4. Persistent Memories + +**What it is:** Automatic extraction of user preferences, project conventions, and past decisions from conversations, stored as retrievable memories that are injected into new sessions. + +**Why it matters:** Across a long debugging session, the agent rediscovers the same facts: "this project uses Rust edition 2024," "tests run with `cargo test --workspace`," "the user prefers 4-space indentation." A memory system compounds value — each session builds on prior knowledge rather than starting from zero. + +**Codex CLI implementation:** The `MemoryTool` feature (experimental, behind `/experimental` menu) enables: +- Memory generation: the model creates structured memories from conversation content +- Memory retrieval: relevant memories are injected into new conversation context +- The `Chronicle` feature adds passive screen-context memories via a sidecar process +- Memories are stored in SQLite and surfaced in the TUI via `/memories` command + +**What DeepSeek TUI would need:** A memory extraction prompt, a vector or keyword-based retrieval system, and storage in the existing session/state infrastructure. + +### 5. Skill Auto-Discovery + +**What it is:** Automatic scanning of multiple locations for `SKILL.md` files that provide domain-specific instructions, scripts, and references. Skills are injected into the conversation on demand via a `skill` tool. + +**Why it matters:** Skills are how the community packages expertise. A "Rust refactoring" skill, a "Docker deployment" skill, a "GitHub Actions" skill — each provides specialized instructions without bloating the main system prompt. OpenCode's multi-location discovery means skills can be project-local, user-global, or pulled from URLs. + +**OpenCode implementation:** `packages/opencode/src/skill/index.ts` scans: + +1. `~/.claude/skills/**/SKILL.md` (Claude Code compatibility) +2. `~/.agents/skills/**/SKILL.md` (Agents SDK compatibility) +3. Parent directories from cwd to workspace root for `.claude/skills/` and `.agents/skills/` +4. Project config directories for `{skill,skills}/**/SKILL.md` +5. User-configured paths (with `~/` expansion) +6. User-configured URLs (pulled via discovery module) + +Skills are parsed for YAML frontmatter (`name`, `description`) and Markdown content. Duplicate names warn but don't error. Skills respect agent permissions — an agent can only load skills its permission ruleset allows. + +**What DeepSeek TUI would need:** Extend the existing `~/.deepseek/skills/` discovery to parent-directory walking, Claude Code compatibility paths, and URL-based skill sources. Add YAML frontmatter parsing. + +--- + +## Medium Priority Gaps + +These would meaningfully improve the agent experience but are less urgent. + +### 6. Agent Profiles with Permission Inheritance + +**What it is:** Named agent types (build, plan, general, explore) that inherit different tool permission sets. Users can define custom agents with specific models, temperatures, system prompts, and permission rules. + +**OpenCode implementation:** `packages/opencode/src/agent/agent.ts`: + +- `build`: full-access with ask on sensitive paths +- `plan`: all edit tools denied, plan-exit allowed, plan file writes in `.opencode/plans/` allowed +- `general`: subagent-only, todo-write denied +- `explore`: read-only, grep/glob/read/bash/webfetch/websearch allowed +- Plus hidden agents for internal tasks (compaction, title generation, summarization) + +Each agent carries its own `model`, `temperature`, `topP`, `prompt`, and `permission` ruleset. A `generate` function creates new agent configs dynamically from user descriptions. + +**What DeepSeek TUI would need:** Extend the mode system (Plan/Agent/YOLO) to support named agent profiles with per-profile tool filtering and model configuration. + +### 7. Shell Sandboxing + +**What it is:** OS-level sandbox enforcement for shell commands — network restrictions, filesystem read-only mounts, allowed/disallowed paths. + +**Codex CLI implementation:** `codex-rs/sandboxing/`: + +- macOS: Seatbelt (`sandboxing/src/seatbelt.rs`) with `.sbpl` policy files +- Linux: bubblewrap (default) or Landlock (legacy fallback) +- Windows: restricted token +- Configurable sandbox policies per command +- Integration tests can detect they're running under sandbox and early-exit + +**What DeepSeek TUI would need:** Extend `crates/execpolicy/` to support platform-specific sandbox enforcement. Start with macOS Seatbelt (most DeepSeek TUI users are on macOS). + +### 8. Tool Search / Deferred MCP Tool Exposure + +**What it is:** Instead of dumping all MCP tools into the system prompt (bloating context), expose a `tool_search` function that the model calls to discover relevant tools by name or description. + +**Codex CLI implementation:** `ToolSearch` feature (stable, default-enabled). `ToolSearchAlwaysDeferMcpTools` goes further — never exposes MCP tools directly, always requires search. This is critical when MCP servers expose hundreds of tools. + +**What DeepSeek TUI would need:** `tool_search_tool_regex` and `tool_search_tool_bm25` already exist as deferred tool discovery mechanisms. Extend them to gate MCP tool exposure behind on-demand search. + +### 9. ExecPolicy / Command Approval Rules + +**What it is:** A policy engine that evaluates shell commands against user-defined rules — prefix allowlists, network restrictions, pattern matching — and auto-approves, denies, or escalates. + +**Codex CLI implementation:** `codex-rs/execpolicy/src/`: + +- `Policy`: ordered list of `Rule` entries +- `Rule`: prefix patterns (e.g., allow `cargo build*`, deny `rm *`) +- `NetworkRule`: protocol-level network restrictions +- `MatchOptions`: controls rule evaluation behavior +- `Evaluation`: result of policy evaluation against a command + +Rules can be amended at runtime via `blocking_append_allow_prefix_rule`. + +**What DeepSeek TUI would need:** Extend `crates/execpolicy/` to support prefix rules, network rules, and runtime policy amendments. + +### 10. Dynamic Agent Generation + +**What it is:** On-the-fly generation of new agent configurations from natural language descriptions. + +**OpenCode implementation:** The `generate` function in `agent.ts` takes a description like "code reviewer that only reads files and reports issues" and returns an `{ identifier, whenToUse, systemPrompt }` object using a structured LLM call. Generated agents respect existing agent name collisions. + +**What DeepSeek TUI would need:** A model-callable tool or slash command that generates agent configs from descriptions and registers them for the session. + +### 11. Streaming Patch Events + +**What it is:** Structured progress events streamed while the model is generating `apply_patch` input, giving the user real-time feedback on what files will change. + +**Codex CLI implementation:** `ApplyPatchStreamingEvents` feature (under development) streams file-level progress as the model produces patch hunks. The `StreamingPatchParser` in `apply-patch/src/streaming_parser.rs` handles incremental parsing. + +**What DeepSeek TUI would need:** Extend `apply_patch.rs` to emit progress events during streaming model output. + +--- + +## Lower Priority Gaps + +Specialized features that are valuable but less critical for core coding workflow. + +| Capability | Where | Notes | +|---|---|---| +| Image Generation | Codex CLI `ImageGeneration` | Niche for coding; useful for documentation diagrams | +| Browser Use | Codex CLI `BrowserUse` | Interactive browser automation (click, type, screenshot). DeepSeek TUI has `web_run` for headless | +| Computer Use | Codex CLI `ComputerUse` | Full desktop automation. Desktop-app-gated | +| Realtime Voice | Codex CLI `RealtimeConversation` | Voice conversation mode. Experimental | +| Unified PTY Exec | Codex CLI `UnifiedExec` | Single PTY-backed shell with state snapshotting across turns | +| Artifacts | Codex CLI `Artifact` | Native artifact rendering tools | +| Goals | Codex CLI `Goals` | Persistent thread goals that survive compaction and session restarts | +| Git Commit Attribution | Codex CLI `CodexGitCommit` | Model instructions for proper commit attribution | +| CSV Agent Spawning | Codex CLI `SpawnCsv` | CSV-backed parallel agent job distribution | +| Shell Snapshotting | Codex CLI `ShellSnapshot` | Save/restore shell state across turns | +| Prevent Idle Sleep | Codex CLI `PreventIdleSleep` | Keep machine awake during long-running agent tasks | + +--- + +## Architectural Patterns + +### OpenCode + +**Client/Server Architecture:** The TUI is one client; the server can be driven remotely from a mobile app, desktop app, or web console. This decouples the agent runtime from the UI layer. + +**Plugin System:** `packages/opencode/src/plugin/` supports hot-loadable JS/TS plugins that add tools, models, auth providers, and chat middleware. Plugins receive a typed context with tool execution, auth, and filesystem access. + +**Multi-Provider:** Not coupled to any single AI provider. Models are configured with provider IDs and resolved through a provider registry. OAuth support for OpenAI Codex (ChatGPT subscription integration) in `plugin/codex.ts`. + +**Config Layering:** Config is loaded from multiple sources (global, project, env vars) and merged with well-defined precedence. + +### Codex CLI + +**App-Server Protocol:** `codex-rs/app-server-protocol/` defines a versioned RPC protocol (v2) between the TUI frontend and the agent backend. All new API development goes through v2 with strict naming conventions (`*Params`/`*Response`/`*Notification`, `resource/method` RPC naming). + +**Feature Flag System:** `codex-rs/features/` centralizes 60+ feature flags with lifecycle stages (UnderDevelopment, Experimental, Stable, Deprecated, Removed). Features have metadata (menu name, description, announcement text) and can carry custom config structs. + +**Bazel + Cargo Dual Build:** Codex CLI uses both Cargo (for development) and Bazel (for CI/release). The `find_resource!` macro and `cargo_bin()` helper abstract over runfile differences. + +**Snapshot Testing:** `codex-rs/tui/` extensively uses `insta` for UI snapshot tests. Any UI change requires corresponding snapshot coverage. + +**Core Modularity:** Explicit resistance to adding code to `codex-core`. New functionality goes into purpose-built crates (`codex-apply-patch`, `codex-memories`, `codex-sandboxing`) rather than growing the core crate. + +### DeepSeek TUI + +**RLM (Recursive Language Model):** Unique in this space. A sandboxed Python REPL where a sub-LLM can call helpers (`llm_query`, `llm_query_batched`, `rlm_query`) for batch processing, chunking, and recursive critique. Neither competitor has an equivalent. + +**Durable Tasks:** Restart-aware persistent task objects with evidence tracking (gate runs, PR attempts, timeline). Designed for long-running autonomous work that survives restarts. + +**Automations:** Scheduled recurring tasks with cron-style RRULE recurrence. Unique among the three. + +--- + +## What DeepSeek TUI Already Excels At + +- **RLM** — batch/bulk LLM processing in a Python sandbox; no equivalent in either competitor +- **Finance** — live stock/crypto quotes; unique in this space +- **Automations** — scheduled recurring tasks with cron rules +- **Durable tasks** — restart-aware with evidence tracking and gate verification +- **Turn revert** — undo workspace changes per turn via side-git snapshots +- **Data validation** — JSON/TOML validation tool +- **Web run** — headless browser interaction (Codex CLI has Browser Use but it's desktop-app-gated) +- **Parallel tool execution** — explicitly modeled as infrastructure +- **Git/GitHub operations** — comprehensive git module with blame, log, diff, status plus full GitHub API via gh +- **Project map** — high-level project structure generation + +--- + +## Recommended Implementation Order + +1. **LSP tool** — single biggest capability gap. Estimated 30–50% reduction in codebase exploration turns. +2. **Path-pattern permissions** — reduces approval fatigue by 60–80% over long sessions. +3. **Persistent memory** — compounds value across sessions; foundational for long-running projects. +4. **Pre/Post-tool-use hooks** — escape hatch for user-defined guardrails without system prompt bloat. +5. **Skill auto-discovery** — enables community skill ecosystem and Claude Code compatibility. +6. **Agent profiles** — named agent types with model/permission inheritance. +7. **Tool search for MCP** — keeps context window manageable when connecting to MCP servers with many tools. +8. **Shell sandboxing** — security improvement, starting with macOS Seatbelt. diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index bbe717a7..48f62f91 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -217,15 +217,17 @@ If you are upgrading from older releases: - `sandbox_mode` (string, optional): `read-only`, `workspace-write`, `danger-full-access`, `external-sandbox`. - `managed_config_path` (string, optional): managed config file loaded after user/env config. - `requirements_path` (string, optional): requirements file used to enforce allowed approval/sandbox values. -- `max_subagents` (int, optional): defaults to `5` and is clamped to `1..=20`. +- `max_subagents` (int, optional): defaults to `10` and is clamped to `1..=20`. - `subagents.*` (optional): per-role/type model defaults for `agent_spawn` and - `agent_swarm`. Explicit tool `model` values win, then role/type overrides, - then the parent runtime model. Supported convenience keys are + related sub-agent tools. Explicit tool `model` values win, then role/type + overrides, then the parent runtime model. Supported convenience keys are `default_model`, `worker_model`, `explorer_model`, `awaiter_model`, - `review_model`, `custom_model`; `[subagents.models]` accepts lower-case role - or type keys such as `worker`, `explorer`, `general`, `explore`, `plan`, and - `review`. Values must normalize to a supported DeepSeek model id before any - swarm worker is spawned. + `review_model`, `custom_model`, and `max_concurrent`. The + `[subagents] max_concurrent` value overrides top-level `max_subagents` and is + also clamped to `1..=20`. `[subagents.models]` accepts lower-case role or type + keys such as `worker`, `explorer`, `general`, `explore`, `plan`, and + `review`. Values must normalize to a supported DeepSeek model id before an + agent is spawned. - `skills_dir` (string, optional): defaults to `~/.deepseek/skills` (each skill is a directory containing `SKILL.md`). Workspace-local `.agents/skills` or `./skills` are preferred when present. - `mcp_config_path` (string, optional): defaults to `~/.deepseek/mcp.json`. It is visible in `/config` and can be changed from the TUI. The new path is @@ -272,6 +274,7 @@ If you are upgrading from older releases: - `[capacity].fallback_default_prior` (float, default `3.8`) - `tui.alternate_screen` (string, optional): `auto`, `always`, or `never`. `auto` disables the alternate screen in Zellij; `--no-alt-screen` forces inline mode. Set `never` or run with `--no-alt-screen` when you want real terminal scrollback. - `tui.mouse_capture` (bool, optional, default `true` when the alternate screen is active): enable internal mouse scrolling, transcript selection, and right-click context actions. TUI-owned drag selection copies only user/assistant transcript text. Set this to `false` or run with `--no-mouse-capture` for raw terminal selection. +- `tui.terminal_probe_timeout_ms` (int, optional, default `500`): startup terminal-mode probe timeout in milliseconds. Values are clamped to `100..=5000`; timeout emits a warning and aborts startup instead of hanging indefinitely. - `hooks` (optional): lifecycle hooks configuration (see `config.example.toml`). - `features.*` (optional): feature flag overrides (see below). diff --git a/docs/RELEASE_RUNBOOK.md b/docs/RELEASE_RUNBOOK.md index 9179fcc8..18662d3d 100644 --- a/docs/RELEASE_RUNBOOK.md +++ b/docs/RELEASE_RUNBOOK.md @@ -103,10 +103,18 @@ release notes explicit that no new Rust binary version shipped. ## Rust Crates Release +Crate publishing to crates.io is **manual** — there is no automated +`crates-publish` GitHub workflow. Operators run the helpers in +`scripts/release/` from a developer workstation that has `cargo login` +configured. + 1. Update the workspace version in [Cargo.toml](../Cargo.toml). -2. Tag the release as `vX.Y.Z`. -3. Let `.github/workflows/crates-publish.yml` verify the workspace version and dry-run each crate. -4. Publish crates in this order: +2. Run `./scripts/release/check-versions.sh` and + `./scripts/release/publish-crates.sh dry-run` locally; both must be clean. +3. Tag the release as `vX.Y.Z` (typically by pushing the version bump to + `main` and letting `auto-tag.yml` create the tag — see the npm wrapper + release section below for the `RELEASE_TAG_PAT` requirement). +4. Publish crates in this order with `./scripts/release/publish-crates.sh publish`: - `deepseek-secrets` - `deepseek-config` - `deepseek-protocol` @@ -166,11 +174,11 @@ npm publish --access public ### Why not automated? - `release.yml`'s old `publish-npm` job used `secrets.NPM_TOKEN`, but npm's 2FA-by-default policy means a publish token must be either an automation token with "Bypass 2FA for token authentication" enabled OR an account-level 2FA-disabled state. We don't have either configured. -- The `publish-npm.yml` workflow remains as inert plumbing for a future move to npm Trusted Publishing (OIDC). It only fires on `workflow_dispatch` and only works once Trusted Publishing is configured for *that* workflow filename on the npm side. +- The standalone `publish-npm.yml` and `crates-publish.yml` workflows have been removed; no inert automation plumbing remains. A future move to npm Trusted Publishing (OIDC) would re-introduce a dedicated workflow at that point. ### If you fix the token later -To re-enable automated publish: provision an npm automation token with "Bypass 2FA for token authentication" enabled, store it as repo secret `NPM_TOKEN`, and revert this section's "manual" framing along with re-adding the `publish-npm` job in `release.yml`. +To re-enable automated publish: provision an npm automation token with "Bypass 2FA for token authentication" enabled (or set up npm Trusted Publishing via OIDC), store the corresponding secret on the repo, and re-add a `publish-npm` job to `release.yml` (or a dedicated workflow) along with reverting this section's "manual" framing. ## Recovery and Rollback diff --git a/docs/TOOL_SURFACE.md b/docs/TOOL_SURFACE.md index 8545734d..a8674895 100644 --- a/docs/TOOL_SURFACE.md +++ b/docs/TOOL_SURFACE.md @@ -147,7 +147,7 @@ reflect very different cost classes: | Tool | What each child does | Wall-clock | Token cost | Cap | |---|---|---|---|---| -| `agent_spawn` | Full sub-agent loop (planning, tool calls, multi-turn streaming, can spawn children) | minutes | thousands of tokens | 5 in flight | +| `agent_spawn` | Full sub-agent loop (planning, tool calls, multi-turn streaming, can spawn children) | minutes | thousands of tokens | 10 in flight by default (`[subagents].max_concurrent`, hard ceiling 20) | | `rlm_query` | One-shot non-streaming Chat Completions call to `deepseek-v4-flash` | seconds | ~hundreds of tokens | 16 per call | The caps appear in each tool's description and error messages so the model