From af22a5c7b1d624767efa20d468c53d66de1d5d80 Mon Sep 17 00:00:00 2001 From: LinQ Date: Sun, 10 May 2026 18:53:50 +0100 Subject: [PATCH] fix(working-set): cap file-index walk to bound first-turn latency (#697) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `WorkingSet::build_file_index` walks the workspace tree (depth 6) plus all `DISCOVERY_ALWAYS_DIRS` (depth 5) the first time `fuzzy_resolve` is called. On huge workspaces that walk dominates the first turn's wall clock, surfacing as the ~10-second `Working...` hang reported in #697. Adds a `FILE_INDEX_MAX_ENTRIES = 50_000` cap. When the walk produces more than 50K (file or directory) entries the index is returned early with a warning. A surplus entry simply isn't fuzzy-resolvable; literal paths still resolve via the existing fallback so functionality is preserved on outsized workspaces. 50K is well above any realistic project's depth-6 entry count, so for typical users the cap is a no-op. The existing `working_set` tests (26/26) still pass — this is purely a defensive upper bound on a path that previously had none. Refs #697 Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/tui/src/working_set.rs | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/crates/tui/src/working_set.rs b/crates/tui/src/working_set.rs index c2a5f14d..2c792971 100644 --- a/crates/tui/src/working_set.rs +++ b/crates/tui/src/working_set.rs @@ -93,9 +93,18 @@ impl Workspace { fn build_file_index(&self) -> HashMap> { let mut index: HashMap> = HashMap::new(); + let mut total: usize = 0; let builder = discovery_walk_builder(&self.root, Some(6)); for entry in builder.build().flatten() { + if total >= FILE_INDEX_MAX_ENTRIES { + tracing::warn!( + target: "working_set", + limit = FILE_INDEX_MAX_ENTRIES, + "file-index discovery hit the entry cap; truncating to keep first-turn latency bounded (#697)" + ); + return index; + } if entry .file_type() .is_some_and(|ft| ft.is_file() || ft.is_dir()) @@ -105,11 +114,15 @@ impl Workspace { .entry(name) .or_default() .push(entry.path().to_path_buf()); + total += 1; } } // Also index AI-tool dot-directories with gitignore disabled. for dir_name in DISCOVERY_ALWAYS_DIRS { + if total >= FILE_INDEX_MAX_ENTRIES { + break; + } let dot_dir = self.root.join(dir_name); if !dot_dir.is_dir() { continue; @@ -122,6 +135,9 @@ impl Workspace { .ignore(false) .max_depth(Some(5)); for entry in dot_builder.build().flatten() { + if total >= FILE_INDEX_MAX_ENTRIES { + break; + } // Exclude machine-generated bulk (e.g. .deepseek/snapshots/). if path_is_excluded_from_discovery(&self.root, entry.path()) { continue; @@ -135,6 +151,7 @@ impl Workspace { .entry(name) .or_default() .push(entry.path().to_path_buf()); + total += 1; } } } @@ -207,6 +224,15 @@ impl Workspace { /// monorepos. const COMPLETIONS_WALK_DEPTH: usize = 6; +/// Hard cap on the number of `(file or directory)` entries indexed by +/// [`WorkingSet::build_file_index`]. The fuzzy-resolve index is a +/// convenience for [`WorkingSet::fuzzy_resolve`]; missing entries fall +/// back to literal-path resolution. Capping here keeps the first +/// `fuzzy_resolve` call bounded on huge workspaces (#697 reported a +/// ~10s hang on the first turn). For typical projects 50K is well +/// above the actual entry count and the cap is a no-op. +const FILE_INDEX_MAX_ENTRIES: usize = 50_000; + /// Directories that must remain discoverable for `@`-mention completion and /// fuzzy file resolution even when excluded by `.gitignore`. AI-tool /// convention directories (`.deepseek/`, `.cursor/`, `.claude/`, `.agents/`)