fix(working-set): cap file-index walk to bound first-turn latency (#697)

`WorkingSet::build_file_index` walks the workspace tree (depth 6) plus
all `DISCOVERY_ALWAYS_DIRS` (depth 5) the first time `fuzzy_resolve` is
called. On huge workspaces that walk dominates the first turn's wall
clock, surfacing as the ~10-second `Working...` hang reported in #697.

Adds a `FILE_INDEX_MAX_ENTRIES = 50_000` cap. When the walk produces
more than 50K (file or directory) entries the index is returned early
with a warning. A surplus entry simply isn't fuzzy-resolvable; literal
paths still resolve via the existing fallback so functionality is
preserved on outsized workspaces.

50K is well above any realistic project's depth-6 entry count, so for
typical users the cap is a no-op. The existing `working_set` tests
(26/26) still pass — this is purely a defensive upper bound on a path
that previously had none.

Refs #697
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
LinQ
2026-05-10 18:53:50 +01:00
committed by Hunter Bown
parent 60347b8940
commit af22a5c7b1
+26
View File
@@ -93,9 +93,18 @@ impl Workspace {
fn build_file_index(&self) -> HashMap<String, Vec<PathBuf>> {
let mut index: HashMap<String, Vec<PathBuf>> = HashMap::new();
let mut total: usize = 0;
let builder = discovery_walk_builder(&self.root, Some(6));
for entry in builder.build().flatten() {
if total >= FILE_INDEX_MAX_ENTRIES {
tracing::warn!(
target: "working_set",
limit = FILE_INDEX_MAX_ENTRIES,
"file-index discovery hit the entry cap; truncating to keep first-turn latency bounded (#697)"
);
return index;
}
if entry
.file_type()
.is_some_and(|ft| ft.is_file() || ft.is_dir())
@@ -105,11 +114,15 @@ impl Workspace {
.entry(name)
.or_default()
.push(entry.path().to_path_buf());
total += 1;
}
}
// Also index AI-tool dot-directories with gitignore disabled.
for dir_name in DISCOVERY_ALWAYS_DIRS {
if total >= FILE_INDEX_MAX_ENTRIES {
break;
}
let dot_dir = self.root.join(dir_name);
if !dot_dir.is_dir() {
continue;
@@ -122,6 +135,9 @@ impl Workspace {
.ignore(false)
.max_depth(Some(5));
for entry in dot_builder.build().flatten() {
if total >= FILE_INDEX_MAX_ENTRIES {
break;
}
// Exclude machine-generated bulk (e.g. .deepseek/snapshots/).
if path_is_excluded_from_discovery(&self.root, entry.path()) {
continue;
@@ -135,6 +151,7 @@ impl Workspace {
.entry(name)
.or_default()
.push(entry.path().to_path_buf());
total += 1;
}
}
}
@@ -207,6 +224,15 @@ impl Workspace {
/// monorepos.
const COMPLETIONS_WALK_DEPTH: usize = 6;
/// Hard cap on the number of `(file or directory)` entries indexed by
/// [`WorkingSet::build_file_index`]. The fuzzy-resolve index is a
/// convenience for [`WorkingSet::fuzzy_resolve`]; missing entries fall
/// back to literal-path resolution. Capping here keeps the first
/// `fuzzy_resolve` call bounded on huge workspaces (#697 reported a
/// ~10s hang on the first turn). For typical projects 50K is well
/// above the actual entry count and the cap is a no-op.
const FILE_INDEX_MAX_ENTRIES: usize = 50_000;
/// Directories that must remain discoverable for `@`-mention completion and
/// fuzzy file resolution even when excluded by `.gitignore`. AI-tool
/// convention directories (`.deepseek/`, `.cursor/`, `.claude/`, `.agents/`)