fix(working-set): cap file-index walk to bound first-turn latency (#697)
`WorkingSet::build_file_index` walks the workspace tree (depth 6) plus all `DISCOVERY_ALWAYS_DIRS` (depth 5) the first time `fuzzy_resolve` is called. On huge workspaces that walk dominates the first turn's wall clock, surfacing as the ~10-second `Working...` hang reported in #697. Adds a `FILE_INDEX_MAX_ENTRIES = 50_000` cap. When the walk produces more than 50K (file or directory) entries the index is returned early with a warning. A surplus entry simply isn't fuzzy-resolvable; literal paths still resolve via the existing fallback so functionality is preserved on outsized workspaces. 50K is well above any realistic project's depth-6 entry count, so for typical users the cap is a no-op. The existing `working_set` tests (26/26) still pass — this is purely a defensive upper bound on a path that previously had none. Refs #697 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -93,9 +93,18 @@ impl Workspace {
|
||||
|
||||
fn build_file_index(&self) -> HashMap<String, Vec<PathBuf>> {
|
||||
let mut index: HashMap<String, Vec<PathBuf>> = HashMap::new();
|
||||
let mut total: usize = 0;
|
||||
let builder = discovery_walk_builder(&self.root, Some(6));
|
||||
|
||||
for entry in builder.build().flatten() {
|
||||
if total >= FILE_INDEX_MAX_ENTRIES {
|
||||
tracing::warn!(
|
||||
target: "working_set",
|
||||
limit = FILE_INDEX_MAX_ENTRIES,
|
||||
"file-index discovery hit the entry cap; truncating to keep first-turn latency bounded (#697)"
|
||||
);
|
||||
return index;
|
||||
}
|
||||
if entry
|
||||
.file_type()
|
||||
.is_some_and(|ft| ft.is_file() || ft.is_dir())
|
||||
@@ -105,11 +114,15 @@ impl Workspace {
|
||||
.entry(name)
|
||||
.or_default()
|
||||
.push(entry.path().to_path_buf());
|
||||
total += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Also index AI-tool dot-directories with gitignore disabled.
|
||||
for dir_name in DISCOVERY_ALWAYS_DIRS {
|
||||
if total >= FILE_INDEX_MAX_ENTRIES {
|
||||
break;
|
||||
}
|
||||
let dot_dir = self.root.join(dir_name);
|
||||
if !dot_dir.is_dir() {
|
||||
continue;
|
||||
@@ -122,6 +135,9 @@ impl Workspace {
|
||||
.ignore(false)
|
||||
.max_depth(Some(5));
|
||||
for entry in dot_builder.build().flatten() {
|
||||
if total >= FILE_INDEX_MAX_ENTRIES {
|
||||
break;
|
||||
}
|
||||
// Exclude machine-generated bulk (e.g. .deepseek/snapshots/).
|
||||
if path_is_excluded_from_discovery(&self.root, entry.path()) {
|
||||
continue;
|
||||
@@ -135,6 +151,7 @@ impl Workspace {
|
||||
.entry(name)
|
||||
.or_default()
|
||||
.push(entry.path().to_path_buf());
|
||||
total += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -207,6 +224,15 @@ impl Workspace {
|
||||
/// monorepos.
|
||||
const COMPLETIONS_WALK_DEPTH: usize = 6;
|
||||
|
||||
/// Hard cap on the number of `(file or directory)` entries indexed by
|
||||
/// [`WorkingSet::build_file_index`]. The fuzzy-resolve index is a
|
||||
/// convenience for [`WorkingSet::fuzzy_resolve`]; missing entries fall
|
||||
/// back to literal-path resolution. Capping here keeps the first
|
||||
/// `fuzzy_resolve` call bounded on huge workspaces (#697 reported a
|
||||
/// ~10s hang on the first turn). For typical projects 50K is well
|
||||
/// above the actual entry count and the cap is a no-op.
|
||||
const FILE_INDEX_MAX_ENTRIES: usize = 50_000;
|
||||
|
||||
/// Directories that must remain discoverable for `@`-mention completion and
|
||||
/// fuzzy file resolution even when excluded by `.gitignore`. AI-tool
|
||||
/// convention directories (`.deepseek/`, `.cursor/`, `.claude/`, `.agents/`)
|
||||
|
||||
Reference in New Issue
Block a user