perf+fix(file_mention): address Gemini and Devin code-review findings
Five issues from the PR #105 review pass: 1. Lazy file-tree index for fuzzy_resolve (HIGH, Gemini) The previous `fuzzy_resolve` walked the workspace up to depth 6 on every miss. A user typing several non-existent paths in one message could trigger multiple disk-intensive walks. Replace with a `OnceLock`-backed basename → paths index, built once on first miss and reused thereafter. 2. Cache cwd at construction (MEDIUM, Gemini) `std::env::current_dir()` was a syscall on every `resolve` call (up to 8 mentions per message). Capture once in `Workspace::new()` and store in the struct. 3. Include directories in fuzzy match (MEDIUM, Gemini) The mention system supports directory listings, but fuzzy_resolve was restricted to `is_file()`. Allow directories too. 4. Drop `Path::canonicalize` from the mention loop (MEDIUM, Gemini) `Workspace::resolve` already returns absolute paths when the workspace root is absolute (always true in TUI use). Removed the per-mention `canonicalize` syscall on the message-send hot path. The rare symlink-aliasing dedup miss is an acceptable cost. 5. Gate <missing-file> blocks through dedup set (Devin) The `Err` arm in `local_context_from_file_mentions` pushed a `<missing-file>` block before reaching the `seen.insert` check, so the same non-existent mention typed twice produced duplicate blocks and wasted prompt tokens. Restructured the loop so all blocks (existing AND missing) flow through the dedup gate. Bonus: replaced the test that was mutating the global cwd (race-prone across tests) with one that constructs `Workspace::with_cwd` explicitly. Added a second test exercising the lazy-index + directory-match paths. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -303,24 +303,38 @@ fn local_context_from_file_mentions(input: &str, workspace: &Path) -> Option<Str
|
||||
let mut blocks = Vec::new();
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
let ws = Workspace::new(workspace.to_path_buf());
|
||||
|
||||
|
||||
for mention in mentions.into_iter().take(MAX_FILE_MENTIONS_PER_MESSAGE) {
|
||||
let (path, display_path) = match ws.resolve(&mention) {
|
||||
// `Workspace::resolve` already returns absolute paths when the root
|
||||
// is absolute (TUI always runs from an absolute workspace), so we
|
||||
// skip `canonicalize()` here — it's per-mention I/O on the
|
||||
// message-send hot path. Accept the rare symlink-aliasing dedup
|
||||
// miss as the cost of avoiding a syscall (Gemini code-review).
|
||||
let (path, display_path, exists) = match ws.resolve(&mention) {
|
||||
Ok(p) => {
|
||||
let d = p.canonicalize().unwrap_or_else(|_| p.clone()).display().to_string();
|
||||
(p, d)
|
||||
},
|
||||
let d = p.display().to_string();
|
||||
(p, d, true)
|
||||
}
|
||||
Err(p) => {
|
||||
let d = p.display().to_string();
|
||||
blocks.push(format!("<missing-file mention=\"@{mention}\" path=\"{d}\" />"));
|
||||
continue;
|
||||
(p, d, false)
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// Gate every block — including <missing-file> — through the dedup
|
||||
// set so a user typing the same non-existent file twice doesn't
|
||||
// waste tokens on duplicate missing-file blocks (Devin code-review).
|
||||
if !seen.insert(display_path.clone()) {
|
||||
continue;
|
||||
}
|
||||
blocks.push(render_file_mention_context(&mention, &path, &display_path));
|
||||
|
||||
if exists {
|
||||
blocks.push(render_file_mention_context(&mention, &path, &display_path));
|
||||
} else {
|
||||
blocks.push(format!(
|
||||
"<missing-file mention=\"@{mention}\" path=\"{display_path}\" />"
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
if blocks.is_empty() {
|
||||
@@ -402,8 +416,6 @@ fn trim_unquoted_mention(raw: &str) -> &str {
|
||||
trimmed
|
||||
}
|
||||
|
||||
|
||||
|
||||
fn render_file_mention_context(raw: &str, path: &Path, display_path: &str) -> String {
|
||||
if !path.exists() {
|
||||
return format!("<missing-file mention=\"@{raw}\" path=\"{display_path}\" />");
|
||||
|
||||
@@ -18,14 +18,33 @@ use std::path::{Path, PathBuf};
|
||||
use std::sync::OnceLock;
|
||||
|
||||
/// Repo-aware resolver for `@`-mentions and file pickers.
|
||||
#[derive(Debug, Clone)]
|
||||
///
|
||||
/// `cwd` is captured at construction; if the host's current directory changes
|
||||
/// during a session, build a fresh `Workspace`. Fuzzy lookups are backed by a
|
||||
/// lazy basename → paths index built once on first miss and reused for the
|
||||
/// rest of the session — without it, every mis-typed mention triggered a full
|
||||
/// `WalkBuilder` traversal up to depth 6 (Gemini code-review feedback).
|
||||
#[derive(Debug)]
|
||||
pub struct Workspace {
|
||||
pub root: PathBuf,
|
||||
cwd: Option<PathBuf>,
|
||||
file_index: OnceLock<HashMap<String, Vec<PathBuf>>>,
|
||||
}
|
||||
|
||||
impl Workspace {
|
||||
pub fn new(root: PathBuf) -> Self {
|
||||
Self { root }
|
||||
Self::with_cwd(root, std::env::current_dir().ok())
|
||||
}
|
||||
|
||||
/// Construct with an explicit cwd. Used by tests that need deterministic
|
||||
/// resolution against a known directory without depending on (and
|
||||
/// mutating) the process's real working directory.
|
||||
pub fn with_cwd(root: PathBuf, cwd: Option<PathBuf>) -> Self {
|
||||
Self {
|
||||
root,
|
||||
cwd,
|
||||
file_index: OnceLock::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Two-pass resolution: workspace, then cwd, then fuzzy fallback.
|
||||
@@ -43,14 +62,13 @@ impl Workspace {
|
||||
return Ok(ws_path);
|
||||
}
|
||||
|
||||
if let Ok(cwd) = std::env::current_dir() {
|
||||
if let Some(cwd) = self.cwd.as_ref() {
|
||||
let cwd_path = cwd.join(&path);
|
||||
if cwd_path.exists() {
|
||||
return Ok(cwd_path);
|
||||
}
|
||||
}
|
||||
|
||||
// Fuzzy fallback
|
||||
if let Some(fuzzy) = self.fuzzy_resolve(&path) {
|
||||
return Ok(fuzzy);
|
||||
}
|
||||
@@ -64,17 +82,40 @@ impl Workspace {
|
||||
return None;
|
||||
}
|
||||
|
||||
let index = self.file_index.get_or_init(|| self.build_file_index());
|
||||
index.get(&needle).and_then(|paths| paths.first()).cloned()
|
||||
}
|
||||
|
||||
fn build_file_index(&self) -> HashMap<String, Vec<PathBuf>> {
|
||||
let mut index: HashMap<String, Vec<PathBuf>> = HashMap::new();
|
||||
let mut builder = WalkBuilder::new(&self.root);
|
||||
builder.hidden(true).follow_links(false).max_depth(Some(6));
|
||||
|
||||
for entry in builder.build().flatten() {
|
||||
if entry.file_type().map_or(false, |ft| ft.is_file()) {
|
||||
if entry.file_name().to_string_lossy().to_lowercase() == needle {
|
||||
return Some(entry.path().to_path_buf());
|
||||
}
|
||||
if entry
|
||||
.file_type()
|
||||
.is_some_and(|ft| ft.is_file() || ft.is_dir())
|
||||
{
|
||||
let name = entry.file_name().to_string_lossy().to_lowercase();
|
||||
index
|
||||
.entry(name)
|
||||
.or_default()
|
||||
.push(entry.path().to_path_buf());
|
||||
}
|
||||
}
|
||||
None
|
||||
index
|
||||
}
|
||||
}
|
||||
|
||||
impl Clone for Workspace {
|
||||
fn clone(&self) -> Self {
|
||||
// Don't carry the cached file_index — clones get a fresh OnceLock so
|
||||
// they don't pin a stale snapshot of the previous owner's tree.
|
||||
Self {
|
||||
root: self.root.clone(),
|
||||
cwd: self.cwd.clone(),
|
||||
file_index: OnceLock::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -869,31 +910,51 @@ mod tests {
|
||||
#[test]
|
||||
fn workspace_resolve_respects_cwd_and_workspace() {
|
||||
let tmp = TempDir::new().unwrap();
|
||||
let ws = Workspace::new(tmp.path().to_path_buf());
|
||||
|
||||
|
||||
let sub = tmp.path().join("sub");
|
||||
std::fs::create_dir_all(&sub).unwrap();
|
||||
|
||||
let bar = sub.join("bar.txt");
|
||||
std::fs::write(&bar, "bar").unwrap();
|
||||
|
||||
|
||||
let nested = tmp.path().join("nested/deep");
|
||||
std::fs::create_dir_all(&nested).unwrap();
|
||||
let file_md = nested.join("file.md");
|
||||
std::fs::write(&file_md, "md").unwrap();
|
||||
|
||||
// Simulate CWD being /tmp/foo/sub
|
||||
let old_cwd = std::env::current_dir().unwrap();
|
||||
std::env::set_current_dir(&sub).unwrap();
|
||||
|
||||
// Test 1: type @bar.txt from sub/ -> should resolve to sub/bar.txt
|
||||
// Construct with an explicit cwd so the test doesn't race with other
|
||||
// tests that mutate the real process cwd.
|
||||
let ws = Workspace::with_cwd(tmp.path().to_path_buf(), Some(sub.clone()));
|
||||
|
||||
// Test 1: @bar.txt with cwd=sub → resolves via the cwd pass.
|
||||
let res1 = ws.resolve("bar.txt").unwrap();
|
||||
assert_eq!(res1.canonicalize().unwrap_or(res1), bar.canonicalize().unwrap_or(bar));
|
||||
|
||||
// Test 2: type @nested/deep/file.md from sub/ -> should fall back to workspace root
|
||||
assert_eq!(
|
||||
res1.canonicalize().unwrap_or(res1),
|
||||
bar.canonicalize().unwrap_or(bar)
|
||||
);
|
||||
|
||||
// Test 2: @nested/deep/file.md → falls through to workspace root.
|
||||
let res2 = ws.resolve("nested/deep/file.md").unwrap();
|
||||
assert_eq!(res2.canonicalize().unwrap_or(res2), file_md.canonicalize().unwrap_or(file_md));
|
||||
|
||||
std::env::set_current_dir(old_cwd).unwrap();
|
||||
assert_eq!(
|
||||
res2.canonicalize().unwrap_or(res2),
|
||||
file_md.canonicalize().unwrap_or(file_md)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fuzzy_index_finds_files_and_directories() {
|
||||
let tmp = TempDir::new().unwrap();
|
||||
std::fs::create_dir_all(tmp.path().join("a/b/target_dir")).unwrap();
|
||||
std::fs::write(tmp.path().join("a/b/needle.rs"), "fn main(){}").unwrap();
|
||||
|
||||
let ws = Workspace::with_cwd(tmp.path().to_path_buf(), None);
|
||||
|
||||
// Basename-only mention triggers fuzzy fallback for both files and dirs.
|
||||
let f = ws.resolve("needle.rs").unwrap();
|
||||
assert!(f.ends_with("a/b/needle.rs"));
|
||||
let d = ws.resolve("target_dir").unwrap();
|
||||
assert!(d.ends_with("a/b/target_dir"));
|
||||
|
||||
// Index was populated exactly once (subsequent lookups reuse it).
|
||||
assert!(ws.file_index.get().is_some());
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user