From 4fe3bc37bc903fbf7b2d2432eebabadae8c93e20 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Mon, 4 May 2026 03:06:04 -0500 Subject: [PATCH] feat(tui): file @-mention frecency ranking (#441) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the user @-mentions a file, score it; on the next mention popup, re-sort completions so files mentioned often + recently float to the top. Never-mentioned candidates fall back to the workspace ranker's order without surprises. * New `tui/file_frecency.rs` module: - `FrecencyRecord { path, count, last_used }`, persisted as a JSONL append at `~/.deepseek/file-frecency.jsonl`. - `record_mention(path)` bumps the count, stamps the time, appends a line, and evicts to a 1000-entry cap (matches the issue's acceptance criterion). Eviction drops the lowest-scored entries. - `rerank_by_frecency(candidates)` decays each record's score by `count * exp(-ln(2) * age / HALF_LIFE)` (7-day half-life — same as the OPENCODE source) and stable-sorts the candidate list. * Wired into `find_file_mention_completions` so the menu shows re-ranked entries automatically. * Wired into both confirmation paths: `apply_mention_menu_selection` (Enter / Tab on the popup) and `try_autocomplete_file_mention`'s unique-match shortcut. I/O is best-effort: a missing home directory, a permission failure, or a corrupt JSONL line gets silently skipped — frecency loss is never worth blocking the user's autocomplete. Two unit tests cover the core: rerank floats a hot path above never-mentioned ones (and preserves the original order for ties), and score decay drops a stale-but-popular entry below a fresh one after ~8 half-lives. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/tui/src/tui/file_frecency.rs | 261 ++++++++++++++++++++++++++++ crates/tui/src/tui/file_mention.rs | 8 + crates/tui/src/tui/mod.rs | 1 + 3 files changed, 270 insertions(+) create mode 100644 crates/tui/src/tui/file_frecency.rs diff --git a/crates/tui/src/tui/file_frecency.rs b/crates/tui/src/tui/file_frecency.rs new file mode 100644 index 00000000..5129d695 --- /dev/null +++ b/crates/tui/src/tui/file_frecency.rs @@ -0,0 +1,261 @@ +//! @-mention frecency tracking (#441). +//! +//! Records every file the user @-mentions with a timestamp and click count, +//! decays the score over time so a file that was hot last week ranks below +//! one mentioned 5 minutes ago, and re-orders mention-popup completions by +//! the resulting score. Persisted as a single JSONL file at +//! `~/.deepseek/file-frecency.jsonl` so frecency survives restarts. +//! +//! Append-only on the wire, compacted in memory: the loader replays every +//! line into a `HashMap` keyed by repo-relative path, +//! folding duplicates into the last record. We cap the in-memory map at +//! 1000 entries and evict the lowest-scored on overflow — same heuristic +//! the OPENCODE source uses. + +use std::collections::HashMap; +use std::fs::OpenOptions; +use std::io::Write; +use std::path::PathBuf; +use std::sync::{Mutex, OnceLock}; +use std::time::{SystemTime, UNIX_EPOCH}; + +use serde::{Deserialize, Serialize}; + +/// Hard cap on the number of paths we track (the acceptance criterion for +/// #441). Older / lower-scored entries are evicted when the map exceeds +/// this. +const FRECENCY_CAP: usize = 1000; + +/// Half-life of a frecency score, in seconds. After this many seconds the +/// score has decayed to ½ of its peak. 7 days is OPENCODE's default — long +/// enough that a commonly-edited file stays sticky across a workweek but +/// short enough that yesterday's deep-dive doesn't haunt you forever. +const HALF_LIFE_SECS: f64 = 7.0 * 24.0 * 60.0 * 60.0; + +#[derive(Debug, Clone, Serialize, Deserialize)] +struct FrecencyRecord { + /// Workspace-relative path string. + path: String, + /// Total mentions over the lifetime of the entry. + count: u32, + /// Unix timestamp (seconds) of the last mention. + last_used: u64, +} + +#[derive(Debug, Default)] +struct Store { + by_path: HashMap, + persisted_path: Option, + loaded: bool, +} + +fn store() -> &'static Mutex { + static STORE: OnceLock> = OnceLock::new(); + STORE.get_or_init(|| Mutex::new(Store::default())) +} + +fn default_path() -> Option { + dirs::home_dir().map(|h| h.join(".deepseek").join("file-frecency.jsonl")) +} + +fn now_secs() -> u64 { + SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0) +} + +/// Time-decayed frecency score for a record, in arbitrary units. Mentions +/// count linearly; the whole sum is multiplied by an exponential decay +/// factor based on time since `last_used`. Records older than ~5 half-lives +/// score effectively zero. +fn decayed_score(record: &FrecencyRecord, now: u64) -> f64 { + let age_secs = now.saturating_sub(record.last_used) as f64; + let lambda = std::f64::consts::LN_2 / HALF_LIFE_SECS; + (record.count as f64) * (-lambda * age_secs).exp() +} + +fn ensure_loaded(store: &mut Store) { + if store.loaded { + return; + } + store.loaded = true; + let Some(path) = default_path() else { + return; + }; + store.persisted_path = Some(path.clone()); + let Ok(text) = std::fs::read_to_string(&path) else { + return; + }; + for line in text.lines() { + if line.trim().is_empty() { + continue; + } + let Ok(record) = serde_json::from_str::(line) else { + continue; + }; + store.by_path.insert(record.path.clone(), record); + } +} + +fn evict_to_cap(store: &mut Store, now: u64) { + if store.by_path.len() <= FRECENCY_CAP { + return; + } + let target = FRECENCY_CAP; + let mut scored: Vec<(String, f64)> = store + .by_path + .iter() + .map(|(k, v)| (k.clone(), decayed_score(v, now))) + .collect(); + scored.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal)); + let drop_count = store.by_path.len().saturating_sub(target); + for (key, _) in scored.iter().take(drop_count) { + store.by_path.remove(key); + } +} + +fn append_record_line(path: &PathBuf, record: &FrecencyRecord) -> std::io::Result<()> { + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + let mut file = OpenOptions::new().create(true).append(true).open(path)?; + let line = serde_json::to_string(record).map_err(std::io::Error::other)?; + writeln!(file, "{line}")?; + Ok(()) +} + +/// Record one mention of `path` (a workspace-relative path string). Updates +/// the in-memory store, persists a single JSONL line, and evicts the lowest- +/// scored entry if we just exceeded the cap. Best-effort: I/O failures are +/// logged and swallowed — losing a frecency datapoint is never worth +/// failing the user's `@` autocomplete. +pub fn record_mention(path: &str) { + if path.is_empty() { + return; + } + let store = store(); + let Ok(mut store) = store.lock() else { + return; + }; + ensure_loaded(&mut store); + let now = now_secs(); + let entry = store + .by_path + .entry(path.to_string()) + .or_insert_with(|| FrecencyRecord { + path: path.to_string(), + count: 0, + last_used: now, + }); + entry.count = entry.count.saturating_add(1); + entry.last_used = now; + let snapshot = entry.clone(); + if let Some(persisted_path) = store.persisted_path.clone() + && let Err(err) = append_record_line(&persisted_path, &snapshot) + { + tracing::debug!(target: "frecency", "persist failed: {err}"); + } + evict_to_cap(&mut store, now); +} + +/// Re-sort a candidate list by frecency score (highest first), preserving +/// the original order for ties so the underlying ranker's choices aren't +/// upended. Candidates the store has never seen score zero — they end up +/// at the bottom of the sort, which means a one-time mention will start +/// floating to the top after first use. +#[must_use] +pub fn rerank_by_frecency(candidates: Vec) -> Vec { + if candidates.len() <= 1 { + return candidates; + } + let store = store(); + let Ok(mut store) = store.lock() else { + return candidates; + }; + ensure_loaded(&mut store); + let now = now_secs(); + let mut scored: Vec<(usize, String, f64)> = candidates + .into_iter() + .enumerate() + .map(|(idx, path)| { + let score = store + .by_path + .get(&path) + .map(|r| decayed_score(r, now)) + .unwrap_or(0.0); + (idx, path, score) + }) + .collect(); + // Stable sort on (-score, original-index): ties keep the underlying + // ranker's order. + scored.sort_by(|a, b| { + b.2.partial_cmp(&a.2) + .unwrap_or(std::cmp::Ordering::Equal) + .then_with(|| a.0.cmp(&b.0)) + }); + scored.into_iter().map(|(_, path, _)| path).collect() +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Recently mentioned paths win against never-mentioned ones; never-mentioned + /// preserve their original ranker order. + #[test] + fn rerank_floats_recent_paths_to_the_top() { + // Use the global store; reset its state so we don't leak across tests. + let store = super::store(); + let mut s = store.lock().unwrap(); + s.by_path.clear(); + s.loaded = true; // skip on-disk replay + s.persisted_path = None; // skip persistence + let now = super::now_secs(); + s.by_path.insert( + "src/popular.rs".into(), + FrecencyRecord { + path: "src/popular.rs".into(), + count: 8, + last_used: now, + }, + ); + drop(s); + + let order = super::rerank_by_frecency(vec![ + "README.md".to_string(), + "src/popular.rs".to_string(), + "Cargo.toml".to_string(), + ]); + assert_eq!(order[0], "src/popular.rs"); + // README.md was first in original order; Cargo.toml second. Both score 0 + // so the original relative order survives. + assert_eq!(order[1], "README.md"); + assert_eq!(order[2], "Cargo.toml"); + } + + /// Decayed score drops below a freshly-used entry after enough half-lives + /// that count alone can't carry the older one. With a 7-day half-life, + /// 8 weeks gives 8 half-lives → ~256× decay; an entry mentioned twice + /// today comfortably beats one mentioned 50× two months ago. + #[test] + fn old_entries_decay_below_recent_ones() { + let now: u64 = 7 * 24 * 60 * 60 * 8; // 8 weeks (8 half-lives) + let stale = FrecencyRecord { + path: "x".into(), + count: 50, + last_used: 0, + }; + let fresh = FrecencyRecord { + path: "y".into(), + count: 2, + last_used: now, + }; + assert!( + super::decayed_score(&fresh, now) > super::decayed_score(&stale, now), + "fresh={}, stale={}", + super::decayed_score(&fresh, now), + super::decayed_score(&stale, now) + ); + } +} diff --git a/crates/tui/src/tui/file_mention.rs b/crates/tui/src/tui/file_mention.rs index 540920e6..e70e49ed 100644 --- a/crates/tui/src/tui/file_mention.rs +++ b/crates/tui/src/tui/file_mention.rs @@ -148,6 +148,9 @@ pub fn find_file_mention_completions( limit: usize, ) -> Vec { let entries = workspace.completions(partial, limit); + // #441: re-rank by frecency so files the user mentions a lot float up. + // Never-mentioned candidates fall back to the workspace ranker's order. + let entries = super::file_frecency::rerank_by_frecency(entries); tracing::debug!( target: "deepseek_tui::file_mention", partial = %partial, @@ -215,6 +218,9 @@ pub fn apply_mention_menu_selection(app: &mut App, entries: &[String]) -> bool { .mention_menu_selected .min(entries.len().saturating_sub(1)); let replacement = &entries[selected_idx]; + // #441: bump this path's frecency before we splice it in. The store + // persists asynchronously, so this never blocks input handling. + super::file_frecency::record_mention(replacement); replace_file_mention(app, byte_start, &partial, replacement); app.mention_menu_hidden = false; app.status_message = Some(format!("Attached @{replacement}")); @@ -239,6 +245,8 @@ pub fn try_autocomplete_file_mention(app: &mut App) -> bool { return true; } if candidates.len() == 1 { + // #441: a unique-match completion is also a "mention" for ranking. + super::file_frecency::record_mention(&candidates[0]); replace_file_mention(app, byte_start, &partial, &candidates[0]); app.status_message = Some(format!("Attached @{}", candidates[0])); return true; diff --git a/crates/tui/src/tui/mod.rs b/crates/tui/src/tui/mod.rs index 5b9c689d..354b1a18 100644 --- a/crates/tui/src/tui/mod.rs +++ b/crates/tui/src/tui/mod.rs @@ -13,6 +13,7 @@ pub mod context_menu; pub mod diff_render; pub mod event_broker; pub mod external_editor; +pub mod file_frecency; pub mod file_mention; pub mod file_picker; pub mod file_tree;