feat(tui): file @-mention frecency ranking (#441)

When the user @-mentions a file, score it; on the next mention popup,
re-sort completions so files mentioned often + recently float to the top.
Never-mentioned candidates fall back to the workspace ranker's order
without surprises.

* New `tui/file_frecency.rs` module:
  - `FrecencyRecord { path, count, last_used }`, persisted as a JSONL
    append at `~/.deepseek/file-frecency.jsonl`.
  - `record_mention(path)` bumps the count, stamps the time, appends a
    line, and evicts to a 1000-entry cap (matches the issue's acceptance
    criterion). Eviction drops the lowest-scored entries.
  - `rerank_by_frecency(candidates)` decays each record's score by
    `count * exp(-ln(2) * age / HALF_LIFE)` (7-day half-life — same as
    the OPENCODE source) and stable-sorts the candidate list.
* Wired into `find_file_mention_completions` so the menu shows
  re-ranked entries automatically.
* Wired into both confirmation paths: `apply_mention_menu_selection`
  (Enter / Tab on the popup) and `try_autocomplete_file_mention`'s
  unique-match shortcut.

I/O is best-effort: a missing home directory, a permission failure,
or a corrupt JSONL line gets silently skipped — frecency loss is never
worth blocking the user's autocomplete.

Two unit tests cover the core: rerank floats a hot path above
never-mentioned ones (and preserves the original order for ties), and
score decay drops a stale-but-popular entry below a fresh one after
~8 half-lives.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hunter Bown
2026-05-04 03:06:04 -05:00
parent 59e1dd4e99
commit 4fe3bc37bc
3 changed files with 270 additions and 0 deletions
+261
View File
@@ -0,0 +1,261 @@
//! @-mention frecency tracking (#441).
//!
//! Records every file the user @-mentions with a timestamp and click count,
//! decays the score over time so a file that was hot last week ranks below
//! one mentioned 5 minutes ago, and re-orders mention-popup completions by
//! the resulting score. Persisted as a single JSONL file at
//! `~/.deepseek/file-frecency.jsonl` so frecency survives restarts.
//!
//! Append-only on the wire, compacted in memory: the loader replays every
//! line into a `HashMap<String, FrecencyEntry>` keyed by repo-relative path,
//! folding duplicates into the last record. We cap the in-memory map at
//! 1000 entries and evict the lowest-scored on overflow — same heuristic
//! the OPENCODE source uses.
use std::collections::HashMap;
use std::fs::OpenOptions;
use std::io::Write;
use std::path::PathBuf;
use std::sync::{Mutex, OnceLock};
use std::time::{SystemTime, UNIX_EPOCH};
use serde::{Deserialize, Serialize};
/// Hard cap on the number of paths we track (the acceptance criterion for
/// #441). Older / lower-scored entries are evicted when the map exceeds
/// this.
const FRECENCY_CAP: usize = 1000;
/// Half-life of a frecency score, in seconds. After this many seconds the
/// score has decayed to ½ of its peak. 7 days is OPENCODE's default — long
/// enough that a commonly-edited file stays sticky across a workweek but
/// short enough that yesterday's deep-dive doesn't haunt you forever.
const HALF_LIFE_SECS: f64 = 7.0 * 24.0 * 60.0 * 60.0;
#[derive(Debug, Clone, Serialize, Deserialize)]
struct FrecencyRecord {
/// Workspace-relative path string.
path: String,
/// Total mentions over the lifetime of the entry.
count: u32,
/// Unix timestamp (seconds) of the last mention.
last_used: u64,
}
#[derive(Debug, Default)]
struct Store {
by_path: HashMap<String, FrecencyRecord>,
persisted_path: Option<PathBuf>,
loaded: bool,
}
fn store() -> &'static Mutex<Store> {
static STORE: OnceLock<Mutex<Store>> = OnceLock::new();
STORE.get_or_init(|| Mutex::new(Store::default()))
}
fn default_path() -> Option<PathBuf> {
dirs::home_dir().map(|h| h.join(".deepseek").join("file-frecency.jsonl"))
}
fn now_secs() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.map(|d| d.as_secs())
.unwrap_or(0)
}
/// Time-decayed frecency score for a record, in arbitrary units. Mentions
/// count linearly; the whole sum is multiplied by an exponential decay
/// factor based on time since `last_used`. Records older than ~5 half-lives
/// score effectively zero.
fn decayed_score(record: &FrecencyRecord, now: u64) -> f64 {
let age_secs = now.saturating_sub(record.last_used) as f64;
let lambda = std::f64::consts::LN_2 / HALF_LIFE_SECS;
(record.count as f64) * (-lambda * age_secs).exp()
}
fn ensure_loaded(store: &mut Store) {
if store.loaded {
return;
}
store.loaded = true;
let Some(path) = default_path() else {
return;
};
store.persisted_path = Some(path.clone());
let Ok(text) = std::fs::read_to_string(&path) else {
return;
};
for line in text.lines() {
if line.trim().is_empty() {
continue;
}
let Ok(record) = serde_json::from_str::<FrecencyRecord>(line) else {
continue;
};
store.by_path.insert(record.path.clone(), record);
}
}
fn evict_to_cap(store: &mut Store, now: u64) {
if store.by_path.len() <= FRECENCY_CAP {
return;
}
let target = FRECENCY_CAP;
let mut scored: Vec<(String, f64)> = store
.by_path
.iter()
.map(|(k, v)| (k.clone(), decayed_score(v, now)))
.collect();
scored.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
let drop_count = store.by_path.len().saturating_sub(target);
for (key, _) in scored.iter().take(drop_count) {
store.by_path.remove(key);
}
}
fn append_record_line(path: &PathBuf, record: &FrecencyRecord) -> std::io::Result<()> {
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
let mut file = OpenOptions::new().create(true).append(true).open(path)?;
let line = serde_json::to_string(record).map_err(std::io::Error::other)?;
writeln!(file, "{line}")?;
Ok(())
}
/// Record one mention of `path` (a workspace-relative path string). Updates
/// the in-memory store, persists a single JSONL line, and evicts the lowest-
/// scored entry if we just exceeded the cap. Best-effort: I/O failures are
/// logged and swallowed — losing a frecency datapoint is never worth
/// failing the user's `@` autocomplete.
pub fn record_mention(path: &str) {
if path.is_empty() {
return;
}
let store = store();
let Ok(mut store) = store.lock() else {
return;
};
ensure_loaded(&mut store);
let now = now_secs();
let entry = store
.by_path
.entry(path.to_string())
.or_insert_with(|| FrecencyRecord {
path: path.to_string(),
count: 0,
last_used: now,
});
entry.count = entry.count.saturating_add(1);
entry.last_used = now;
let snapshot = entry.clone();
if let Some(persisted_path) = store.persisted_path.clone()
&& let Err(err) = append_record_line(&persisted_path, &snapshot)
{
tracing::debug!(target: "frecency", "persist failed: {err}");
}
evict_to_cap(&mut store, now);
}
/// Re-sort a candidate list by frecency score (highest first), preserving
/// the original order for ties so the underlying ranker's choices aren't
/// upended. Candidates the store has never seen score zero — they end up
/// at the bottom of the sort, which means a one-time mention will start
/// floating to the top after first use.
#[must_use]
pub fn rerank_by_frecency(candidates: Vec<String>) -> Vec<String> {
if candidates.len() <= 1 {
return candidates;
}
let store = store();
let Ok(mut store) = store.lock() else {
return candidates;
};
ensure_loaded(&mut store);
let now = now_secs();
let mut scored: Vec<(usize, String, f64)> = candidates
.into_iter()
.enumerate()
.map(|(idx, path)| {
let score = store
.by_path
.get(&path)
.map(|r| decayed_score(r, now))
.unwrap_or(0.0);
(idx, path, score)
})
.collect();
// Stable sort on (-score, original-index): ties keep the underlying
// ranker's order.
scored.sort_by(|a, b| {
b.2.partial_cmp(&a.2)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.0.cmp(&b.0))
});
scored.into_iter().map(|(_, path, _)| path).collect()
}
#[cfg(test)]
mod tests {
use super::*;
/// Recently mentioned paths win against never-mentioned ones; never-mentioned
/// preserve their original ranker order.
#[test]
fn rerank_floats_recent_paths_to_the_top() {
// Use the global store; reset its state so we don't leak across tests.
let store = super::store();
let mut s = store.lock().unwrap();
s.by_path.clear();
s.loaded = true; // skip on-disk replay
s.persisted_path = None; // skip persistence
let now = super::now_secs();
s.by_path.insert(
"src/popular.rs".into(),
FrecencyRecord {
path: "src/popular.rs".into(),
count: 8,
last_used: now,
},
);
drop(s);
let order = super::rerank_by_frecency(vec![
"README.md".to_string(),
"src/popular.rs".to_string(),
"Cargo.toml".to_string(),
]);
assert_eq!(order[0], "src/popular.rs");
// README.md was first in original order; Cargo.toml second. Both score 0
// so the original relative order survives.
assert_eq!(order[1], "README.md");
assert_eq!(order[2], "Cargo.toml");
}
/// Decayed score drops below a freshly-used entry after enough half-lives
/// that count alone can't carry the older one. With a 7-day half-life,
/// 8 weeks gives 8 half-lives → ~256× decay; an entry mentioned twice
/// today comfortably beats one mentioned 50× two months ago.
#[test]
fn old_entries_decay_below_recent_ones() {
let now: u64 = 7 * 24 * 60 * 60 * 8; // 8 weeks (8 half-lives)
let stale = FrecencyRecord {
path: "x".into(),
count: 50,
last_used: 0,
};
let fresh = FrecencyRecord {
path: "y".into(),
count: 2,
last_used: now,
};
assert!(
super::decayed_score(&fresh, now) > super::decayed_score(&stale, now),
"fresh={}, stale={}",
super::decayed_score(&fresh, now),
super::decayed_score(&stale, now)
);
}
}
+8
View File
@@ -148,6 +148,9 @@ pub fn find_file_mention_completions(
limit: usize,
) -> Vec<String> {
let entries = workspace.completions(partial, limit);
// #441: re-rank by frecency so files the user mentions a lot float up.
// Never-mentioned candidates fall back to the workspace ranker's order.
let entries = super::file_frecency::rerank_by_frecency(entries);
tracing::debug!(
target: "deepseek_tui::file_mention",
partial = %partial,
@@ -215,6 +218,9 @@ pub fn apply_mention_menu_selection(app: &mut App, entries: &[String]) -> bool {
.mention_menu_selected
.min(entries.len().saturating_sub(1));
let replacement = &entries[selected_idx];
// #441: bump this path's frecency before we splice it in. The store
// persists asynchronously, so this never blocks input handling.
super::file_frecency::record_mention(replacement);
replace_file_mention(app, byte_start, &partial, replacement);
app.mention_menu_hidden = false;
app.status_message = Some(format!("Attached @{replacement}"));
@@ -239,6 +245,8 @@ pub fn try_autocomplete_file_mention(app: &mut App) -> bool {
return true;
}
if candidates.len() == 1 {
// #441: a unique-match completion is also a "mention" for ranking.
super::file_frecency::record_mention(&candidates[0]);
replace_file_mention(app, byte_start, &partial, &candidates[0]);
app.status_message = Some(format!("Attached @{}", candidates[0]));
return true;
+1
View File
@@ -13,6 +13,7 @@ pub mod context_menu;
pub mod diff_render;
pub mod event_broker;
pub mod external_editor;
pub mod file_frecency;
pub mod file_mention;
pub mod file_picker;
pub mod file_tree;