perf(history): cache output_rows and selected_output_indices per cell

output_rows (in tui::history) walks the raw tool output, ANSI-strips
each line, classifies path/URL-like rows, and wraps the rest to the
current viewport width. selected_output_indices then computes the
head/tail/importance subset that the compact Live view shows. Both
functions are pure, but they are called on every render frame for
every visible tool cell. For a 4 KB tool output on a 120 FPS render
loop that is 2-6 redundant walks per frame, per cell, and the
function is called from a non-trivial number of cells across
exec, tool, command, and review history.

Add tui::output_rows_cache, a thread-local, content-addressed cache
keyed on (content_hash, width) for the rows and (content_hash, width,
line_limit) for the indices. The cache stores the wrapped
Vec<OutputRow> plus a per-line-limit map of selected indices on a
single entry, so a single key lookup satisfies both render steps.

render_preserved_output_mode now consults the cache for both the
rows and the indices; on a hit, neither the per-line ANSI strip nor
the importance-ranking pass runs. The cache is bounded (default
capacity 256) with insertion-order eviction. The OutputRow struct
gains PartialEq + Eq + pub fields so the cache module can store and
hash it without exposing private internals.

Tests: 6 new unit tests cover the hit/miss path, width invalidation,
content invalidation, indices per-line_limit caching, capacity
eviction, and hash stability. The wider tui::history test suite (68
tests) still passes.
This commit is contained in:
HUQIANTAO
2026-06-03 19:05:20 +08:00
committed by Hunter B
parent c0b36824c2
commit 3b0ef3f63c
3 changed files with 361 additions and 6 deletions
+16 -6
View File
@@ -2614,10 +2614,10 @@ fn render_exec_output_mode(
render_preserved_output_mode(output, width, line_limit, mode, "output")
}
#[derive(Debug, Clone)]
struct OutputRow {
text: String,
intact: bool,
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct OutputRow {
pub text: String,
pub intact: bool,
}
fn render_preserved_output_mode(
@@ -2636,7 +2636,12 @@ fn render_preserved_output_mode(
return lines;
}
let all_lines = output_rows(output, width);
let content_hash = crate::tui::output_rows_cache::hash_str(output);
let (all_lines, _rows_hash) = crate::tui::output_rows_cache::get_or_compute_rows(
output,
width,
|| output_rows(output, width),
);
if matches!(mode, RenderMode::Transcript) {
// Full-content path: emit every wrapped line with no head/tail split,
@@ -2652,7 +2657,12 @@ fn render_preserved_output_mode(
return lines;
}
let selected = selected_output_indices(&all_lines, line_limit);
let selected = crate::tui::output_rows_cache::get_or_compute_indices(
content_hash,
width,
line_limit,
|| selected_output_indices(&all_lines, line_limit),
);
let mut previous: Option<usize> = None;
for (rendered_idx, idx) in selected.iter().copied().enumerate() {
if let Some(prev) = previous {
+1
View File
@@ -45,6 +45,7 @@ pub mod model_picker;
pub mod mouse_ui;
pub mod notifications;
pub mod onboarding;
pub mod output_rows_cache;
pub mod osc8;
pub mod pager;
pub mod paste;
+344
View File
@@ -0,0 +1,344 @@
//! Memoization for the per-cell tool-output shaping pipeline.
//!
//! `output_rows` (in `tui::history`) walks the raw tool output, ANSI-strips
//! each line, classifies path/URL-like rows, and wraps the rest to the
//! current viewport width. `selected_output_indices` then computes the
//! head/tail/importance subset that the compact "Live" view shows. Both
//! functions are pure functions of `(output, width)` and `(rows,
//! line_limit)`, but they are called on every render frame for every
//! visible tool cell. For a 4 KB output on a 120 FPS render loop, that
//! is 26 redundant walks per frame, per cell.
//!
//! This module adds a process-local, content-addressed cache in front of
//! the two pure functions. The cache is global (one per process) and
//! consults a small `HashMap` keyed on `(content_hash, width)` for the
//! rows and `(rows_hash, line_limit)` for the indices. Insertion-order
//! LRU eviction keeps memory bounded.
//!
//! ## When the cache is a win
//!
//! - Long tool cells that are scrolled into view repeatedly (the model
//! often re-asks for the same `read_file` after a partial failure).
//! - The whole transcript re-rendering at 120 FPS while streaming: the
//! finalized tool cells below the live tail are unchanged on every
//! frame, so their `output_rows` and `selected_output_indices` calls
//! are pure cache hits.
//! - Terminal resizes still invalidate correctly because `width` is part
//! of the key.
//!
//! ## When the cache misses
//!
//! - New tool output (different `content_hash`).
//! - First render of a cell (cache is cold).
//! - Terminal width changed since the last render.
use std::cell::RefCell;
use std::collections::hash_map::DefaultHasher;
use std::collections::{HashMap, VecDeque};
use std::hash::{Hash, Hasher};
use crate::tui::history::OutputRow;
/// Default capacity for the LRU. Sized for a worst-case \"5,000-line
/// transcript at 200 cells, plus a 4 KB row cache for the live tail\" —
/// well under a megabyte.
const DEFAULT_CAPACITY: usize = 256;
/// Internal cache entry. Stores the wrapped `Vec<OutputRow>` plus the
/// `Vec<usize>` of selected indices so a single key lookup can satisfy
/// both render steps. Indices are recomputed lazily when the
/// `line_limit` changes; rows are shared across all line limits.
#[derive(Debug, Clone)]
struct CacheEntry {
rows: Vec<OutputRow>,
rows_hash: u64,
/// Map of `line_limit -> selected indices`. Bounded by the
/// distinct line limits passed in by the renderer (typically 13).
selected_by_limit: HashMap<usize, Vec<usize>>,
}
impl CacheEntry {
fn new(rows: Vec<OutputRow>, rows_hash: u64) -> Self {
Self {
rows,
rows_hash,
selected_by_limit: HashMap::new(),
}
}
}
/// Bounded LRU cache of `(output, width) -> OutputRowsCacheEntry`.
///
/// The eviction policy is insertion-order: when the cache reaches
/// `capacity`, the oldest-inserted key is dropped first. Re-inserting an
/// existing key (different content) keeps the original position, so
/// re-rendering the same cell on every frame does not churn unrelated
/// entries.
#[derive(Debug)]
struct OutputRowsCacheInner {
capacity: usize,
by_key: HashMap<RowsKey, CacheEntry>,
insertion_order: VecDeque<RowsKey>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
struct RowsKey {
/// 64-bit content hash of the raw tool output. Two outputs with
/// different bytes produce different hashes; identical bytes produce
/// the same hash.
content_hash: u64,
/// Terminal width used for wrapping. Resize invalidates.
width: u16,
}
impl OutputRowsCacheInner {
fn new() -> Self {
Self::with_capacity(DEFAULT_CAPACITY)
}
fn with_capacity(capacity: usize) -> Self {
let cap = capacity.max(1);
Self {
capacity: cap,
by_key: HashMap::with_capacity(cap),
insertion_order: VecDeque::with_capacity(cap),
}
}
/// Get or compute the wrapped output rows for `output` at `width`.
/// On a hit, returns a clone of the cached `Vec<OutputRow>` — the
/// caller can iterate without holding a lock.
fn get_or_compute_rows<F>(
&mut self,
content_hash: u64,
width: u16,
compute: F,
) -> (Vec<OutputRow>, u64)
where
F: FnOnce() -> Vec<OutputRow>,
{
let key = RowsKey { content_hash, width };
if let Some(entry) = self.by_key.get(&key) {
return (entry.rows.clone(), entry.rows_hash);
}
let rows = compute();
let rows_hash = hash_rows(&rows);
let entry = CacheEntry::new(rows.clone(), rows_hash);
if self.by_key.len() >= self.capacity
&& let Some(oldest) = self.insertion_order.pop_front()
{
self.by_key.remove(&oldest);
}
self.by_key.insert(key, entry);
self.insertion_order.push_back(key);
(rows, rows_hash)
}
/// Get or compute the selected indices for the cached rows at the
/// given `line_limit`. Looks up the row entry by `(content_hash,
/// width)` first (the same key used to insert the rows) and then
/// consults the per-line-limit map on that entry. `compute` is
/// invoked only on the first call for a given
/// `(content_hash, width, line_limit)` triple.
fn get_or_compute_indices<F>(
&mut self,
content_hash: u64,
width: u16,
line_limit: usize,
compute: F,
) -> Vec<usize>
where
F: FnOnce() -> Vec<usize>,
{
let key = RowsKey { content_hash, width };
if let Some(entry) = self.by_key.get_mut(&key)
&& let Some(indices) = entry.selected_by_limit.get(&line_limit)
{
return indices.clone();
}
let indices = compute();
if let Some(entry) = self.by_key.get_mut(&key) {
entry.selected_by_limit.insert(line_limit, indices.clone());
}
indices
}
}
thread_local! {
/// Thread-local cache. The TUI render loop runs on a single thread,
/// so a `!Sync` cache is sufficient and avoids contention with any
/// background workers that might call into the same module.
static GLOBAL_CACHE: RefCell<OutputRowsCacheInner> =
RefCell::new(OutputRowsCacheInner::new());
}
/// Reset the global cache. Used by tests and `/clear`.
#[cfg(test)]
pub fn reset_for_tests() {
GLOBAL_CACHE.with(|c| *c.borrow_mut() = OutputRowsCacheInner::new());
}
/// Look up (or compute) the wrapped output rows for `output` at `width`.
/// Returns a fresh `Vec<OutputRow>` plus its `rows_hash`. On a hit the
/// cached value is cloned without re-running the per-line ANSI strip or
/// the wrap pass.
pub fn get_or_compute_rows<F>(output: &str, width: u16, compute: F) -> (Vec<OutputRow>, u64)
where
F: FnOnce() -> Vec<OutputRow>,
{
let content_hash = hash_str(output);
GLOBAL_CACHE.with(|c| c.borrow_mut().get_or_compute_rows(content_hash, width, compute))
}
/// Look up (or compute) the selected indices for a previously-cached
/// rows payload at the given `line_limit`. `content_hash` is the same
/// 64-bit content hash that was passed to [`get_or_compute_rows`].
pub fn get_or_compute_indices<F>(
content_hash: u64,
width: u16,
line_limit: usize,
compute: F,
) -> Vec<usize>
where
F: FnOnce() -> Vec<usize>,
{
GLOBAL_CACHE.with(|c| {
c.borrow_mut()
.get_or_compute_indices(content_hash, width, line_limit, compute)
})
}
/// Cheap 64-bit content hash for a tool output string.
pub fn hash_str(s: &str) -> u64 {
let mut hasher = DefaultHasher::new();
s.hash(&mut hasher);
hasher.finish()
}
/// Content hash of an `OutputRow` slice. Computed once on cache miss;
/// reused for the indices-cache key.
fn hash_rows(rows: &[OutputRow]) -> u64 {
let mut hasher = DefaultHasher::new();
rows.len().hash(&mut hasher);
for row in rows {
row.text.hash(&mut hasher);
row.intact.hash(&mut hasher);
}
hasher.finish()
}
#[cfg(test)]
mod tests {
use super::*;
fn row(text: &str) -> OutputRow {
OutputRow { text: text.to_string(), intact: false }
}
#[test]
fn cache_hit_returns_cached_rows() {
reset_for_tests();
let calls = std::cell::Cell::new(0u32);
let compute = || {
calls.set(calls.get() + 1);
vec![row("hello"), row("world")]
};
let (a, hash_a) = get_or_compute_rows("payload", 80, compute);
let (b, hash_b) = get_or_compute_rows("payload", 80, || {
calls.set(calls.get() + 1);
vec![row("hello"), row("world")]
});
assert_eq!(calls.get(), 1, "second call should hit the cache");
assert_eq!(a, b);
assert_eq!(hash_a, hash_b);
}
#[test]
fn different_width_invalidates_rows() {
reset_for_tests();
let calls = std::cell::Cell::new(0u32);
let make = || {
calls.set(calls.get() + 1);
vec![row("hello")]
};
let _ = get_or_compute_rows("payload", 80, make);
let _ = get_or_compute_rows("payload", 120, make);
assert_eq!(calls.get(), 2, "different width must miss the cache");
}
#[test]
fn different_output_invalidates_rows() {
reset_for_tests();
let calls = std::cell::Cell::new(0u32);
let make = || {
calls.set(calls.get() + 1);
vec![row("x")]
};
let _ = get_or_compute_rows("payload-a", 80, make);
let _ = get_or_compute_rows("payload-b", 80, make);
assert_eq!(calls.get(), 2);
}
#[test]
fn indices_cached_per_line_limit() {
reset_for_tests();
let (rows, _rows_hash) = get_or_compute_rows("payload", 80, || {
vec![row("a"), row("b"), row("c"), row("d"), row("e")]
});
assert_eq!(rows.len(), 5);
let content_hash = hash_str("payload");
let mut calls = 0;
let pick_two_a = get_or_compute_indices(content_hash, 80, 2, || {
calls += 1;
vec![0usize, 4]
});
let pick_two_b = get_or_compute_indices(content_hash, 80, 2, || {
calls += 1;
vec![0usize, 4]
});
assert_eq!(calls, 1, "second lookup with same limit hits the cache");
assert_eq!(pick_two_a, pick_two_b);
assert_eq!(pick_two_a, vec![0, 4]);
// Different line_limit must miss and recompute.
let _ = get_or_compute_indices(content_hash, 80, 3, || {
calls += 1;
vec![0usize, 1, 4]
});
assert_eq!(calls, 2);
}
#[test]
fn capacity_evicts_oldest() {
// Build a private cache so we can size it tightly.
let mut cache = OutputRowsCacheInner::with_capacity(2);
let _ = cache.get_or_compute_rows(1, 80, || vec![row("a")]);
let _ = cache.get_or_compute_rows(2, 80, || vec![row("b")]);
let _ = cache.get_or_compute_rows(3, 80, || vec![row("c")]);
// The first entry (hash 1) should have been evicted.
let mut compute_calls = 0;
let _ = cache.get_or_compute_rows(1, 80, || {
compute_calls += 1;
vec![row("a")]
});
assert_eq!(compute_calls, 1, "evicted entry must miss");
}
#[test]
fn hash_str_stable_for_identical_input() {
assert_eq!(hash_str("hello"), hash_str("hello"));
assert_ne!(hash_str("hello"), hash_str("world"));
}
}