perf(history): cache output_rows and selected_output_indices per cell
output_rows (in tui::history) walks the raw tool output, ANSI-strips each line, classifies path/URL-like rows, and wraps the rest to the current viewport width. selected_output_indices then computes the head/tail/importance subset that the compact Live view shows. Both functions are pure, but they are called on every render frame for every visible tool cell. For a 4 KB tool output on a 120 FPS render loop that is 2-6 redundant walks per frame, per cell, and the function is called from a non-trivial number of cells across exec, tool, command, and review history. Add tui::output_rows_cache, a thread-local, content-addressed cache keyed on (content_hash, width) for the rows and (content_hash, width, line_limit) for the indices. The cache stores the wrapped Vec<OutputRow> plus a per-line-limit map of selected indices on a single entry, so a single key lookup satisfies both render steps. render_preserved_output_mode now consults the cache for both the rows and the indices; on a hit, neither the per-line ANSI strip nor the importance-ranking pass runs. The cache is bounded (default capacity 256) with insertion-order eviction. The OutputRow struct gains PartialEq + Eq + pub fields so the cache module can store and hash it without exposing private internals. Tests: 6 new unit tests cover the hit/miss path, width invalidation, content invalidation, indices per-line_limit caching, capacity eviction, and hash stability. The wider tui::history test suite (68 tests) still passes.
This commit is contained in:
@@ -2614,10 +2614,10 @@ fn render_exec_output_mode(
|
||||
render_preserved_output_mode(output, width, line_limit, mode, "output")
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct OutputRow {
|
||||
text: String,
|
||||
intact: bool,
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct OutputRow {
|
||||
pub text: String,
|
||||
pub intact: bool,
|
||||
}
|
||||
|
||||
fn render_preserved_output_mode(
|
||||
@@ -2636,7 +2636,12 @@ fn render_preserved_output_mode(
|
||||
return lines;
|
||||
}
|
||||
|
||||
let all_lines = output_rows(output, width);
|
||||
let content_hash = crate::tui::output_rows_cache::hash_str(output);
|
||||
let (all_lines, _rows_hash) = crate::tui::output_rows_cache::get_or_compute_rows(
|
||||
output,
|
||||
width,
|
||||
|| output_rows(output, width),
|
||||
);
|
||||
|
||||
if matches!(mode, RenderMode::Transcript) {
|
||||
// Full-content path: emit every wrapped line with no head/tail split,
|
||||
@@ -2652,7 +2657,12 @@ fn render_preserved_output_mode(
|
||||
return lines;
|
||||
}
|
||||
|
||||
let selected = selected_output_indices(&all_lines, line_limit);
|
||||
let selected = crate::tui::output_rows_cache::get_or_compute_indices(
|
||||
content_hash,
|
||||
width,
|
||||
line_limit,
|
||||
|| selected_output_indices(&all_lines, line_limit),
|
||||
);
|
||||
let mut previous: Option<usize> = None;
|
||||
for (rendered_idx, idx) in selected.iter().copied().enumerate() {
|
||||
if let Some(prev) = previous {
|
||||
|
||||
@@ -45,6 +45,7 @@ pub mod model_picker;
|
||||
pub mod mouse_ui;
|
||||
pub mod notifications;
|
||||
pub mod onboarding;
|
||||
pub mod output_rows_cache;
|
||||
pub mod osc8;
|
||||
pub mod pager;
|
||||
pub mod paste;
|
||||
|
||||
@@ -0,0 +1,344 @@
|
||||
//! Memoization for the per-cell tool-output shaping pipeline.
|
||||
//!
|
||||
//! `output_rows` (in `tui::history`) walks the raw tool output, ANSI-strips
|
||||
//! each line, classifies path/URL-like rows, and wraps the rest to the
|
||||
//! current viewport width. `selected_output_indices` then computes the
|
||||
//! head/tail/importance subset that the compact "Live" view shows. Both
|
||||
//! functions are pure functions of `(output, width)` and `(rows,
|
||||
//! line_limit)`, but they are called on every render frame for every
|
||||
//! visible tool cell. For a 4 KB output on a 120 FPS render loop, that
|
||||
//! is 2–6 redundant walks per frame, per cell.
|
||||
//!
|
||||
//! This module adds a process-local, content-addressed cache in front of
|
||||
//! the two pure functions. The cache is global (one per process) and
|
||||
//! consults a small `HashMap` keyed on `(content_hash, width)` for the
|
||||
//! rows and `(rows_hash, line_limit)` for the indices. Insertion-order
|
||||
//! LRU eviction keeps memory bounded.
|
||||
//!
|
||||
//! ## When the cache is a win
|
||||
//!
|
||||
//! - Long tool cells that are scrolled into view repeatedly (the model
|
||||
//! often re-asks for the same `read_file` after a partial failure).
|
||||
//! - The whole transcript re-rendering at 120 FPS while streaming: the
|
||||
//! finalized tool cells below the live tail are unchanged on every
|
||||
//! frame, so their `output_rows` and `selected_output_indices` calls
|
||||
//! are pure cache hits.
|
||||
//! - Terminal resizes still invalidate correctly because `width` is part
|
||||
//! of the key.
|
||||
//!
|
||||
//! ## When the cache misses
|
||||
//!
|
||||
//! - New tool output (different `content_hash`).
|
||||
//! - First render of a cell (cache is cold).
|
||||
//! - Terminal width changed since the last render.
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
use crate::tui::history::OutputRow;
|
||||
|
||||
/// Default capacity for the LRU. Sized for a worst-case \"5,000-line
|
||||
/// transcript at 200 cells, plus a 4 KB row cache for the live tail\" —
|
||||
/// well under a megabyte.
|
||||
const DEFAULT_CAPACITY: usize = 256;
|
||||
|
||||
/// Internal cache entry. Stores the wrapped `Vec<OutputRow>` plus the
|
||||
/// `Vec<usize>` of selected indices so a single key lookup can satisfy
|
||||
/// both render steps. Indices are recomputed lazily when the
|
||||
/// `line_limit` changes; rows are shared across all line limits.
|
||||
#[derive(Debug, Clone)]
|
||||
struct CacheEntry {
|
||||
rows: Vec<OutputRow>,
|
||||
rows_hash: u64,
|
||||
/// Map of `line_limit -> selected indices`. Bounded by the
|
||||
/// distinct line limits passed in by the renderer (typically 1–3).
|
||||
selected_by_limit: HashMap<usize, Vec<usize>>,
|
||||
}
|
||||
|
||||
impl CacheEntry {
|
||||
fn new(rows: Vec<OutputRow>, rows_hash: u64) -> Self {
|
||||
Self {
|
||||
rows,
|
||||
rows_hash,
|
||||
selected_by_limit: HashMap::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Bounded LRU cache of `(output, width) -> OutputRowsCacheEntry`.
|
||||
///
|
||||
/// The eviction policy is insertion-order: when the cache reaches
|
||||
/// `capacity`, the oldest-inserted key is dropped first. Re-inserting an
|
||||
/// existing key (different content) keeps the original position, so
|
||||
/// re-rendering the same cell on every frame does not churn unrelated
|
||||
/// entries.
|
||||
#[derive(Debug)]
|
||||
struct OutputRowsCacheInner {
|
||||
capacity: usize,
|
||||
by_key: HashMap<RowsKey, CacheEntry>,
|
||||
insertion_order: VecDeque<RowsKey>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
struct RowsKey {
|
||||
/// 64-bit content hash of the raw tool output. Two outputs with
|
||||
/// different bytes produce different hashes; identical bytes produce
|
||||
/// the same hash.
|
||||
content_hash: u64,
|
||||
/// Terminal width used for wrapping. Resize invalidates.
|
||||
width: u16,
|
||||
}
|
||||
|
||||
impl OutputRowsCacheInner {
|
||||
fn new() -> Self {
|
||||
Self::with_capacity(DEFAULT_CAPACITY)
|
||||
}
|
||||
|
||||
fn with_capacity(capacity: usize) -> Self {
|
||||
let cap = capacity.max(1);
|
||||
Self {
|
||||
capacity: cap,
|
||||
by_key: HashMap::with_capacity(cap),
|
||||
insertion_order: VecDeque::with_capacity(cap),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get or compute the wrapped output rows for `output` at `width`.
|
||||
/// On a hit, returns a clone of the cached `Vec<OutputRow>` — the
|
||||
/// caller can iterate without holding a lock.
|
||||
fn get_or_compute_rows<F>(
|
||||
&mut self,
|
||||
content_hash: u64,
|
||||
width: u16,
|
||||
compute: F,
|
||||
) -> (Vec<OutputRow>, u64)
|
||||
where
|
||||
F: FnOnce() -> Vec<OutputRow>,
|
||||
{
|
||||
let key = RowsKey { content_hash, width };
|
||||
if let Some(entry) = self.by_key.get(&key) {
|
||||
return (entry.rows.clone(), entry.rows_hash);
|
||||
}
|
||||
|
||||
let rows = compute();
|
||||
let rows_hash = hash_rows(&rows);
|
||||
let entry = CacheEntry::new(rows.clone(), rows_hash);
|
||||
|
||||
if self.by_key.len() >= self.capacity
|
||||
&& let Some(oldest) = self.insertion_order.pop_front()
|
||||
{
|
||||
self.by_key.remove(&oldest);
|
||||
}
|
||||
self.by_key.insert(key, entry);
|
||||
self.insertion_order.push_back(key);
|
||||
(rows, rows_hash)
|
||||
}
|
||||
|
||||
/// Get or compute the selected indices for the cached rows at the
|
||||
/// given `line_limit`. Looks up the row entry by `(content_hash,
|
||||
/// width)` first (the same key used to insert the rows) and then
|
||||
/// consults the per-line-limit map on that entry. `compute` is
|
||||
/// invoked only on the first call for a given
|
||||
/// `(content_hash, width, line_limit)` triple.
|
||||
fn get_or_compute_indices<F>(
|
||||
&mut self,
|
||||
content_hash: u64,
|
||||
width: u16,
|
||||
line_limit: usize,
|
||||
compute: F,
|
||||
) -> Vec<usize>
|
||||
where
|
||||
F: FnOnce() -> Vec<usize>,
|
||||
{
|
||||
let key = RowsKey { content_hash, width };
|
||||
if let Some(entry) = self.by_key.get_mut(&key)
|
||||
&& let Some(indices) = entry.selected_by_limit.get(&line_limit)
|
||||
{
|
||||
return indices.clone();
|
||||
}
|
||||
|
||||
let indices = compute();
|
||||
if let Some(entry) = self.by_key.get_mut(&key) {
|
||||
entry.selected_by_limit.insert(line_limit, indices.clone());
|
||||
}
|
||||
indices
|
||||
}
|
||||
}
|
||||
|
||||
thread_local! {
|
||||
/// Thread-local cache. The TUI render loop runs on a single thread,
|
||||
/// so a `!Sync` cache is sufficient and avoids contention with any
|
||||
/// background workers that might call into the same module.
|
||||
static GLOBAL_CACHE: RefCell<OutputRowsCacheInner> =
|
||||
RefCell::new(OutputRowsCacheInner::new());
|
||||
}
|
||||
|
||||
/// Reset the global cache. Used by tests and `/clear`.
|
||||
#[cfg(test)]
|
||||
pub fn reset_for_tests() {
|
||||
GLOBAL_CACHE.with(|c| *c.borrow_mut() = OutputRowsCacheInner::new());
|
||||
}
|
||||
|
||||
/// Look up (or compute) the wrapped output rows for `output` at `width`.
|
||||
/// Returns a fresh `Vec<OutputRow>` plus its `rows_hash`. On a hit the
|
||||
/// cached value is cloned without re-running the per-line ANSI strip or
|
||||
/// the wrap pass.
|
||||
pub fn get_or_compute_rows<F>(output: &str, width: u16, compute: F) -> (Vec<OutputRow>, u64)
|
||||
where
|
||||
F: FnOnce() -> Vec<OutputRow>,
|
||||
{
|
||||
let content_hash = hash_str(output);
|
||||
GLOBAL_CACHE.with(|c| c.borrow_mut().get_or_compute_rows(content_hash, width, compute))
|
||||
}
|
||||
|
||||
/// Look up (or compute) the selected indices for a previously-cached
|
||||
/// rows payload at the given `line_limit`. `content_hash` is the same
|
||||
/// 64-bit content hash that was passed to [`get_or_compute_rows`].
|
||||
pub fn get_or_compute_indices<F>(
|
||||
content_hash: u64,
|
||||
width: u16,
|
||||
line_limit: usize,
|
||||
compute: F,
|
||||
) -> Vec<usize>
|
||||
where
|
||||
F: FnOnce() -> Vec<usize>,
|
||||
{
|
||||
GLOBAL_CACHE.with(|c| {
|
||||
c.borrow_mut()
|
||||
.get_or_compute_indices(content_hash, width, line_limit, compute)
|
||||
})
|
||||
}
|
||||
|
||||
/// Cheap 64-bit content hash for a tool output string.
|
||||
pub fn hash_str(s: &str) -> u64 {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
s.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
/// Content hash of an `OutputRow` slice. Computed once on cache miss;
|
||||
/// reused for the indices-cache key.
|
||||
fn hash_rows(rows: &[OutputRow]) -> u64 {
|
||||
let mut hasher = DefaultHasher::new();
|
||||
rows.len().hash(&mut hasher);
|
||||
for row in rows {
|
||||
row.text.hash(&mut hasher);
|
||||
row.intact.hash(&mut hasher);
|
||||
}
|
||||
hasher.finish()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn row(text: &str) -> OutputRow {
|
||||
OutputRow { text: text.to_string(), intact: false }
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cache_hit_returns_cached_rows() {
|
||||
reset_for_tests();
|
||||
|
||||
let calls = std::cell::Cell::new(0u32);
|
||||
let compute = || {
|
||||
calls.set(calls.get() + 1);
|
||||
vec![row("hello"), row("world")]
|
||||
};
|
||||
|
||||
let (a, hash_a) = get_or_compute_rows("payload", 80, compute);
|
||||
let (b, hash_b) = get_or_compute_rows("payload", 80, || {
|
||||
calls.set(calls.get() + 1);
|
||||
vec![row("hello"), row("world")]
|
||||
});
|
||||
assert_eq!(calls.get(), 1, "second call should hit the cache");
|
||||
assert_eq!(a, b);
|
||||
assert_eq!(hash_a, hash_b);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn different_width_invalidates_rows() {
|
||||
reset_for_tests();
|
||||
|
||||
let calls = std::cell::Cell::new(0u32);
|
||||
let make = || {
|
||||
calls.set(calls.get() + 1);
|
||||
vec![row("hello")]
|
||||
};
|
||||
|
||||
let _ = get_or_compute_rows("payload", 80, make);
|
||||
let _ = get_or_compute_rows("payload", 120, make);
|
||||
assert_eq!(calls.get(), 2, "different width must miss the cache");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn different_output_invalidates_rows() {
|
||||
reset_for_tests();
|
||||
|
||||
let calls = std::cell::Cell::new(0u32);
|
||||
let make = || {
|
||||
calls.set(calls.get() + 1);
|
||||
vec![row("x")]
|
||||
};
|
||||
|
||||
let _ = get_or_compute_rows("payload-a", 80, make);
|
||||
let _ = get_or_compute_rows("payload-b", 80, make);
|
||||
assert_eq!(calls.get(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn indices_cached_per_line_limit() {
|
||||
reset_for_tests();
|
||||
|
||||
let (rows, _rows_hash) = get_or_compute_rows("payload", 80, || {
|
||||
vec![row("a"), row("b"), row("c"), row("d"), row("e")]
|
||||
});
|
||||
assert_eq!(rows.len(), 5);
|
||||
|
||||
let content_hash = hash_str("payload");
|
||||
let mut calls = 0;
|
||||
let pick_two_a = get_or_compute_indices(content_hash, 80, 2, || {
|
||||
calls += 1;
|
||||
vec![0usize, 4]
|
||||
});
|
||||
let pick_two_b = get_or_compute_indices(content_hash, 80, 2, || {
|
||||
calls += 1;
|
||||
vec![0usize, 4]
|
||||
});
|
||||
assert_eq!(calls, 1, "second lookup with same limit hits the cache");
|
||||
assert_eq!(pick_two_a, pick_two_b);
|
||||
assert_eq!(pick_two_a, vec![0, 4]);
|
||||
|
||||
// Different line_limit must miss and recompute.
|
||||
let _ = get_or_compute_indices(content_hash, 80, 3, || {
|
||||
calls += 1;
|
||||
vec![0usize, 1, 4]
|
||||
});
|
||||
assert_eq!(calls, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn capacity_evicts_oldest() {
|
||||
// Build a private cache so we can size it tightly.
|
||||
let mut cache = OutputRowsCacheInner::with_capacity(2);
|
||||
|
||||
let _ = cache.get_or_compute_rows(1, 80, || vec![row("a")]);
|
||||
let _ = cache.get_or_compute_rows(2, 80, || vec![row("b")]);
|
||||
let _ = cache.get_or_compute_rows(3, 80, || vec![row("c")]);
|
||||
// The first entry (hash 1) should have been evicted.
|
||||
let mut compute_calls = 0;
|
||||
let _ = cache.get_or_compute_rows(1, 80, || {
|
||||
compute_calls += 1;
|
||||
vec![row("a")]
|
||||
});
|
||||
assert_eq!(compute_calls, 1, "evicted entry must miss");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hash_str_stable_for_identical_input() {
|
||||
assert_eq!(hash_str("hello"), hash_str("hello"));
|
||||
assert_ne!(hash_str("hello"), hash_str("world"));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user