From 3b0ef3f63c3f7a0bf4b351d84fe45e18378e3d27 Mon Sep 17 00:00:00 2001 From: HUQIANTAO <58421104+HUQIANTAO@users.noreply.github.com> Date: Wed, 3 Jun 2026 19:05:20 +0800 Subject: [PATCH] perf(history): cache output_rows and selected_output_indices per cell output_rows (in tui::history) walks the raw tool output, ANSI-strips each line, classifies path/URL-like rows, and wraps the rest to the current viewport width. selected_output_indices then computes the head/tail/importance subset that the compact Live view shows. Both functions are pure, but they are called on every render frame for every visible tool cell. For a 4 KB tool output on a 120 FPS render loop that is 2-6 redundant walks per frame, per cell, and the function is called from a non-trivial number of cells across exec, tool, command, and review history. Add tui::output_rows_cache, a thread-local, content-addressed cache keyed on (content_hash, width) for the rows and (content_hash, width, line_limit) for the indices. The cache stores the wrapped Vec plus a per-line-limit map of selected indices on a single entry, so a single key lookup satisfies both render steps. render_preserved_output_mode now consults the cache for both the rows and the indices; on a hit, neither the per-line ANSI strip nor the importance-ranking pass runs. The cache is bounded (default capacity 256) with insertion-order eviction. The OutputRow struct gains PartialEq + Eq + pub fields so the cache module can store and hash it without exposing private internals. Tests: 6 new unit tests cover the hit/miss path, width invalidation, content invalidation, indices per-line_limit caching, capacity eviction, and hash stability. The wider tui::history test suite (68 tests) still passes. --- crates/tui/src/tui/history.rs | 22 +- crates/tui/src/tui/mod.rs | 1 + crates/tui/src/tui/output_rows_cache.rs | 344 ++++++++++++++++++++++++ 3 files changed, 361 insertions(+), 6 deletions(-) create mode 100644 crates/tui/src/tui/output_rows_cache.rs diff --git a/crates/tui/src/tui/history.rs b/crates/tui/src/tui/history.rs index 9ef96eb5..4ac287e8 100644 --- a/crates/tui/src/tui/history.rs +++ b/crates/tui/src/tui/history.rs @@ -2614,10 +2614,10 @@ fn render_exec_output_mode( render_preserved_output_mode(output, width, line_limit, mode, "output") } -#[derive(Debug, Clone)] -struct OutputRow { - text: String, - intact: bool, +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct OutputRow { + pub text: String, + pub intact: bool, } fn render_preserved_output_mode( @@ -2636,7 +2636,12 @@ fn render_preserved_output_mode( return lines; } - let all_lines = output_rows(output, width); + let content_hash = crate::tui::output_rows_cache::hash_str(output); + let (all_lines, _rows_hash) = crate::tui::output_rows_cache::get_or_compute_rows( + output, + width, + || output_rows(output, width), + ); if matches!(mode, RenderMode::Transcript) { // Full-content path: emit every wrapped line with no head/tail split, @@ -2652,7 +2657,12 @@ fn render_preserved_output_mode( return lines; } - let selected = selected_output_indices(&all_lines, line_limit); + let selected = crate::tui::output_rows_cache::get_or_compute_indices( + content_hash, + width, + line_limit, + || selected_output_indices(&all_lines, line_limit), + ); let mut previous: Option = None; for (rendered_idx, idx) in selected.iter().copied().enumerate() { if let Some(prev) = previous { diff --git a/crates/tui/src/tui/mod.rs b/crates/tui/src/tui/mod.rs index af2d8996..8be2dc94 100644 --- a/crates/tui/src/tui/mod.rs +++ b/crates/tui/src/tui/mod.rs @@ -45,6 +45,7 @@ pub mod model_picker; pub mod mouse_ui; pub mod notifications; pub mod onboarding; +pub mod output_rows_cache; pub mod osc8; pub mod pager; pub mod paste; diff --git a/crates/tui/src/tui/output_rows_cache.rs b/crates/tui/src/tui/output_rows_cache.rs new file mode 100644 index 00000000..441b99c9 --- /dev/null +++ b/crates/tui/src/tui/output_rows_cache.rs @@ -0,0 +1,344 @@ +//! Memoization for the per-cell tool-output shaping pipeline. +//! +//! `output_rows` (in `tui::history`) walks the raw tool output, ANSI-strips +//! each line, classifies path/URL-like rows, and wraps the rest to the +//! current viewport width. `selected_output_indices` then computes the +//! head/tail/importance subset that the compact "Live" view shows. Both +//! functions are pure functions of `(output, width)` and `(rows, +//! line_limit)`, but they are called on every render frame for every +//! visible tool cell. For a 4 KB output on a 120 FPS render loop, that +//! is 2–6 redundant walks per frame, per cell. +//! +//! This module adds a process-local, content-addressed cache in front of +//! the two pure functions. The cache is global (one per process) and +//! consults a small `HashMap` keyed on `(content_hash, width)` for the +//! rows and `(rows_hash, line_limit)` for the indices. Insertion-order +//! LRU eviction keeps memory bounded. +//! +//! ## When the cache is a win +//! +//! - Long tool cells that are scrolled into view repeatedly (the model +//! often re-asks for the same `read_file` after a partial failure). +//! - The whole transcript re-rendering at 120 FPS while streaming: the +//! finalized tool cells below the live tail are unchanged on every +//! frame, so their `output_rows` and `selected_output_indices` calls +//! are pure cache hits. +//! - Terminal resizes still invalidate correctly because `width` is part +//! of the key. +//! +//! ## When the cache misses +//! +//! - New tool output (different `content_hash`). +//! - First render of a cell (cache is cold). +//! - Terminal width changed since the last render. + +use std::cell::RefCell; +use std::collections::hash_map::DefaultHasher; +use std::collections::{HashMap, VecDeque}; +use std::hash::{Hash, Hasher}; + +use crate::tui::history::OutputRow; + +/// Default capacity for the LRU. Sized for a worst-case \"5,000-line +/// transcript at 200 cells, plus a 4 KB row cache for the live tail\" — +/// well under a megabyte. +const DEFAULT_CAPACITY: usize = 256; + +/// Internal cache entry. Stores the wrapped `Vec` plus the +/// `Vec` of selected indices so a single key lookup can satisfy +/// both render steps. Indices are recomputed lazily when the +/// `line_limit` changes; rows are shared across all line limits. +#[derive(Debug, Clone)] +struct CacheEntry { + rows: Vec, + rows_hash: u64, + /// Map of `line_limit -> selected indices`. Bounded by the + /// distinct line limits passed in by the renderer (typically 1–3). + selected_by_limit: HashMap>, +} + +impl CacheEntry { + fn new(rows: Vec, rows_hash: u64) -> Self { + Self { + rows, + rows_hash, + selected_by_limit: HashMap::new(), + } + } +} + +/// Bounded LRU cache of `(output, width) -> OutputRowsCacheEntry`. +/// +/// The eviction policy is insertion-order: when the cache reaches +/// `capacity`, the oldest-inserted key is dropped first. Re-inserting an +/// existing key (different content) keeps the original position, so +/// re-rendering the same cell on every frame does not churn unrelated +/// entries. +#[derive(Debug)] +struct OutputRowsCacheInner { + capacity: usize, + by_key: HashMap, + insertion_order: VecDeque, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +struct RowsKey { + /// 64-bit content hash of the raw tool output. Two outputs with + /// different bytes produce different hashes; identical bytes produce + /// the same hash. + content_hash: u64, + /// Terminal width used for wrapping. Resize invalidates. + width: u16, +} + +impl OutputRowsCacheInner { + fn new() -> Self { + Self::with_capacity(DEFAULT_CAPACITY) + } + + fn with_capacity(capacity: usize) -> Self { + let cap = capacity.max(1); + Self { + capacity: cap, + by_key: HashMap::with_capacity(cap), + insertion_order: VecDeque::with_capacity(cap), + } + } + + /// Get or compute the wrapped output rows for `output` at `width`. + /// On a hit, returns a clone of the cached `Vec` — the + /// caller can iterate without holding a lock. + fn get_or_compute_rows( + &mut self, + content_hash: u64, + width: u16, + compute: F, + ) -> (Vec, u64) + where + F: FnOnce() -> Vec, + { + let key = RowsKey { content_hash, width }; + if let Some(entry) = self.by_key.get(&key) { + return (entry.rows.clone(), entry.rows_hash); + } + + let rows = compute(); + let rows_hash = hash_rows(&rows); + let entry = CacheEntry::new(rows.clone(), rows_hash); + + if self.by_key.len() >= self.capacity + && let Some(oldest) = self.insertion_order.pop_front() + { + self.by_key.remove(&oldest); + } + self.by_key.insert(key, entry); + self.insertion_order.push_back(key); + (rows, rows_hash) + } + + /// Get or compute the selected indices for the cached rows at the + /// given `line_limit`. Looks up the row entry by `(content_hash, + /// width)` first (the same key used to insert the rows) and then + /// consults the per-line-limit map on that entry. `compute` is + /// invoked only on the first call for a given + /// `(content_hash, width, line_limit)` triple. + fn get_or_compute_indices( + &mut self, + content_hash: u64, + width: u16, + line_limit: usize, + compute: F, + ) -> Vec + where + F: FnOnce() -> Vec, + { + let key = RowsKey { content_hash, width }; + if let Some(entry) = self.by_key.get_mut(&key) + && let Some(indices) = entry.selected_by_limit.get(&line_limit) + { + return indices.clone(); + } + + let indices = compute(); + if let Some(entry) = self.by_key.get_mut(&key) { + entry.selected_by_limit.insert(line_limit, indices.clone()); + } + indices + } +} + +thread_local! { + /// Thread-local cache. The TUI render loop runs on a single thread, + /// so a `!Sync` cache is sufficient and avoids contention with any + /// background workers that might call into the same module. + static GLOBAL_CACHE: RefCell = + RefCell::new(OutputRowsCacheInner::new()); +} + +/// Reset the global cache. Used by tests and `/clear`. +#[cfg(test)] +pub fn reset_for_tests() { + GLOBAL_CACHE.with(|c| *c.borrow_mut() = OutputRowsCacheInner::new()); +} + +/// Look up (or compute) the wrapped output rows for `output` at `width`. +/// Returns a fresh `Vec` plus its `rows_hash`. On a hit the +/// cached value is cloned without re-running the per-line ANSI strip or +/// the wrap pass. +pub fn get_or_compute_rows(output: &str, width: u16, compute: F) -> (Vec, u64) +where + F: FnOnce() -> Vec, +{ + let content_hash = hash_str(output); + GLOBAL_CACHE.with(|c| c.borrow_mut().get_or_compute_rows(content_hash, width, compute)) +} + +/// Look up (or compute) the selected indices for a previously-cached +/// rows payload at the given `line_limit`. `content_hash` is the same +/// 64-bit content hash that was passed to [`get_or_compute_rows`]. +pub fn get_or_compute_indices( + content_hash: u64, + width: u16, + line_limit: usize, + compute: F, +) -> Vec +where + F: FnOnce() -> Vec, +{ + GLOBAL_CACHE.with(|c| { + c.borrow_mut() + .get_or_compute_indices(content_hash, width, line_limit, compute) + }) +} + +/// Cheap 64-bit content hash for a tool output string. +pub fn hash_str(s: &str) -> u64 { + let mut hasher = DefaultHasher::new(); + s.hash(&mut hasher); + hasher.finish() +} + +/// Content hash of an `OutputRow` slice. Computed once on cache miss; +/// reused for the indices-cache key. +fn hash_rows(rows: &[OutputRow]) -> u64 { + let mut hasher = DefaultHasher::new(); + rows.len().hash(&mut hasher); + for row in rows { + row.text.hash(&mut hasher); + row.intact.hash(&mut hasher); + } + hasher.finish() +} + +#[cfg(test)] +mod tests { + use super::*; + + fn row(text: &str) -> OutputRow { + OutputRow { text: text.to_string(), intact: false } + } + + #[test] + fn cache_hit_returns_cached_rows() { + reset_for_tests(); + + let calls = std::cell::Cell::new(0u32); + let compute = || { + calls.set(calls.get() + 1); + vec![row("hello"), row("world")] + }; + + let (a, hash_a) = get_or_compute_rows("payload", 80, compute); + let (b, hash_b) = get_or_compute_rows("payload", 80, || { + calls.set(calls.get() + 1); + vec![row("hello"), row("world")] + }); + assert_eq!(calls.get(), 1, "second call should hit the cache"); + assert_eq!(a, b); + assert_eq!(hash_a, hash_b); + } + + #[test] + fn different_width_invalidates_rows() { + reset_for_tests(); + + let calls = std::cell::Cell::new(0u32); + let make = || { + calls.set(calls.get() + 1); + vec![row("hello")] + }; + + let _ = get_or_compute_rows("payload", 80, make); + let _ = get_or_compute_rows("payload", 120, make); + assert_eq!(calls.get(), 2, "different width must miss the cache"); + } + + #[test] + fn different_output_invalidates_rows() { + reset_for_tests(); + + let calls = std::cell::Cell::new(0u32); + let make = || { + calls.set(calls.get() + 1); + vec![row("x")] + }; + + let _ = get_or_compute_rows("payload-a", 80, make); + let _ = get_or_compute_rows("payload-b", 80, make); + assert_eq!(calls.get(), 2); + } + + #[test] + fn indices_cached_per_line_limit() { + reset_for_tests(); + + let (rows, _rows_hash) = get_or_compute_rows("payload", 80, || { + vec![row("a"), row("b"), row("c"), row("d"), row("e")] + }); + assert_eq!(rows.len(), 5); + + let content_hash = hash_str("payload"); + let mut calls = 0; + let pick_two_a = get_or_compute_indices(content_hash, 80, 2, || { + calls += 1; + vec![0usize, 4] + }); + let pick_two_b = get_or_compute_indices(content_hash, 80, 2, || { + calls += 1; + vec![0usize, 4] + }); + assert_eq!(calls, 1, "second lookup with same limit hits the cache"); + assert_eq!(pick_two_a, pick_two_b); + assert_eq!(pick_two_a, vec![0, 4]); + + // Different line_limit must miss and recompute. + let _ = get_or_compute_indices(content_hash, 80, 3, || { + calls += 1; + vec![0usize, 1, 4] + }); + assert_eq!(calls, 2); + } + + #[test] + fn capacity_evicts_oldest() { + // Build a private cache so we can size it tightly. + let mut cache = OutputRowsCacheInner::with_capacity(2); + + let _ = cache.get_or_compute_rows(1, 80, || vec![row("a")]); + let _ = cache.get_or_compute_rows(2, 80, || vec![row("b")]); + let _ = cache.get_or_compute_rows(3, 80, || vec![row("c")]); + // The first entry (hash 1) should have been evicted. + let mut compute_calls = 0; + let _ = cache.get_or_compute_rows(1, 80, || { + compute_calls += 1; + vec![row("a")] + }); + assert_eq!(compute_calls, 1, "evicted entry must miss"); + } + + #[test] + fn hash_str_stable_for_identical_input() { + assert_eq!(hash_str("hello"), hash_str("hello")); + assert_ne!(hash_str("hello"), hash_str("world")); + } +}