From 60f8e7d62ec26526949ea8585982230bdca2610e Mon Sep 17 00:00:00 2001 From: Hunter B Date: Wed, 3 Jun 2026 20:06:08 -0700 Subject: [PATCH] refactor(web_run): split cache locks for page reads Harvested from PR #2502 by @HUQIANTAO Co-authored-by: HUQIANTAO <58421104+HUQIANTAO@users.noreply.github.com> --- CHANGELOG.md | 10 +- Cargo.lock | 1 + crates/tui/Cargo.toml | 1 + crates/tui/src/tools/web_run.rs | 159 +++++++++++++++++++++++++++----- docs/V0_9_0_EXECUTION_MAP.md | 11 +-- 5 files changed, 150 insertions(+), 32 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ed5e141d..9ffec202 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,10 +15,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 reach beyond that default listing up to a bounded index, and list requests above the visible cap fail explicitly instead of silently truncating. +### Changed + +- Split `web_run` session/page cache state so cached page reads use shared + page handles and do not serialize through the mutation path. The harvest also + adds panic-safe state write-back and serializes cache-mutating unit tests so + the global web cache remains stable under normal Cargo test parallelism. + ### Community Thanks to **@cyq1017** for the restore-listing implementation (#2513) and -**@wywsoor** for the broader macOS/iTerm rollback UX report (#2494). +**@wywsoor** for the broader macOS/iTerm rollback UX report (#2494), and +**@HUQIANTAO** for the `web_run` lock-splitting work (#2502). ## [0.8.53] - 2026-06-03 diff --git a/Cargo.lock b/Cargo.lock index 139ac2a7..1bb14640 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1014,6 +1014,7 @@ dependencies = [ "multimap", "objc2", "objc2-foundation", + "parking_lot", "pdf-extract", "portable-pty", "pretty_assertions", diff --git a/crates/tui/Cargo.toml b/crates/tui/Cargo.toml index 6a577538..ed9e8270 100644 --- a/crates/tui/Cargo.toml +++ b/crates/tui/Cargo.toml @@ -77,6 +77,7 @@ portable-pty = "0.9" zeroize = "1.8.2" ignore = "0.4" image = { version = "0.25", default-features = false, features = ["png"] } +parking_lot = "0.12" pdf-extract = "0.7" tar = "0.4" flate2 = "1.1" diff --git a/crates/tui/src/tools/web_run.rs b/crates/tui/src/tools/web_run.rs index edf1f56f..f1495d36 100644 --- a/crates/tui/src/tools/web_run.rs +++ b/crates/tui/src/tools/web_run.rs @@ -15,9 +15,11 @@ use serde::{Deserialize, Serialize}; use serde_json::{Value, json}; use std::collections::{HashMap, VecDeque}; use std::hash::{Hash, Hasher}; -use std::sync::{Mutex, OnceLock}; +use std::sync::{Arc, OnceLock}; use std::time::{Duration, Instant}; +use parking_lot::{RwLock, RwLockWriteGuard}; + const MAX_RESULTS: usize = 10; const DEFAULT_TIMEOUT_MS: u64 = 15_000; const DEFAULT_OPEN_TIMEOUT_MS: u64 = 20_000; @@ -26,7 +28,13 @@ const MAX_PAGES_PER_SESSION: usize = 256; const WEB_RUN_SESSION_TTL: Duration = Duration::from_secs(30 * 60); const USER_AGENT: &str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15"; -static WEB_RUN_STATE: OnceLock> = OnceLock::new(); +static WEB_RUN_STATE: OnceLock = OnceLock::new(); + +#[derive(Default)] +struct WebRunCache { + sessions: RwLock>, + pages: RwLock>, +} #[derive(Default)] struct WebRunState { @@ -53,7 +61,7 @@ impl Default for WebRunSessionState { #[derive(Debug, Clone)] struct StoredWebPage { namespace: String, - page: WebPage, + page: Arc, } impl WebRunState { @@ -148,22 +156,13 @@ impl WebRunState { ref_id.to_string(), StoredWebPage { namespace: namespace.to_string(), - page, + page: Arc::new(page), }, ); for evicted_ref in evicted_refs { self.pages.remove(&evicted_ref); } } - - fn get_page(&mut self, ref_id: &str) -> Option { - self.cleanup(); - let stored = self.pages.get(ref_id)?.clone(); - if let Some(session) = self.sessions.get_mut(&stored.namespace) { - session.last_access = Instant::now(); - } - Some(stored.page) - } } #[derive(Debug, Clone, Serialize)] @@ -576,7 +575,7 @@ impl ToolSpec for WebRunTool { let page = resolve_or_fetch_page(&ref_id, DEFAULT_OPEN_TIMEOUT_MS, context).await?; view_counter += 1; let view_ref = format!("{scope}turn{turn}view{view_counter}"); - store_page(&context.state_namespace, &view_ref, page.clone()); + store_page(&context.state_namespace, &view_ref, (*page).clone()); let view = render_view(&view_ref, &page, lineno, response_length); views.push(view); @@ -607,7 +606,7 @@ impl ToolSpec for WebRunTool { resolve_or_fetch_page(&target, DEFAULT_OPEN_TIMEOUT_MS, context).await?; click_counter += 1; let click_ref = format!("{scope}turn{turn}click{click_counter}"); - store_page(&context.state_namespace, &click_ref, fetched.clone()); + store_page(&context.state_namespace, &click_ref, (*fetched).clone()); let view = render_view(&click_ref, &fetched, 1, response_length); views.push(view); } @@ -653,12 +652,60 @@ impl ToolSpec for WebRunTool { } fn with_state(f: impl FnOnce(&mut WebRunState) -> T) -> T { - let lock = WEB_RUN_STATE.get_or_init(|| Mutex::new(WebRunState::default())); - let mut state = lock - .lock() - .expect("web run state mutex should not be poisoned"); - state.cleanup(); - f(&mut state) + let cache = WEB_RUN_STATE.get_or_init(WebRunCache::default); + let sessions = cache.sessions.write(); + let pages = cache.pages.write(); + let mut guard = WebRunStateWriteBack::new(sessions, pages); + guard.state_mut().cleanup(); + let result = f(guard.state_mut()); + guard.write_back(); + result +} + +struct WebRunStateWriteBack<'a> { + sessions: RwLockWriteGuard<'a, HashMap>, + pages: RwLockWriteGuard<'a, HashMap>, + state: Option, +} + +impl<'a> WebRunStateWriteBack<'a> { + fn new( + mut sessions: RwLockWriteGuard<'a, HashMap>, + mut pages: RwLockWriteGuard<'a, HashMap>, + ) -> Self { + let state = WebRunState { + sessions: std::mem::take(&mut *sessions), + pages: std::mem::take(&mut *pages), + }; + Self { + sessions, + pages, + state: Some(state), + } + } + + fn state_mut(&mut self) -> &mut WebRunState { + self.state + .as_mut() + .expect("web run state should be present until write-back") + } + + fn write_back(mut self) { + self.restore(); + } + + fn restore(&mut self) { + if let Some(state) = self.state.take() { + *self.sessions = state.sessions; + *self.pages = state.pages; + } + } +} + +impl Drop for WebRunStateWriteBack<'_> { + fn drop(&mut self) { + self.restore(); + } } fn scoped_ref_prefix(namespace: &str) -> String { @@ -673,8 +720,19 @@ fn store_page(namespace: &str, ref_id: &str, page: WebPage) { }); } -fn get_page(ref_id: &str) -> Option { - with_state(|state| state.get_page(ref_id)) +fn get_page(ref_id: &str) -> Option> { + let cache = WEB_RUN_STATE.get_or_init(WebRunCache::default); + let stored = { + let pages = cache.pages.read(); + pages.get(ref_id).cloned() + }?; + { + let mut sessions = cache.sessions.write(); + if let Some(session) = sessions.get_mut(&stored.namespace) { + session.last_access = Instant::now(); + } + } + Some(stored.page) } #[cfg(test)] @@ -693,13 +751,13 @@ async fn resolve_or_fetch_page( ref_id: &str, timeout_ms: u64, context: &ToolContext, -) -> Result { +) -> Result, ToolError> { if let Some(page) = get_page(ref_id) { return Ok(page); } if looks_like_url(ref_id) { check_network_policy(ref_id, context)?; - return fetch_page(ref_id, timeout_ms).await; + return fetch_page(ref_id, timeout_ms).await.map(Arc::new); } Err(ToolError::invalid_input(format!( "Unknown ref_id '{ref_id}'" @@ -1637,6 +1695,15 @@ fn url_encode(input: &str) -> String { mod tests { use super::*; use std::path::PathBuf; + use std::sync::{Mutex, MutexGuard}; + + static WEB_RUN_TEST_LOCK: Mutex<()> = Mutex::new(()); + + fn lock_web_run_test_state() -> MutexGuard<'static, ()> { + WEB_RUN_TEST_LOCK + .lock() + .unwrap_or_else(|poisoned| poisoned.into_inner()) + } fn sample_page(url: &str) -> WebPage { WebPage { @@ -1729,6 +1796,7 @@ mod tests { #[test] fn scoped_ref_prefix_is_session_specific() { + let _lock = lock_web_run_test_state(); reset_web_run_state(); let alpha = scoped_ref_prefix("session-alpha"); let beta = scoped_ref_prefix("session-beta"); @@ -1741,6 +1809,7 @@ mod tests { #[test] fn stored_pages_do_not_cross_scoped_sessions() { + let _lock = lock_web_run_test_state(); reset_web_run_state(); let shared_suffix = "turn1search1"; let ref_alpha = format!("{}{}", scoped_ref_prefix("session-alpha"), shared_suffix); @@ -1756,8 +1825,23 @@ mod tests { assert!(get_page(&ref_beta).is_none()); } + #[test] + fn cached_page_reads_share_page_arc() { + let _lock = lock_web_run_test_state(); + reset_web_run_state(); + let namespace = "session-alpha"; + let ref_id = format!("{}turn0search1", scoped_ref_prefix(namespace)); + store_page(namespace, &ref_id, sample_page("https://example.com/alpha")); + + let first = get_page(&ref_id).expect("first page read"); + let second = get_page(&ref_id).expect("second page read"); + + assert!(Arc::ptr_eq(&first, &second)); + } + #[test] fn turn_counters_are_scoped_per_session() { + let _lock = lock_web_run_test_state(); reset_web_run_state(); assert_eq!(next_turn_for_namespace("session-alpha"), 0); @@ -1765,8 +1849,33 @@ mod tests { assert_eq!(next_turn_for_namespace("session-beta"), 0); } + #[test] + fn with_state_restores_cache_after_panic() { + let _lock = lock_web_run_test_state(); + reset_web_run_state(); + let namespace = "session-alpha"; + let ref_id = format!("{}turn0search1", scoped_ref_prefix(namespace)); + store_page(namespace, &ref_id, sample_page("https://example.com/alpha")); + + let panic_result = std::panic::catch_unwind(|| { + with_state(|state| { + let session = state + .sessions + .get_mut(namespace) + .expect("session should exist"); + session.next_turn = 42; + panic!("exercise web_run write-back guard"); + }); + }); + + assert!(panic_result.is_err()); + assert!(get_page(&ref_id).is_some()); + assert_eq!(next_turn_for_namespace(namespace), 42); + } + #[test] fn stale_session_pages_are_evicted() { + let _lock = lock_web_run_test_state(); reset_web_run_state(); let namespace = "session-alpha"; let ref_id = format!("{}turn0search1", scoped_ref_prefix(namespace)); diff --git a/docs/V0_9_0_EXECUTION_MAP.md b/docs/V0_9_0_EXECUTION_MAP.md index 41c4cc55..5611bb1d 100644 --- a/docs/V0_9_0_EXECUTION_MAP.md +++ b/docs/V0_9_0_EXECUTION_MAP.md @@ -46,6 +46,7 @@ harvest/stewardship commits: | #2530 mention depth-cap hint | Already present in the current v0.9 stack as `a97675824` and `29f57665e`. | `cargo test -p codewhale-tui --locked try_autocomplete_file_mention_no_match` passed. | | #2513 restore snapshot listing | Manually harvested as `bb39cf169` with explicit `/restore list 101` cap rejection. | `cargo test -p codewhale-tui --locked restore_`; `cargo fmt --all -- --check`; `cargo clippy -p codewhale-tui --locked -- -D warnings` passed. Keep #2494 open because this is only the restore-listing slice. | | #2576 PrefixCacheChange first-freeze event | Already present in the current v0.9 stack through `29acb87a9d`. | `cargo test -p codewhale-tui --locked prefix_cache` passed. Do not close until this integration branch is public or merged. | +| #2502 web_run RwLock split | Manually harvested with panic-safe state write-back, `Arc` cache reads, and serialized cache tests. | `cargo test -p codewhale-tui --locked web_run`; `cargo clippy -p codewhale-tui --locked -- -D warnings`; `cargo fmt --all -- --check` passed. | ## PR Harvest Queue @@ -69,7 +70,7 @@ harvest/stewardship commits: | #2491 typed ask permissions schema | Conflicting | Prior memory says safe candidate; verify current permissions work first. | | #2498 Windows shell process trees | Conflicting | Prior memory says safe candidate; review for #2721 stabilization. | | #2501 in-process LLM response cache | Conflicting | Defer; cache key risks noted in prior review. | -| #2502 web_run RwLock split | Mergeable | Review lock/panic safety before merge. | +| #2502 web_run RwLock split | Mergeable | Manually harvested with panic-safety and shared cached-page reads; close/comment after branch is public. | | #2505 subagent cap accounting | Draft/conflicting | Compare with current subagent cap tests before harvest. | | #2506 provider path suffix overrides | Draft/conflicting | Partly superseded by current provider path-suffix support; verify. | | #2507 stream chunk timeout config | Draft/conflicting | Defer unless stabilization needs it. | @@ -124,11 +125,9 @@ Issue count should drop through evidence-backed consolidation, not bulk closing. ## Immediate Next Actions -1. Review and harvest #2502 after adding panic-safety coverage for the web_run - state split. -2. Review #2517, #2520, and #2522 for prompt/cache implications after #2687 +1. Review #2517, #2520, and #2522 for prompt/cache implications after #2687 was deferred. -3. Prepare public comments for #2708, #2513, #2530, #2576, #2581, #2627, +2. Prepare public comments for #2708, #2502, #2513, #2530, #2576, #2581, #2627, #2634, #2636, #2687, and already-harvested performance PRs. -4. Start file decomposition Phase 1 only after the PR harvest table has no +3. Start file decomposition Phase 1 only after the PR harvest table has no unknown high-priority provider/prompt/cache branches.