From cef1632d6ae53c84da731e6a7b54fc2be4976991 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Sun, 31 May 2026 03:34:01 -0700 Subject: [PATCH] feat(cache): track cache warmup keys (#2424) Harvested from #2391 with thanks to @wplll. Tracks a cache warmup key across provider, model, base URL, static prefix, tool catalog, project pack, and skills hashes; records base URL and tool catalog from completed turns; reports whether /cache inspect still matches the last warmup key; and computes the warmup key from the actual warmup request sent to the provider. Validation: - cargo fmt --all -- --check - git diff --check - CARGO_TARGET_DIR=/Volumes/VIXinSSD/codewhale-target/harvest-2391-rebase cargo test -p codewhale-tui warmup_status --all-features - CARGO_TARGET_DIR=/Volumes/VIXinSSD/codewhale-target/harvest-2391-rebase cargo test -p codewhale-tui cache_inspect --all-features --- crates/tui/src/client.rs | 7 +- crates/tui/src/client/chat.rs | 46 +++++++++++ crates/tui/src/commands/core.rs | 6 ++ crates/tui/src/commands/debug.rs | 130 +++++++++++++++++++++++++++++- crates/tui/src/core/engine.rs | 10 +++ crates/tui/src/core/events.rs | 2 + crates/tui/src/runtime_api.rs | 5 ++ crates/tui/src/runtime_threads.rs | 11 +++ crates/tui/src/tui/app.rs | 7 +- crates/tui/src/tui/ui.rs | 27 +++++-- crates/tui/src/tui/ui/tests.rs | 1 + 11 files changed, 241 insertions(+), 11 deletions(-) diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs index ac4f5941..8ab01a81 100644 --- a/crates/tui/src/client.rs +++ b/crates/tui/src/client.rs @@ -568,6 +568,11 @@ fn build_default_headers( } impl DeepSeekClient { + /// Returns the API base URL used by this client. + pub fn base_url(&self) -> &str { + &self.base_url + } + /// Translate text to the requested target language using a focused /// non-streaming chat completion call on the supplied model. /// @@ -1118,7 +1123,7 @@ impl DeepSeekClient { mod chat; -pub(crate) use chat::PromptInspection; +pub(crate) use chat::{CacheWarmupKey, PromptInspection}; pub(crate) fn inspect_prompt_for_request(request: &MessageRequest) -> PromptInspection { chat::inspect_prompt_for_request(request) diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs index 0d3d04cc..2656ec29 100644 --- a/crates/tui/src/client/chat.rs +++ b/crates/tui/src/client/chat.rs @@ -561,6 +561,52 @@ pub(crate) struct PromptInspection { pub layers: Vec, } +/// Identifies the stable prefix that a cache warmup primes. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +pub(crate) struct CacheWarmupKey { + pub provider: String, + pub model: String, + pub base_url: String, + pub static_prefix_hash: String, + pub tool_catalog_hash: String, + pub project_pack_hash: String, + pub skills_hash: String, +} + +impl CacheWarmupKey { + pub(crate) fn from_inspection( + provider: &str, + model: &str, + base_url: &str, + inspection: &PromptInspection, + ) -> Self { + Self { + provider: provider.to_string(), + model: model.to_string(), + base_url: base_url.to_string(), + static_prefix_hash: inspection.base_static_prefix_hash.clone(), + tool_catalog_hash: inspection.tool_catalog_hash.clone(), + project_pack_hash: layer_hash(inspection, "Project context pack"), + skills_hash: layer_hash(inspection, "Skills"), + } + } + + pub(crate) fn hash_short(&self) -> String { + let json = serde_json::to_string(self).unwrap_or_default(); + let hash = sha256_hex(json.as_bytes()); + hash[..hash.len().min(12)].to_string() + } +} + +fn layer_hash(inspection: &PromptInspection, name: &str) -> String { + inspection + .layers + .iter() + .find(|layer| layer.name == name) + .map(|layer| layer.sha256.clone()) + .unwrap_or_default() +} + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub(crate) struct PromptLayerInspection { pub name: String, diff --git a/crates/tui/src/commands/core.rs b/crates/tui/src/commands/core.rs index a7d9bec1..1154338d 100644 --- a/crates/tui/src/commands/core.rs +++ b/crates/tui/src/commands/core.rs @@ -102,7 +102,9 @@ pub(crate) fn reset_conversation_state(app: &mut App) -> bool { app.session.last_reasoning_replay_tokens = None; app.session.turn_cache_history.clear(); app.session.last_cache_inspection = None; + app.session.last_warmup_key = None; app.session.last_tool_catalog = None; + app.session.last_base_url = None; todos_cleared } @@ -559,7 +561,9 @@ mod tests { app.session.last_prompt_cache_hit_tokens = Some(70); app.session.last_prompt_cache_miss_tokens = Some(30); app.session.last_reasoning_replay_tokens = Some(12); + app.session.last_warmup_key = None; app.session.last_tool_catalog = Some(Vec::new()); + app.session.last_base_url = Some("https://api.deepseek.com".to_string()); app.session.last_cache_inspection = Some(PromptInspection { base_static_prefix_hash: "base".to_string(), full_request_prefix_hash: "full".to_string(), @@ -591,7 +595,9 @@ mod tests { assert_eq!(app.session.last_reasoning_replay_tokens, None); assert!(app.session.turn_cache_history.is_empty()); assert_eq!(app.session.last_cache_inspection, None); + assert_eq!(app.session.last_warmup_key, None); assert_eq!(app.session.last_tool_catalog, None); + assert_eq!(app.session.last_base_url, None); } #[test] diff --git a/crates/tui/src/commands/debug.rs b/crates/tui/src/commands/debug.rs index 709a1967..8ed16820 100644 --- a/crates/tui/src/commands/debug.rs +++ b/crates/tui/src/commands/debug.rs @@ -5,7 +5,7 @@ use std::time::Instant; use super::CommandResult; -use crate::client::{PromptInspection, inspect_prompt_for_request}; +use crate::client::{CacheWarmupKey, PromptInspection, inspect_prompt_for_request}; use crate::compaction::estimate_input_tokens_conservative; use crate::dependencies::{ExternalTool, Git}; use crate::localization::{Locale, MessageId, tr}; @@ -194,10 +194,32 @@ fn format_cache_inspect(app: &mut App, verbose: bool, json_mode: bool) -> String }; let inspection = inspect_prompt_for_request(&request); let previous = app.session.last_cache_inspection.as_ref(); + let current_warmup_key = CacheWarmupKey::from_inspection( + &format!("{:?}", app.api_provider), + &app.model, + app.session.last_base_url.as_deref().unwrap_or_default(), + &inspection, + ); + let warmup_status = + format_warmup_status(app.session.last_warmup_key.as_ref(), ¤t_warmup_key); if json_mode { - let output = serde_json::to_string_pretty(&inspection).unwrap_or_else(|_| { - "{\"error\":\"cache inspection serialization failed\"}".to_string() - }); + let output = serde_json::to_value(&inspection) + .and_then(|mut value| { + if let serde_json::Value::Object(ref mut object) = value { + object.insert( + "current_warmup_key".to_string(), + serde_json::to_value(¤t_warmup_key)?, + ); + object.insert( + "warmup_status".to_string(), + serde_json::Value::String(warmup_status.trim_end().to_string()), + ); + } + serde_json::to_string_pretty(&value) + }) + .unwrap_or_else(|_| { + "{\"error\":\"cache inspection serialization failed\"}".to_string() + }); app.session.last_cache_inspection = Some(inspection); return output; } @@ -223,6 +245,7 @@ fn format_cache_inspect(app: &mut App, verbose: bool, json_mode: bool) -> String )); out.push_str(&format_static_prefix_status(previous, &inspection)); out.push_str(&format_first_divergence(previous, &inspection)); + out.push_str(&warmup_status); let total_tokens: usize = inspection .layers .iter() @@ -275,6 +298,56 @@ fn format_cache_inspect(app: &mut App, verbose: bool, json_mode: bool) -> String out } +fn format_warmup_status(last_warmup: Option<&CacheWarmupKey>, current: &CacheWarmupKey) -> String { + match last_warmup { + None => format!( + "Warmup status: no previous warmup (current key: {})\n", + current.hash_short() + ), + Some(previous) if previous == current => { + format!( + "Warmup status: valid (key {} matches)\n", + current.hash_short() + ) + } + Some(previous) => { + let mut reasons = Vec::new(); + if previous.provider != current.provider { + reasons.push("provider changed"); + } + if previous.model != current.model { + reasons.push("model changed"); + } + if previous.base_url != current.base_url { + reasons.push("base URL changed"); + } + if previous.static_prefix_hash != current.static_prefix_hash { + reasons.push("static prefix changed"); + } + if previous.tool_catalog_hash != current.tool_catalog_hash { + reasons.push("tool catalog changed"); + } + if previous.project_pack_hash != current.project_pack_hash { + reasons.push("project pack changed"); + } + if previous.skills_hash != current.skills_hash { + reasons.push("skills changed"); + } + let reason_text = if reasons.is_empty() { + "unknown prefix input changed".to_string() + } else { + reasons.join(", ") + }; + format!( + "Warmup status: invalid ({} -> {}; {})\n", + previous.hash_short(), + current.hash_short(), + reason_text + ) + } + } +} + fn format_verbose_diff(previous: &PromptInspection, current: &PromptInspection) -> String { let mut out = String::new(); let max_len = previous.layers.len().max(current.layers.len()); @@ -889,6 +962,12 @@ mod tests { let parsed: serde_json::Value = serde_json::from_str(&msg).expect("valid json"); assert_eq!(parsed["tool_catalog_hash"].as_str().unwrap().len(), 64); + assert!( + parsed["warmup_status"] + .as_str() + .is_some_and(|status| status.starts_with("Warmup status: no previous warmup")) + ); + assert!(parsed["current_warmup_key"].is_object()); let tool_layer = parsed["layers"] .as_array() .unwrap() @@ -899,6 +978,49 @@ mod tests { assert!(tool_layer["token_estimate"].as_u64().unwrap() > 0); } + fn warmup_key(model: &str, static_hash: &str) -> CacheWarmupKey { + CacheWarmupKey { + provider: "Deepseek".to_string(), + model: model.to_string(), + base_url: "https://api.deepseek.com".to_string(), + static_prefix_hash: static_hash.to_string(), + tool_catalog_hash: "tool".to_string(), + project_pack_hash: "project".to_string(), + skills_hash: "skills".to_string(), + } + } + + #[test] + fn warmup_status_reports_valid_matching_key() { + let key = warmup_key("deepseek-v4-pro", "static-a"); + let result = format_warmup_status(Some(&key), &key); + assert!(result.contains("Warmup status: valid"), "got: {result}"); + } + + #[test] + fn warmup_status_reports_invalidation_reason() { + let previous = warmup_key("deepseek-v4-pro", "static-a"); + let current = warmup_key("deepseek-v4-flash", "static-b"); + let result = format_warmup_status(Some(&previous), ¤t); + assert!(result.contains("Warmup status: invalid"), "got: {result}"); + assert!(result.contains("model changed"), "got: {result}"); + assert!(result.contains("static prefix changed"), "got: {result}"); + } + + #[test] + fn warmup_status_reports_project_and_skills_reasons() { + let previous = warmup_key("deepseek-v4-pro", "static-a"); + let mut current = previous.clone(); + current.project_pack_hash = "project-b".to_string(); + current.skills_hash = "skills-b".to_string(); + + let result = format_warmup_status(Some(&previous), ¤t); + + assert!(result.contains("project pack changed"), "got: {result}"); + assert!(result.contains("skills changed"), "got: {result}"); + assert!(!result.contains("; )"), "got: {result}"); + } + #[test] fn cache_inspect_rejects_json_verbose_combo() { let mut app = create_test_app(); diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index a2327772..4f1b822b 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -1072,6 +1072,7 @@ impl Engine { status: TurnOutcomeStatus::Failed, error: Some(message), tool_catalog: None, + base_url: None, }) .await; return; @@ -1256,6 +1257,10 @@ impl Engine { ) }); let tool_catalog_for_event = tools.clone(); + let base_url_for_event = self + .deepseek_client + .as_ref() + .map(|client| client.base_url().to_string()); // Main turn loop let (status, error) = self @@ -1290,6 +1295,7 @@ impl Engine { status, error, tool_catalog: tool_catalog_for_event, + base_url: base_url_for_event, }) .await; @@ -1336,6 +1342,7 @@ impl Engine { status: TurnOutcomeStatus::Failed, error: Some(message), tool_catalog: None, + base_url: None, }) .await; return; @@ -1414,6 +1421,7 @@ impl Engine { status: turn_status, error: turn_error, tool_catalog: None, + base_url: None, }) .await; } @@ -1438,6 +1446,7 @@ impl Engine { status: TurnOutcomeStatus::Failed, error: Some(message), tool_catalog: None, + base_url: None, }) .await; return; @@ -1493,6 +1502,7 @@ impl Engine { status, error, tool_catalog: None, + base_url: None, }) .await; } diff --git a/crates/tui/src/core/events.rs b/crates/tui/src/core/events.rs index 6827f64b..6db2a27e 100644 --- a/crates/tui/src/core/events.rs +++ b/crates/tui/src/core/events.rs @@ -94,6 +94,8 @@ pub enum Event { error: Option, /// Tool catalog sent with this turn's model request. tool_catalog: Option>, + /// API base URL used by this turn's client. + base_url: Option, }, /// Context compaction started. diff --git a/crates/tui/src/runtime_api.rs b/crates/tui/src/runtime_api.rs index 719f562a..8602f574 100644 --- a/crates/tui/src/runtime_api.rs +++ b/crates/tui/src/runtime_api.rs @@ -2917,6 +2917,7 @@ mod tests { status: TurnOutcomeStatus::Completed, error: None, tool_catalog: None, + base_url: None, }) .await; } @@ -2931,6 +2932,7 @@ mod tests { status: TurnOutcomeStatus::Completed, error: None, tool_catalog: None, + base_url: None, }) .await; } @@ -3088,6 +3090,7 @@ mod tests { status: TurnOutcomeStatus::Completed, error: None, tool_catalog: None, + base_url: None, }) .await; }); @@ -3221,6 +3224,7 @@ mod tests { status: TurnOutcomeStatus::Completed, error: None, tool_catalog: None, + base_url: None, }) .await; }); @@ -3441,6 +3445,7 @@ mod tests { status: TurnOutcomeStatus::Completed, error: None, tool_catalog: None, + base_url: None, }) .await; }); diff --git a/crates/tui/src/runtime_threads.rs b/crates/tui/src/runtime_threads.rs index 3ccbb327..bd82071b 100644 --- a/crates/tui/src/runtime_threads.rs +++ b/crates/tui/src/runtime_threads.rs @@ -3713,6 +3713,7 @@ mod tests { status: TurnOutcomeStatus::Completed, error: None, tool_catalog: None, + base_url: None, }) .await; } @@ -4006,6 +4007,7 @@ mod tests { status: TurnOutcomeStatus::Completed, error: None, tool_catalog: None, + base_url: None, }) .await; if turn_index >= 2 { @@ -4244,6 +4246,7 @@ mod tests { status: TurnOutcomeStatus::Completed, error: None, tool_catalog: None, + base_url: None, }) .await?; @@ -4341,6 +4344,7 @@ mod tests { status: TurnOutcomeStatus::Completed, error: None, tool_catalog: None, + base_url: None, }) .await?; Ok(()) @@ -4420,6 +4424,7 @@ mod tests { status: TurnOutcomeStatus::Completed, error: None, tool_catalog: None, + base_url: None, }) .await?; Ok(()) @@ -4484,6 +4489,7 @@ mod tests { status: TurnOutcomeStatus::Completed, error: None, tool_catalog: None, + base_url: None, }) .await?; @@ -4613,6 +4619,7 @@ mod tests { status: TurnOutcomeStatus::Completed, error: None, tool_catalog: None, + base_url: None, }) .await?; Ok(()) @@ -4694,6 +4701,7 @@ mod tests { status: TurnOutcomeStatus::Completed, error: None, tool_catalog: None, + base_url: None, }) .await?; @@ -4756,6 +4764,7 @@ mod tests { status: TurnOutcomeStatus::Completed, error: None, tool_catalog: None, + base_url: None, }) .await; } @@ -4867,6 +4876,7 @@ mod tests { status: TurnOutcomeStatus::Completed, error: None, tool_catalog: None, + base_url: None, }) .await; } @@ -4898,6 +4908,7 @@ mod tests { status: TurnOutcomeStatus::Completed, error: None, tool_catalog: None, + base_url: None, }) .await; } diff --git a/crates/tui/src/tui/app.rs b/crates/tui/src/tui/app.rs index e5e4fe39..9ff82990 100644 --- a/crates/tui/src/tui/app.rs +++ b/crates/tui/src/tui/app.rs @@ -10,7 +10,7 @@ use serde_json::Value; use thiserror::Error; use crate::artifacts::ArtifactRecord; -use crate::client::PromptInspection; +use crate::client::{CacheWarmupKey, PromptInspection}; use crate::compaction::CompactionConfig; use crate::config::{ ApiProvider, Config, DEFAULT_TEXT_MODEL, SavedCredential, has_api_key, save_api_key, @@ -1046,11 +1046,14 @@ pub struct SessionState { pub total_output_tokens: u32, pub turn_cache_history: VecDeque, pub last_cache_inspection: Option, + pub last_warmup_key: Option, /// Tool catalog from the most recent model request. /// /// `/cache inspect` uses this to inspect the same tool schema bytes /// that were eligible for the provider's prefix cache. pub last_tool_catalog: Option>, + /// API base URL used by the most recent model request or cache warmup. + pub last_base_url: Option, } /// Sidebar hover state for mouse tooltip support. @@ -1092,7 +1095,9 @@ impl Default for SessionState { total_output_tokens: 0, turn_cache_history: VecDeque::new(), last_cache_inspection: None, + last_warmup_key: None, last_tool_catalog: None, + last_base_url: None, } } } diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 2502f88f..eeec9ffd 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -35,7 +35,10 @@ use windows::Win32::System::Console::{GetConsoleMode, GetStdHandle, SetConsoleMo use crate::audit::log_sensitive_event; use crate::automation_manager::{AutomationManager, AutomationSchedulerConfig, spawn_scheduler}; -use crate::client::{DeepSeekClient, build_cache_warmup_request}; +use crate::client::{ + CacheWarmupKey, DeepSeekClient, PromptInspection, build_cache_warmup_request, + inspect_prompt_for_request, +}; use crate::commands; use crate::compaction::estimate_input_tokens_conservative; use crate::config::{ @@ -1515,8 +1518,10 @@ async fn run_event_loop( status, error, tool_catalog, + base_url, } => { app.session.last_tool_catalog = tool_catalog; + app.session.last_base_url = base_url; let was_locally_cancelled = app.suppress_stream_events_until_turn_complete; app.suppress_stream_events_until_turn_complete = false; app.active_allowed_tools = None; @@ -3975,8 +3980,9 @@ async fn fetch_available_models(config: &Config) -> Result> { Ok(ids) } -async fn run_cache_warmup(app: &App, config: &Config) -> Result { +async fn run_cache_warmup(app: &App, config: &Config) -> Result<(Usage, String, PromptInspection)> { let client = DeepSeekClient::new(config)?; + let base_url = client.base_url().to_string(); let reasoning_effort = if app.reasoning_effort == ReasoningEffort::Auto { app.last_effective_reasoning_effort .and_then(ReasoningEffort::api_value) @@ -3989,7 +3995,7 @@ async fn run_cache_warmup(app: &App, config: &Config) -> Result { messages: app.api_messages.clone(), max_tokens: 1024, system: app.system_prompt.clone(), - tools: None, + tools: app.session.last_tool_catalog.clone(), tool_choice: None, metadata: None, thinking: None, @@ -3999,9 +4005,10 @@ async fn run_cache_warmup(app: &App, config: &Config) -> Result { top_p: None, }; let warmup = build_cache_warmup_request(&request); + let inspection = inspect_prompt_for_request(&warmup); let response = tokio::time::timeout(Duration::from_secs(45), client.create_message(warmup)).await??; - Ok(response.usage) + Ok((response.usage, base_url, inspection)) } // `format_*` chip/message builders moved to `tui/format_helpers.rs`. @@ -5075,8 +5082,18 @@ async fn apply_command_result( AppAction::CacheWarmup => { app.status_message = Some("Warming DeepSeek cache...".to_string()); match run_cache_warmup(app, config).await { - Ok(usage) => { + Ok((usage, base_url, inspection)) => { + app.session.last_base_url = Some(base_url.clone()); + app.session.last_warmup_key = Some(CacheWarmupKey::from_inspection( + &format!("{:?}", app.api_provider), + &app.model, + &base_url, + &inspection, + )); let mut message = format_helpers::cache_warmup_result(&usage); + if let Some(key) = app.session.last_warmup_key.as_ref() { + message.push_str(&format!("\nWarmup key: {}", key.hash_short())); + } // Append prefix-cache stability info. if app.prefix_checks_total > 0 { let changes = app.prefix_change_count; diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index 39760ef0..a666712a 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -3060,6 +3060,7 @@ fn local_cancel_marks_late_stream_events_for_suppression() { status: crate::core::events::TurnOutcomeStatus::Interrupted, error: None, tool_catalog: None, + base_url: None, } )); assert!(!suppress_engine_event_after_local_cancel(