feat(cache): track cache warmup keys (#2424)

Harvested from #2391 with thanks to @wplll.

Tracks a cache warmup key across provider, model, base URL, static prefix, tool catalog, project pack, and skills hashes; records base URL and tool catalog from completed turns; reports whether /cache inspect still matches the last warmup key; and computes the warmup key from the actual warmup request sent to the provider.

Validation:
- cargo fmt --all -- --check
- git diff --check
- CARGO_TARGET_DIR=/Volumes/VIXinSSD/codewhale-target/harvest-2391-rebase cargo test -p codewhale-tui warmup_status --all-features
- CARGO_TARGET_DIR=/Volumes/VIXinSSD/codewhale-target/harvest-2391-rebase cargo test -p codewhale-tui cache_inspect --all-features
This commit is contained in:
Hunter Bown
2026-05-31 03:34:01 -07:00
committed by GitHub
parent 51906511c5
commit cef1632d6a
11 changed files with 241 additions and 11 deletions
+6 -1
View File
@@ -568,6 +568,11 @@ fn build_default_headers(
}
impl DeepSeekClient {
/// Returns the API base URL used by this client.
pub fn base_url(&self) -> &str {
&self.base_url
}
/// Translate text to the requested target language using a focused
/// non-streaming chat completion call on the supplied model.
///
@@ -1118,7 +1123,7 @@ impl DeepSeekClient {
mod chat;
pub(crate) use chat::PromptInspection;
pub(crate) use chat::{CacheWarmupKey, PromptInspection};
pub(crate) fn inspect_prompt_for_request(request: &MessageRequest) -> PromptInspection {
chat::inspect_prompt_for_request(request)
+46
View File
@@ -561,6 +561,52 @@ pub(crate) struct PromptInspection {
pub layers: Vec<PromptLayerInspection>,
}
/// Identifies the stable prefix that a cache warmup primes.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub(crate) struct CacheWarmupKey {
pub provider: String,
pub model: String,
pub base_url: String,
pub static_prefix_hash: String,
pub tool_catalog_hash: String,
pub project_pack_hash: String,
pub skills_hash: String,
}
impl CacheWarmupKey {
pub(crate) fn from_inspection(
provider: &str,
model: &str,
base_url: &str,
inspection: &PromptInspection,
) -> Self {
Self {
provider: provider.to_string(),
model: model.to_string(),
base_url: base_url.to_string(),
static_prefix_hash: inspection.base_static_prefix_hash.clone(),
tool_catalog_hash: inspection.tool_catalog_hash.clone(),
project_pack_hash: layer_hash(inspection, "Project context pack"),
skills_hash: layer_hash(inspection, "Skills"),
}
}
pub(crate) fn hash_short(&self) -> String {
let json = serde_json::to_string(self).unwrap_or_default();
let hash = sha256_hex(json.as_bytes());
hash[..hash.len().min(12)].to_string()
}
}
fn layer_hash(inspection: &PromptInspection, name: &str) -> String {
inspection
.layers
.iter()
.find(|layer| layer.name == name)
.map(|layer| layer.sha256.clone())
.unwrap_or_default()
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub(crate) struct PromptLayerInspection {
pub name: String,
+6
View File
@@ -102,7 +102,9 @@ pub(crate) fn reset_conversation_state(app: &mut App) -> bool {
app.session.last_reasoning_replay_tokens = None;
app.session.turn_cache_history.clear();
app.session.last_cache_inspection = None;
app.session.last_warmup_key = None;
app.session.last_tool_catalog = None;
app.session.last_base_url = None;
todos_cleared
}
@@ -559,7 +561,9 @@ mod tests {
app.session.last_prompt_cache_hit_tokens = Some(70);
app.session.last_prompt_cache_miss_tokens = Some(30);
app.session.last_reasoning_replay_tokens = Some(12);
app.session.last_warmup_key = None;
app.session.last_tool_catalog = Some(Vec::new());
app.session.last_base_url = Some("https://api.deepseek.com".to_string());
app.session.last_cache_inspection = Some(PromptInspection {
base_static_prefix_hash: "base".to_string(),
full_request_prefix_hash: "full".to_string(),
@@ -591,7 +595,9 @@ mod tests {
assert_eq!(app.session.last_reasoning_replay_tokens, None);
assert!(app.session.turn_cache_history.is_empty());
assert_eq!(app.session.last_cache_inspection, None);
assert_eq!(app.session.last_warmup_key, None);
assert_eq!(app.session.last_tool_catalog, None);
assert_eq!(app.session.last_base_url, None);
}
#[test]
+126 -4
View File
@@ -5,7 +5,7 @@
use std::time::Instant;
use super::CommandResult;
use crate::client::{PromptInspection, inspect_prompt_for_request};
use crate::client::{CacheWarmupKey, PromptInspection, inspect_prompt_for_request};
use crate::compaction::estimate_input_tokens_conservative;
use crate::dependencies::{ExternalTool, Git};
use crate::localization::{Locale, MessageId, tr};
@@ -194,10 +194,32 @@ fn format_cache_inspect(app: &mut App, verbose: bool, json_mode: bool) -> String
};
let inspection = inspect_prompt_for_request(&request);
let previous = app.session.last_cache_inspection.as_ref();
let current_warmup_key = CacheWarmupKey::from_inspection(
&format!("{:?}", app.api_provider),
&app.model,
app.session.last_base_url.as_deref().unwrap_or_default(),
&inspection,
);
let warmup_status =
format_warmup_status(app.session.last_warmup_key.as_ref(), &current_warmup_key);
if json_mode {
let output = serde_json::to_string_pretty(&inspection).unwrap_or_else(|_| {
"{\"error\":\"cache inspection serialization failed\"}".to_string()
});
let output = serde_json::to_value(&inspection)
.and_then(|mut value| {
if let serde_json::Value::Object(ref mut object) = value {
object.insert(
"current_warmup_key".to_string(),
serde_json::to_value(&current_warmup_key)?,
);
object.insert(
"warmup_status".to_string(),
serde_json::Value::String(warmup_status.trim_end().to_string()),
);
}
serde_json::to_string_pretty(&value)
})
.unwrap_or_else(|_| {
"{\"error\":\"cache inspection serialization failed\"}".to_string()
});
app.session.last_cache_inspection = Some(inspection);
return output;
}
@@ -223,6 +245,7 @@ fn format_cache_inspect(app: &mut App, verbose: bool, json_mode: bool) -> String
));
out.push_str(&format_static_prefix_status(previous, &inspection));
out.push_str(&format_first_divergence(previous, &inspection));
out.push_str(&warmup_status);
let total_tokens: usize = inspection
.layers
.iter()
@@ -275,6 +298,56 @@ fn format_cache_inspect(app: &mut App, verbose: bool, json_mode: bool) -> String
out
}
fn format_warmup_status(last_warmup: Option<&CacheWarmupKey>, current: &CacheWarmupKey) -> String {
match last_warmup {
None => format!(
"Warmup status: no previous warmup (current key: {})\n",
current.hash_short()
),
Some(previous) if previous == current => {
format!(
"Warmup status: valid (key {} matches)\n",
current.hash_short()
)
}
Some(previous) => {
let mut reasons = Vec::new();
if previous.provider != current.provider {
reasons.push("provider changed");
}
if previous.model != current.model {
reasons.push("model changed");
}
if previous.base_url != current.base_url {
reasons.push("base URL changed");
}
if previous.static_prefix_hash != current.static_prefix_hash {
reasons.push("static prefix changed");
}
if previous.tool_catalog_hash != current.tool_catalog_hash {
reasons.push("tool catalog changed");
}
if previous.project_pack_hash != current.project_pack_hash {
reasons.push("project pack changed");
}
if previous.skills_hash != current.skills_hash {
reasons.push("skills changed");
}
let reason_text = if reasons.is_empty() {
"unknown prefix input changed".to_string()
} else {
reasons.join(", ")
};
format!(
"Warmup status: invalid ({} -> {}; {})\n",
previous.hash_short(),
current.hash_short(),
reason_text
)
}
}
}
fn format_verbose_diff(previous: &PromptInspection, current: &PromptInspection) -> String {
let mut out = String::new();
let max_len = previous.layers.len().max(current.layers.len());
@@ -889,6 +962,12 @@ mod tests {
let parsed: serde_json::Value = serde_json::from_str(&msg).expect("valid json");
assert_eq!(parsed["tool_catalog_hash"].as_str().unwrap().len(), 64);
assert!(
parsed["warmup_status"]
.as_str()
.is_some_and(|status| status.starts_with("Warmup status: no previous warmup"))
);
assert!(parsed["current_warmup_key"].is_object());
let tool_layer = parsed["layers"]
.as_array()
.unwrap()
@@ -899,6 +978,49 @@ mod tests {
assert!(tool_layer["token_estimate"].as_u64().unwrap() > 0);
}
fn warmup_key(model: &str, static_hash: &str) -> CacheWarmupKey {
CacheWarmupKey {
provider: "Deepseek".to_string(),
model: model.to_string(),
base_url: "https://api.deepseek.com".to_string(),
static_prefix_hash: static_hash.to_string(),
tool_catalog_hash: "tool".to_string(),
project_pack_hash: "project".to_string(),
skills_hash: "skills".to_string(),
}
}
#[test]
fn warmup_status_reports_valid_matching_key() {
let key = warmup_key("deepseek-v4-pro", "static-a");
let result = format_warmup_status(Some(&key), &key);
assert!(result.contains("Warmup status: valid"), "got: {result}");
}
#[test]
fn warmup_status_reports_invalidation_reason() {
let previous = warmup_key("deepseek-v4-pro", "static-a");
let current = warmup_key("deepseek-v4-flash", "static-b");
let result = format_warmup_status(Some(&previous), &current);
assert!(result.contains("Warmup status: invalid"), "got: {result}");
assert!(result.contains("model changed"), "got: {result}");
assert!(result.contains("static prefix changed"), "got: {result}");
}
#[test]
fn warmup_status_reports_project_and_skills_reasons() {
let previous = warmup_key("deepseek-v4-pro", "static-a");
let mut current = previous.clone();
current.project_pack_hash = "project-b".to_string();
current.skills_hash = "skills-b".to_string();
let result = format_warmup_status(Some(&previous), &current);
assert!(result.contains("project pack changed"), "got: {result}");
assert!(result.contains("skills changed"), "got: {result}");
assert!(!result.contains("; )"), "got: {result}");
}
#[test]
fn cache_inspect_rejects_json_verbose_combo() {
let mut app = create_test_app();
+10
View File
@@ -1072,6 +1072,7 @@ impl Engine {
status: TurnOutcomeStatus::Failed,
error: Some(message),
tool_catalog: None,
base_url: None,
})
.await;
return;
@@ -1256,6 +1257,10 @@ impl Engine {
)
});
let tool_catalog_for_event = tools.clone();
let base_url_for_event = self
.deepseek_client
.as_ref()
.map(|client| client.base_url().to_string());
// Main turn loop
let (status, error) = self
@@ -1290,6 +1295,7 @@ impl Engine {
status,
error,
tool_catalog: tool_catalog_for_event,
base_url: base_url_for_event,
})
.await;
@@ -1336,6 +1342,7 @@ impl Engine {
status: TurnOutcomeStatus::Failed,
error: Some(message),
tool_catalog: None,
base_url: None,
})
.await;
return;
@@ -1414,6 +1421,7 @@ impl Engine {
status: turn_status,
error: turn_error,
tool_catalog: None,
base_url: None,
})
.await;
}
@@ -1438,6 +1446,7 @@ impl Engine {
status: TurnOutcomeStatus::Failed,
error: Some(message),
tool_catalog: None,
base_url: None,
})
.await;
return;
@@ -1493,6 +1502,7 @@ impl Engine {
status,
error,
tool_catalog: None,
base_url: None,
})
.await;
}
+2
View File
@@ -94,6 +94,8 @@ pub enum Event {
error: Option<String>,
/// Tool catalog sent with this turn's model request.
tool_catalog: Option<Vec<Tool>>,
/// API base URL used by this turn's client.
base_url: Option<String>,
},
/// Context compaction started.
+5
View File
@@ -2917,6 +2917,7 @@ mod tests {
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
base_url: None,
})
.await;
}
@@ -2931,6 +2932,7 @@ mod tests {
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
base_url: None,
})
.await;
}
@@ -3088,6 +3090,7 @@ mod tests {
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
base_url: None,
})
.await;
});
@@ -3221,6 +3224,7 @@ mod tests {
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
base_url: None,
})
.await;
});
@@ -3441,6 +3445,7 @@ mod tests {
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
base_url: None,
})
.await;
});
+11
View File
@@ -3713,6 +3713,7 @@ mod tests {
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
base_url: None,
})
.await;
}
@@ -4006,6 +4007,7 @@ mod tests {
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
base_url: None,
})
.await;
if turn_index >= 2 {
@@ -4244,6 +4246,7 @@ mod tests {
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
base_url: None,
})
.await?;
@@ -4341,6 +4344,7 @@ mod tests {
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
base_url: None,
})
.await?;
Ok(())
@@ -4420,6 +4424,7 @@ mod tests {
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
base_url: None,
})
.await?;
Ok(())
@@ -4484,6 +4489,7 @@ mod tests {
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
base_url: None,
})
.await?;
@@ -4613,6 +4619,7 @@ mod tests {
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
base_url: None,
})
.await?;
Ok(())
@@ -4694,6 +4701,7 @@ mod tests {
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
base_url: None,
})
.await?;
@@ -4756,6 +4764,7 @@ mod tests {
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
base_url: None,
})
.await;
}
@@ -4867,6 +4876,7 @@ mod tests {
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
base_url: None,
})
.await;
}
@@ -4898,6 +4908,7 @@ mod tests {
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
base_url: None,
})
.await;
}
+6 -1
View File
@@ -10,7 +10,7 @@ use serde_json::Value;
use thiserror::Error;
use crate::artifacts::ArtifactRecord;
use crate::client::PromptInspection;
use crate::client::{CacheWarmupKey, PromptInspection};
use crate::compaction::CompactionConfig;
use crate::config::{
ApiProvider, Config, DEFAULT_TEXT_MODEL, SavedCredential, has_api_key, save_api_key,
@@ -1046,11 +1046,14 @@ pub struct SessionState {
pub total_output_tokens: u32,
pub turn_cache_history: VecDeque<TurnCacheRecord>,
pub last_cache_inspection: Option<PromptInspection>,
pub last_warmup_key: Option<CacheWarmupKey>,
/// Tool catalog from the most recent model request.
///
/// `/cache inspect` uses this to inspect the same tool schema bytes
/// that were eligible for the provider's prefix cache.
pub last_tool_catalog: Option<Vec<Tool>>,
/// API base URL used by the most recent model request or cache warmup.
pub last_base_url: Option<String>,
}
/// Sidebar hover state for mouse tooltip support.
@@ -1092,7 +1095,9 @@ impl Default for SessionState {
total_output_tokens: 0,
turn_cache_history: VecDeque::new(),
last_cache_inspection: None,
last_warmup_key: None,
last_tool_catalog: None,
last_base_url: None,
}
}
}
+22 -5
View File
@@ -35,7 +35,10 @@ use windows::Win32::System::Console::{GetConsoleMode, GetStdHandle, SetConsoleMo
use crate::audit::log_sensitive_event;
use crate::automation_manager::{AutomationManager, AutomationSchedulerConfig, spawn_scheduler};
use crate::client::{DeepSeekClient, build_cache_warmup_request};
use crate::client::{
CacheWarmupKey, DeepSeekClient, PromptInspection, build_cache_warmup_request,
inspect_prompt_for_request,
};
use crate::commands;
use crate::compaction::estimate_input_tokens_conservative;
use crate::config::{
@@ -1515,8 +1518,10 @@ async fn run_event_loop(
status,
error,
tool_catalog,
base_url,
} => {
app.session.last_tool_catalog = tool_catalog;
app.session.last_base_url = base_url;
let was_locally_cancelled = app.suppress_stream_events_until_turn_complete;
app.suppress_stream_events_until_turn_complete = false;
app.active_allowed_tools = None;
@@ -3975,8 +3980,9 @@ async fn fetch_available_models(config: &Config) -> Result<Vec<String>> {
Ok(ids)
}
async fn run_cache_warmup(app: &App, config: &Config) -> Result<Usage> {
async fn run_cache_warmup(app: &App, config: &Config) -> Result<(Usage, String, PromptInspection)> {
let client = DeepSeekClient::new(config)?;
let base_url = client.base_url().to_string();
let reasoning_effort = if app.reasoning_effort == ReasoningEffort::Auto {
app.last_effective_reasoning_effort
.and_then(ReasoningEffort::api_value)
@@ -3989,7 +3995,7 @@ async fn run_cache_warmup(app: &App, config: &Config) -> Result<Usage> {
messages: app.api_messages.clone(),
max_tokens: 1024,
system: app.system_prompt.clone(),
tools: None,
tools: app.session.last_tool_catalog.clone(),
tool_choice: None,
metadata: None,
thinking: None,
@@ -3999,9 +4005,10 @@ async fn run_cache_warmup(app: &App, config: &Config) -> Result<Usage> {
top_p: None,
};
let warmup = build_cache_warmup_request(&request);
let inspection = inspect_prompt_for_request(&warmup);
let response =
tokio::time::timeout(Duration::from_secs(45), client.create_message(warmup)).await??;
Ok(response.usage)
Ok((response.usage, base_url, inspection))
}
// `format_*` chip/message builders moved to `tui/format_helpers.rs`.
@@ -5075,8 +5082,18 @@ async fn apply_command_result(
AppAction::CacheWarmup => {
app.status_message = Some("Warming DeepSeek cache...".to_string());
match run_cache_warmup(app, config).await {
Ok(usage) => {
Ok((usage, base_url, inspection)) => {
app.session.last_base_url = Some(base_url.clone());
app.session.last_warmup_key = Some(CacheWarmupKey::from_inspection(
&format!("{:?}", app.api_provider),
&app.model,
&base_url,
&inspection,
));
let mut message = format_helpers::cache_warmup_result(&usage);
if let Some(key) = app.session.last_warmup_key.as_ref() {
message.push_str(&format!("\nWarmup key: {}", key.hash_short()));
}
// Append prefix-cache stability info.
if app.prefix_checks_total > 0 {
let changes = app.prefix_change_count;
+1
View File
@@ -3060,6 +3060,7 @@ fn local_cancel_marks_late_stream_events_for_suppression() {
status: crate::core::events::TurnOutcomeStatus::Interrupted,
error: None,
tool_catalog: None,
base_url: None,
}
));
assert!(!suppress_engine_event_after_local_cancel(