feat(cache): expose real tool catalog in cache inspect (#2423)

Harvested from #2390 with thanks to @wplll.

Records the actual tool catalog used by the last model request, reports that catalog in /cache inspect JSON and text output, and includes review hardening for stale catalog clearing, JSON/verbose flag handling, and CJK-aware token estimates.

Validation:
- cargo fmt --all -- --check
- CARGO_TARGET_DIR=/Volumes/VIXinSSD/codewhale-target/harvest-2390 cargo test -p codewhale-tui cache_inspect --all-features
This commit is contained in:
Hunter Bown
2026-05-31 03:24:00 -07:00
committed by GitHub
parent 58e45d384e
commit 51906511c5
10 changed files with 269 additions and 14 deletions
+25 -6
View File
@@ -9,6 +9,7 @@ use std::pin::Pin;
use std::time::Duration;
use anyhow::{Context, Result};
use serde::{Deserialize, Serialize};
use serde_json::{Value, json};
use sha2::{Digest, Sha256};
use tokio::time::timeout as tokio_timeout;
@@ -551,24 +552,29 @@ const TOOL_RESULT_DEDUP_MIN_CHARS: usize = 1_024;
/// up with tiny `gh auth status` and `cat package.json` files.
const TOOL_RESULT_SHA_PERSIST_MIN_CHARS: usize = 1_024;
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub(crate) struct PromptInspection {
pub base_static_prefix_hash: String,
pub full_request_prefix_hash: String,
/// Hash of the rendered tool catalog JSON, or empty when no tools were supplied.
pub tool_catalog_hash: String,
pub layers: Vec<PromptLayerInspection>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub(crate) struct PromptLayerInspection {
pub name: String,
pub stability: PromptLayerStability,
pub char_len: usize,
pub byte_len: usize,
/// Rough token estimate for quick before/after cache-hit reports.
pub token_estimate: usize,
pub sha256: String,
pub tool_result: Option<ToolResultInspection>,
pub turn_meta: Option<TurnMetaInspection>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub(crate) struct ToolResultInspection {
pub original_chars: usize,
pub sent_chars: usize,
@@ -576,7 +582,7 @@ pub(crate) struct ToolResultInspection {
pub deduplicated: bool,
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub(crate) struct TurnMetaInspection {
pub original_chars: usize,
pub sent_chars: usize,
@@ -584,7 +590,7 @@ pub(crate) struct TurnMetaInspection {
pub sha256: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub(crate) enum PromptLayerStability {
Static,
History,
@@ -605,6 +611,7 @@ fn inspect_wire_request(tools: Option<&[Tool]>, messages: &[Value]) -> PromptIns
let mut layers = Vec::new();
let mut base_static_prefix_parts = Vec::new();
let mut full_request_prefix_parts = Vec::new();
let mut tool_catalog_hash = String::new();
let mut start_index = 0;
if let Some(message) = messages.first() {
@@ -628,6 +635,7 @@ fn inspect_wire_request(tools: Option<&[Tool]>, messages: &[Value]) -> PromptIns
}
if let Some(tool_catalog) = tool_catalog_for_inspect(tools) {
tool_catalog_hash = sha256_hex(tool_catalog.as_bytes());
base_static_prefix_parts.push(tool_catalog.clone());
full_request_prefix_parts.push(tool_catalog.clone());
layers.push(prompt_layer(
@@ -669,6 +677,7 @@ fn inspect_wire_request(tools: Option<&[Tool]>, messages: &[Value]) -> PromptIns
PromptInspection {
base_static_prefix_hash: sha256_hex(base_static_prefix.as_bytes()),
full_request_prefix_hash: sha256_hex(full_request_prefix.as_bytes()),
tool_catalog_hash,
layers,
}
}
@@ -840,10 +849,20 @@ fn prompt_layer(
stability: PromptLayerStability,
content: &str,
) -> PromptLayerInspection {
let char_len = content.chars().count();
let token_estimate = if char_len == 0 {
0
} else if content.is_ascii() {
(char_len / 4).max(1)
} else {
char_len.max(1)
};
PromptLayerInspection {
name,
stability,
char_len: content.chars().count(),
char_len,
byte_len: content.len(),
token_estimate,
sha256: sha256_hex(content.as_bytes()),
tool_result: None,
turn_meta: None,
+4
View File
@@ -102,6 +102,7 @@ pub(crate) fn reset_conversation_state(app: &mut App) -> bool {
app.session.last_reasoning_replay_tokens = None;
app.session.turn_cache_history.clear();
app.session.last_cache_inspection = None;
app.session.last_tool_catalog = None;
todos_cleared
}
@@ -558,9 +559,11 @@ mod tests {
app.session.last_prompt_cache_hit_tokens = Some(70);
app.session.last_prompt_cache_miss_tokens = Some(30);
app.session.last_reasoning_replay_tokens = Some(12);
app.session.last_tool_catalog = Some(Vec::new());
app.session.last_cache_inspection = Some(PromptInspection {
base_static_prefix_hash: "base".to_string(),
full_request_prefix_hash: "full".to_string(),
tool_catalog_hash: String::new(),
layers: Vec::new(),
});
app.push_turn_cache_record(TurnCacheRecord {
@@ -588,6 +591,7 @@ mod tests {
assert_eq!(app.session.last_reasoning_replay_tokens, None);
assert!(app.session.turn_cache_history.is_empty());
assert_eq!(app.session.last_cache_inspection, None);
assert_eq!(app.session.last_tool_catalog, None);
}
#[test]
+203 -6
View File
@@ -140,8 +140,11 @@ pub fn context(_app: &mut App) -> CommandResult {
/// Renders a fixed-width table the user can paste into a bug report.
pub fn cache(app: &mut App, arg: Option<&str>) -> CommandResult {
let arg = arg.map(str::trim).filter(|s| !s.is_empty());
if matches!(arg, Some("inspect")) {
return CommandResult::message(format_cache_inspect(app));
if let Some(flags) = arg.and_then(|a| a.strip_prefix("inspect")) {
let flags = flags.trim();
let verbose = flags.split_whitespace().any(|flag| flag == "--verbose");
let json_mode = flags.split_whitespace().any(|flag| flag == "--json");
return CommandResult::message(format_cache_inspect(app, verbose, json_mode));
}
if matches!(arg, Some("warmup")) {
return CommandResult::action(AppAction::CacheWarmup);
@@ -163,7 +166,11 @@ pub fn cache(app: &mut App, arg: Option<&str>) -> CommandResult {
CommandResult::message(format_cache_history(app, count, app.ui_locale))
}
fn format_cache_inspect(app: &mut App) -> String {
fn format_cache_inspect(app: &mut App, verbose: bool, json_mode: bool) -> String {
if verbose && json_mode {
return "cache inspect: --json and --verbose cannot be combined".to_string();
}
let reasoning_effort = if app.reasoning_effort == crate::tui::app::ReasoningEffort::Auto {
app.last_effective_reasoning_effort
.and_then(crate::tui::app::ReasoningEffort::api_value)
@@ -176,7 +183,7 @@ fn format_cache_inspect(app: &mut App) -> String {
messages: app.api_messages.clone(),
max_tokens: 0,
system: app.system_prompt.clone(),
tools: None,
tools: app.session.last_tool_catalog.clone(),
tool_choice: None,
metadata: None,
thinking: None,
@@ -187,6 +194,13 @@ fn format_cache_inspect(app: &mut App) -> String {
};
let inspection = inspect_prompt_for_request(&request);
let previous = app.session.last_cache_inspection.as_ref();
if json_mode {
let output = serde_json::to_string_pretty(&inspection).unwrap_or_else(|_| {
"{\"error\":\"cache inspection serialization failed\"}".to_string()
});
app.session.last_cache_inspection = Some(inspection);
return output;
}
let mut out = String::new();
out.push_str("Cache Inspect\n");
@@ -199,16 +213,32 @@ fn format_cache_inspect(app: &mut App) -> String {
"Full request prefix hash: {}\n",
inspection.full_request_prefix_hash
));
out.push_str(&format!(
"Tool catalog hash: {}\n",
if inspection.tool_catalog_hash.is_empty() {
"(no tools registered)".to_string()
} else {
inspection.tool_catalog_hash.clone()
}
));
out.push_str(&format_static_prefix_status(previous, &inspection));
out.push_str(&format_first_divergence(previous, &inspection));
let total_tokens: usize = inspection
.layers
.iter()
.map(|layer| layer.token_estimate)
.sum();
out.push_str(&format!("Estimated reusable tokens: ~{total_tokens}\n"));
out.push('\n');
for layer in &inspection.layers {
let mut line = format!(
"{}: {}, chars={}, hash={}\n",
"{}: {}, chars={}, bytes={}, ~{}tok, hash={}\n",
layer.name,
layer.stability.label(),
layer.char_len,
layer.byte_len,
layer.token_estimate,
layer.sha256
);
if let Some(tool_result) = &layer.tool_result {
@@ -233,10 +263,70 @@ fn format_cache_inspect(app: &mut App) -> String {
}
out.push_str(&line);
}
if verbose {
out.push_str("\nVerbose diff\n");
if let Some(previous) = previous {
out.push_str(&format_verbose_diff(previous, &inspection));
} else {
out.push_str("No previous inspection to compare against.\n");
}
}
app.session.last_cache_inspection = Some(inspection);
out
}
fn format_verbose_diff(previous: &PromptInspection, current: &PromptInspection) -> String {
let mut out = String::new();
let max_len = previous.layers.len().max(current.layers.len());
for index in 0..max_len {
match (previous.layers.get(index), current.layers.get(index)) {
(Some(prev), Some(curr)) if prev == curr => {
out.push_str(&format!(" [{index}] {} unchanged\n", curr.name));
}
(Some(prev), Some(curr)) => {
out.push_str(&format!(" [{index}] {} changed\n", curr.name));
if prev.name != curr.name {
out.push_str(&format!(" name: {} -> {}\n", prev.name, curr.name));
}
if prev.stability != curr.stability {
out.push_str(&format!(
" stability: {} -> {}\n",
prev.stability.label(),
curr.stability.label()
));
}
if prev.char_len != curr.char_len {
out.push_str(&format!(
" chars: {} -> {} ({:+})\n",
prev.char_len,
curr.char_len,
curr.char_len as i64 - prev.char_len as i64
));
}
if prev.sha256 != curr.sha256 {
out.push_str(&format!(
" hash: {} -> {}\n",
short_hash(&prev.sha256),
short_hash(&curr.sha256)
));
}
}
(None, Some(curr)) => {
out.push_str(&format!(" [{index}] {} added\n", curr.name));
}
(Some(prev), None) => {
out.push_str(&format!(" [{index}] {} removed\n", prev.name));
}
(None, None) => unreachable!("index is within max_len"),
}
}
out
}
fn short_hash(hash: &str) -> &str {
&hash[..hash.len().min(12)]
}
/// Render a prefix-cache stability and health summary for `/cache stats`.
///
/// Surfaces the current prefix fingerprint, stability ratio, change history,
@@ -560,7 +650,7 @@ fn humanize_age(d: std::time::Duration) -> String {
mod tests {
use super::*;
use crate::config::Config;
use crate::models::{ContentBlock, Message, SystemBlock};
use crate::models::{ContentBlock, Message, SystemBlock, Tool};
use crate::tui::app::{App, TuiOptions};
use crate::tui::history::{GenericToolCell, ToolCell, ToolStatus};
use std::path::PathBuf;
@@ -594,6 +684,25 @@ mod tests {
app
}
fn test_tool(name: &str) -> Tool {
Tool {
tool_type: Some("function".to_string()),
name: name.to_string(),
description: format!("{name} test tool"),
input_schema: serde_json::json!({
"type": "object",
"properties": {
"path": {"type": "string"}
}
}),
allowed_callers: None,
defer_loading: Some(false),
input_examples: None,
strict: Some(true),
cache_control: None,
}
}
#[test]
fn test_tokens_shows_usage_info() {
let mut app = create_test_app();
@@ -737,6 +846,94 @@ mod tests {
assert!(!msg.contains("SECRET_USER_TASK"));
}
#[test]
fn cache_inspect_uses_last_request_tool_catalog() {
let mut app = create_test_app();
app.system_prompt = Some(SystemPrompt::Text("Base policy".to_string()));
app.session.last_tool_catalog = Some(vec![test_tool("read_file")]);
app.api_messages.push(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "Current task".to_string(),
cache_control: None,
}],
});
let msg = cache(&mut app, Some("inspect"))
.message
.expect("inspect output");
assert!(msg.contains("Tool catalog hash: "), "got: {msg}");
assert!(!msg.contains("(no tools registered)"), "got: {msg}");
assert!(msg.contains("Tool catalog: static"), "got: {msg}");
assert!(msg.contains("bytes="), "got: {msg}");
assert!(msg.contains("~"), "got: {msg}");
}
#[test]
fn cache_inspect_json_reports_tool_catalog_hash_and_layer_sizes() {
let mut app = create_test_app();
app.system_prompt = Some(SystemPrompt::Text("Base policy".to_string()));
app.session.last_tool_catalog = Some(vec![test_tool("read_file")]);
app.api_messages.push(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "Current task".to_string(),
cache_control: None,
}],
});
let msg = cache(&mut app, Some("inspect --json"))
.message
.expect("inspect json output");
let parsed: serde_json::Value = serde_json::from_str(&msg).expect("valid json");
assert_eq!(parsed["tool_catalog_hash"].as_str().unwrap().len(), 64);
let tool_layer = parsed["layers"]
.as_array()
.unwrap()
.iter()
.find(|layer| layer["name"] == "Tool catalog")
.expect("tool catalog layer");
assert!(tool_layer["byte_len"].as_u64().unwrap() > 0);
assert!(tool_layer["token_estimate"].as_u64().unwrap() > 0);
}
#[test]
fn cache_inspect_rejects_json_verbose_combo() {
let mut app = create_test_app();
let msg = cache(&mut app, Some("inspect --json --verbose"))
.message
.expect("inspect output");
assert_eq!(
msg,
"cache inspect: --json and --verbose cannot be combined"
);
}
#[test]
fn cache_inspect_json_uses_cjk_aware_token_estimate() {
let mut app = create_test_app();
app.system_prompt = Some(SystemPrompt::Text("缓存命中测试".to_string()));
let msg = cache(&mut app, Some("inspect --json"))
.message
.expect("inspect json output");
let parsed: serde_json::Value = serde_json::from_str(&msg).expect("valid json");
let system_layer = parsed["layers"]
.as_array()
.unwrap()
.iter()
.find(|layer| layer["name"] == "Global system prefix")
.expect("system layer");
assert_eq!(
system_layer["token_estimate"].as_u64(),
system_layer["char_len"].as_u64()
);
}
#[test]
fn cache_inspect_reports_divergence_from_previous_request() {
let mut app = create_test_app();
+7
View File
@@ -1071,6 +1071,7 @@ impl Engine {
usage: turn.usage.clone(),
status: TurnOutcomeStatus::Failed,
error: Some(message),
tool_catalog: None,
})
.await;
return;
@@ -1254,6 +1255,7 @@ impl Engine {
&self.config.tools_always_load,
)
});
let tool_catalog_for_event = tools.clone();
// Main turn loop
let (status, error) = self
@@ -1287,6 +1289,7 @@ impl Engine {
usage: turn.usage,
status,
error,
tool_catalog: tool_catalog_for_event,
})
.await;
@@ -1332,6 +1335,7 @@ impl Engine {
usage: zero_usage,
status: TurnOutcomeStatus::Failed,
error: Some(message),
tool_catalog: None,
})
.await;
return;
@@ -1409,6 +1413,7 @@ impl Engine {
usage: zero_usage,
status: turn_status,
error: turn_error,
tool_catalog: None,
})
.await;
}
@@ -1432,6 +1437,7 @@ impl Engine {
usage: zero_usage,
status: TurnOutcomeStatus::Failed,
error: Some(message),
tool_catalog: None,
})
.await;
return;
@@ -1486,6 +1492,7 @@ impl Engine {
usage: zero_usage,
status,
error,
tool_catalog: None,
})
.await;
}
+3 -1
View File
@@ -9,7 +9,7 @@ use serde_json::Value;
use crate::core::coherence::CoherenceState;
use crate::error_taxonomy::ErrorEnvelope;
use crate::models::{Message, SystemPrompt, Usage};
use crate::models::{Message, SystemPrompt, Tool, Usage};
use crate::tools::spec::{ToolError, ToolResult};
use crate::tools::subagent::SubAgentResult;
use crate::tools::user_input::UserInputRequest;
@@ -92,6 +92,8 @@ pub enum Event {
usage: Usage,
status: TurnOutcomeStatus,
error: Option<String>,
/// Tool catalog sent with this turn's model request.
tool_catalog: Option<Vec<Tool>>,
},
/// Context compaction started.
+5
View File
@@ -2916,6 +2916,7 @@ mod tests {
},
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
})
.await;
}
@@ -2929,6 +2930,7 @@ mod tests {
},
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
})
.await;
}
@@ -3085,6 +3087,7 @@ mod tests {
},
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
})
.await;
});
@@ -3217,6 +3220,7 @@ mod tests {
},
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
})
.await;
});
@@ -3436,6 +3440,7 @@ mod tests {
},
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
})
.await;
});
+12
View File
@@ -2887,6 +2887,7 @@ impl RuntimeThreadManager {
usage,
status,
error,
..
} => {
turn_usage = Some(usage);
turn_status = match status {
@@ -3711,6 +3712,7 @@ mod tests {
},
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
})
.await;
}
@@ -4003,6 +4005,7 @@ mod tests {
},
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
})
.await;
if turn_index >= 2 {
@@ -4240,6 +4243,7 @@ mod tests {
},
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
})
.await?;
@@ -4336,6 +4340,7 @@ mod tests {
usage: Usage::default(),
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
})
.await?;
Ok(())
@@ -4414,6 +4419,7 @@ mod tests {
usage: Usage::default(),
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
})
.await?;
Ok(())
@@ -4477,6 +4483,7 @@ mod tests {
usage: Usage::default(),
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
})
.await?;
@@ -4605,6 +4612,7 @@ mod tests {
usage: Usage::default(),
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
})
.await?;
Ok(())
@@ -4685,6 +4693,7 @@ mod tests {
},
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
})
.await?;
@@ -4746,6 +4755,7 @@ mod tests {
},
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
})
.await;
}
@@ -4856,6 +4866,7 @@ mod tests {
},
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
})
.await;
}
@@ -4886,6 +4897,7 @@ mod tests {
},
status: TurnOutcomeStatus::Completed,
error: None,
tool_catalog: None,
})
.await;
}
+7 -1
View File
@@ -20,7 +20,7 @@ use crate::core::coherence::CoherenceState;
use crate::cycle_manager::{CycleBriefing, CycleConfig};
use crate::hooks::{HookContext, HookEvent, HookExecutor, HookResult};
use crate::localization::{Locale, MessageId, resolve_locale, tr};
use crate::models::{Message, SystemPrompt, compaction_threshold_for_model_and_effort};
use crate::models::{Message, SystemPrompt, Tool, compaction_threshold_for_model_and_effort};
use crate::palette::{self, UiTheme};
use crate::pricing::{CostCurrency, CostEstimate};
use crate::session_manager::SessionContextReference;
@@ -1046,6 +1046,11 @@ pub struct SessionState {
pub total_output_tokens: u32,
pub turn_cache_history: VecDeque<TurnCacheRecord>,
pub last_cache_inspection: Option<PromptInspection>,
/// Tool catalog from the most recent model request.
///
/// `/cache inspect` uses this to inspect the same tool schema bytes
/// that were eligible for the provider's prefix cache.
pub last_tool_catalog: Option<Vec<Tool>>,
}
/// Sidebar hover state for mouse tooltip support.
@@ -1087,6 +1092,7 @@ impl Default for SessionState {
total_output_tokens: 0,
turn_cache_history: VecDeque::new(),
last_cache_inspection: None,
last_tool_catalog: None,
}
}
}
+2
View File
@@ -1514,7 +1514,9 @@ async fn run_event_loop(
usage,
status,
error,
tool_catalog,
} => {
app.session.last_tool_catalog = tool_catalog;
let was_locally_cancelled = app.suppress_stream_events_until_turn_complete;
app.suppress_stream_events_until_turn_complete = false;
app.active_allowed_tools = None;
+1
View File
@@ -3059,6 +3059,7 @@ fn local_cancel_marks_late_stream_events_for_suppression() {
usage: Usage::default(),
status: crate::core::events::TurnOutcomeStatus::Interrupted,
error: None,
tool_catalog: None,
}
));
assert!(!suppress_engine_event_after_local_cancel(