From 5ccfe91bff866974dad52255c07ff4bff8afea17 Mon Sep 17 00:00:00 2001 From: Zhiping <2716057626@qq.com> Date: Mon, 11 May 2026 03:43:21 +0800 Subject: [PATCH] fix: cache usage unknown shown as 0, and add truthful reporting prompt (#false-claim) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two independent fixes: 1. **Prompt truthful reporting** (base.md): add explicit rules for honest outcome reporting — if a tool fails/returns-empty say so; if cache usage is unobserved treat it as unknown/null, not 0. 2. **Cache usage u64 → Option** (session.rs): when the API does not report cache hit/miss tokens, the cumulative SessionUsage defaulted to 0. Models interpreted this as "no cache hits" rather than "unknown". Changing to Option ensures absent cache data serializes as null in the model context. Tests added for all three cases: starts None, stays None when API omits cache, accumulates correctly when API reports cache. --- crates/tui/src/core/session.rs | 83 +++++++++++++++++++++++++++++++--- crates/tui/src/prompts/base.md | 4 ++ 2 files changed, 81 insertions(+), 6 deletions(-) diff --git a/crates/tui/src/core/session.rs b/crates/tui/src/core/session.rs index ad0bdb18..65ea5f16 100644 --- a/crates/tui/src/core/session.rs +++ b/crates/tui/src/core/session.rs @@ -90,10 +90,12 @@ pub struct Session { pub struct SessionUsage { pub input_tokens: u64, pub output_tokens: u64, - #[allow(dead_code)] - pub cache_creation_input_tokens: u64, - #[allow(dead_code)] - pub cache_read_input_tokens: u64, + /// Cache creation (miss) tokens. `None` when never observed by the API — + /// do NOT display as 0, which would be indistinguishable from "no misses". + pub cache_creation_input_tokens: Option, + /// Cache read (hit) tokens. `None` when never observed by the API — + /// do NOT display as 0, which would be indistinguishable from "no hits". + pub cache_read_input_tokens: Option, } impl SessionUsage { @@ -102,10 +104,12 @@ impl SessionUsage { self.input_tokens += u64::from(usage.input_tokens); self.output_tokens += u64::from(usage.output_tokens); if let Some(tokens) = usage.prompt_cache_miss_tokens { - self.cache_creation_input_tokens += u64::from(tokens); + self.cache_creation_input_tokens = + Some(self.cache_creation_input_tokens.unwrap_or(0) + u64::from(tokens)); } if let Some(tokens) = usage.prompt_cache_hit_tokens { - self.cache_read_input_tokens += u64::from(tokens); + self.cache_read_input_tokens = + Some(self.cache_read_input_tokens.unwrap_or(0) + u64::from(tokens)); } } } @@ -165,3 +169,70 @@ impl Session { .rebuild_from_messages(&self.messages, &self.workspace); } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn session_usage_cache_starts_none() { + let usage = SessionUsage::default(); + assert!(usage.cache_creation_input_tokens.is_none()); + assert!(usage.cache_read_input_tokens.is_none()); + } + + #[test] + fn session_usage_cache_remains_none_when_api_omits_cache() { + let mut usage = SessionUsage::default(); + let api_usage = Usage { + input_tokens: 100, + output_tokens: 50, + prompt_cache_hit_tokens: None, + prompt_cache_miss_tokens: None, + reasoning_tokens: None, + reasoning_replay_tokens: None, + server_tool_use: None, + }; + usage.add(&api_usage); + assert!(usage.cache_creation_input_tokens.is_none()); + assert!(usage.cache_read_input_tokens.is_none()); + } + + #[test] + fn session_usage_cache_accumulates_when_reported() { + let mut usage = SessionUsage::default(); + let api_usage = Usage { + input_tokens: 100, + output_tokens: 50, + prompt_cache_hit_tokens: Some(30), + prompt_cache_miss_tokens: Some(70), + reasoning_tokens: None, + reasoning_replay_tokens: None, + server_tool_use: None, + }; + usage.add(&api_usage); + assert_eq!(usage.cache_read_input_tokens, Some(30)); + assert_eq!(usage.cache_creation_input_tokens, Some(70)); + usage.add(&api_usage); + assert_eq!(usage.cache_read_input_tokens, Some(60)); + assert_eq!(usage.cache_creation_input_tokens, Some(140)); + } + + #[test] + fn session_usage_cache_preserves_explicit_zero() { + let mut usage = SessionUsage::default(); + let api_usage = Usage { + input_tokens: 100, + output_tokens: 50, + prompt_cache_hit_tokens: Some(0), // explicit zero from provider + prompt_cache_miss_tokens: Some(1234), + reasoning_tokens: None, + reasoning_replay_tokens: None, + server_tool_use: None, + }; + usage.add(&api_usage); + // 0 is a valid observed value, must NOT be converted to None + assert_eq!(usage.cache_read_input_tokens, Some(0)); + assert_eq!(usage.cache_creation_input_tokens, Some(1234)); + } +} diff --git a/crates/tui/src/prompts/base.md b/crates/tui/src/prompts/base.md index 6f248541..bbca6b5e 100644 --- a/crates/tui/src/prompts/base.md +++ b/crates/tui/src/prompts/base.md @@ -60,6 +60,10 @@ After every tool call that produces a result you'll act on, verify before procee Don't claim a change worked until you've observed evidence. Don't trust memory over live tool output. +**Report outcomes faithfully.** If a tool call fails or returns no data, say so. If you did not run a verification step, say that — don't imply it succeeded. Never claim "all tests pass" when output shows failures. State what actually happened, not what you expected. + +When the API does not report cache usage (`prompt_cache_hit_tokens` or `prompt_cache_miss_tokens` are absent/`null`), treat cache status as **unknown** — not zero. Do not report "cache miss" or "cache hit rate 0%" for unobserved metrics. + ## Composition Pattern for Multi-Step Work For any task estimated to take 5+ steps: