fix: cache usage unknown shown as 0, and add truthful reporting prompt (#false-claim)

Two independent fixes:

1. **Prompt truthful reporting** (base.md): add explicit rules for honest
   outcome reporting — if a tool fails/returns-empty say so; if cache
   usage is unobserved treat it as unknown/null, not 0.

2. **Cache usage u64 → Option<u64>** (session.rs): when the API does
   not report cache hit/miss tokens, the cumulative SessionUsage
   defaulted to 0. Models interpreted this as "no cache hits" rather
   than "unknown". Changing to Option<u64> ensures absent cache data
   serializes as null in the model context.

Tests added for all three cases: starts None, stays None when API
omits cache, accumulates correctly when API reports cache.
This commit is contained in:
Zhiping
2026-05-11 03:43:21 +08:00
committed by Hunter Bown
parent ca5204e311
commit 5ccfe91bff
2 changed files with 81 additions and 6 deletions
+77 -6
View File
@@ -90,10 +90,12 @@ pub struct Session {
pub struct SessionUsage {
pub input_tokens: u64,
pub output_tokens: u64,
#[allow(dead_code)]
pub cache_creation_input_tokens: u64,
#[allow(dead_code)]
pub cache_read_input_tokens: u64,
/// Cache creation (miss) tokens. `None` when never observed by the API —
/// do NOT display as 0, which would be indistinguishable from "no misses".
pub cache_creation_input_tokens: Option<u64>,
/// Cache read (hit) tokens. `None` when never observed by the API —
/// do NOT display as 0, which would be indistinguishable from "no hits".
pub cache_read_input_tokens: Option<u64>,
}
impl SessionUsage {
@@ -102,10 +104,12 @@ impl SessionUsage {
self.input_tokens += u64::from(usage.input_tokens);
self.output_tokens += u64::from(usage.output_tokens);
if let Some(tokens) = usage.prompt_cache_miss_tokens {
self.cache_creation_input_tokens += u64::from(tokens);
self.cache_creation_input_tokens =
Some(self.cache_creation_input_tokens.unwrap_or(0) + u64::from(tokens));
}
if let Some(tokens) = usage.prompt_cache_hit_tokens {
self.cache_read_input_tokens += u64::from(tokens);
self.cache_read_input_tokens =
Some(self.cache_read_input_tokens.unwrap_or(0) + u64::from(tokens));
}
}
}
@@ -165,3 +169,70 @@ impl Session {
.rebuild_from_messages(&self.messages, &self.workspace);
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn session_usage_cache_starts_none() {
let usage = SessionUsage::default();
assert!(usage.cache_creation_input_tokens.is_none());
assert!(usage.cache_read_input_tokens.is_none());
}
#[test]
fn session_usage_cache_remains_none_when_api_omits_cache() {
let mut usage = SessionUsage::default();
let api_usage = Usage {
input_tokens: 100,
output_tokens: 50,
prompt_cache_hit_tokens: None,
prompt_cache_miss_tokens: None,
reasoning_tokens: None,
reasoning_replay_tokens: None,
server_tool_use: None,
};
usage.add(&api_usage);
assert!(usage.cache_creation_input_tokens.is_none());
assert!(usage.cache_read_input_tokens.is_none());
}
#[test]
fn session_usage_cache_accumulates_when_reported() {
let mut usage = SessionUsage::default();
let api_usage = Usage {
input_tokens: 100,
output_tokens: 50,
prompt_cache_hit_tokens: Some(30),
prompt_cache_miss_tokens: Some(70),
reasoning_tokens: None,
reasoning_replay_tokens: None,
server_tool_use: None,
};
usage.add(&api_usage);
assert_eq!(usage.cache_read_input_tokens, Some(30));
assert_eq!(usage.cache_creation_input_tokens, Some(70));
usage.add(&api_usage);
assert_eq!(usage.cache_read_input_tokens, Some(60));
assert_eq!(usage.cache_creation_input_tokens, Some(140));
}
#[test]
fn session_usage_cache_preserves_explicit_zero() {
let mut usage = SessionUsage::default();
let api_usage = Usage {
input_tokens: 100,
output_tokens: 50,
prompt_cache_hit_tokens: Some(0), // explicit zero from provider
prompt_cache_miss_tokens: Some(1234),
reasoning_tokens: None,
reasoning_replay_tokens: None,
server_tool_use: None,
};
usage.add(&api_usage);
// 0 is a valid observed value, must NOT be converted to None
assert_eq!(usage.cache_read_input_tokens, Some(0));
assert_eq!(usage.cache_creation_input_tokens, Some(1234));
}
}
+4
View File
@@ -60,6 +60,10 @@ After every tool call that produces a result you'll act on, verify before procee
Don't claim a change worked until you've observed evidence. Don't trust memory over live tool output.
**Report outcomes faithfully.** If a tool call fails or returns no data, say so. If you did not run a verification step, say that — don't imply it succeeded. Never claim "all tests pass" when output shows failures. State what actually happened, not what you expected.
When the API does not report cache usage (`prompt_cache_hit_tokens` or `prompt_cache_miss_tokens` are absent/`null`), treat cache status as **unknown** — not zero. Do not report "cache miss" or "cache hit rate 0%" for unobserved metrics.
## Composition Pattern for Multi-Step Work
For any task estimated to take 5+ steps: