fix: cache usage unknown shown as 0, and add truthful reporting prompt (#false-claim)
Two independent fixes: 1. **Prompt truthful reporting** (base.md): add explicit rules for honest outcome reporting — if a tool fails/returns-empty say so; if cache usage is unobserved treat it as unknown/null, not 0. 2. **Cache usage u64 → Option<u64>** (session.rs): when the API does not report cache hit/miss tokens, the cumulative SessionUsage defaulted to 0. Models interpreted this as "no cache hits" rather than "unknown". Changing to Option<u64> ensures absent cache data serializes as null in the model context. Tests added for all three cases: starts None, stays None when API omits cache, accumulates correctly when API reports cache.
This commit is contained in:
@@ -90,10 +90,12 @@ pub struct Session {
|
||||
pub struct SessionUsage {
|
||||
pub input_tokens: u64,
|
||||
pub output_tokens: u64,
|
||||
#[allow(dead_code)]
|
||||
pub cache_creation_input_tokens: u64,
|
||||
#[allow(dead_code)]
|
||||
pub cache_read_input_tokens: u64,
|
||||
/// Cache creation (miss) tokens. `None` when never observed by the API —
|
||||
/// do NOT display as 0, which would be indistinguishable from "no misses".
|
||||
pub cache_creation_input_tokens: Option<u64>,
|
||||
/// Cache read (hit) tokens. `None` when never observed by the API —
|
||||
/// do NOT display as 0, which would be indistinguishable from "no hits".
|
||||
pub cache_read_input_tokens: Option<u64>,
|
||||
}
|
||||
|
||||
impl SessionUsage {
|
||||
@@ -102,10 +104,12 @@ impl SessionUsage {
|
||||
self.input_tokens += u64::from(usage.input_tokens);
|
||||
self.output_tokens += u64::from(usage.output_tokens);
|
||||
if let Some(tokens) = usage.prompt_cache_miss_tokens {
|
||||
self.cache_creation_input_tokens += u64::from(tokens);
|
||||
self.cache_creation_input_tokens =
|
||||
Some(self.cache_creation_input_tokens.unwrap_or(0) + u64::from(tokens));
|
||||
}
|
||||
if let Some(tokens) = usage.prompt_cache_hit_tokens {
|
||||
self.cache_read_input_tokens += u64::from(tokens);
|
||||
self.cache_read_input_tokens =
|
||||
Some(self.cache_read_input_tokens.unwrap_or(0) + u64::from(tokens));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -165,3 +169,70 @@ impl Session {
|
||||
.rebuild_from_messages(&self.messages, &self.workspace);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn session_usage_cache_starts_none() {
|
||||
let usage = SessionUsage::default();
|
||||
assert!(usage.cache_creation_input_tokens.is_none());
|
||||
assert!(usage.cache_read_input_tokens.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn session_usage_cache_remains_none_when_api_omits_cache() {
|
||||
let mut usage = SessionUsage::default();
|
||||
let api_usage = Usage {
|
||||
input_tokens: 100,
|
||||
output_tokens: 50,
|
||||
prompt_cache_hit_tokens: None,
|
||||
prompt_cache_miss_tokens: None,
|
||||
reasoning_tokens: None,
|
||||
reasoning_replay_tokens: None,
|
||||
server_tool_use: None,
|
||||
};
|
||||
usage.add(&api_usage);
|
||||
assert!(usage.cache_creation_input_tokens.is_none());
|
||||
assert!(usage.cache_read_input_tokens.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn session_usage_cache_accumulates_when_reported() {
|
||||
let mut usage = SessionUsage::default();
|
||||
let api_usage = Usage {
|
||||
input_tokens: 100,
|
||||
output_tokens: 50,
|
||||
prompt_cache_hit_tokens: Some(30),
|
||||
prompt_cache_miss_tokens: Some(70),
|
||||
reasoning_tokens: None,
|
||||
reasoning_replay_tokens: None,
|
||||
server_tool_use: None,
|
||||
};
|
||||
usage.add(&api_usage);
|
||||
assert_eq!(usage.cache_read_input_tokens, Some(30));
|
||||
assert_eq!(usage.cache_creation_input_tokens, Some(70));
|
||||
usage.add(&api_usage);
|
||||
assert_eq!(usage.cache_read_input_tokens, Some(60));
|
||||
assert_eq!(usage.cache_creation_input_tokens, Some(140));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn session_usage_cache_preserves_explicit_zero() {
|
||||
let mut usage = SessionUsage::default();
|
||||
let api_usage = Usage {
|
||||
input_tokens: 100,
|
||||
output_tokens: 50,
|
||||
prompt_cache_hit_tokens: Some(0), // explicit zero from provider
|
||||
prompt_cache_miss_tokens: Some(1234),
|
||||
reasoning_tokens: None,
|
||||
reasoning_replay_tokens: None,
|
||||
server_tool_use: None,
|
||||
};
|
||||
usage.add(&api_usage);
|
||||
// 0 is a valid observed value, must NOT be converted to None
|
||||
assert_eq!(usage.cache_read_input_tokens, Some(0));
|
||||
assert_eq!(usage.cache_creation_input_tokens, Some(1234));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -60,6 +60,10 @@ After every tool call that produces a result you'll act on, verify before procee
|
||||
|
||||
Don't claim a change worked until you've observed evidence. Don't trust memory over live tool output.
|
||||
|
||||
**Report outcomes faithfully.** If a tool call fails or returns no data, say so. If you did not run a verification step, say that — don't imply it succeeded. Never claim "all tests pass" when output shows failures. State what actually happened, not what you expected.
|
||||
|
||||
When the API does not report cache usage (`prompt_cache_hit_tokens` or `prompt_cache_miss_tokens` are absent/`null`), treat cache status as **unknown** — not zero. Do not report "cache miss" or "cache hit rate 0%" for unobserved metrics.
|
||||
|
||||
## Composition Pattern for Multi-Step Work
|
||||
|
||||
For any task estimated to take 5+ steps:
|
||||
|
||||
Reference in New Issue
Block a user