fix: cache usage unknown shown as 0, and add truthful reporting prompt (#false-claim)

Two independent fixes: 1. **Prompt truthful reporting** (base.md): add explicit rules for honest outcome reporting — if a tool fails/returns-empty say so; if cache usage is unobserved treat it as unknown/null, not 0. 2. **Cache usage u64 → Option<u64>** (session.rs): when the API does not report cache hit/miss tokens, the cumulative SessionUsage defaulted to 0. Models interpreted this as "no cache hits" rather than "unknown". Changing to Option<u64> ensures absent cache data serializes as null in the model context. Tests added for all three cases: starts None, stays None when API omits cache, accumulates correctly when API reports cache.
2026-05-11 03:43:21 +08:00
parent ca5204e311
commit 5ccfe91bff
2 changed files with 81 additions and 6 deletions
@@ -90,10 +90,12 @@ pub struct Session {
 pub struct SessionUsage {
    pub input_tokens: u64,
    pub output_tokens: u64,
-    #[allow(dead_code)]
-    pub cache_creation_input_tokens: u64,
-    #[allow(dead_code)]
-    pub cache_read_input_tokens: u64,
+    /// Cache creation (miss) tokens. `None` when never observed by the API —
+    /// do NOT display as 0, which would be indistinguishable from "no misses".
+    pub cache_creation_input_tokens: Option<u64>,
+    /// Cache read (hit) tokens. `None` when never observed by the API —
+    /// do NOT display as 0, which would be indistinguishable from "no hits".
+    pub cache_read_input_tokens: Option<u64>,
 }

 impl SessionUsage {
@@ -102,10 +104,12 @@ impl SessionUsage {
        self.input_tokens += u64::from(usage.input_tokens);
        self.output_tokens += u64::from(usage.output_tokens);
        if let Some(tokens) = usage.prompt_cache_miss_tokens {
-            self.cache_creation_input_tokens += u64::from(tokens);
+            self.cache_creation_input_tokens =
+                Some(self.cache_creation_input_tokens.unwrap_or(0) + u64::from(tokens));
        }
        if let Some(tokens) = usage.prompt_cache_hit_tokens {
-            self.cache_read_input_tokens += u64::from(tokens);
+            self.cache_read_input_tokens =
+                Some(self.cache_read_input_tokens.unwrap_or(0) + u64::from(tokens));
        }
    }
 }
@@ -165,3 +169,70 @@ impl Session {
            .rebuild_from_messages(&self.messages, &self.workspace);
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn session_usage_cache_starts_none() {
+        let usage = SessionUsage::default();
+        assert!(usage.cache_creation_input_tokens.is_none());
+        assert!(usage.cache_read_input_tokens.is_none());
+    }
+
+    #[test]
+    fn session_usage_cache_remains_none_when_api_omits_cache() {
+        let mut usage = SessionUsage::default();
+        let api_usage = Usage {
+            input_tokens: 100,
+            output_tokens: 50,
+            prompt_cache_hit_tokens: None,
+            prompt_cache_miss_tokens: None,
+            reasoning_tokens: None,
+            reasoning_replay_tokens: None,
+            server_tool_use: None,
+        };
+        usage.add(&api_usage);
+        assert!(usage.cache_creation_input_tokens.is_none());
+        assert!(usage.cache_read_input_tokens.is_none());
+    }
+
+    #[test]
+    fn session_usage_cache_accumulates_when_reported() {
+        let mut usage = SessionUsage::default();
+        let api_usage = Usage {
+            input_tokens: 100,
+            output_tokens: 50,
+            prompt_cache_hit_tokens: Some(30),
+            prompt_cache_miss_tokens: Some(70),
+            reasoning_tokens: None,
+            reasoning_replay_tokens: None,
+            server_tool_use: None,
+        };
+        usage.add(&api_usage);
+        assert_eq!(usage.cache_read_input_tokens, Some(30));
+        assert_eq!(usage.cache_creation_input_tokens, Some(70));
+        usage.add(&api_usage);
+        assert_eq!(usage.cache_read_input_tokens, Some(60));
+        assert_eq!(usage.cache_creation_input_tokens, Some(140));
+    }
+
+    #[test]
+    fn session_usage_cache_preserves_explicit_zero() {
+        let mut usage = SessionUsage::default();
+        let api_usage = Usage {
+            input_tokens: 100,
+            output_tokens: 50,
+            prompt_cache_hit_tokens: Some(0), // explicit zero from provider
+            prompt_cache_miss_tokens: Some(1234),
+            reasoning_tokens: None,
+            reasoning_replay_tokens: None,
+            server_tool_use: None,
+        };
+        usage.add(&api_usage);
+        // 0 is a valid observed value, must NOT be converted to None
+        assert_eq!(usage.cache_read_input_tokens, Some(0));
+        assert_eq!(usage.cache_creation_input_tokens, Some(1234));
+    }
+}
@@ -60,6 +60,10 @@ After every tool call that produces a result you'll act on, verify before procee

 Don't claim a change worked until you've observed evidence. Don't trust memory over live tool output.

+**Report outcomes faithfully.** If a tool call fails or returns no data, say so. If you did not run a verification step, say that — don't imply it succeeded. Never claim "all tests pass" when output shows failures. State what actually happened, not what you expected.
+
+When the API does not report cache usage (`prompt_cache_hit_tokens` or `prompt_cache_miss_tokens` are absent/`null`), treat cache status as **unknown** — not zero. Do not report "cache miss" or "cache hit rate 0%" for unobserved metrics.
+
 ## Composition Pattern for Multi-Step Work

 For any task estimated to take 5+ steps: