From a326ef28910f4300edb4f479bd968781654ecc92 Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Sun, 26 Apr 2026 17:48:57 -0500
Subject: [PATCH] fix(tui): context-usage % no longer drops after multi-round
 turns (#115)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

User reported: "the context % at the top is pretty inconsistent — like I
just had a message where it was 31% then I sent another message and it
went to 9%? not sure how that works......"

Root cause: `context_usage_snapshot` preferred `app.last_prompt_tokens`
(reported, from `Event::TurnComplete.usage`) over the estimate computed
from `app.api_messages`. The engine populates that usage via
`turn.add_usage`, which SUMS `input_tokens` across every round in a turn:

  ```
  pub fn add_usage(&mut self, usage: &Usage) {
      self.usage.input_tokens += usage.input_tokens;
      ...
  }
  ```

So a multi-round tool-call turn reports a value much larger than the
actual context window state (e.g., 200k from round 1 + 210k from round 2
= 410k displayed as 31% of 1M), then the next single-round turn drops
back to a single round's input_tokens (e.g., 90k displayed as 9%).

Fix: prefer the estimate, which is computed from the current
`api_messages` and is monotonic wrt conversation growth. Reported tokens
fall back only when no estimate is available (e.g., immediately after a
session restore). Also clamp `used` to the model's context window so the
ratio never exceeds 100%.

`is_reported_context_inflated` is no longer in the primary path; kept
behind `#[allow(dead_code)]` because existing tests still exercise it
and a future heuristic may want to distinguish "obviously inflated
reported tokens" from healthy reports.

Regression test
`context_usage_does_not_drop_when_reported_shrinks_after_multi_round_turn`
exercises the exact 31% → 9% scenario the user hit.

Fixes #115.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 crates/tui/src/tui/ui.rs       | 43 +++++++++++++++++-----------------
 crates/tui/src/tui/ui/tests.rs | 41 ++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+), 22 deletions(-)

diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs
index 3497f981..20cc9c00 100644
--- a/crates/tui/src/tui/ui.rs
+++ b/crates/tui/src/tui/ui.rs
@@ -3660,28 +3660,22 @@ fn context_usage_snapshot(app: &App) -> Option<(i64, u32, f64)> {
         .map(|tokens| tokens.max(0));
     let estimated = estimated_context_tokens(app).map(|tokens| tokens.max(0));
 
-    let used = if app.is_loading {
-        match (estimated, reported) {
-            (Some(estimated), _) => estimated,
-            (None, Some(reported)) => reported,
-            (None, None) => return None,
-        }
-    } else {
-        match (reported, estimated) {
-            (Some(reported), Some(estimated))
-                if reported > max_i64 && estimated > 0 && estimated <= max_i64 =>
-            {
-                estimated
-            }
-            (Some(reported), Some(estimated))
-                if is_reported_context_inflated(reported, estimated) =>
-            {
-                estimated
-            }
-            (Some(reported), _) => reported,
-            (None, Some(estimated)) => estimated,
-            (None, None) => return None,
-        }
+    // Always prefer the estimated current-context size (computed from
+    // `app.api_messages`) when we have it. Reported `last_prompt_tokens`
+    // comes from `Event::TurnComplete.usage`, which the engine builds with
+    // `turn.add_usage` — that SUMS input_tokens across every round in the
+    // turn, so a multi-round tool-call turn reports a value much larger
+    // than the actual context window state, then the next single-round
+    // turn drops back to a single round's input_tokens. User-visible %
+    // was bouncing 31% → 9% (#115) because of this. The estimate is
+    // monotonic wrt conversation growth, which is what a "context filling
+    // up" indicator should show. We still consult `reported` only as a
+    // fallback when no estimate is available (e.g., immediately after a
+    // session restore before the api_messages are populated).
+    let used = match (estimated, reported) {
+        (Some(estimated), _) => estimated.min(max_i64),
+        (None, Some(reported)) => reported.min(max_i64),
+        (None, None) => return None,
     };
 
     let max_f64 = f64::from(max);
@@ -3690,6 +3684,11 @@ fn context_usage_snapshot(app: &App) -> Option<(i64, u32, f64)> {
     Some((used, max, percent))
 }
 
+/// Retained as a callable utility — `context_usage_snapshot` no longer uses
+/// it directly (#115 makes the estimate the primary signal), but tests in
+/// `ui/tests.rs` still exercise it and a future heuristic may want to
+/// distinguish "obviously inflated reported tokens" from healthy reports.
+#[allow(dead_code)]
 fn is_reported_context_inflated(reported: i64, estimated: i64) -> bool {
     const MIN_ABSOLUTE_GAP: i64 = 4_096;
     if estimated <= 0 || reported <= estimated {
diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs
index c8a4844d..a1a8cd9e 100644
--- a/crates/tui/src/tui/ui/tests.rs
+++ b/crates/tui/src/tui/ui/tests.rs
@@ -542,6 +542,47 @@ fn context_usage_snapshot_prefers_estimate_when_reported_is_inflated_by_old_reas
     assert!(percent < 2.0);
 }
 
+/// Regression for #115. The engine sums `input_tokens` across every round
+/// of a turn (`turn.add_usage` does `+=`), so a multi-round tool-call turn
+/// reports a value much larger than the actual context window state, then
+/// the next single-round turn drops back to a single round's input_tokens.
+/// User-visible % was bouncing 31% → 9% because of this. The fix is to
+/// prefer the estimated current-context size, which is monotonic wrt
+/// conversation growth.
+#[test]
+fn context_usage_does_not_drop_when_reported_shrinks_after_multi_round_turn() {
+    let mut app = create_test_app();
+    app.api_messages = vec![Message {
+        role: "user".to_string(),
+        content: vec![ContentBlock::Text {
+            text: "context ".repeat(2_000), // ~14k tokens estimated
+            cache_control: None,
+        }],
+    }];
+
+    // Simulate a multi-round turn that summed two rounds' input_tokens
+    // (e.g., 200k + 210k from a long thinking + tool-call sequence).
+    app.last_prompt_tokens = Some(410_000);
+    let (_, _, percent_after_multi_round) =
+        context_usage_snapshot(&app).expect("usage available");
+
+    // Now the next turn is a single round on the same conversation —
+    // reported drops to one round's worth even though the actual context
+    // hasn't shrunk.
+    app.last_prompt_tokens = Some(15_000);
+    let (_, _, percent_after_single_round) =
+        context_usage_snapshot(&app).expect("usage available");
+
+    // The displayed % should reflect the conversation size (estimated
+    // from api_messages), NOT the wildly variable reported value.
+    let drift = (percent_after_multi_round - percent_after_single_round).abs();
+    assert!(
+        drift < 1.0,
+        "displayed % should not jump because reported tokens varied across rounds; \
+         after-multi-round={percent_after_multi_round:.2} after-single-round={percent_after_single_round:.2}"
+    );
+}
+
 #[test]
 fn context_usage_snapshot_prefers_live_estimate_while_loading() {
     let mut app = create_test_app();