fix(tui): context-usage % no longer drops after multi-round turns (#115)

User reported: "the context % at the top is pretty inconsistent — like I just had a message where it was 31% then I sent another message and it went to 9%? not sure how that works......" Root cause: `context_usage_snapshot` preferred `app.last_prompt_tokens` (reported, from `Event::TurnComplete.usage`) over the estimate computed from `app.api_messages`. The engine populates that usage via `turn.add_usage`, which SUMS `input_tokens` across every round in a turn: ``` pub fn add_usage(&mut self, usage: &Usage) { self.usage.input_tokens += usage.input_tokens; ... } ``` So a multi-round tool-call turn reports a value much larger than the actual context window state (e.g., 200k from round 1 + 210k from round 2 = 410k displayed as 31% of 1M), then the next single-round turn drops back to a single round's input_tokens (e.g., 90k displayed as 9%). Fix: prefer the estimate, which is computed from the current `api_messages` and is monotonic wrt conversation growth. Reported tokens fall back only when no estimate is available (e.g., immediately after a session restore). Also clamp `used` to the model's context window so the ratio never exceeds 100%. `is_reported_context_inflated` is no longer in the primary path; kept behind `#[allow(dead_code)]` because existing tests still exercise it and a future heuristic may want to distinguish "obviously inflated reported tokens" from healthy reports. Regression test `context_usage_does_not_drop_when_reported_shrinks_after_multi_round_turn` exercises the exact 31% → 9% scenario the user hit. Fixes #115. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 17:48:57 -05:00
parent e5954160fb
commit a326ef2891
2 changed files with 62 additions and 22 deletions
@@ -3660,28 +3660,22 @@ fn context_usage_snapshot(app: &App) -> Option<(i64, u32, f64)> {
        .map(|tokens| tokens.max(0));
    let estimated = estimated_context_tokens(app).map(|tokens| tokens.max(0));

-    let used = if app.is_loading {
-        match (estimated, reported) {
-            (Some(estimated), _) => estimated,
-            (None, Some(reported)) => reported,
-            (None, None) => return None,
-        }
-    } else {
-        match (reported, estimated) {
-            (Some(reported), Some(estimated))
-                if reported > max_i64 && estimated > 0 && estimated <= max_i64 =>
-            {
-                estimated
-            }
-            (Some(reported), Some(estimated))
-                if is_reported_context_inflated(reported, estimated) =>
-            {
-                estimated
-            }
-            (Some(reported), _) => reported,
-            (None, Some(estimated)) => estimated,
-            (None, None) => return None,
-        }
+    // Always prefer the estimated current-context size (computed from
+    // `app.api_messages`) when we have it. Reported `last_prompt_tokens`
+    // comes from `Event::TurnComplete.usage`, which the engine builds with
+    // `turn.add_usage` — that SUMS input_tokens across every round in the
+    // turn, so a multi-round tool-call turn reports a value much larger
+    // than the actual context window state, then the next single-round
+    // turn drops back to a single round's input_tokens. User-visible %
+    // was bouncing 31% → 9% (#115) because of this. The estimate is
+    // monotonic wrt conversation growth, which is what a "context filling
+    // up" indicator should show. We still consult `reported` only as a
+    // fallback when no estimate is available (e.g., immediately after a
+    // session restore before the api_messages are populated).
+    let used = match (estimated, reported) {
+        (Some(estimated), _) => estimated.min(max_i64),
+        (None, Some(reported)) => reported.min(max_i64),
+        (None, None) => return None,
    };

    let max_f64 = f64::from(max);
@@ -3690,6 +3684,11 @@ fn context_usage_snapshot(app: &App) -> Option<(i64, u32, f64)> {
    Some((used, max, percent))
 }

+/// Retained as a callable utility — `context_usage_snapshot` no longer uses
+/// it directly (#115 makes the estimate the primary signal), but tests in
+/// `ui/tests.rs` still exercise it and a future heuristic may want to
+/// distinguish "obviously inflated reported tokens" from healthy reports.
+#[allow(dead_code)]
 fn is_reported_context_inflated(reported: i64, estimated: i64) -> bool {
    const MIN_ABSOLUTE_GAP: i64 = 4_096;
    if estimated <= 0 || reported <= estimated {
@@ -542,6 +542,47 @@ fn context_usage_snapshot_prefers_estimate_when_reported_is_inflated_by_old_reas
    assert!(percent < 2.0);
 }

+/// Regression for #115. The engine sums `input_tokens` across every round
+/// of a turn (`turn.add_usage` does `+=`), so a multi-round tool-call turn
+/// reports a value much larger than the actual context window state, then
+/// the next single-round turn drops back to a single round's input_tokens.
+/// User-visible % was bouncing 31% → 9% because of this. The fix is to
+/// prefer the estimated current-context size, which is monotonic wrt
+/// conversation growth.
+#[test]
+fn context_usage_does_not_drop_when_reported_shrinks_after_multi_round_turn() {
+    let mut app = create_test_app();
+    app.api_messages = vec![Message {
+        role: "user".to_string(),
+        content: vec![ContentBlock::Text {
+            text: "context ".repeat(2_000), // ~14k tokens estimated
+            cache_control: None,
+        }],
+    }];
+
+    // Simulate a multi-round turn that summed two rounds' input_tokens
+    // (e.g., 200k + 210k from a long thinking + tool-call sequence).
+    app.last_prompt_tokens = Some(410_000);
+    let (_, _, percent_after_multi_round) =
+        context_usage_snapshot(&app).expect("usage available");
+
+    // Now the next turn is a single round on the same conversation —
+    // reported drops to one round's worth even though the actual context
+    // hasn't shrunk.
+    app.last_prompt_tokens = Some(15_000);
+    let (_, _, percent_after_single_round) =
+        context_usage_snapshot(&app).expect("usage available");
+
+    // The displayed % should reflect the conversation size (estimated
+    // from api_messages), NOT the wildly variable reported value.
+    let drift = (percent_after_multi_round - percent_after_single_round).abs();
+    assert!(
+        drift < 1.0,
+        "displayed % should not jump because reported tokens varied across rounds; \
+         after-multi-round={percent_after_multi_round:.2} after-single-round={percent_after_single_round:.2}"
+    );
+}
+
 #[test]
 fn context_usage_snapshot_prefers_live_estimate_while_loading() {
    let mut app = create_test_app();