fix(tui): context-usage % no longer drops after multi-round turns (#115)

User reported: "the context % at the top is pretty inconsistent — like I
just had a message where it was 31% then I sent another message and it
went to 9%? not sure how that works......"

Root cause: `context_usage_snapshot` preferred `app.last_prompt_tokens`
(reported, from `Event::TurnComplete.usage`) over the estimate computed
from `app.api_messages`. The engine populates that usage via
`turn.add_usage`, which SUMS `input_tokens` across every round in a turn:

  ```
  pub fn add_usage(&mut self, usage: &Usage) {
      self.usage.input_tokens += usage.input_tokens;
      ...
  }
  ```

So a multi-round tool-call turn reports a value much larger than the
actual context window state (e.g., 200k from round 1 + 210k from round 2
= 410k displayed as 31% of 1M), then the next single-round turn drops
back to a single round's input_tokens (e.g., 90k displayed as 9%).

Fix: prefer the estimate, which is computed from the current
`api_messages` and is monotonic wrt conversation growth. Reported tokens
fall back only when no estimate is available (e.g., immediately after a
session restore). Also clamp `used` to the model's context window so the
ratio never exceeds 100%.

`is_reported_context_inflated` is no longer in the primary path; kept
behind `#[allow(dead_code)]` because existing tests still exercise it
and a future heuristic may want to distinguish "obviously inflated
reported tokens" from healthy reports.

Regression test
`context_usage_does_not_drop_when_reported_shrinks_after_multi_round_turn`
exercises the exact 31% → 9% scenario the user hit.

Fixes #115.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hunter Bown
2026-04-26 17:48:57 -05:00
parent e5954160fb
commit a326ef2891
2 changed files with 62 additions and 22 deletions
+21 -22
View File
@@ -3660,28 +3660,22 @@ fn context_usage_snapshot(app: &App) -> Option<(i64, u32, f64)> {
.map(|tokens| tokens.max(0));
let estimated = estimated_context_tokens(app).map(|tokens| tokens.max(0));
let used = if app.is_loading {
match (estimated, reported) {
(Some(estimated), _) => estimated,
(None, Some(reported)) => reported,
(None, None) => return None,
}
} else {
match (reported, estimated) {
(Some(reported), Some(estimated))
if reported > max_i64 && estimated > 0 && estimated <= max_i64 =>
{
estimated
}
(Some(reported), Some(estimated))
if is_reported_context_inflated(reported, estimated) =>
{
estimated
}
(Some(reported), _) => reported,
(None, Some(estimated)) => estimated,
(None, None) => return None,
}
// Always prefer the estimated current-context size (computed from
// `app.api_messages`) when we have it. Reported `last_prompt_tokens`
// comes from `Event::TurnComplete.usage`, which the engine builds with
// `turn.add_usage` — that SUMS input_tokens across every round in the
// turn, so a multi-round tool-call turn reports a value much larger
// than the actual context window state, then the next single-round
// turn drops back to a single round's input_tokens. User-visible %
// was bouncing 31% → 9% (#115) because of this. The estimate is
// monotonic wrt conversation growth, which is what a "context filling
// up" indicator should show. We still consult `reported` only as a
// fallback when no estimate is available (e.g., immediately after a
// session restore before the api_messages are populated).
let used = match (estimated, reported) {
(Some(estimated), _) => estimated.min(max_i64),
(None, Some(reported)) => reported.min(max_i64),
(None, None) => return None,
};
let max_f64 = f64::from(max);
@@ -3690,6 +3684,11 @@ fn context_usage_snapshot(app: &App) -> Option<(i64, u32, f64)> {
Some((used, max, percent))
}
/// Retained as a callable utility — `context_usage_snapshot` no longer uses
/// it directly (#115 makes the estimate the primary signal), but tests in
/// `ui/tests.rs` still exercise it and a future heuristic may want to
/// distinguish "obviously inflated reported tokens" from healthy reports.
#[allow(dead_code)]
fn is_reported_context_inflated(reported: i64, estimated: i64) -> bool {
const MIN_ABSOLUTE_GAP: i64 = 4_096;
if estimated <= 0 || reported <= estimated {
+41
View File
@@ -542,6 +542,47 @@ fn context_usage_snapshot_prefers_estimate_when_reported_is_inflated_by_old_reas
assert!(percent < 2.0);
}
/// Regression for #115. The engine sums `input_tokens` across every round
/// of a turn (`turn.add_usage` does `+=`), so a multi-round tool-call turn
/// reports a value much larger than the actual context window state, then
/// the next single-round turn drops back to a single round's input_tokens.
/// User-visible % was bouncing 31% → 9% because of this. The fix is to
/// prefer the estimated current-context size, which is monotonic wrt
/// conversation growth.
#[test]
fn context_usage_does_not_drop_when_reported_shrinks_after_multi_round_turn() {
let mut app = create_test_app();
app.api_messages = vec![Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "context ".repeat(2_000), // ~14k tokens estimated
cache_control: None,
}],
}];
// Simulate a multi-round turn that summed two rounds' input_tokens
// (e.g., 200k + 210k from a long thinking + tool-call sequence).
app.last_prompt_tokens = Some(410_000);
let (_, _, percent_after_multi_round) =
context_usage_snapshot(&app).expect("usage available");
// Now the next turn is a single round on the same conversation —
// reported drops to one round's worth even though the actual context
// hasn't shrunk.
app.last_prompt_tokens = Some(15_000);
let (_, _, percent_after_single_round) =
context_usage_snapshot(&app).expect("usage available");
// The displayed % should reflect the conversation size (estimated
// from api_messages), NOT the wildly variable reported value.
let drift = (percent_after_multi_round - percent_after_single_round).abs();
assert!(
drift < 1.0,
"displayed % should not jump because reported tokens varied across rounds; \
after-multi-round={percent_after_multi_round:.2} after-single-round={percent_after_single_round:.2}"
);
}
#[test]
fn context_usage_snapshot_prefers_live_estimate_while_loading() {
let mut app = create_test_app();