From a326ef28910f4300edb4f479bd968781654ecc92 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Sun, 26 Apr 2026 17:48:57 -0500 Subject: [PATCH] fix(tui): context-usage % no longer drops after multi-round turns (#115) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User reported: "the context % at the top is pretty inconsistent — like I just had a message where it was 31% then I sent another message and it went to 9%? not sure how that works......" Root cause: `context_usage_snapshot` preferred `app.last_prompt_tokens` (reported, from `Event::TurnComplete.usage`) over the estimate computed from `app.api_messages`. The engine populates that usage via `turn.add_usage`, which SUMS `input_tokens` across every round in a turn: ``` pub fn add_usage(&mut self, usage: &Usage) { self.usage.input_tokens += usage.input_tokens; ... } ``` So a multi-round tool-call turn reports a value much larger than the actual context window state (e.g., 200k from round 1 + 210k from round 2 = 410k displayed as 31% of 1M), then the next single-round turn drops back to a single round's input_tokens (e.g., 90k displayed as 9%). Fix: prefer the estimate, which is computed from the current `api_messages` and is monotonic wrt conversation growth. Reported tokens fall back only when no estimate is available (e.g., immediately after a session restore). Also clamp `used` to the model's context window so the ratio never exceeds 100%. `is_reported_context_inflated` is no longer in the primary path; kept behind `#[allow(dead_code)]` because existing tests still exercise it and a future heuristic may want to distinguish "obviously inflated reported tokens" from healthy reports. Regression test `context_usage_does_not_drop_when_reported_shrinks_after_multi_round_turn` exercises the exact 31% → 9% scenario the user hit. Fixes #115. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/tui/src/tui/ui.rs | 43 +++++++++++++++++----------------- crates/tui/src/tui/ui/tests.rs | 41 ++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 22 deletions(-) diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 3497f981..20cc9c00 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -3660,28 +3660,22 @@ fn context_usage_snapshot(app: &App) -> Option<(i64, u32, f64)> { .map(|tokens| tokens.max(0)); let estimated = estimated_context_tokens(app).map(|tokens| tokens.max(0)); - let used = if app.is_loading { - match (estimated, reported) { - (Some(estimated), _) => estimated, - (None, Some(reported)) => reported, - (None, None) => return None, - } - } else { - match (reported, estimated) { - (Some(reported), Some(estimated)) - if reported > max_i64 && estimated > 0 && estimated <= max_i64 => - { - estimated - } - (Some(reported), Some(estimated)) - if is_reported_context_inflated(reported, estimated) => - { - estimated - } - (Some(reported), _) => reported, - (None, Some(estimated)) => estimated, - (None, None) => return None, - } + // Always prefer the estimated current-context size (computed from + // `app.api_messages`) when we have it. Reported `last_prompt_tokens` + // comes from `Event::TurnComplete.usage`, which the engine builds with + // `turn.add_usage` — that SUMS input_tokens across every round in the + // turn, so a multi-round tool-call turn reports a value much larger + // than the actual context window state, then the next single-round + // turn drops back to a single round's input_tokens. User-visible % + // was bouncing 31% → 9% (#115) because of this. The estimate is + // monotonic wrt conversation growth, which is what a "context filling + // up" indicator should show. We still consult `reported` only as a + // fallback when no estimate is available (e.g., immediately after a + // session restore before the api_messages are populated). + let used = match (estimated, reported) { + (Some(estimated), _) => estimated.min(max_i64), + (None, Some(reported)) => reported.min(max_i64), + (None, None) => return None, }; let max_f64 = f64::from(max); @@ -3690,6 +3684,11 @@ fn context_usage_snapshot(app: &App) -> Option<(i64, u32, f64)> { Some((used, max, percent)) } +/// Retained as a callable utility — `context_usage_snapshot` no longer uses +/// it directly (#115 makes the estimate the primary signal), but tests in +/// `ui/tests.rs` still exercise it and a future heuristic may want to +/// distinguish "obviously inflated reported tokens" from healthy reports. +#[allow(dead_code)] fn is_reported_context_inflated(reported: i64, estimated: i64) -> bool { const MIN_ABSOLUTE_GAP: i64 = 4_096; if estimated <= 0 || reported <= estimated { diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index c8a4844d..a1a8cd9e 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -542,6 +542,47 @@ fn context_usage_snapshot_prefers_estimate_when_reported_is_inflated_by_old_reas assert!(percent < 2.0); } +/// Regression for #115. The engine sums `input_tokens` across every round +/// of a turn (`turn.add_usage` does `+=`), so a multi-round tool-call turn +/// reports a value much larger than the actual context window state, then +/// the next single-round turn drops back to a single round's input_tokens. +/// User-visible % was bouncing 31% → 9% because of this. The fix is to +/// prefer the estimated current-context size, which is monotonic wrt +/// conversation growth. +#[test] +fn context_usage_does_not_drop_when_reported_shrinks_after_multi_round_turn() { + let mut app = create_test_app(); + app.api_messages = vec![Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: "context ".repeat(2_000), // ~14k tokens estimated + cache_control: None, + }], + }]; + + // Simulate a multi-round turn that summed two rounds' input_tokens + // (e.g., 200k + 210k from a long thinking + tool-call sequence). + app.last_prompt_tokens = Some(410_000); + let (_, _, percent_after_multi_round) = + context_usage_snapshot(&app).expect("usage available"); + + // Now the next turn is a single round on the same conversation — + // reported drops to one round's worth even though the actual context + // hasn't shrunk. + app.last_prompt_tokens = Some(15_000); + let (_, _, percent_after_single_round) = + context_usage_snapshot(&app).expect("usage available"); + + // The displayed % should reflect the conversation size (estimated + // from api_messages), NOT the wildly variable reported value. + let drift = (percent_after_multi_round - percent_after_single_round).abs(); + assert!( + drift < 1.0, + "displayed % should not jump because reported tokens varied across rounds; \ + after-multi-round={percent_after_multi_round:.2} after-single-round={percent_after_single_round:.2}" + ); +} + #[test] fn context_usage_snapshot_prefers_live_estimate_while_loading() { let mut app = create_test_app();