fix(tui): context-usage % no longer drops after multi-round turns (#115)
User reported: "the context % at the top is pretty inconsistent — like I
just had a message where it was 31% then I sent another message and it
went to 9%? not sure how that works......"
Root cause: `context_usage_snapshot` preferred `app.last_prompt_tokens`
(reported, from `Event::TurnComplete.usage`) over the estimate computed
from `app.api_messages`. The engine populates that usage via
`turn.add_usage`, which SUMS `input_tokens` across every round in a turn:
```
pub fn add_usage(&mut self, usage: &Usage) {
self.usage.input_tokens += usage.input_tokens;
...
}
```
So a multi-round tool-call turn reports a value much larger than the
actual context window state (e.g., 200k from round 1 + 210k from round 2
= 410k displayed as 31% of 1M), then the next single-round turn drops
back to a single round's input_tokens (e.g., 90k displayed as 9%).
Fix: prefer the estimate, which is computed from the current
`api_messages` and is monotonic wrt conversation growth. Reported tokens
fall back only when no estimate is available (e.g., immediately after a
session restore). Also clamp `used` to the model's context window so the
ratio never exceeds 100%.
`is_reported_context_inflated` is no longer in the primary path; kept
behind `#[allow(dead_code)]` because existing tests still exercise it
and a future heuristic may want to distinguish "obviously inflated
reported tokens" from healthy reports.
Regression test
`context_usage_does_not_drop_when_reported_shrinks_after_multi_round_turn`
exercises the exact 31% → 9% scenario the user hit.
Fixes #115.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+21
-22
@@ -3660,28 +3660,22 @@ fn context_usage_snapshot(app: &App) -> Option<(i64, u32, f64)> {
|
||||
.map(|tokens| tokens.max(0));
|
||||
let estimated = estimated_context_tokens(app).map(|tokens| tokens.max(0));
|
||||
|
||||
let used = if app.is_loading {
|
||||
match (estimated, reported) {
|
||||
(Some(estimated), _) => estimated,
|
||||
(None, Some(reported)) => reported,
|
||||
(None, None) => return None,
|
||||
}
|
||||
} else {
|
||||
match (reported, estimated) {
|
||||
(Some(reported), Some(estimated))
|
||||
if reported > max_i64 && estimated > 0 && estimated <= max_i64 =>
|
||||
{
|
||||
estimated
|
||||
}
|
||||
(Some(reported), Some(estimated))
|
||||
if is_reported_context_inflated(reported, estimated) =>
|
||||
{
|
||||
estimated
|
||||
}
|
||||
(Some(reported), _) => reported,
|
||||
(None, Some(estimated)) => estimated,
|
||||
(None, None) => return None,
|
||||
}
|
||||
// Always prefer the estimated current-context size (computed from
|
||||
// `app.api_messages`) when we have it. Reported `last_prompt_tokens`
|
||||
// comes from `Event::TurnComplete.usage`, which the engine builds with
|
||||
// `turn.add_usage` — that SUMS input_tokens across every round in the
|
||||
// turn, so a multi-round tool-call turn reports a value much larger
|
||||
// than the actual context window state, then the next single-round
|
||||
// turn drops back to a single round's input_tokens. User-visible %
|
||||
// was bouncing 31% → 9% (#115) because of this. The estimate is
|
||||
// monotonic wrt conversation growth, which is what a "context filling
|
||||
// up" indicator should show. We still consult `reported` only as a
|
||||
// fallback when no estimate is available (e.g., immediately after a
|
||||
// session restore before the api_messages are populated).
|
||||
let used = match (estimated, reported) {
|
||||
(Some(estimated), _) => estimated.min(max_i64),
|
||||
(None, Some(reported)) => reported.min(max_i64),
|
||||
(None, None) => return None,
|
||||
};
|
||||
|
||||
let max_f64 = f64::from(max);
|
||||
@@ -3690,6 +3684,11 @@ fn context_usage_snapshot(app: &App) -> Option<(i64, u32, f64)> {
|
||||
Some((used, max, percent))
|
||||
}
|
||||
|
||||
/// Retained as a callable utility — `context_usage_snapshot` no longer uses
|
||||
/// it directly (#115 makes the estimate the primary signal), but tests in
|
||||
/// `ui/tests.rs` still exercise it and a future heuristic may want to
|
||||
/// distinguish "obviously inflated reported tokens" from healthy reports.
|
||||
#[allow(dead_code)]
|
||||
fn is_reported_context_inflated(reported: i64, estimated: i64) -> bool {
|
||||
const MIN_ABSOLUTE_GAP: i64 = 4_096;
|
||||
if estimated <= 0 || reported <= estimated {
|
||||
|
||||
@@ -542,6 +542,47 @@ fn context_usage_snapshot_prefers_estimate_when_reported_is_inflated_by_old_reas
|
||||
assert!(percent < 2.0);
|
||||
}
|
||||
|
||||
/// Regression for #115. The engine sums `input_tokens` across every round
|
||||
/// of a turn (`turn.add_usage` does `+=`), so a multi-round tool-call turn
|
||||
/// reports a value much larger than the actual context window state, then
|
||||
/// the next single-round turn drops back to a single round's input_tokens.
|
||||
/// User-visible % was bouncing 31% → 9% because of this. The fix is to
|
||||
/// prefer the estimated current-context size, which is monotonic wrt
|
||||
/// conversation growth.
|
||||
#[test]
|
||||
fn context_usage_does_not_drop_when_reported_shrinks_after_multi_round_turn() {
|
||||
let mut app = create_test_app();
|
||||
app.api_messages = vec![Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: "context ".repeat(2_000), // ~14k tokens estimated
|
||||
cache_control: None,
|
||||
}],
|
||||
}];
|
||||
|
||||
// Simulate a multi-round turn that summed two rounds' input_tokens
|
||||
// (e.g., 200k + 210k from a long thinking + tool-call sequence).
|
||||
app.last_prompt_tokens = Some(410_000);
|
||||
let (_, _, percent_after_multi_round) =
|
||||
context_usage_snapshot(&app).expect("usage available");
|
||||
|
||||
// Now the next turn is a single round on the same conversation —
|
||||
// reported drops to one round's worth even though the actual context
|
||||
// hasn't shrunk.
|
||||
app.last_prompt_tokens = Some(15_000);
|
||||
let (_, _, percent_after_single_round) =
|
||||
context_usage_snapshot(&app).expect("usage available");
|
||||
|
||||
// The displayed % should reflect the conversation size (estimated
|
||||
// from api_messages), NOT the wildly variable reported value.
|
||||
let drift = (percent_after_multi_round - percent_after_single_round).abs();
|
||||
assert!(
|
||||
drift < 1.0,
|
||||
"displayed % should not jump because reported tokens varied across rounds; \
|
||||
after-multi-round={percent_after_multi_round:.2} after-single-round={percent_after_single_round:.2}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn context_usage_snapshot_prefers_live_estimate_while_loading() {
|
||||
let mut app = create_test_app();
|
||||
|
||||
Reference in New Issue
Block a user