diff --git a/crates/tui/src/tools/rlm.rs b/crates/tui/src/tools/rlm.rs index 80a13138..5cc641ab 100644 --- a/crates/tui/src/tools/rlm.rs +++ b/crates/tui/src/tools/rlm.rs @@ -296,13 +296,24 @@ impl ToolSpec for RlmTool { }) .collect(); + // The `child_*` keys are the contract the engine reads in + // `tool_routing::accrue_child_token_cost_if_any` to roll + // sub-LLM token usage into the session-cost counter. RLM + // spawns its own DeepSeek calls under `child_model`; without + // this accrual the dashboard under-reports a session that + // uses RLM heavily by 10-20× because only the parent turn's + // tokens hit `accrue_session_cost` (#524). let metadata = json!({ "iterations": result.iterations, "duration_ms": result.duration.as_millis() as u64, "input_tokens": result.usage.input_tokens, "output_tokens": result.usage.output_tokens, - "termination": format!("{:?}", result.termination).to_lowercase(), + "child_input_tokens": result.usage.input_tokens, + "child_output_tokens": result.usage.output_tokens, + "child_prompt_cache_hit_tokens": result.usage.prompt_cache_hit_tokens, + "child_prompt_cache_miss_tokens": result.usage.prompt_cache_miss_tokens, "child_model": child_model, + "termination": format!("{:?}", result.termination).to_lowercase(), "max_depth": max_depth, "total_rpcs": result.total_rpcs, "trace": trace_json, diff --git a/crates/tui/src/tui/app.rs b/crates/tui/src/tui/app.rs index 6250e9c7..a1eec4c2 100644 --- a/crates/tui/src/tui/app.rs +++ b/crates/tui/src/tui/app.rs @@ -756,12 +756,12 @@ pub struct App { pub submit_pending_steers_after_interrupt: bool, /// Start time for current turn pub turn_started_at: Option, - /// When this `App` instance was constructed (#448). Used to render - /// the footer's `worked Nh Mm` indicator. Resets per launch — we - /// deliberately don't try to persist across full restarts because - /// "since I sat down" is the more useful framing than wall-clock - /// session age. - pub session_started_at: Instant, + /// Sum of completed turn durations for this `App` instance (#448 + /// follow-up). Drives the footer's `worked Nh Mm` chip so the + /// label reflects actual model work, not wall-clock since launch. + /// Incremented on `TurnComplete` from the elapsed time of the + /// just-finished turn. Resets per launch. + pub cumulative_turn_duration: std::time::Duration, /// Current runtime turn id (if known). pub runtime_turn_id: Option, /// Current runtime turn status (if known). @@ -1207,7 +1207,7 @@ impl App { rejected_steers: VecDeque::new(), submit_pending_steers_after_interrupt: false, turn_started_at: None, - session_started_at: Instant::now(), + cumulative_turn_duration: std::time::Duration::ZERO, runtime_turn_id: None, runtime_turn_status: None, workspace_context: None, diff --git a/crates/tui/src/tui/tool_routing.rs b/crates/tui/src/tui/tool_routing.rs index 27ae3b67..2e946d59 100644 --- a/crates/tui/src/tui/tool_routing.rs +++ b/crates/tui/src/tui/tool_routing.rs @@ -330,6 +330,61 @@ fn store_tool_detail_output( } #[allow(clippy::too_many_lines)] +/// Inspect a tool's success metadata for the `child_*` token-usage +/// fields that tools spawning their own LLM calls populate (e.g. +/// `rlm`). Roll any reported child-token cost into the session's +/// running sub-agent cost counter so the footer total reflects all +/// tokens the user is actually billed for, not just the parent turn's +/// tokens. +/// +/// Without this hook, an RLM-heavy session shows a fraction of the +/// real spend because the parent turn's `Usage` only counts the +/// orchestrator's tokens, not the dozens of `deepseek-v4-flash` child +/// rounds RLM fans out under the hood (#524). +fn accrue_child_token_cost_if_any(app: &mut App, result: &Result) { + let Ok(tool_result) = result else { return }; + let Some(metadata) = tool_result.metadata.as_ref() else { + return; + }; + let Some(model) = metadata + .get("child_model") + .and_then(serde_json::Value::as_str) + else { + return; + }; + let input_tokens = metadata + .get("child_input_tokens") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let output_tokens = metadata + .get("child_output_tokens") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + if input_tokens == 0 && output_tokens == 0 { + return; + } + let prompt_cache_hit_tokens = metadata + .get("child_prompt_cache_hit_tokens") + .and_then(serde_json::Value::as_u64) + .map(|v| u32::try_from(v).unwrap_or(u32::MAX)); + let prompt_cache_miss_tokens = metadata + .get("child_prompt_cache_miss_tokens") + .and_then(serde_json::Value::as_u64) + .map(|v| u32::try_from(v).unwrap_or(u32::MAX)); + let usage = crate::models::Usage { + input_tokens: u32::try_from(input_tokens).unwrap_or(u32::MAX), + output_tokens: u32::try_from(output_tokens).unwrap_or(u32::MAX), + prompt_cache_hit_tokens, + prompt_cache_miss_tokens, + reasoning_tokens: None, + reasoning_replay_tokens: None, + server_tool_use: None, + }; + if let Some(cost) = crate::pricing::calculate_turn_cost_from_usage(model, &usage) { + app.accrue_subagent_cost(cost); + } +} + pub(super) fn handle_tool_call_complete( app: &mut App, id: &str, @@ -339,6 +394,11 @@ pub(super) fn handle_tool_call_complete( if app.ignored_tool_calls.remove(id) { return; } + // Roll any child-LLM token usage the tool reports into the + // session-cost counter. Runs unconditionally so future tools that + // spawn their own LLM calls (RLM, summarizers, retrieval helpers) + // get accrued without needing a per-tool hook (#524). + accrue_child_token_cost_if_any(app, result); // Exploring entries land in the per-tool map regardless of whether they // live in the active cell or in finalized history; the path is the same. diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 342e2356..83f35ca2 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -148,13 +148,26 @@ pub async fn run_tui(config: &Config, options: TuiOptions) -> Result<()> { let use_mouse_capture = options.use_mouse_capture; let use_bracketed_paste = options.use_bracketed_paste; - // Apply OSC 8 hyperlink toggle from config; default `true`. + // Apply OSC 8 hyperlink toggle from config. + // + // Default-off on Windows because legacy `cmd.exe` and pre-Win11 + // PowerShell consoles don't always honor the OSC 8 string + // terminator (`ESC \`) cleanly — emitting the escape can leave + // stray bytes that eat the leading column of the next line and + // duplicate the composer panel during scroll. Reported on a + // Windows session (issue forthcoming, screenshot showed + // "eepseek-v4-flash" with the leading `d` consumed and three + // overlapping composer panels). Mac/Linux still default-on; users + // on a Windows console that *does* support OSC 8 (Windows + // Terminal, Alacritty, WezTerm) can opt back in via + // `[ui] osc8_links = true`. + let osc8_default_on = !cfg!(windows); crate::tui::osc8::set_enabled( config .tui .as_ref() .and_then(|tui| tui.osc8_links) - .unwrap_or(true), + .unwrap_or(osc8_default_on), ); // Terminal probe with timeout to prevent hanging on unresponsive terminals @@ -845,6 +858,13 @@ async fn run_event_loop( let turn_elapsed = app.turn_started_at.map(|t| t.elapsed()).unwrap_or_default(); app.turn_started_at = None; + // Roll the just-finished turn's elapsed time into the + // cumulative session work-time (#448 follow-up). The + // footer's `worked Nh Mm` chip reads this so the + // label reflects actual model work, not idle + // uptime since launch. + app.cumulative_turn_duration = + app.cumulative_turn_duration.saturating_add(turn_elapsed); // Stream lock applies per-turn; clear it so the next // turn's chunks pull the view down again until the // user opts out by scrolling up. diff --git a/crates/tui/src/tui/widgets/footer.rs b/crates/tui/src/tui/widgets/footer.rs index 535d5e58..77224088 100644 --- a/crates/tui/src/tui/widgets/footer.rs +++ b/crates/tui/src/tui/widgets/footer.rs @@ -55,10 +55,11 @@ pub struct FooterProps { /// MCP server health chip spans (empty when no MCP servers configured). /// Populated lazily — see [`footer_mcp_chip`]. (#502) pub mcp: Vec>, - /// Cumulative session-elapsed chip spans ("worked 3h 12m"). Empty - /// for the first minute of a session so a fresh launch doesn't - /// flash a `worked 5s` indicator. Populated by [`footer_worked_chip`] - /// from `App::session_started_at`. (#448) + /// Cumulative model-work chip spans ("worked 3h 12m"). Sums the + /// elapsed time of completed turns (from `App::cumulative_turn_duration`), + /// **not** wall-clock since launch — an idle TUI shouldn't claim + /// it's been "working." Empty until cumulative turn time crosses + /// 60s. Populated by [`footer_worked_chip`]. (#448) pub worked: Vec>, /// Snapshot of the global retry-status surface (#499). Sampled once /// at props-build time and rendered as a foreground banner on the @@ -267,9 +268,12 @@ impl FooterProps { .as_ref() .map(|s| s.servers.iter().filter(|server| server.connected).count()); let mcp = footer_mcp_chip(mcp_connected, mcp_configured); - // #448: cumulative-elapsed chip. Sampled at props-build time - // (matches the `retry` capture pattern) so render is pure. - let worked = footer_worked_chip(app.session_started_at.elapsed()); + // #448: cumulative work-time chip. Sums actual turn durations + // (set on `TurnComplete`) rather than wall-clock uptime — a TUI + // that's been open and idle for 4 minutes shouldn't claim + // "worked 4m". The chip stays empty until enough turns add up + // to cross the 60s threshold inside `footer_worked_chip`. + let worked = footer_worked_chip(app.cumulative_turn_duration); Self { model: app.model.clone(), mode_label, @@ -708,13 +712,48 @@ mod tests { assert!(props.cache.is_empty()); assert!(props.cost.is_empty()); assert!(props.reasoning_replay.is_empty()); - // #448: fresh apps don't get a `worked` chip until the - // session has been alive for >= 60s. A test app built right - // before this assertion is well under that threshold. + // #448: fresh apps don't get a `worked` chip until completed + // turns have added up to >= 60s of model work. A freshly-built + // App has cumulative_turn_duration == 0 so the chip is empty. assert!(props.worked.is_empty()); assert!(props.toast.is_none()); } + #[test] + fn worked_chip_tracks_completed_turn_time_not_session_uptime() { + // Regression test for the v0.8.8 takedown: the chip used to + // read `App::session_started_at.elapsed()`, so a TUI that had + // been open and idle for several minutes claimed "worked 3m" + // even though no turn had ever fired. The chip now sources + // from `App::cumulative_turn_duration`, which is only ever + // incremented on `TurnComplete`. Pin both directions: + // + // 1. cumulative == 0 (no turn finished yet) → empty + // 2. cumulative crosses 60s (real work) → label shows + // 3. wall-clock since launch is irrelevant → not consulted + let mut app = make_app(); + // The whole point: cumulative_turn_duration starts at zero, + // so however long the TUI has been open the chip stays empty + // until a turn actually completes and adds time. + let props = idle_props_for(&app); + assert!( + props.worked.is_empty(), + "idle app with zero cumulative turn time must not show worked chip" + ); + + // A real turn finishes for 90s of model work — chip lights up. + // (`humanize_duration` keeps both units when both are non-zero, + // so 90s renders as `1m 30s`, not `1m`.) + app.cumulative_turn_duration = std::time::Duration::from_secs(90); + let props = idle_props_for(&app); + let text: String = props + .worked + .iter() + .map(|s| s.content.as_ref()) + .collect::(); + assert_eq!(text, "worked 1m 30s"); + } + #[test] fn footer_worked_chip_hidden_below_one_minute() { use std::time::Duration;