diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs index b3473eed..26ce3266 100644 --- a/crates/tui/src/client.rs +++ b/crates/tui/src/client.rs @@ -809,7 +809,7 @@ impl LlmClient for DeepSeekClient { // misses a case (e.g. a session restored from disk, a sub-agent // adding messages directly, or a cached prefix mismatch), this pass // still produces a valid request. - sanitize_thinking_mode_messages( + let replay_input_tokens = sanitize_thinking_mode_messages( &mut body, &request.model, request.reasoning_effort.as_deref(), @@ -907,7 +907,7 @@ impl LlmClient for DeepSeekClient { // Stream complete } else if let Ok(chunk_json) = serde_json::from_str::(&data) { // Parse the SSE chunk into stream events - for event in parse_sse_chunk( + for mut event in parse_sse_chunk( &chunk_json, &mut content_index, &mut text_started, @@ -915,6 +915,19 @@ impl LlmClient for DeepSeekClient { &mut tool_indices, is_reasoning_model, ) { + // Stamp the client-side replay-token estimate + // onto the final usage so the UI can surface + // it (#30). We compute it pre-request and + // overlay it on the server-reported usage at + // stream completion. + if let Some(tokens) = replay_input_tokens + && let StreamEvent::MessageDelta { + usage: Some(usage), + .. + } = &mut event + { + usage.reasoning_replay_tokens = Some(tokens); + } yield Ok(event); } } @@ -1690,13 +1703,15 @@ fn map_tool_choice_for_chat(choice: &Value) -> Option { /// budget is being spent re-sending prior thinking traces (V4 §5.1.1 /// "Interleaved Thinking" requires the full trace to be replayed across user /// message boundaries in tool-calling sessions). -fn sanitize_thinking_mode_messages(body: &mut Value, model: &str, effort: Option<&str>) { +fn sanitize_thinking_mode_messages( + body: &mut Value, + model: &str, + effort: Option<&str>, +) -> Option { if !should_replay_reasoning_content(model, effort) { - return; + return None; } - let Some(messages) = body.get_mut("messages").and_then(Value::as_array_mut) else { - return; - }; + let messages = body.get_mut("messages").and_then(Value::as_array_mut)?; let mut substitutions: u32 = 0; let mut replay_chars: u64 = 0; let mut replay_messages: u32 = 0; @@ -1728,14 +1743,16 @@ fn sanitize_thinking_mode_messages(body: &mut Value, model: &str, effort: Option "Final sanitizer: {substitutions} assistant message(s) needed reasoning_content placeholder", )); } - if replay_messages > 0 { - // ~4 chars/token is the standard rough estimate; DeepSeek tokens skew - // a touch shorter on Chinese/code but this is order-of-magnitude info. - let approx_tokens = replay_chars / 4; - logging::info(format!( - "Reasoning-content replay: {replay_messages} assistant message(s), ~{approx_tokens} input tokens ({replay_chars} chars) being re-sent in this request", - )); + if replay_messages == 0 { + return None; } + // ~4 chars/token is the standard rough estimate; DeepSeek tokens skew + // a touch shorter on Chinese/code but this is order-of-magnitude info. + let approx_tokens = (replay_chars / 4).min(u64::from(u32::MAX)) as u32; + logging::info(format!( + "Reasoning-content replay: {replay_messages} assistant message(s), ~{approx_tokens} input tokens ({replay_chars} chars) being re-sent in this request", + )); + Some(approx_tokens) } /// Sums the byte length of `reasoning_content` across all assistant messages in @@ -2034,6 +2051,7 @@ fn parse_usage(usage: Option<&Value>) -> Usage { prompt_cache_hit_tokens, prompt_cache_miss_tokens, reasoning_tokens, + reasoning_replay_tokens: None, server_tool_use, } } @@ -3463,7 +3481,11 @@ mod tests { ] }); - sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max")); + let approx_tokens = + sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max")) + .expect("multi-turn thinking-mode conversation should report replay tokens"); + // ~4 chars/token; 46 bytes of reasoning -> 11 tokens. + assert_eq!(approx_tokens, 11); let chars = count_reasoning_replay_chars(&body); // "I need to call tool A first." (28) + "Now I call tool B." (18) = 46 @@ -3483,6 +3505,21 @@ mod tests { assert_eq!(assistant_with_reasoning, 2); } + /// Issue #30: when no thinking-mode replay applies (non-thinking model or + /// empty conversation), the sanitizer returns `None` so the footer chip + /// stays hidden. + #[test] + fn sanitize_thinking_mode_returns_none_for_non_thinking_model() { + let mut body = json!({ + "model": "deepseek-chat", + "messages": [ + { "role": "user", "content": "hi" } + ] + }); + let result = sanitize_thinking_mode_messages(&mut body, "deepseek-chat", None); + assert!(result.is_none()); + } + #[test] fn sanitize_thinking_mode_counts_substituted_placeholder() { // An assistant tool-call message is missing reasoning_content; the diff --git a/crates/tui/src/models.rs b/crates/tui/src/models.rs index 67a85f75..93f7c920 100644 --- a/crates/tui/src/models.rs +++ b/crates/tui/src/models.rs @@ -191,6 +191,12 @@ pub struct Usage { pub prompt_cache_miss_tokens: Option, #[serde(skip_serializing_if = "Option::is_none")] pub reasoning_tokens: Option, + /// Approximate input tokens spent re-sending prior `reasoning_content` + /// across user-message boundaries in DeepSeek V4 thinking-mode tool-calling + /// turns (V4 §5.1.1 "Interleaved Thinking"). Estimated client-side at + /// ~4 chars/token from the outgoing request body, before the model sees it. + #[serde(skip_serializing_if = "Option::is_none")] + pub reasoning_replay_tokens: Option, #[serde(skip_serializing_if = "Option::is_none")] pub server_tool_use: Option, } diff --git a/crates/tui/src/tui/app.rs b/crates/tui/src/tui/app.rs index 98d871da..079b0512 100644 --- a/crates/tui/src/tui/app.rs +++ b/crates/tui/src/tui/app.rs @@ -506,6 +506,10 @@ pub struct App { pub last_prompt_cache_hit_tokens: Option, /// DeepSeek context-cache miss tokens from the last API call. pub last_prompt_cache_miss_tokens: Option, + /// Approximate input tokens spent re-sending prior `reasoning_content` on + /// the last thinking-mode tool-calling turn (V4 §5.1.1 "Interleaved + /// Thinking"). Computed client-side at ~4 chars/token. + pub last_reasoning_replay_tokens: Option, /// Cached git context snapshot for the footer. pub workspace_context: Option, /// Timestamp for cached workspace context. @@ -778,6 +782,7 @@ impl App { last_completion_tokens: None, last_prompt_cache_hit_tokens: None, last_prompt_cache_miss_tokens: None, + last_reasoning_replay_tokens: None, workspace_context: None, workspace_context_refreshed_at: None, task_panel: Vec::new(), diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 60968ae7..863daf1d 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -16,6 +16,7 @@ use crossterm::{ execute, terminal::{EnterAlternateScreen, LeaveAlternateScreen, disable_raw_mode, enable_raw_mode}, }; +use ignore::WalkBuilder; use ratatui::{ Frame, Terminal, backend::CrosstermBackend, @@ -164,6 +165,7 @@ pub async fn run_tui(config: &Config, options: TuiOptions) -> Result<()> { app.last_completion_tokens = None; app.last_prompt_cache_hit_tokens = None; app.last_prompt_cache_miss_tokens = None; + app.last_reasoning_replay_tokens = None; if let Some(prompt) = saved.system_prompt { app.system_prompt = Some(SystemPrompt::Text(prompt)); } @@ -540,6 +542,7 @@ async fn run_event_loop( app.last_completion_tokens = Some(usage.output_tokens); app.last_prompt_cache_hit_tokens = usage.prompt_cache_hit_tokens; app.last_prompt_cache_miss_tokens = usage.prompt_cache_miss_tokens; + app.last_reasoning_replay_tokens = usage.reasoning_replay_tokens; if let Some(error) = error { app.status_message = Some(format!("Turn failed: {error}")); } @@ -1266,6 +1269,9 @@ async fn run_event_loop( if try_autocomplete_slash_command(app) { continue; } + if try_autocomplete_file_mention(app) { + continue; + } app.cycle_mode(); } KeyCode::BackTab => { @@ -1610,6 +1616,167 @@ fn try_autocomplete_slash_command(app: &mut App) -> bool { true } +/// Maximum file-mention completion candidates to consider per keypress. Caps +/// the cost of walking large workspaces; subsequent keystrokes narrow further. +const FILE_MENTION_COMPLETION_LIMIT: usize = 64; + +/// Maximum directory depth walked when completing a file mention. Mirrors the +/// existing `project_tree` cutoff and keeps Tab snappy in deep monorepos. +const FILE_MENTION_COMPLETION_DEPTH: usize = 6; + +/// If the cursor sits inside a `@` token in the input, return the +/// byte offset where the `@` starts (so we can splice in a completion) and +/// the partial path the user has typed so far. The token stops at whitespace +/// or the end of input. Returns `None` when the cursor is outside any mention +/// or the token is empty (`@` with nothing after it). +fn partial_file_mention_at_cursor(input: &str, cursor_chars: usize) -> Option<(usize, String)> { + let chars: Vec = input.chars().collect(); + if cursor_chars > chars.len() { + return None; + } + // Walk left from the cursor until we find an `@` or a whitespace; if + // whitespace comes first the cursor isn't inside a mention. + let mut start_chars = cursor_chars; + while start_chars > 0 { + let prev = chars[start_chars - 1]; + if prev == '@' { + start_chars -= 1; + break; + } + if prev.is_whitespace() { + return None; + } + start_chars -= 1; + } + if start_chars == cursor_chars || chars.get(start_chars) != Some(&'@') { + return None; + } + // Confirm the `@` itself is at a valid mention boundary. + if !is_file_mention_start(&chars, start_chars) { + return None; + } + // Consume from the `@` to the next whitespace (the end of the token). + let mut end_chars = start_chars + 1; + while end_chars < chars.len() && !chars[end_chars].is_whitespace() { + end_chars += 1; + } + let partial: String = chars[start_chars + 1..end_chars].iter().collect(); + let byte_start: usize = chars[..start_chars].iter().map(|c| c.len_utf8()).sum(); + Some((byte_start, partial)) +} + +/// Walk the workspace and return relative paths whose representation matches +/// the partial mention. A file matches when its case-insensitive relative +/// path either starts with the partial or contains it as a substring; the +/// former rank earlier so a partial like `docs/de` resolves to +/// `docs/deepseek_v4.pdf` before any path that merely contains those bytes. +fn find_file_mention_completions(workspace: &Path, partial: &str, limit: usize) -> Vec { + if limit == 0 { + return Vec::new(); + } + let needle = partial.to_lowercase(); + let mut prefix_hits: Vec = Vec::new(); + let mut substring_hits: Vec = Vec::new(); + + let mut builder = WalkBuilder::new(workspace); + builder + .hidden(true) + .follow_links(false) + .max_depth(Some(FILE_MENTION_COMPLETION_DEPTH)); + + for entry in builder.build().flatten() { + if prefix_hits.len() + substring_hits.len() >= limit { + break; + } + let path = entry.path(); + let Ok(rel) = path.strip_prefix(workspace) else { + continue; + }; + let rel_str = rel.to_string_lossy().replace('\\', "/"); + if rel_str.is_empty() { + continue; + } + let is_dir = entry.file_type().is_some_and(|ft| ft.is_dir()); + let candidate = if is_dir { + format!("{rel_str}/") + } else { + rel_str.clone() + }; + let lower = candidate.to_lowercase(); + if needle.is_empty() || lower.starts_with(&needle) { + prefix_hits.push(candidate); + } else if lower.contains(&needle) { + substring_hits.push(candidate); + } + } + + prefix_hits.sort(); + substring_hits.sort(); + prefix_hits.extend(substring_hits); + prefix_hits.truncate(limit); + prefix_hits +} + +/// Tab-completion handler for `@file` mentions. Mirrors the slash-command +/// flow: a single match is applied directly; multiple matches with a longer +/// shared prefix extend the partial; otherwise the first few candidates are +/// surfaced via the status line. Returns true when the input was modified or +/// a suggestion was offered, so the caller can short-circuit other handlers. +fn try_autocomplete_file_mention(app: &mut App) -> bool { + let Some((byte_start, partial)) = + partial_file_mention_at_cursor(&app.input, app.cursor_position) + else { + return false; + }; + let workspace = app.workspace.clone(); + let candidates = + find_file_mention_completions(&workspace, &partial, FILE_MENTION_COMPLETION_LIMIT); + if candidates.is_empty() { + app.status_message = Some(format!("No files match @{partial}")); + return true; + } + if candidates.len() == 1 { + replace_file_mention(app, byte_start, &partial, &candidates[0]); + app.status_message = Some(format!("Attached @{}", candidates[0])); + return true; + } + let candidate_refs: Vec<&str> = candidates.iter().map(String::as_str).collect(); + let shared = longest_common_prefix(&candidate_refs); + if shared.len() > partial.len() { + replace_file_mention(app, byte_start, &partial, shared); + app.status_message = Some(format!("@{shared}…")); + return true; + } + let preview = candidates + .iter() + .take(5) + .map(|c| format!("@{c}")) + .collect::>() + .join(", "); + app.status_message = Some(format!("Matches: {preview}")); + true +} + +/// Splice a completion into the input, replacing the `@` token at +/// `byte_start` with `@`. Cursor moves to the end of the new +/// token so further keystrokes extend (or escape via space) naturally. +fn replace_file_mention(app: &mut App, byte_start: usize, partial: &str, replacement: &str) { + let original_token_len = '@'.len_utf8() + partial.len(); + let original_token_end = byte_start + original_token_len; + let mut new_input = + String::with_capacity(app.input.len() - original_token_len + 1 + replacement.len()); + new_input.push_str(&app.input[..byte_start]); + new_input.push('@'); + new_input.push_str(replacement); + if original_token_end < app.input.len() { + new_input.push_str(&app.input[original_token_end..]); + } + let new_cursor_chars = + app.input[..byte_start].chars().count() + 1 + replacement.chars().count(); + app.input = new_input; + app.cursor_position = new_cursor_chars; +} + fn longest_common_prefix<'a>(values: &[&'a str]) -> &'a str { let Some(first) = values.first().copied() else { return ""; @@ -2152,6 +2319,7 @@ async fn dispatch_user_message( app.last_completion_tokens = None; app.last_prompt_cache_hit_tokens = None; app.last_prompt_cache_miss_tokens = None; + app.last_reasoning_replay_tokens = None; // Persist immediately so abrupt termination can recover this in-flight turn. persist_checkpoint(app); @@ -3592,8 +3760,9 @@ fn render_footer(f: &mut Frame, area: Rect, app: &mut App) { fn footer_auxiliary_spans(app: &App, max_width: usize) -> Vec> { // Context % is already shown in the header signal bar — don't // duplicate it in the footer. The footer carries unique info only: - // coherence state, cache hit rate, and session cost. + // coherence state, reasoning replay tokens, cache hit rate, and session cost. let coherence_spans = footer_coherence_spans(app); + let replay_spans = footer_reasoning_replay_spans(app); let cache_spans = footer_cache_spans(app); let cost_spans = if app.session_cost > 0.001 { vec![Span::styled( @@ -3604,11 +3773,12 @@ fn footer_auxiliary_spans(app: &App, max_width: usize) -> Vec> { Vec::new() }; - let parts: Vec<&Vec>> = [&coherence_spans, &cache_spans, &cost_spans] - .iter() - .filter(|spans| !spans.is_empty()) - .copied() - .collect(); + let parts: Vec<&Vec>> = + [&coherence_spans, &replay_spans, &cache_spans, &cost_spans] + .iter() + .filter(|spans| !spans.is_empty()) + .copied() + .collect(); // Try to fit as many parts as possible, dropping from the end. for end in (0..=parts.len()).rev() { @@ -3660,6 +3830,30 @@ fn footer_cache_spans(app: &App) -> Vec> { )] } +/// Render a footer chip showing the size of the `reasoning_content` block +/// replayed on the most recent thinking-mode tool-calling turn (#30). +/// +/// Stays hidden when the count is zero (non-thinking models, first turn, or +/// turns with no tool calls). When replay tokens dominate the input budget +/// (>50%), the chip turns warning-coloured so users notice that thinking +/// replay is the main consumer of context. +fn footer_reasoning_replay_spans(app: &App) -> Vec> { + let Some(replay) = app.last_reasoning_replay_tokens else { + return Vec::new(); + }; + if replay == 0 { + return Vec::new(); + } + let label = format!("rsn {}", format_token_count_compact(u64::from(replay))); + let color = match app.last_prompt_tokens { + Some(input) if input > 0 && f64::from(replay) / f64::from(input) > 0.5 => { + palette::STATUS_WARNING + } + _ => palette::TEXT_MUTED, + }; + vec![Span::styled(label, Style::default().fg(color))] +} + fn footer_toast_spans( toast: &crate::tui::app::StatusToast, max_width: usize, diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index 0b0468b8..2f22ddef 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -451,6 +451,34 @@ fn footer_auxiliary_spans_show_cache_and_cost_when_roomy() { ); } +#[test] +fn footer_auxiliary_spans_show_reasoning_replay_chip() { + // Issue #30: when a thinking-mode tool-calling turn replays prior + // reasoning_content, the footer surfaces the approximate input-token + // cost so users can see why their context filled up. + let mut app = create_test_app(); + app.last_prompt_tokens = Some(48_000); + app.last_reasoning_replay_tokens = Some(8_200); + + let spans = footer_auxiliary_spans(&app, 64); + let text = spans_text(&spans); + assert!( + text.contains("rsn 8.2k"), + "expected replay chip, got {text:?}" + ); +} + +#[test] +fn footer_auxiliary_spans_hide_reasoning_replay_when_zero() { + let mut app = create_test_app(); + app.last_prompt_tokens = Some(48_000); + app.last_reasoning_replay_tokens = Some(0); + + let spans = footer_auxiliary_spans(&app, 64); + let text = spans_text(&spans); + assert!(!text.contains("rsn"), "zero replay must not render chip"); +} + #[test] fn context_usage_snapshot_prefers_estimate_when_reported_exceeds_window() { let mut app = create_test_app(); @@ -895,3 +923,125 @@ fn jump_to_adjacent_tool_cell_finds_next_and_previous() { SearchDirection::Backward )); } + +#[test] +fn partial_file_mention_finds_token_under_cursor() { + // Cursor in middle of `@docs/de` should be detected as a partial mention. + let input = "look at @docs/de please"; + let cursor = "look at @docs/de".chars().count(); + let (start, partial) = partial_file_mention_at_cursor(input, cursor) + .expect("cursor inside mention should yield a partial"); + assert_eq!(start, "look at ".len(), "byte_start of @ in input"); + assert_eq!(partial, "docs/de"); +} + +#[test] +fn partial_file_mention_returns_none_when_cursor_outside() { + let input = "look at @docs/de please"; + // Cursor after "please" — past the whitespace following the mention. + let cursor = input.chars().count(); + assert!(partial_file_mention_at_cursor(input, cursor).is_none()); + + // Cursor before the `@` — not inside any mention either. + let early_cursor = "look".chars().count(); + assert!(partial_file_mention_at_cursor(input, early_cursor).is_none()); +} + +#[test] +fn partial_file_mention_handles_email_addresses() { + // The `@` in `user@example.com` is preceded by a non-boundary char so + // it's not treated as a file-mention. + let input = "ping user@example.com now"; + let cursor = "ping user@example.com".chars().count(); + assert!(partial_file_mention_at_cursor(input, cursor).is_none()); +} + +#[test] +fn file_mention_completion_finds_unique_match() { + let tmpdir = TempDir::new().expect("tempdir"); + std::fs::write(tmpdir.path().join("README.md"), "readme").unwrap(); + std::fs::create_dir_all(tmpdir.path().join("docs")).unwrap(); + std::fs::write(tmpdir.path().join("docs/deepseek_v4.pdf"), b"%PDF-").unwrap(); + + let matches = find_file_mention_completions(tmpdir.path(), "docs/de", 16); + assert_eq!(matches, vec!["docs/deepseek_v4.pdf".to_string()]); +} + +#[test] +fn file_mention_completion_ranks_prefix_before_substring() { + let tmpdir = TempDir::new().expect("tempdir"); + std::fs::write(tmpdir.path().join("README.md"), "x").unwrap(); + std::fs::create_dir_all(tmpdir.path().join("nested")).unwrap(); + std::fs::write(tmpdir.path().join("nested/README.md"), "x").unwrap(); + + let matches = find_file_mention_completions(tmpdir.path(), "README", 16); + // Top-level README (prefix match) outranks the nested one (substring). + assert_eq!(matches.first().map(String::as_str), Some("README.md")); +} + +#[test] +fn try_autocomplete_file_mention_unique_replaces_partial() { + let tmpdir = TempDir::new().expect("tempdir"); + std::fs::create_dir_all(tmpdir.path().join("docs")).unwrap(); + std::fs::write(tmpdir.path().join("docs/deepseek_v4.pdf"), b"%PDF-").unwrap(); + + let mut app = create_test_app(); + app.workspace = tmpdir.path().to_path_buf(); + app.input = "summarize @docs/de".to_string(); + app.cursor_position = app.input.chars().count(); + + assert!(try_autocomplete_file_mention(&mut app)); + assert_eq!(app.input, "summarize @docs/deepseek_v4.pdf"); + assert_eq!(app.cursor_position, app.input.chars().count()); +} + +#[test] +fn try_autocomplete_file_mention_extends_to_common_prefix() { + let tmpdir = TempDir::new().expect("tempdir"); + std::fs::create_dir_all(tmpdir.path().join("crates/tui")).unwrap(); + std::fs::write(tmpdir.path().join("crates/tui/lib.rs"), "//").unwrap(); + std::fs::write(tmpdir.path().join("crates/tui/main.rs"), "//").unwrap(); + + let mut app = create_test_app(); + app.workspace = tmpdir.path().to_path_buf(); + app.input = "@crates/tui/".to_string(); + app.cursor_position = app.input.chars().count(); + + assert!(try_autocomplete_file_mention(&mut app)); + // Both files share the `crates/tui/` prefix and one more letter is + // not unique (`l` vs `m`), so the partial extends to the common prefix + // unchanged here, with the status surfacing both candidates. + assert!(app.input.starts_with("@crates/tui/")); + let preview = app + .status_message + .as_deref() + .expect("status message should describe candidates"); + assert!(preview.contains("@crates/tui/lib.rs")); + assert!(preview.contains("@crates/tui/main.rs")); +} + +#[test] +fn try_autocomplete_file_mention_no_match_reports_status() { + let tmpdir = TempDir::new().expect("tempdir"); + std::fs::write(tmpdir.path().join("README.md"), "x").unwrap(); + + let mut app = create_test_app(); + app.workspace = tmpdir.path().to_path_buf(); + app.input = "@nonexistent_xyz".to_string(); + app.cursor_position = app.input.chars().count(); + + assert!(try_autocomplete_file_mention(&mut app)); + assert_eq!(app.input, "@nonexistent_xyz"); + assert_eq!( + app.status_message.as_deref(), + Some("No files match @nonexistent_xyz") + ); +} + +#[test] +fn try_autocomplete_file_mention_returns_false_outside_mention() { + let mut app = create_test_app(); + app.input = "no mention here".to_string(); + app.cursor_position = app.input.chars().count(); + assert!(!try_autocomplete_file_mention(&mut app)); +}