From 754e8bd4688f8e3384cb3876b154cb5156ec7bee Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Mon, 4 May 2026 09:55:16 -0500 Subject: [PATCH] fix(v0.8.10): cache-aware compaction and onboarding paste --- crates/tui/src/compaction.rs | 152 +++++++++++++++++++++++++++------ crates/tui/src/tui/ui.rs | 17 +++- crates/tui/src/tui/ui/tests.rs | 10 +++ 3 files changed, 148 insertions(+), 31 deletions(-) diff --git a/crates/tui/src/compaction.rs b/crates/tui/src/compaction.rs index e3f87e23..cda69077 100644 --- a/crates/tui/src/compaction.rs +++ b/crates/tui/src/compaction.rs @@ -60,6 +60,7 @@ const LARGE_CONTEXT_SUMMARY_INPUT_HEAD_CHARS: usize = 72_000; const LARGE_CONTEXT_SUMMARY_INPUT_TAIL_CHARS: usize = 36_000; const LARGE_CONTEXT_SUMMARY_MAX_TOKENS: u32 = 2_048; const LARGE_CONTEXT_WINDOW_TOKENS: u32 = 500_000; +const CACHE_ALIGNED_SUMMARY_CONTEXT_BUDGET_PERCENT: usize = 85; #[derive(Debug, Clone, Copy)] struct SummaryInputLimits { @@ -819,6 +820,92 @@ async fn create_summary( model: &str, ) -> Result { let limits = summary_input_limits_for_model(model); + let request = if should_use_cache_aligned_summary(model, messages) { + build_cache_aligned_summary_request(model, messages, limits) + } else { + build_formatted_summary_request(model, messages, limits) + }; + + let response = client.create_message(request).await?; + // Compaction summary calls are billed by DeepSeek; route the + // tokens through the side-channel so the dashboard total + // matches the website (#526). + crate::cost_status::report(&response.model, &response.usage); + + // Extract text from response + let summary = response + .content + .iter() + .filter_map(|block| match block { + ContentBlock::Text { text, .. } => Some(text.clone()), + _ => None, + }) + .collect::>() + .join("\n"); + + Ok(summary) +} + +fn should_use_cache_aligned_summary(model: &str, messages: &[Message]) -> bool { + let Some(window) = context_window_for_model(model) else { + return false; + }; + if window < LARGE_CONTEXT_WINDOW_TOKENS { + return false; + } + + let budget = usize::try_from(window).unwrap_or(usize::MAX) + * CACHE_ALIGNED_SUMMARY_CONTEXT_BUDGET_PERCENT + / 100; + let summary_prompt_tokens = 512usize; + estimate_tokens(messages).saturating_add(summary_prompt_tokens) <= budget +} + +fn summary_instruction(word_limit: usize) -> String { + format!( + "Summarize the conversation above in a concise but comprehensive way. \ + Preserve key information, decisions made, exact file paths, commands, \ + errors, and tool-result facts needed to continue the work. \ + Tool outputs may be abbreviated only when they are repetitive. \ + Keep it under {word_limit} words." + ) +} + +fn build_cache_aligned_summary_request( + model: &str, + messages: &[Message], + limits: SummaryInputLimits, +) -> MessageRequest { + let mut request_messages = messages.to_vec(); + request_messages.push(Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: summary_instruction(limits.word_limit), + cache_control: None, + }], + }); + + MessageRequest { + model: model.to_string(), + messages: request_messages, + max_tokens: limits.max_tokens, + system: None, + tools: None, + tool_choice: None, + metadata: None, + thinking: None, + reasoning_effort: None, + stream: Some(false), + temperature: Some(0.3), + top_p: None, + } +} + +fn build_formatted_summary_request( + model: &str, + messages: &[Message], + limits: SummaryInputLimits, +) -> MessageRequest { // Format messages for summarization let mut conversation_text = String::new(); for msg in messages { @@ -861,18 +948,14 @@ async fn create_summary( format!("{head}\n\n[... {omitted} characters omitted before summary ...]\n\n{tail}"); } - let request = MessageRequest { + MessageRequest { model: model.to_string(), messages: vec![Message { role: "user".to_string(), content: vec![ContentBlock::Text { text: format!( - "Summarize the following conversation in a concise but comprehensive way. \ - Preserve key information, decisions made, exact file paths, commands, \ - errors, and tool-result facts needed to continue the work. \ - Tool outputs may be abbreviated only when they are repetitive. \ - Keep it under {} words.\n\n---\n\n{conversation_text}", - limits.word_limit + "{}\n\n---\n\n{conversation_text}", + summary_instruction(limits.word_limit) ), cache_control: None, }], @@ -889,26 +972,7 @@ async fn create_summary( stream: Some(false), temperature: Some(0.3), top_p: None, - }; - - let response = client.create_message(request).await?; - // Compaction summary calls are billed by DeepSeek; route the - // tokens through the side-channel so the dashboard total - // matches the website (#526). - crate::cost_status::report(&response.model, &response.usage); - - // Extract text from response - let summary = response - .content - .iter() - .filter_map(|block| match block { - ContentBlock::Text { text, .. } => Some(text.clone()), - _ => None, - }) - .collect::>() - .join("\n"); - - Ok(summary) + } } /// Extract workflow context from messages (files touched, tasks, etc.) @@ -1113,6 +1177,40 @@ mod tests { assert!(v4.max_tokens > legacy.max_tokens); } + #[test] + fn cache_aligned_summary_is_used_for_v4_scale_contexts() { + let messages = vec![msg("user", "Please edit crates/tui/src/compaction.rs")]; + + assert!(should_use_cache_aligned_summary( + "deepseek-v4-flash", + &messages + )); + assert!(!should_use_cache_aligned_summary( + "deepseek-v3.2-128k", + &messages + )); + } + + #[test] + fn cache_aligned_summary_request_preserves_message_prefix() { + let messages = vec![ + msg("user", "Please edit crates/tui/src/compaction.rs"), + msg("assistant", "I will inspect the file."), + ]; + let limits = summary_input_limits_for_model("deepseek-v4-pro"); + let request = build_cache_aligned_summary_request("deepseek-v4-pro", &messages, limits); + + assert_eq!(request.system, None); + assert_eq!(&request.messages[..messages.len()], &messages[..]); + assert_eq!(request.messages.len(), messages.len() + 1); + let last = request.messages.last().expect("summary instruction"); + assert_eq!(last.role, "user"); + assert!(matches!( + &last.content[..], + [ContentBlock::Text { text, .. }] if text.contains("conversation above") + )); + } + #[test] fn estimate_tokens_empty_messages() { let messages: Vec = vec![]; diff --git a/crates/tui/src/tui/ui.rs b/crates/tui/src/tui/ui.rs index 23f1588d..62bc845f 100644 --- a/crates/tui/src/tui/ui.rs +++ b/crates/tui/src/tui/ui.rs @@ -232,6 +232,7 @@ pub async fn run_tui(config: &Config, options: TuiOptions) -> Result<()> { } let backend = CrosstermBackend::new(stdout); let mut terminal = Terminal::new(backend)?; + terminal.clear()?; let event_broker = EventBroker::new(); // Local mutable copy so runtime config flips (e.g. `/provider` switch) @@ -1721,10 +1722,6 @@ async fn run_event_loop( app.delete_api_key_char(); sync_api_key_validation_status(app, false); } - KeyCode::Char(c) if app.onboarding == OnboardingState::ApiKey => { - app.insert_api_key_char(c); - sync_api_key_validation_status(app, false); - } KeyCode::Char('v') | KeyCode::Char('V') if is_paste_shortcut(&key) && app.onboarding == OnboardingState::ApiKey => { @@ -1732,6 +1729,12 @@ async fn run_event_loop( app.paste_api_key_from_clipboard(); sync_api_key_validation_status(app, false); } + KeyCode::Char(c) + if app.onboarding == OnboardingState::ApiKey && is_text_input_key(&key) => + { + app.insert_api_key_char(c); + sync_api_key_validation_status(app, false); + } _ => {} } continue; @@ -7234,6 +7237,12 @@ fn is_paste_shortcut(key: &KeyEvent) -> bool { key.modifiers.contains(KeyModifiers::CONTROL) } +fn is_text_input_key(key: &KeyEvent) -> bool { + !key.modifiers.contains(KeyModifiers::CONTROL) + && !key.modifiers.contains(KeyModifiers::ALT) + && !key.modifiers.contains(KeyModifiers::SUPER) +} + fn is_ctrl_h_backspace(key: &KeyEvent) -> bool { matches!(key.code, KeyCode::Char('h')) && key.modifiers.contains(KeyModifiers::CONTROL) diff --git a/crates/tui/src/tui/ui/tests.rs b/crates/tui/src/tui/ui/tests.rs index 8394e9c3..5a2e69dc 100644 --- a/crates/tui/src/tui/ui/tests.rs +++ b/crates/tui/src/tui/ui/tests.rs @@ -1521,6 +1521,16 @@ fn api_key_validation_warns_without_blocking_unusual_formats() { )); } +#[test] +fn api_key_paste_shortcut_is_not_plain_text_input() { + let ctrl_v = KeyEvent::new(KeyCode::Char('v'), KeyModifiers::CONTROL); + assert!(is_paste_shortcut(&ctrl_v)); + assert!(!is_text_input_key(&ctrl_v)); + + let shifted = KeyEvent::new(KeyCode::Char('A'), KeyModifiers::SHIFT); + assert!(is_text_input_key(&shifted)); +} + #[test] fn jump_to_adjacent_tool_cell_finds_next_and_previous() { let mut app = create_test_app();