fix(v0.8.10): cache-aware compaction and onboarding paste

This commit is contained in:
Hunter Bown
2026-05-04 09:55:16 -05:00
parent 874e8b4b78
commit 754e8bd468
3 changed files with 148 additions and 31 deletions
+125 -27
View File
@@ -60,6 +60,7 @@ const LARGE_CONTEXT_SUMMARY_INPUT_HEAD_CHARS: usize = 72_000;
const LARGE_CONTEXT_SUMMARY_INPUT_TAIL_CHARS: usize = 36_000;
const LARGE_CONTEXT_SUMMARY_MAX_TOKENS: u32 = 2_048;
const LARGE_CONTEXT_WINDOW_TOKENS: u32 = 500_000;
const CACHE_ALIGNED_SUMMARY_CONTEXT_BUDGET_PERCENT: usize = 85;
#[derive(Debug, Clone, Copy)]
struct SummaryInputLimits {
@@ -819,6 +820,92 @@ async fn create_summary(
model: &str,
) -> Result<String> {
let limits = summary_input_limits_for_model(model);
let request = if should_use_cache_aligned_summary(model, messages) {
build_cache_aligned_summary_request(model, messages, limits)
} else {
build_formatted_summary_request(model, messages, limits)
};
let response = client.create_message(request).await?;
// Compaction summary calls are billed by DeepSeek; route the
// tokens through the side-channel so the dashboard total
// matches the website (#526).
crate::cost_status::report(&response.model, &response.usage);
// Extract text from response
let summary = response
.content
.iter()
.filter_map(|block| match block {
ContentBlock::Text { text, .. } => Some(text.clone()),
_ => None,
})
.collect::<Vec<_>>()
.join("\n");
Ok(summary)
}
fn should_use_cache_aligned_summary(model: &str, messages: &[Message]) -> bool {
let Some(window) = context_window_for_model(model) else {
return false;
};
if window < LARGE_CONTEXT_WINDOW_TOKENS {
return false;
}
let budget = usize::try_from(window).unwrap_or(usize::MAX)
* CACHE_ALIGNED_SUMMARY_CONTEXT_BUDGET_PERCENT
/ 100;
let summary_prompt_tokens = 512usize;
estimate_tokens(messages).saturating_add(summary_prompt_tokens) <= budget
}
fn summary_instruction(word_limit: usize) -> String {
format!(
"Summarize the conversation above in a concise but comprehensive way. \
Preserve key information, decisions made, exact file paths, commands, \
errors, and tool-result facts needed to continue the work. \
Tool outputs may be abbreviated only when they are repetitive. \
Keep it under {word_limit} words."
)
}
fn build_cache_aligned_summary_request(
model: &str,
messages: &[Message],
limits: SummaryInputLimits,
) -> MessageRequest {
let mut request_messages = messages.to_vec();
request_messages.push(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: summary_instruction(limits.word_limit),
cache_control: None,
}],
});
MessageRequest {
model: model.to_string(),
messages: request_messages,
max_tokens: limits.max_tokens,
system: None,
tools: None,
tool_choice: None,
metadata: None,
thinking: None,
reasoning_effort: None,
stream: Some(false),
temperature: Some(0.3),
top_p: None,
}
}
fn build_formatted_summary_request(
model: &str,
messages: &[Message],
limits: SummaryInputLimits,
) -> MessageRequest {
// Format messages for summarization
let mut conversation_text = String::new();
for msg in messages {
@@ -861,18 +948,14 @@ async fn create_summary(
format!("{head}\n\n[... {omitted} characters omitted before summary ...]\n\n{tail}");
}
let request = MessageRequest {
MessageRequest {
model: model.to_string(),
messages: vec![Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: format!(
"Summarize the following conversation in a concise but comprehensive way. \
Preserve key information, decisions made, exact file paths, commands, \
errors, and tool-result facts needed to continue the work. \
Tool outputs may be abbreviated only when they are repetitive. \
Keep it under {} words.\n\n---\n\n{conversation_text}",
limits.word_limit
"{}\n\n---\n\n{conversation_text}",
summary_instruction(limits.word_limit)
),
cache_control: None,
}],
@@ -889,26 +972,7 @@ async fn create_summary(
stream: Some(false),
temperature: Some(0.3),
top_p: None,
};
let response = client.create_message(request).await?;
// Compaction summary calls are billed by DeepSeek; route the
// tokens through the side-channel so the dashboard total
// matches the website (#526).
crate::cost_status::report(&response.model, &response.usage);
// Extract text from response
let summary = response
.content
.iter()
.filter_map(|block| match block {
ContentBlock::Text { text, .. } => Some(text.clone()),
_ => None,
})
.collect::<Vec<_>>()
.join("\n");
Ok(summary)
}
}
/// Extract workflow context from messages (files touched, tasks, etc.)
@@ -1113,6 +1177,40 @@ mod tests {
assert!(v4.max_tokens > legacy.max_tokens);
}
#[test]
fn cache_aligned_summary_is_used_for_v4_scale_contexts() {
let messages = vec![msg("user", "Please edit crates/tui/src/compaction.rs")];
assert!(should_use_cache_aligned_summary(
"deepseek-v4-flash",
&messages
));
assert!(!should_use_cache_aligned_summary(
"deepseek-v3.2-128k",
&messages
));
}
#[test]
fn cache_aligned_summary_request_preserves_message_prefix() {
let messages = vec![
msg("user", "Please edit crates/tui/src/compaction.rs"),
msg("assistant", "I will inspect the file."),
];
let limits = summary_input_limits_for_model("deepseek-v4-pro");
let request = build_cache_aligned_summary_request("deepseek-v4-pro", &messages, limits);
assert_eq!(request.system, None);
assert_eq!(&request.messages[..messages.len()], &messages[..]);
assert_eq!(request.messages.len(), messages.len() + 1);
let last = request.messages.last().expect("summary instruction");
assert_eq!(last.role, "user");
assert!(matches!(
&last.content[..],
[ContentBlock::Text { text, .. }] if text.contains("conversation above")
));
}
#[test]
fn estimate_tokens_empty_messages() {
let messages: Vec<Message> = vec![];
+13 -4
View File
@@ -232,6 +232,7 @@ pub async fn run_tui(config: &Config, options: TuiOptions) -> Result<()> {
}
let backend = CrosstermBackend::new(stdout);
let mut terminal = Terminal::new(backend)?;
terminal.clear()?;
let event_broker = EventBroker::new();
// Local mutable copy so runtime config flips (e.g. `/provider` switch)
@@ -1721,10 +1722,6 @@ async fn run_event_loop(
app.delete_api_key_char();
sync_api_key_validation_status(app, false);
}
KeyCode::Char(c) if app.onboarding == OnboardingState::ApiKey => {
app.insert_api_key_char(c);
sync_api_key_validation_status(app, false);
}
KeyCode::Char('v') | KeyCode::Char('V')
if is_paste_shortcut(&key) && app.onboarding == OnboardingState::ApiKey =>
{
@@ -1732,6 +1729,12 @@ async fn run_event_loop(
app.paste_api_key_from_clipboard();
sync_api_key_validation_status(app, false);
}
KeyCode::Char(c)
if app.onboarding == OnboardingState::ApiKey && is_text_input_key(&key) =>
{
app.insert_api_key_char(c);
sync_api_key_validation_status(app, false);
}
_ => {}
}
continue;
@@ -7234,6 +7237,12 @@ fn is_paste_shortcut(key: &KeyEvent) -> bool {
key.modifiers.contains(KeyModifiers::CONTROL)
}
fn is_text_input_key(key: &KeyEvent) -> bool {
!key.modifiers.contains(KeyModifiers::CONTROL)
&& !key.modifiers.contains(KeyModifiers::ALT)
&& !key.modifiers.contains(KeyModifiers::SUPER)
}
fn is_ctrl_h_backspace(key: &KeyEvent) -> bool {
matches!(key.code, KeyCode::Char('h'))
&& key.modifiers.contains(KeyModifiers::CONTROL)
+10
View File
@@ -1521,6 +1521,16 @@ fn api_key_validation_warns_without_blocking_unusual_formats() {
));
}
#[test]
fn api_key_paste_shortcut_is_not_plain_text_input() {
let ctrl_v = KeyEvent::new(KeyCode::Char('v'), KeyModifiers::CONTROL);
assert!(is_paste_shortcut(&ctrl_v));
assert!(!is_text_input_key(&ctrl_v));
let shifted = KeyEvent::new(KeyCode::Char('A'), KeyModifiers::SHIFT);
assert!(is_text_input_key(&shifted));
}
#[test]
fn jump_to_adjacent_tool_cell_finds_next_and_previous() {
let mut app = create_test_app();