fix(cache): store turn metadata on user messages

Closes #934

Squashed from #951 after rebase onto current main, local verification, and green CI.
This commit is contained in:
Hunter Bown
2026-05-07 02:30:05 -05:00
committed by GitHub
parent 7396532e47
commit 018e47daa6
5 changed files with 160 additions and 148 deletions
+35 -7
View File
@@ -829,6 +829,40 @@ impl Engine {
self.emit_session_updated().await;
}
fn turn_metadata_block(&self) -> ContentBlock {
let today = chrono::Local::now().format("%Y-%m-%d").to_string();
let working_set_summary = self
.session
.working_set
.summary_block(&self.config.workspace)
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty());
let summary = if let Some(working_set_summary) = working_set_summary {
format!("Current local date: {today}\n{working_set_summary}")
} else {
format!("Current local date: {today}")
};
ContentBlock::Text {
text: format!("<turn_meta>\n{summary}\n</turn_meta>"),
cache_control: None,
}
}
fn user_text_message_with_turn_metadata(&self, text: String) -> Message {
Message {
role: "user".to_string(),
content: vec![
self.turn_metadata_block(),
ContentBlock::Text {
text,
cache_control: None,
},
],
}
}
/// Handle a send message operation
#[allow(clippy::too_many_arguments)]
async fn handle_send_message(
@@ -908,13 +942,7 @@ impl Engine {
let force_update_plan_first = should_force_update_plan_first(mode, &content);
// Add user message to session
let user_msg = Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: content,
cache_control: None,
}],
};
let user_msg = self.user_text_message_with_turn_metadata(content);
self.session.add_message(user_msg);
self.session.model = model;
+2 -8
View File
@@ -116,13 +116,7 @@ impl Engine {
if rendered.is_empty() {
return;
}
self.add_session_message(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: rendered,
cache_control: None,
}],
})
.await;
self.add_session_message(self.user_text_message_with_turn_metadata(rendered))
.await;
}
}
+68 -36
View File
@@ -702,13 +702,9 @@ fn working_set_reaches_model_as_turn_metadata() {
.session
.working_set
.observe_user_message("please inspect src/lib.rs", tmp.path());
engine.session.add_message(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "please inspect src/lib.rs".to_string(),
cache_control: None,
}],
});
let user_msg =
engine.user_text_message_with_turn_metadata("please inspect src/lib.rs".to_string());
engine.session.add_message(user_msg);
let messages = engine.messages_with_turn_metadata();
let first_block = messages
@@ -731,13 +727,8 @@ fn turn_metadata_includes_current_local_date_without_working_set() {
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
engine.session.add_message(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "what is today's date?".to_string(),
cache_control: None,
}],
});
let user_msg = engine.user_text_message_with_turn_metadata("what is today's date?".to_string());
engine.session.add_message(user_msg);
let messages = engine.messages_with_turn_metadata();
let first_block = messages
@@ -753,14 +744,50 @@ fn turn_metadata_includes_current_local_date_without_working_set() {
assert!(text.contains(&format!("Current local date: {today}")));
}
#[test]
fn messages_with_turn_metadata_preserves_stored_messages_for_prefix_cache() {
let tmp = tempdir().expect("tempdir");
fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write");
let config = EngineConfig {
workspace: tmp.path().to_path_buf(),
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
engine
.session
.working_set
.observe_user_message("inspect src/lib.rs", tmp.path());
let first_user = engine.user_text_message_with_turn_metadata("inspect src/lib.rs".to_string());
engine.session.add_message(first_user.clone());
let first_request = engine.messages_with_turn_metadata();
assert_eq!(first_request, engine.session.messages);
engine.session.add_message(Message {
role: "assistant".to_string(),
content: vec![ContentBlock::Text {
text: "I inspected it.".to_string(),
cache_control: None,
}],
});
engine
.session
.working_set
.observe_user_message("now summarize it", tmp.path());
let second_user = engine.user_text_message_with_turn_metadata("now summarize it".to_string());
engine.session.add_message(second_user);
let second_request = engine.messages_with_turn_metadata();
assert_eq!(second_request, engine.session.messages);
assert_eq!(second_request.first(), Some(&first_user));
}
/// v0.8.11 regression: tool-result messages serialize to role="tool" on
/// the wire but are stored as role="user" internally. Prepending
/// `<turn_meta>` text onto a tool-result message broke the
/// assistant→tool_result invariant and caused HTTP 400 from DeepSeek's
/// API ("insufficient tool messages following tool_calls"). The fix:
/// inject only into messages that have a Text content block and no
/// ToolResult blocks; mid-turn (tool-result is the trailing user
/// message) the injection skips.
/// the wire but are stored as role="user" internally. `<turn_meta>` must
/// be stored only on actual user-text messages, not retroactively added
/// to tool-result messages at request time.
#[test]
fn turn_metadata_skips_tool_result_messages() {
let tmp = tempdir().expect("tempdir");
@@ -778,13 +805,8 @@ fn turn_metadata_skips_tool_result_messages() {
.observe_user_message("inspect src/lib.rs", tmp.path());
// Real user message — should be eligible for injection.
engine.session.add_message(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "inspect src/lib.rs".to_string(),
cache_control: None,
}],
});
let user_msg = engine.user_text_message_with_turn_metadata("inspect src/lib.rs".to_string());
engine.session.add_message(user_msg);
// Assistant tool-call.
engine.session.add_message(Message {
role: "assistant".to_string(),
@@ -818,7 +840,7 @@ fn turn_metadata_skips_tool_result_messages() {
Some(ContentBlock::ToolResult { .. })
));
// The earlier real user message receives the turn_meta prefix.
// The earlier real user message already carries the turn_meta prefix.
let real_user = messages.first().expect("first user message");
assert_eq!(real_user.role, "user");
let ContentBlock::Text { text, .. } = real_user.content.first().expect("user text content")
@@ -830,10 +852,8 @@ fn turn_metadata_skips_tool_result_messages() {
}
/// When the turn is mid-execution and the trailing user message is a
/// tool result, no turn_meta is injected at all (rather than landing on
/// some earlier user message and confusing the API's tool-call
/// continuity check). The working_set surfaces again on the next
/// genuine user prompt.
/// tool result, no turn_meta is injected at request time. The working_set
/// surfaces again on the next stored user-text message.
#[test]
fn turn_metadata_skips_when_only_tool_results_trail() {
let tmp = tempdir().expect("tempdir");
@@ -1840,12 +1860,24 @@ async fn post_edit_hook_injects_diagnostics_message_before_next_request() {
let last = engine.session.messages.last().expect("message appended");
assert_eq!(last.role, "user");
let text = match &last.content[0] {
let meta = match &last.content[0] {
crate::models::ContentBlock::Text { text, .. } => text.clone(),
other => panic!("expected text block, got {other:?}"),
};
assert!(text.contains("<diagnostics file=\""));
assert!(text.contains("ERROR [1:14] expected i32, found &str"));
assert!(meta.starts_with("<turn_meta>\n"));
let diagnostic_text = last
.content
.iter()
.find_map(|block| match block {
crate::models::ContentBlock::Text { text, .. }
if text.contains("<diagnostics file=\"") =>
{
Some(text)
}
_ => None,
})
.expect("diagnostics text block");
assert!(diagnostic_text.contains("ERROR [1:14] expected i32, found &str"));
}
#[tokio::test]
+54 -96
View File
@@ -55,14 +55,8 @@ impl Engine {
self.session
.working_set
.observe_user_message(&steer, &self.session.workspace);
self.add_session_message(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: steer.clone(),
cache_control: None,
}],
})
.await;
self.add_session_message(self.user_text_message_with_turn_metadata(steer.clone()))
.await;
let _ = self
.tx_event
.send(Event::status(format!(
@@ -821,14 +815,8 @@ impl Engine {
self.session
.working_set
.observe_user_message(&steer, &self.session.workspace);
self.add_session_message(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: steer,
cache_control: None,
}],
})
.await;
self.add_session_message(self.user_text_message_with_turn_metadata(steer))
.await;
}
turn.next_step();
continue;
@@ -881,13 +869,9 @@ impl Engine {
self.session
.working_set
.observe_user_message(&trimmed, &self.session.workspace);
self.add_session_message(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: trimmed.clone(),
cache_control: None,
}],
})
self.add_session_message(
self.user_text_message_with_turn_metadata(trimmed.clone()),
)
.await;
let _ = self
.tx_event
@@ -968,13 +952,9 @@ impl Engine {
} else {
format!("[REPL round {round_num} output]\n{}", round.stdout)
};
self.add_session_message(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: feedback,
cache_control: None,
}],
})
self.add_session_message(
self.user_text_message_with_turn_metadata(feedback),
)
.await;
}
Err(e) => {
@@ -984,15 +964,11 @@ impl Engine {
"REPL round {round_num} failed: {e}"
)))
.await;
self.add_session_message(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: format!(
"[REPL round {round_num} execution failed]\n{e}"
),
cache_control: None,
}],
})
self.add_session_message(
self.user_text_message_with_turn_metadata(format!(
"[REPL round {round_num} execution failed]\n{e}"
)),
)
.await;
}
}
@@ -1756,14 +1732,8 @@ impl Engine {
self.session
.working_set
.observe_user_message(&steer, &self.session.workspace);
self.add_session_message(Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: steer,
cache_control: None,
}],
})
.await;
self.add_session_message(self.user_text_message_with_turn_metadata(steer))
.await;
}
}
@@ -1800,54 +1770,11 @@ impl Engine {
}
pub(super) fn messages_with_turn_metadata(&self) -> Vec<Message> {
let today = chrono::Local::now().format("%Y-%m-%d").to_string();
let working_set_summary = self
.session
.working_set
.summary_block(&self.config.workspace)
.map(|s| s.trim().to_string())
.filter(|s| !s.is_empty());
let summary = if let Some(working_set_summary) = working_set_summary {
format!("Current local date: {today}\n{working_set_summary}")
} else {
format!("Current local date: {today}")
};
let mut messages = self.session.messages.clone();
// v0.8.11 hotfix: tool-result messages are stored as role="user" in
// our internal representation but serialize to role="tool" on the
// wire. Prepending a Text block onto a tool-result message breaks
// the assistant→tool_result invariant — the API rejects the request
// with `"insufficient tool messages following tool_calls"`. Inject
// only into actual user-typed messages, recognizable by having at
// least one Text content block (and no ToolResult blocks).
let Some(last_user) = messages.iter_mut().rev().find(|message| {
message.role == "user"
&& message
.content
.iter()
.all(|block| !matches!(block, ContentBlock::ToolResult { .. }))
&& message
.content
.iter()
.any(|block| matches!(block, ContentBlock::Text { .. }))
}) else {
// No real user message in the trailing slice (e.g. mid-turn
// after a tool call). Skip injection — the working_set will
// surface again on the next genuine user prompt.
return messages;
};
let turn_meta = format!("<turn_meta>\n{summary}\n</turn_meta>");
last_user.content.insert(
0,
ContentBlock::Text {
text: turn_meta,
cache_control: None,
},
);
messages
// `<turn_meta>` is stored on user-text messages when the message is
// appended. Do not rewrite historical messages at request time: doing
// so makes the API prefix differ from the bytes sent in earlier turns
// and destroys DeepSeek's KV prefix cache reuse.
self.session.messages.clone()
}
}
@@ -1887,7 +1814,11 @@ fn resolve_auto_effort(reasoning_effort: Option<&str>, messages: &[Message]) ->
.iter()
.filter_map(|block| {
if let ContentBlock::Text { text, .. } = block {
Some(text.as_str())
if is_turn_metadata_text(text) {
None
} else {
Some(text.as_str())
}
} else {
None
}
@@ -1915,6 +1846,10 @@ fn resolve_auto_effort(reasoning_effort: Option<&str>, messages: &[Message]) ->
}
}
fn is_turn_metadata_text(text: &str) -> bool {
text.trim_start().starts_with("<turn_meta>")
}
#[cfg(test)]
mod tests {
use super::*;
@@ -1935,4 +1870,27 @@ mod tests {
assert!(text.contains("<deepseek:subagent.done>"));
assert!(text.contains("Build passed"));
}
#[test]
fn resolve_auto_effort_ignores_stored_turn_metadata() {
let messages = vec![Message {
role: "user".to_string(),
content: vec![
ContentBlock::Text {
text: "<turn_meta>\nRecent errors: src/failing.rs\n</turn_meta>".to_string(),
cache_control: None,
},
ContentBlock::Text {
text: "hello".to_string(),
cache_control: None,
},
],
}];
assert_eq!(
resolve_auto_effort(Some("auto"), &messages),
Some("high".to_string()),
"auto thinking should classify the user request, not stored metadata"
);
}
}
+1 -1
View File
@@ -448,7 +448,7 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval(
If you notice context is getting long (>80%), proactively suggest using `/compact` to the user.\n\n\
### Prompt-cache awareness\n\n\
DeepSeek caches the longest *byte-stable prefix* of every request and charges roughly 100× less for cache-hit tokens than miss tokens. The system prompt above is layered most-static-first specifically so the prefix stays stable turn-over-turn. To keep cache hits high:\n\
- **Working set location:** the current repo working set is injected into the latest user message inside a `<turn_meta>` block. Treat it as high-priority turn metadata, not as a stable system-prompt section.\n\
- **Working set location:** the current repo working set is stored on new user messages inside a `<turn_meta>` block. Treat it as high-priority turn metadata, not as a stable system-prompt section.\n\
- **Append, don't reorder.** New context goes at the end (latest user / tool messages). Reshuffling earlier messages or rewriting their content invalidates the cache for everything after the change.\n\
- **Don't paraphrase quoted content.** If you've already read a file, refer to it by path or line range instead of re-quoting it with different formatting.\n\
- **Use `/compact` as a hard reset, not a tweak.** Compaction is meant for when the cache is already losing it intentionally rewrites the prefix to a shorter summary. Don't trigger it for small wins.\n\