fix(cache): store turn metadata on user messages
Closes #934 Squashed from #951 after rebase onto current main, local verification, and green CI.
This commit is contained in:
@@ -829,6 +829,40 @@ impl Engine {
|
||||
self.emit_session_updated().await;
|
||||
}
|
||||
|
||||
fn turn_metadata_block(&self) -> ContentBlock {
|
||||
let today = chrono::Local::now().format("%Y-%m-%d").to_string();
|
||||
let working_set_summary = self
|
||||
.session
|
||||
.working_set
|
||||
.summary_block(&self.config.workspace)
|
||||
.map(|s| s.trim().to_string())
|
||||
.filter(|s| !s.is_empty());
|
||||
|
||||
let summary = if let Some(working_set_summary) = working_set_summary {
|
||||
format!("Current local date: {today}\n{working_set_summary}")
|
||||
} else {
|
||||
format!("Current local date: {today}")
|
||||
};
|
||||
|
||||
ContentBlock::Text {
|
||||
text: format!("<turn_meta>\n{summary}\n</turn_meta>"),
|
||||
cache_control: None,
|
||||
}
|
||||
}
|
||||
|
||||
fn user_text_message_with_turn_metadata(&self, text: String) -> Message {
|
||||
Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![
|
||||
self.turn_metadata_block(),
|
||||
ContentBlock::Text {
|
||||
text,
|
||||
cache_control: None,
|
||||
},
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
/// Handle a send message operation
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn handle_send_message(
|
||||
@@ -908,13 +942,7 @@ impl Engine {
|
||||
let force_update_plan_first = should_force_update_plan_first(mode, &content);
|
||||
|
||||
// Add user message to session
|
||||
let user_msg = Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: content,
|
||||
cache_control: None,
|
||||
}],
|
||||
};
|
||||
let user_msg = self.user_text_message_with_turn_metadata(content);
|
||||
self.session.add_message(user_msg);
|
||||
|
||||
self.session.model = model;
|
||||
|
||||
@@ -116,13 +116,7 @@ impl Engine {
|
||||
if rendered.is_empty() {
|
||||
return;
|
||||
}
|
||||
self.add_session_message(Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: rendered,
|
||||
cache_control: None,
|
||||
}],
|
||||
})
|
||||
.await;
|
||||
self.add_session_message(self.user_text_message_with_turn_metadata(rendered))
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -702,13 +702,9 @@ fn working_set_reaches_model_as_turn_metadata() {
|
||||
.session
|
||||
.working_set
|
||||
.observe_user_message("please inspect src/lib.rs", tmp.path());
|
||||
engine.session.add_message(Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: "please inspect src/lib.rs".to_string(),
|
||||
cache_control: None,
|
||||
}],
|
||||
});
|
||||
let user_msg =
|
||||
engine.user_text_message_with_turn_metadata("please inspect src/lib.rs".to_string());
|
||||
engine.session.add_message(user_msg);
|
||||
|
||||
let messages = engine.messages_with_turn_metadata();
|
||||
let first_block = messages
|
||||
@@ -731,13 +727,8 @@ fn turn_metadata_includes_current_local_date_without_working_set() {
|
||||
..Default::default()
|
||||
};
|
||||
let (mut engine, _handle) = Engine::new(config, &Config::default());
|
||||
engine.session.add_message(Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: "what is today's date?".to_string(),
|
||||
cache_control: None,
|
||||
}],
|
||||
});
|
||||
let user_msg = engine.user_text_message_with_turn_metadata("what is today's date?".to_string());
|
||||
engine.session.add_message(user_msg);
|
||||
|
||||
let messages = engine.messages_with_turn_metadata();
|
||||
let first_block = messages
|
||||
@@ -753,14 +744,50 @@ fn turn_metadata_includes_current_local_date_without_working_set() {
|
||||
assert!(text.contains(&format!("Current local date: {today}")));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn messages_with_turn_metadata_preserves_stored_messages_for_prefix_cache() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
|
||||
fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write");
|
||||
|
||||
let config = EngineConfig {
|
||||
workspace: tmp.path().to_path_buf(),
|
||||
..Default::default()
|
||||
};
|
||||
let (mut engine, _handle) = Engine::new(config, &Config::default());
|
||||
engine
|
||||
.session
|
||||
.working_set
|
||||
.observe_user_message("inspect src/lib.rs", tmp.path());
|
||||
|
||||
let first_user = engine.user_text_message_with_turn_metadata("inspect src/lib.rs".to_string());
|
||||
engine.session.add_message(first_user.clone());
|
||||
let first_request = engine.messages_with_turn_metadata();
|
||||
assert_eq!(first_request, engine.session.messages);
|
||||
|
||||
engine.session.add_message(Message {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: "I inspected it.".to_string(),
|
||||
cache_control: None,
|
||||
}],
|
||||
});
|
||||
engine
|
||||
.session
|
||||
.working_set
|
||||
.observe_user_message("now summarize it", tmp.path());
|
||||
let second_user = engine.user_text_message_with_turn_metadata("now summarize it".to_string());
|
||||
engine.session.add_message(second_user);
|
||||
|
||||
let second_request = engine.messages_with_turn_metadata();
|
||||
assert_eq!(second_request, engine.session.messages);
|
||||
assert_eq!(second_request.first(), Some(&first_user));
|
||||
}
|
||||
|
||||
/// v0.8.11 regression: tool-result messages serialize to role="tool" on
|
||||
/// the wire but are stored as role="user" internally. Prepending
|
||||
/// `<turn_meta>` text onto a tool-result message broke the
|
||||
/// assistant→tool_result invariant and caused HTTP 400 from DeepSeek's
|
||||
/// API ("insufficient tool messages following tool_calls"). The fix:
|
||||
/// inject only into messages that have a Text content block and no
|
||||
/// ToolResult blocks; mid-turn (tool-result is the trailing user
|
||||
/// message) the injection skips.
|
||||
/// the wire but are stored as role="user" internally. `<turn_meta>` must
|
||||
/// be stored only on actual user-text messages, not retroactively added
|
||||
/// to tool-result messages at request time.
|
||||
#[test]
|
||||
fn turn_metadata_skips_tool_result_messages() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
@@ -778,13 +805,8 @@ fn turn_metadata_skips_tool_result_messages() {
|
||||
.observe_user_message("inspect src/lib.rs", tmp.path());
|
||||
|
||||
// Real user message — should be eligible for injection.
|
||||
engine.session.add_message(Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: "inspect src/lib.rs".to_string(),
|
||||
cache_control: None,
|
||||
}],
|
||||
});
|
||||
let user_msg = engine.user_text_message_with_turn_metadata("inspect src/lib.rs".to_string());
|
||||
engine.session.add_message(user_msg);
|
||||
// Assistant tool-call.
|
||||
engine.session.add_message(Message {
|
||||
role: "assistant".to_string(),
|
||||
@@ -818,7 +840,7 @@ fn turn_metadata_skips_tool_result_messages() {
|
||||
Some(ContentBlock::ToolResult { .. })
|
||||
));
|
||||
|
||||
// The earlier real user message receives the turn_meta prefix.
|
||||
// The earlier real user message already carries the turn_meta prefix.
|
||||
let real_user = messages.first().expect("first user message");
|
||||
assert_eq!(real_user.role, "user");
|
||||
let ContentBlock::Text { text, .. } = real_user.content.first().expect("user text content")
|
||||
@@ -830,10 +852,8 @@ fn turn_metadata_skips_tool_result_messages() {
|
||||
}
|
||||
|
||||
/// When the turn is mid-execution and the trailing user message is a
|
||||
/// tool result, no turn_meta is injected at all (rather than landing on
|
||||
/// some earlier user message and confusing the API's tool-call
|
||||
/// continuity check). The working_set surfaces again on the next
|
||||
/// genuine user prompt.
|
||||
/// tool result, no turn_meta is injected at request time. The working_set
|
||||
/// surfaces again on the next stored user-text message.
|
||||
#[test]
|
||||
fn turn_metadata_skips_when_only_tool_results_trail() {
|
||||
let tmp = tempdir().expect("tempdir");
|
||||
@@ -1840,12 +1860,24 @@ async fn post_edit_hook_injects_diagnostics_message_before_next_request() {
|
||||
|
||||
let last = engine.session.messages.last().expect("message appended");
|
||||
assert_eq!(last.role, "user");
|
||||
let text = match &last.content[0] {
|
||||
let meta = match &last.content[0] {
|
||||
crate::models::ContentBlock::Text { text, .. } => text.clone(),
|
||||
other => panic!("expected text block, got {other:?}"),
|
||||
};
|
||||
assert!(text.contains("<diagnostics file=\""));
|
||||
assert!(text.contains("ERROR [1:14] expected i32, found &str"));
|
||||
assert!(meta.starts_with("<turn_meta>\n"));
|
||||
let diagnostic_text = last
|
||||
.content
|
||||
.iter()
|
||||
.find_map(|block| match block {
|
||||
crate::models::ContentBlock::Text { text, .. }
|
||||
if text.contains("<diagnostics file=\"") =>
|
||||
{
|
||||
Some(text)
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.expect("diagnostics text block");
|
||||
assert!(diagnostic_text.contains("ERROR [1:14] expected i32, found &str"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
||||
@@ -55,14 +55,8 @@ impl Engine {
|
||||
self.session
|
||||
.working_set
|
||||
.observe_user_message(&steer, &self.session.workspace);
|
||||
self.add_session_message(Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: steer.clone(),
|
||||
cache_control: None,
|
||||
}],
|
||||
})
|
||||
.await;
|
||||
self.add_session_message(self.user_text_message_with_turn_metadata(steer.clone()))
|
||||
.await;
|
||||
let _ = self
|
||||
.tx_event
|
||||
.send(Event::status(format!(
|
||||
@@ -821,14 +815,8 @@ impl Engine {
|
||||
self.session
|
||||
.working_set
|
||||
.observe_user_message(&steer, &self.session.workspace);
|
||||
self.add_session_message(Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: steer,
|
||||
cache_control: None,
|
||||
}],
|
||||
})
|
||||
.await;
|
||||
self.add_session_message(self.user_text_message_with_turn_metadata(steer))
|
||||
.await;
|
||||
}
|
||||
turn.next_step();
|
||||
continue;
|
||||
@@ -881,13 +869,9 @@ impl Engine {
|
||||
self.session
|
||||
.working_set
|
||||
.observe_user_message(&trimmed, &self.session.workspace);
|
||||
self.add_session_message(Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: trimmed.clone(),
|
||||
cache_control: None,
|
||||
}],
|
||||
})
|
||||
self.add_session_message(
|
||||
self.user_text_message_with_turn_metadata(trimmed.clone()),
|
||||
)
|
||||
.await;
|
||||
let _ = self
|
||||
.tx_event
|
||||
@@ -968,13 +952,9 @@ impl Engine {
|
||||
} else {
|
||||
format!("[REPL round {round_num} output]\n{}", round.stdout)
|
||||
};
|
||||
self.add_session_message(Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: feedback,
|
||||
cache_control: None,
|
||||
}],
|
||||
})
|
||||
self.add_session_message(
|
||||
self.user_text_message_with_turn_metadata(feedback),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
Err(e) => {
|
||||
@@ -984,15 +964,11 @@ impl Engine {
|
||||
"REPL round {round_num} failed: {e}"
|
||||
)))
|
||||
.await;
|
||||
self.add_session_message(Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: format!(
|
||||
"[REPL round {round_num} execution failed]\n{e}"
|
||||
),
|
||||
cache_control: None,
|
||||
}],
|
||||
})
|
||||
self.add_session_message(
|
||||
self.user_text_message_with_turn_metadata(format!(
|
||||
"[REPL round {round_num} execution failed]\n{e}"
|
||||
)),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
}
|
||||
@@ -1756,14 +1732,8 @@ impl Engine {
|
||||
self.session
|
||||
.working_set
|
||||
.observe_user_message(&steer, &self.session.workspace);
|
||||
self.add_session_message(Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: steer,
|
||||
cache_control: None,
|
||||
}],
|
||||
})
|
||||
.await;
|
||||
self.add_session_message(self.user_text_message_with_turn_metadata(steer))
|
||||
.await;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1800,54 +1770,11 @@ impl Engine {
|
||||
}
|
||||
|
||||
pub(super) fn messages_with_turn_metadata(&self) -> Vec<Message> {
|
||||
let today = chrono::Local::now().format("%Y-%m-%d").to_string();
|
||||
let working_set_summary = self
|
||||
.session
|
||||
.working_set
|
||||
.summary_block(&self.config.workspace)
|
||||
.map(|s| s.trim().to_string())
|
||||
.filter(|s| !s.is_empty());
|
||||
|
||||
let summary = if let Some(working_set_summary) = working_set_summary {
|
||||
format!("Current local date: {today}\n{working_set_summary}")
|
||||
} else {
|
||||
format!("Current local date: {today}")
|
||||
};
|
||||
|
||||
let mut messages = self.session.messages.clone();
|
||||
// v0.8.11 hotfix: tool-result messages are stored as role="user" in
|
||||
// our internal representation but serialize to role="tool" on the
|
||||
// wire. Prepending a Text block onto a tool-result message breaks
|
||||
// the assistant→tool_result invariant — the API rejects the request
|
||||
// with `"insufficient tool messages following tool_calls"`. Inject
|
||||
// only into actual user-typed messages, recognizable by having at
|
||||
// least one Text content block (and no ToolResult blocks).
|
||||
let Some(last_user) = messages.iter_mut().rev().find(|message| {
|
||||
message.role == "user"
|
||||
&& message
|
||||
.content
|
||||
.iter()
|
||||
.all(|block| !matches!(block, ContentBlock::ToolResult { .. }))
|
||||
&& message
|
||||
.content
|
||||
.iter()
|
||||
.any(|block| matches!(block, ContentBlock::Text { .. }))
|
||||
}) else {
|
||||
// No real user message in the trailing slice (e.g. mid-turn
|
||||
// after a tool call). Skip injection — the working_set will
|
||||
// surface again on the next genuine user prompt.
|
||||
return messages;
|
||||
};
|
||||
|
||||
let turn_meta = format!("<turn_meta>\n{summary}\n</turn_meta>");
|
||||
last_user.content.insert(
|
||||
0,
|
||||
ContentBlock::Text {
|
||||
text: turn_meta,
|
||||
cache_control: None,
|
||||
},
|
||||
);
|
||||
messages
|
||||
// `<turn_meta>` is stored on user-text messages when the message is
|
||||
// appended. Do not rewrite historical messages at request time: doing
|
||||
// so makes the API prefix differ from the bytes sent in earlier turns
|
||||
// and destroys DeepSeek's KV prefix cache reuse.
|
||||
self.session.messages.clone()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1887,7 +1814,11 @@ fn resolve_auto_effort(reasoning_effort: Option<&str>, messages: &[Message]) ->
|
||||
.iter()
|
||||
.filter_map(|block| {
|
||||
if let ContentBlock::Text { text, .. } = block {
|
||||
Some(text.as_str())
|
||||
if is_turn_metadata_text(text) {
|
||||
None
|
||||
} else {
|
||||
Some(text.as_str())
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
@@ -1915,6 +1846,10 @@ fn resolve_auto_effort(reasoning_effort: Option<&str>, messages: &[Message]) ->
|
||||
}
|
||||
}
|
||||
|
||||
fn is_turn_metadata_text(text: &str) -> bool {
|
||||
text.trim_start().starts_with("<turn_meta>")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
@@ -1935,4 +1870,27 @@ mod tests {
|
||||
assert!(text.contains("<deepseek:subagent.done>"));
|
||||
assert!(text.contains("Build passed"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_auto_effort_ignores_stored_turn_metadata() {
|
||||
let messages = vec![Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Text {
|
||||
text: "<turn_meta>\nRecent errors: src/failing.rs\n</turn_meta>".to_string(),
|
||||
cache_control: None,
|
||||
},
|
||||
ContentBlock::Text {
|
||||
text: "hello".to_string(),
|
||||
cache_control: None,
|
||||
},
|
||||
],
|
||||
}];
|
||||
|
||||
assert_eq!(
|
||||
resolve_auto_effort(Some("auto"), &messages),
|
||||
Some("high".to_string()),
|
||||
"auto thinking should classify the user request, not stored metadata"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -448,7 +448,7 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval(
|
||||
If you notice context is getting long (>80%), proactively suggest using `/compact` to the user.\n\n\
|
||||
### Prompt-cache awareness\n\n\
|
||||
DeepSeek caches the longest *byte-stable prefix* of every request and charges roughly 100× less for cache-hit tokens than miss tokens. The system prompt above is layered most-static-first specifically so the prefix stays stable turn-over-turn. To keep cache hits high:\n\
|
||||
- **Working set location:** the current repo working set is injected into the latest user message inside a `<turn_meta>` block. Treat it as high-priority turn metadata, not as a stable system-prompt section.\n\
|
||||
- **Working set location:** the current repo working set is stored on new user messages inside a `<turn_meta>` block. Treat it as high-priority turn metadata, not as a stable system-prompt section.\n\
|
||||
- **Append, don't reorder.** New context goes at the end (latest user / tool messages). Reshuffling earlier messages or rewriting their content invalidates the cache for everything after the change.\n\
|
||||
- **Don't paraphrase quoted content.** If you've already read a file, refer to it by path or line range instead of re-quoting it with different formatting.\n\
|
||||
- **Use `/compact` as a hard reset, not a tweak.** Compaction is meant for when the cache is already losing — it intentionally rewrites the prefix to a shorter summary. Don't trigger it for small wins.\n\
|
||||
|
||||
Reference in New Issue
Block a user