diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs
index dc1cb2fe..de6f0e59 100644
--- a/crates/tui/src/core/engine.rs
+++ b/crates/tui/src/core/engine.rs
@@ -829,6 +829,40 @@ impl Engine {
self.emit_session_updated().await;
}
+ fn turn_metadata_block(&self) -> ContentBlock {
+ let today = chrono::Local::now().format("%Y-%m-%d").to_string();
+ let working_set_summary = self
+ .session
+ .working_set
+ .summary_block(&self.config.workspace)
+ .map(|s| s.trim().to_string())
+ .filter(|s| !s.is_empty());
+
+ let summary = if let Some(working_set_summary) = working_set_summary {
+ format!("Current local date: {today}\n{working_set_summary}")
+ } else {
+ format!("Current local date: {today}")
+ };
+
+ ContentBlock::Text {
+ text: format!("\n{summary}\n"),
+ cache_control: None,
+ }
+ }
+
+ fn user_text_message_with_turn_metadata(&self, text: String) -> Message {
+ Message {
+ role: "user".to_string(),
+ content: vec![
+ self.turn_metadata_block(),
+ ContentBlock::Text {
+ text,
+ cache_control: None,
+ },
+ ],
+ }
+ }
+
/// Handle a send message operation
#[allow(clippy::too_many_arguments)]
async fn handle_send_message(
@@ -908,13 +942,7 @@ impl Engine {
let force_update_plan_first = should_force_update_plan_first(mode, &content);
// Add user message to session
- let user_msg = Message {
- role: "user".to_string(),
- content: vec![ContentBlock::Text {
- text: content,
- cache_control: None,
- }],
- };
+ let user_msg = self.user_text_message_with_turn_metadata(content);
self.session.add_message(user_msg);
self.session.model = model;
diff --git a/crates/tui/src/core/engine/lsp_hooks.rs b/crates/tui/src/core/engine/lsp_hooks.rs
index 535a869f..1e6da746 100644
--- a/crates/tui/src/core/engine/lsp_hooks.rs
+++ b/crates/tui/src/core/engine/lsp_hooks.rs
@@ -116,13 +116,7 @@ impl Engine {
if rendered.is_empty() {
return;
}
- self.add_session_message(Message {
- role: "user".to_string(),
- content: vec![ContentBlock::Text {
- text: rendered,
- cache_control: None,
- }],
- })
- .await;
+ self.add_session_message(self.user_text_message_with_turn_metadata(rendered))
+ .await;
}
}
diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs
index 1971875c..c4e9c598 100644
--- a/crates/tui/src/core/engine/tests.rs
+++ b/crates/tui/src/core/engine/tests.rs
@@ -702,13 +702,9 @@ fn working_set_reaches_model_as_turn_metadata() {
.session
.working_set
.observe_user_message("please inspect src/lib.rs", tmp.path());
- engine.session.add_message(Message {
- role: "user".to_string(),
- content: vec![ContentBlock::Text {
- text: "please inspect src/lib.rs".to_string(),
- cache_control: None,
- }],
- });
+ let user_msg =
+ engine.user_text_message_with_turn_metadata("please inspect src/lib.rs".to_string());
+ engine.session.add_message(user_msg);
let messages = engine.messages_with_turn_metadata();
let first_block = messages
@@ -731,13 +727,8 @@ fn turn_metadata_includes_current_local_date_without_working_set() {
..Default::default()
};
let (mut engine, _handle) = Engine::new(config, &Config::default());
- engine.session.add_message(Message {
- role: "user".to_string(),
- content: vec![ContentBlock::Text {
- text: "what is today's date?".to_string(),
- cache_control: None,
- }],
- });
+ let user_msg = engine.user_text_message_with_turn_metadata("what is today's date?".to_string());
+ engine.session.add_message(user_msg);
let messages = engine.messages_with_turn_metadata();
let first_block = messages
@@ -753,14 +744,50 @@ fn turn_metadata_includes_current_local_date_without_working_set() {
assert!(text.contains(&format!("Current local date: {today}")));
}
+#[test]
+fn messages_with_turn_metadata_preserves_stored_messages_for_prefix_cache() {
+ let tmp = tempdir().expect("tempdir");
+ fs::create_dir_all(tmp.path().join("src")).expect("mkdir");
+ fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write");
+
+ let config = EngineConfig {
+ workspace: tmp.path().to_path_buf(),
+ ..Default::default()
+ };
+ let (mut engine, _handle) = Engine::new(config, &Config::default());
+ engine
+ .session
+ .working_set
+ .observe_user_message("inspect src/lib.rs", tmp.path());
+
+ let first_user = engine.user_text_message_with_turn_metadata("inspect src/lib.rs".to_string());
+ engine.session.add_message(first_user.clone());
+ let first_request = engine.messages_with_turn_metadata();
+ assert_eq!(first_request, engine.session.messages);
+
+ engine.session.add_message(Message {
+ role: "assistant".to_string(),
+ content: vec![ContentBlock::Text {
+ text: "I inspected it.".to_string(),
+ cache_control: None,
+ }],
+ });
+ engine
+ .session
+ .working_set
+ .observe_user_message("now summarize it", tmp.path());
+ let second_user = engine.user_text_message_with_turn_metadata("now summarize it".to_string());
+ engine.session.add_message(second_user);
+
+ let second_request = engine.messages_with_turn_metadata();
+ assert_eq!(second_request, engine.session.messages);
+ assert_eq!(second_request.first(), Some(&first_user));
+}
+
/// v0.8.11 regression: tool-result messages serialize to role="tool" on
-/// the wire but are stored as role="user" internally. Prepending
-/// `` text onto a tool-result message broke the
-/// assistant→tool_result invariant and caused HTTP 400 from DeepSeek's
-/// API ("insufficient tool messages following tool_calls"). The fix:
-/// inject only into messages that have a Text content block and no
-/// ToolResult blocks; mid-turn (tool-result is the trailing user
-/// message) the injection skips.
+/// the wire but are stored as role="user" internally. `` must
+/// be stored only on actual user-text messages, not retroactively added
+/// to tool-result messages at request time.
#[test]
fn turn_metadata_skips_tool_result_messages() {
let tmp = tempdir().expect("tempdir");
@@ -778,13 +805,8 @@ fn turn_metadata_skips_tool_result_messages() {
.observe_user_message("inspect src/lib.rs", tmp.path());
// Real user message — should be eligible for injection.
- engine.session.add_message(Message {
- role: "user".to_string(),
- content: vec![ContentBlock::Text {
- text: "inspect src/lib.rs".to_string(),
- cache_control: None,
- }],
- });
+ let user_msg = engine.user_text_message_with_turn_metadata("inspect src/lib.rs".to_string());
+ engine.session.add_message(user_msg);
// Assistant tool-call.
engine.session.add_message(Message {
role: "assistant".to_string(),
@@ -818,7 +840,7 @@ fn turn_metadata_skips_tool_result_messages() {
Some(ContentBlock::ToolResult { .. })
));
- // The earlier real user message receives the turn_meta prefix.
+ // The earlier real user message already carries the turn_meta prefix.
let real_user = messages.first().expect("first user message");
assert_eq!(real_user.role, "user");
let ContentBlock::Text { text, .. } = real_user.content.first().expect("user text content")
@@ -830,10 +852,8 @@ fn turn_metadata_skips_tool_result_messages() {
}
/// When the turn is mid-execution and the trailing user message is a
-/// tool result, no turn_meta is injected at all (rather than landing on
-/// some earlier user message and confusing the API's tool-call
-/// continuity check). The working_set surfaces again on the next
-/// genuine user prompt.
+/// tool result, no turn_meta is injected at request time. The working_set
+/// surfaces again on the next stored user-text message.
#[test]
fn turn_metadata_skips_when_only_tool_results_trail() {
let tmp = tempdir().expect("tempdir");
@@ -1840,12 +1860,24 @@ async fn post_edit_hook_injects_diagnostics_message_before_next_request() {
let last = engine.session.messages.last().expect("message appended");
assert_eq!(last.role, "user");
- let text = match &last.content[0] {
+ let meta = match &last.content[0] {
crate::models::ContentBlock::Text { text, .. } => text.clone(),
other => panic!("expected text block, got {other:?}"),
};
- assert!(text.contains("\n"));
+ let diagnostic_text = last
+ .content
+ .iter()
+ .find_map(|block| match block {
+ crate::models::ContentBlock::Text { text, .. }
+ if text.contains("
+ {
+ Some(text)
+ }
+ _ => None,
+ })
+ .expect("diagnostics text block");
+ assert!(diagnostic_text.contains("ERROR [1:14] expected i32, found &str"));
}
#[tokio::test]
diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs
index 55b57896..4b374f34 100644
--- a/crates/tui/src/core/engine/turn_loop.rs
+++ b/crates/tui/src/core/engine/turn_loop.rs
@@ -55,14 +55,8 @@ impl Engine {
self.session
.working_set
.observe_user_message(&steer, &self.session.workspace);
- self.add_session_message(Message {
- role: "user".to_string(),
- content: vec![ContentBlock::Text {
- text: steer.clone(),
- cache_control: None,
- }],
- })
- .await;
+ self.add_session_message(self.user_text_message_with_turn_metadata(steer.clone()))
+ .await;
let _ = self
.tx_event
.send(Event::status(format!(
@@ -821,14 +815,8 @@ impl Engine {
self.session
.working_set
.observe_user_message(&steer, &self.session.workspace);
- self.add_session_message(Message {
- role: "user".to_string(),
- content: vec![ContentBlock::Text {
- text: steer,
- cache_control: None,
- }],
- })
- .await;
+ self.add_session_message(self.user_text_message_with_turn_metadata(steer))
+ .await;
}
turn.next_step();
continue;
@@ -881,13 +869,9 @@ impl Engine {
self.session
.working_set
.observe_user_message(&trimmed, &self.session.workspace);
- self.add_session_message(Message {
- role: "user".to_string(),
- content: vec![ContentBlock::Text {
- text: trimmed.clone(),
- cache_control: None,
- }],
- })
+ self.add_session_message(
+ self.user_text_message_with_turn_metadata(trimmed.clone()),
+ )
.await;
let _ = self
.tx_event
@@ -968,13 +952,9 @@ impl Engine {
} else {
format!("[REPL round {round_num} output]\n{}", round.stdout)
};
- self.add_session_message(Message {
- role: "user".to_string(),
- content: vec![ContentBlock::Text {
- text: feedback,
- cache_control: None,
- }],
- })
+ self.add_session_message(
+ self.user_text_message_with_turn_metadata(feedback),
+ )
.await;
}
Err(e) => {
@@ -984,15 +964,11 @@ impl Engine {
"REPL round {round_num} failed: {e}"
)))
.await;
- self.add_session_message(Message {
- role: "user".to_string(),
- content: vec![ContentBlock::Text {
- text: format!(
- "[REPL round {round_num} execution failed]\n{e}"
- ),
- cache_control: None,
- }],
- })
+ self.add_session_message(
+ self.user_text_message_with_turn_metadata(format!(
+ "[REPL round {round_num} execution failed]\n{e}"
+ )),
+ )
.await;
}
}
@@ -1756,14 +1732,8 @@ impl Engine {
self.session
.working_set
.observe_user_message(&steer, &self.session.workspace);
- self.add_session_message(Message {
- role: "user".to_string(),
- content: vec![ContentBlock::Text {
- text: steer,
- cache_control: None,
- }],
- })
- .await;
+ self.add_session_message(self.user_text_message_with_turn_metadata(steer))
+ .await;
}
}
@@ -1800,54 +1770,11 @@ impl Engine {
}
pub(super) fn messages_with_turn_metadata(&self) -> Vec {
- let today = chrono::Local::now().format("%Y-%m-%d").to_string();
- let working_set_summary = self
- .session
- .working_set
- .summary_block(&self.config.workspace)
- .map(|s| s.trim().to_string())
- .filter(|s| !s.is_empty());
-
- let summary = if let Some(working_set_summary) = working_set_summary {
- format!("Current local date: {today}\n{working_set_summary}")
- } else {
- format!("Current local date: {today}")
- };
-
- let mut messages = self.session.messages.clone();
- // v0.8.11 hotfix: tool-result messages are stored as role="user" in
- // our internal representation but serialize to role="tool" on the
- // wire. Prepending a Text block onto a tool-result message breaks
- // the assistant→tool_result invariant — the API rejects the request
- // with `"insufficient tool messages following tool_calls"`. Inject
- // only into actual user-typed messages, recognizable by having at
- // least one Text content block (and no ToolResult blocks).
- let Some(last_user) = messages.iter_mut().rev().find(|message| {
- message.role == "user"
- && message
- .content
- .iter()
- .all(|block| !matches!(block, ContentBlock::ToolResult { .. }))
- && message
- .content
- .iter()
- .any(|block| matches!(block, ContentBlock::Text { .. }))
- }) else {
- // No real user message in the trailing slice (e.g. mid-turn
- // after a tool call). Skip injection — the working_set will
- // surface again on the next genuine user prompt.
- return messages;
- };
-
- let turn_meta = format!("\n{summary}\n");
- last_user.content.insert(
- 0,
- ContentBlock::Text {
- text: turn_meta,
- cache_control: None,
- },
- );
- messages
+ // `` is stored on user-text messages when the message is
+ // appended. Do not rewrite historical messages at request time: doing
+ // so makes the API prefix differ from the bytes sent in earlier turns
+ // and destroys DeepSeek's KV prefix cache reuse.
+ self.session.messages.clone()
}
}
@@ -1887,7 +1814,11 @@ fn resolve_auto_effort(reasoning_effort: Option<&str>, messages: &[Message]) ->
.iter()
.filter_map(|block| {
if let ContentBlock::Text { text, .. } = block {
- Some(text.as_str())
+ if is_turn_metadata_text(text) {
+ None
+ } else {
+ Some(text.as_str())
+ }
} else {
None
}
@@ -1915,6 +1846,10 @@ fn resolve_auto_effort(reasoning_effort: Option<&str>, messages: &[Message]) ->
}
}
+fn is_turn_metadata_text(text: &str) -> bool {
+ text.trim_start().starts_with("")
+}
+
#[cfg(test)]
mod tests {
use super::*;
@@ -1935,4 +1870,27 @@ mod tests {
assert!(text.contains(""));
assert!(text.contains("Build passed"));
}
+
+ #[test]
+ fn resolve_auto_effort_ignores_stored_turn_metadata() {
+ let messages = vec![Message {
+ role: "user".to_string(),
+ content: vec![
+ ContentBlock::Text {
+ text: "\nRecent errors: src/failing.rs\n".to_string(),
+ cache_control: None,
+ },
+ ContentBlock::Text {
+ text: "hello".to_string(),
+ cache_control: None,
+ },
+ ],
+ }];
+
+ assert_eq!(
+ resolve_auto_effort(Some("auto"), &messages),
+ Some("high".to_string()),
+ "auto thinking should classify the user request, not stored metadata"
+ );
+ }
}
diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs
index d77ca63a..b482f89f 100644
--- a/crates/tui/src/prompts.rs
+++ b/crates/tui/src/prompts.rs
@@ -448,7 +448,7 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval(
If you notice context is getting long (>80%), proactively suggest using `/compact` to the user.\n\n\
### Prompt-cache awareness\n\n\
DeepSeek caches the longest *byte-stable prefix* of every request and charges roughly 100× less for cache-hit tokens than miss tokens. The system prompt above is layered most-static-first specifically so the prefix stays stable turn-over-turn. To keep cache hits high:\n\
- - **Working set location:** the current repo working set is injected into the latest user message inside a `` block. Treat it as high-priority turn metadata, not as a stable system-prompt section.\n\
+ - **Working set location:** the current repo working set is stored on new user messages inside a `` block. Treat it as high-priority turn metadata, not as a stable system-prompt section.\n\
- **Append, don't reorder.** New context goes at the end (latest user / tool messages). Reshuffling earlier messages or rewriting their content invalidates the cache for everything after the change.\n\
- **Don't paraphrase quoted content.** If you've already read a file, refer to it by path or line range instead of re-quoting it with different formatting.\n\
- **Use `/compact` as a hard reset, not a tweak.** Compaction is meant for when the cache is already losing — it intentionally rewrites the prefix to a shorter summary. Don't trigger it for small wins.\n\