diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index dc1cb2fe..de6f0e59 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -829,6 +829,40 @@ impl Engine { self.emit_session_updated().await; } + fn turn_metadata_block(&self) -> ContentBlock { + let today = chrono::Local::now().format("%Y-%m-%d").to_string(); + let working_set_summary = self + .session + .working_set + .summary_block(&self.config.workspace) + .map(|s| s.trim().to_string()) + .filter(|s| !s.is_empty()); + + let summary = if let Some(working_set_summary) = working_set_summary { + format!("Current local date: {today}\n{working_set_summary}") + } else { + format!("Current local date: {today}") + }; + + ContentBlock::Text { + text: format!("\n{summary}\n"), + cache_control: None, + } + } + + fn user_text_message_with_turn_metadata(&self, text: String) -> Message { + Message { + role: "user".to_string(), + content: vec![ + self.turn_metadata_block(), + ContentBlock::Text { + text, + cache_control: None, + }, + ], + } + } + /// Handle a send message operation #[allow(clippy::too_many_arguments)] async fn handle_send_message( @@ -908,13 +942,7 @@ impl Engine { let force_update_plan_first = should_force_update_plan_first(mode, &content); // Add user message to session - let user_msg = Message { - role: "user".to_string(), - content: vec![ContentBlock::Text { - text: content, - cache_control: None, - }], - }; + let user_msg = self.user_text_message_with_turn_metadata(content); self.session.add_message(user_msg); self.session.model = model; diff --git a/crates/tui/src/core/engine/lsp_hooks.rs b/crates/tui/src/core/engine/lsp_hooks.rs index 535a869f..1e6da746 100644 --- a/crates/tui/src/core/engine/lsp_hooks.rs +++ b/crates/tui/src/core/engine/lsp_hooks.rs @@ -116,13 +116,7 @@ impl Engine { if rendered.is_empty() { return; } - self.add_session_message(Message { - role: "user".to_string(), - content: vec![ContentBlock::Text { - text: rendered, - cache_control: None, - }], - }) - .await; + self.add_session_message(self.user_text_message_with_turn_metadata(rendered)) + .await; } } diff --git a/crates/tui/src/core/engine/tests.rs b/crates/tui/src/core/engine/tests.rs index 1971875c..c4e9c598 100644 --- a/crates/tui/src/core/engine/tests.rs +++ b/crates/tui/src/core/engine/tests.rs @@ -702,13 +702,9 @@ fn working_set_reaches_model_as_turn_metadata() { .session .working_set .observe_user_message("please inspect src/lib.rs", tmp.path()); - engine.session.add_message(Message { - role: "user".to_string(), - content: vec![ContentBlock::Text { - text: "please inspect src/lib.rs".to_string(), - cache_control: None, - }], - }); + let user_msg = + engine.user_text_message_with_turn_metadata("please inspect src/lib.rs".to_string()); + engine.session.add_message(user_msg); let messages = engine.messages_with_turn_metadata(); let first_block = messages @@ -731,13 +727,8 @@ fn turn_metadata_includes_current_local_date_without_working_set() { ..Default::default() }; let (mut engine, _handle) = Engine::new(config, &Config::default()); - engine.session.add_message(Message { - role: "user".to_string(), - content: vec![ContentBlock::Text { - text: "what is today's date?".to_string(), - cache_control: None, - }], - }); + let user_msg = engine.user_text_message_with_turn_metadata("what is today's date?".to_string()); + engine.session.add_message(user_msg); let messages = engine.messages_with_turn_metadata(); let first_block = messages @@ -753,14 +744,50 @@ fn turn_metadata_includes_current_local_date_without_working_set() { assert!(text.contains(&format!("Current local date: {today}"))); } +#[test] +fn messages_with_turn_metadata_preserves_stored_messages_for_prefix_cache() { + let tmp = tempdir().expect("tempdir"); + fs::create_dir_all(tmp.path().join("src")).expect("mkdir"); + fs::write(tmp.path().join("src/lib.rs"), "pub fn sample() {}").expect("write"); + + let config = EngineConfig { + workspace: tmp.path().to_path_buf(), + ..Default::default() + }; + let (mut engine, _handle) = Engine::new(config, &Config::default()); + engine + .session + .working_set + .observe_user_message("inspect src/lib.rs", tmp.path()); + + let first_user = engine.user_text_message_with_turn_metadata("inspect src/lib.rs".to_string()); + engine.session.add_message(first_user.clone()); + let first_request = engine.messages_with_turn_metadata(); + assert_eq!(first_request, engine.session.messages); + + engine.session.add_message(Message { + role: "assistant".to_string(), + content: vec![ContentBlock::Text { + text: "I inspected it.".to_string(), + cache_control: None, + }], + }); + engine + .session + .working_set + .observe_user_message("now summarize it", tmp.path()); + let second_user = engine.user_text_message_with_turn_metadata("now summarize it".to_string()); + engine.session.add_message(second_user); + + let second_request = engine.messages_with_turn_metadata(); + assert_eq!(second_request, engine.session.messages); + assert_eq!(second_request.first(), Some(&first_user)); +} + /// v0.8.11 regression: tool-result messages serialize to role="tool" on -/// the wire but are stored as role="user" internally. Prepending -/// `` text onto a tool-result message broke the -/// assistant→tool_result invariant and caused HTTP 400 from DeepSeek's -/// API ("insufficient tool messages following tool_calls"). The fix: -/// inject only into messages that have a Text content block and no -/// ToolResult blocks; mid-turn (tool-result is the trailing user -/// message) the injection skips. +/// the wire but are stored as role="user" internally. `` must +/// be stored only on actual user-text messages, not retroactively added +/// to tool-result messages at request time. #[test] fn turn_metadata_skips_tool_result_messages() { let tmp = tempdir().expect("tempdir"); @@ -778,13 +805,8 @@ fn turn_metadata_skips_tool_result_messages() { .observe_user_message("inspect src/lib.rs", tmp.path()); // Real user message — should be eligible for injection. - engine.session.add_message(Message { - role: "user".to_string(), - content: vec![ContentBlock::Text { - text: "inspect src/lib.rs".to_string(), - cache_control: None, - }], - }); + let user_msg = engine.user_text_message_with_turn_metadata("inspect src/lib.rs".to_string()); + engine.session.add_message(user_msg); // Assistant tool-call. engine.session.add_message(Message { role: "assistant".to_string(), @@ -818,7 +840,7 @@ fn turn_metadata_skips_tool_result_messages() { Some(ContentBlock::ToolResult { .. }) )); - // The earlier real user message receives the turn_meta prefix. + // The earlier real user message already carries the turn_meta prefix. let real_user = messages.first().expect("first user message"); assert_eq!(real_user.role, "user"); let ContentBlock::Text { text, .. } = real_user.content.first().expect("user text content") @@ -830,10 +852,8 @@ fn turn_metadata_skips_tool_result_messages() { } /// When the turn is mid-execution and the trailing user message is a -/// tool result, no turn_meta is injected at all (rather than landing on -/// some earlier user message and confusing the API's tool-call -/// continuity check). The working_set surfaces again on the next -/// genuine user prompt. +/// tool result, no turn_meta is injected at request time. The working_set +/// surfaces again on the next stored user-text message. #[test] fn turn_metadata_skips_when_only_tool_results_trail() { let tmp = tempdir().expect("tempdir"); @@ -1840,12 +1860,24 @@ async fn post_edit_hook_injects_diagnostics_message_before_next_request() { let last = engine.session.messages.last().expect("message appended"); assert_eq!(last.role, "user"); - let text = match &last.content[0] { + let meta = match &last.content[0] { crate::models::ContentBlock::Text { text, .. } => text.clone(), other => panic!("expected text block, got {other:?}"), }; - assert!(text.contains("\n")); + let diagnostic_text = last + .content + .iter() + .find_map(|block| match block { + crate::models::ContentBlock::Text { text, .. } + if text.contains(" + { + Some(text) + } + _ => None, + }) + .expect("diagnostics text block"); + assert!(diagnostic_text.contains("ERROR [1:14] expected i32, found &str")); } #[tokio::test] diff --git a/crates/tui/src/core/engine/turn_loop.rs b/crates/tui/src/core/engine/turn_loop.rs index 55b57896..4b374f34 100644 --- a/crates/tui/src/core/engine/turn_loop.rs +++ b/crates/tui/src/core/engine/turn_loop.rs @@ -55,14 +55,8 @@ impl Engine { self.session .working_set .observe_user_message(&steer, &self.session.workspace); - self.add_session_message(Message { - role: "user".to_string(), - content: vec![ContentBlock::Text { - text: steer.clone(), - cache_control: None, - }], - }) - .await; + self.add_session_message(self.user_text_message_with_turn_metadata(steer.clone())) + .await; let _ = self .tx_event .send(Event::status(format!( @@ -821,14 +815,8 @@ impl Engine { self.session .working_set .observe_user_message(&steer, &self.session.workspace); - self.add_session_message(Message { - role: "user".to_string(), - content: vec![ContentBlock::Text { - text: steer, - cache_control: None, - }], - }) - .await; + self.add_session_message(self.user_text_message_with_turn_metadata(steer)) + .await; } turn.next_step(); continue; @@ -881,13 +869,9 @@ impl Engine { self.session .working_set .observe_user_message(&trimmed, &self.session.workspace); - self.add_session_message(Message { - role: "user".to_string(), - content: vec![ContentBlock::Text { - text: trimmed.clone(), - cache_control: None, - }], - }) + self.add_session_message( + self.user_text_message_with_turn_metadata(trimmed.clone()), + ) .await; let _ = self .tx_event @@ -968,13 +952,9 @@ impl Engine { } else { format!("[REPL round {round_num} output]\n{}", round.stdout) }; - self.add_session_message(Message { - role: "user".to_string(), - content: vec![ContentBlock::Text { - text: feedback, - cache_control: None, - }], - }) + self.add_session_message( + self.user_text_message_with_turn_metadata(feedback), + ) .await; } Err(e) => { @@ -984,15 +964,11 @@ impl Engine { "REPL round {round_num} failed: {e}" ))) .await; - self.add_session_message(Message { - role: "user".to_string(), - content: vec![ContentBlock::Text { - text: format!( - "[REPL round {round_num} execution failed]\n{e}" - ), - cache_control: None, - }], - }) + self.add_session_message( + self.user_text_message_with_turn_metadata(format!( + "[REPL round {round_num} execution failed]\n{e}" + )), + ) .await; } } @@ -1756,14 +1732,8 @@ impl Engine { self.session .working_set .observe_user_message(&steer, &self.session.workspace); - self.add_session_message(Message { - role: "user".to_string(), - content: vec![ContentBlock::Text { - text: steer, - cache_control: None, - }], - }) - .await; + self.add_session_message(self.user_text_message_with_turn_metadata(steer)) + .await; } } @@ -1800,54 +1770,11 @@ impl Engine { } pub(super) fn messages_with_turn_metadata(&self) -> Vec { - let today = chrono::Local::now().format("%Y-%m-%d").to_string(); - let working_set_summary = self - .session - .working_set - .summary_block(&self.config.workspace) - .map(|s| s.trim().to_string()) - .filter(|s| !s.is_empty()); - - let summary = if let Some(working_set_summary) = working_set_summary { - format!("Current local date: {today}\n{working_set_summary}") - } else { - format!("Current local date: {today}") - }; - - let mut messages = self.session.messages.clone(); - // v0.8.11 hotfix: tool-result messages are stored as role="user" in - // our internal representation but serialize to role="tool" on the - // wire. Prepending a Text block onto a tool-result message breaks - // the assistant→tool_result invariant — the API rejects the request - // with `"insufficient tool messages following tool_calls"`. Inject - // only into actual user-typed messages, recognizable by having at - // least one Text content block (and no ToolResult blocks). - let Some(last_user) = messages.iter_mut().rev().find(|message| { - message.role == "user" - && message - .content - .iter() - .all(|block| !matches!(block, ContentBlock::ToolResult { .. })) - && message - .content - .iter() - .any(|block| matches!(block, ContentBlock::Text { .. })) - }) else { - // No real user message in the trailing slice (e.g. mid-turn - // after a tool call). Skip injection — the working_set will - // surface again on the next genuine user prompt. - return messages; - }; - - let turn_meta = format!("\n{summary}\n"); - last_user.content.insert( - 0, - ContentBlock::Text { - text: turn_meta, - cache_control: None, - }, - ); - messages + // `` is stored on user-text messages when the message is + // appended. Do not rewrite historical messages at request time: doing + // so makes the API prefix differ from the bytes sent in earlier turns + // and destroys DeepSeek's KV prefix cache reuse. + self.session.messages.clone() } } @@ -1887,7 +1814,11 @@ fn resolve_auto_effort(reasoning_effort: Option<&str>, messages: &[Message]) -> .iter() .filter_map(|block| { if let ContentBlock::Text { text, .. } = block { - Some(text.as_str()) + if is_turn_metadata_text(text) { + None + } else { + Some(text.as_str()) + } } else { None } @@ -1915,6 +1846,10 @@ fn resolve_auto_effort(reasoning_effort: Option<&str>, messages: &[Message]) -> } } +fn is_turn_metadata_text(text: &str) -> bool { + text.trim_start().starts_with("") +} + #[cfg(test)] mod tests { use super::*; @@ -1935,4 +1870,27 @@ mod tests { assert!(text.contains("")); assert!(text.contains("Build passed")); } + + #[test] + fn resolve_auto_effort_ignores_stored_turn_metadata() { + let messages = vec![Message { + role: "user".to_string(), + content: vec![ + ContentBlock::Text { + text: "\nRecent errors: src/failing.rs\n".to_string(), + cache_control: None, + }, + ContentBlock::Text { + text: "hello".to_string(), + cache_control: None, + }, + ], + }]; + + assert_eq!( + resolve_auto_effort(Some("auto"), &messages), + Some("high".to_string()), + "auto thinking should classify the user request, not stored metadata" + ); + } } diff --git a/crates/tui/src/prompts.rs b/crates/tui/src/prompts.rs index d77ca63a..b482f89f 100644 --- a/crates/tui/src/prompts.rs +++ b/crates/tui/src/prompts.rs @@ -448,7 +448,7 @@ pub fn system_prompt_for_mode_with_context_skills_session_and_approval( If you notice context is getting long (>80%), proactively suggest using `/compact` to the user.\n\n\ ### Prompt-cache awareness\n\n\ DeepSeek caches the longest *byte-stable prefix* of every request and charges roughly 100× less for cache-hit tokens than miss tokens. The system prompt above is layered most-static-first specifically so the prefix stays stable turn-over-turn. To keep cache hits high:\n\ - - **Working set location:** the current repo working set is injected into the latest user message inside a `` block. Treat it as high-priority turn metadata, not as a stable system-prompt section.\n\ + - **Working set location:** the current repo working set is stored on new user messages inside a `` block. Treat it as high-priority turn metadata, not as a stable system-prompt section.\n\ - **Append, don't reorder.** New context goes at the end (latest user / tool messages). Reshuffling earlier messages or rewriting their content invalidates the cache for everything after the change.\n\ - **Don't paraphrase quoted content.** If you've already read a file, refer to it by path or line range instead of re-quoting it with different formatting.\n\ - **Use `/compact` as a hard reset, not a tweak.** Compaction is meant for when the cache is already losing — it intentionally rewrites the prefix to a shorter summary. Don't trigger it for small wins.\n\