From 3e2c8329336dfcbb3ba6c47dc8093210c35f0edb Mon Sep 17 00:00:00 2001
From: Hunter Bown <hmbown@gmail.com>
Date: Thu, 7 May 2026 04:45:55 -0500
Subject: [PATCH] fix(api): narrow reasoning replay policy (#1009)

---
 crates/tui/src/client.rs      | 133 +++++++++++++++++++++++++++++++++-
 crates/tui/src/client/chat.rs |  46 +++++++-----
 2 files changed, 156 insertions(+), 23 deletions(-)

diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs
index b5a0eb56..be262748 100644
--- a/crates/tui/src/client.rs
+++ b/crates/tui/src/client.rs
@@ -1073,7 +1073,7 @@ mod tests {
     }
 
     #[test]
-    fn chat_messages_keep_reasoning_content_on_all_assistant_messages() {
+    fn chat_messages_keep_current_turn_reasoning_content() {
         let message = Message {
             role: "assistant".to_string(),
             content: vec![
@@ -1098,7 +1098,58 @@ mod tests {
         assert_eq!(
             assistant.get("reasoning_content").and_then(Value::as_str),
             Some("plan"),
-            "thinking-mode models must keep reasoning_content on all assistant messages"
+            "thinking-mode models keep reasoning_content while still in the current turn"
+        );
+    }
+
+    #[test]
+    fn chat_messages_replay_tool_round_reasoning_before_new_user_turn() {
+        let messages = vec![
+            Message {
+                role: "user".to_string(),
+                content: vec![ContentBlock::Text {
+                    text: "Need the date".to_string(),
+                    cache_control: None,
+                }],
+            },
+            Message {
+                role: "assistant".to_string(),
+                content: vec![
+                    ContentBlock::Thinking {
+                        thinking: "Need to call a tool".to_string(),
+                    },
+                    ContentBlock::ToolUse {
+                        id: "tool-1".to_string(),
+                        name: "get_date".to_string(),
+                        input: json!({}),
+                        caller: None,
+                    },
+                ],
+            },
+            Message {
+                role: "user".to_string(),
+                content: vec![ContentBlock::ToolResult {
+                    tool_use_id: "tool-1".to_string(),
+                    content: "2026-04-23".to_string(),
+                    is_error: None,
+                    content_blocks: None,
+                }],
+            },
+        ];
+        let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
+        let tool_assistant = out
+            .iter()
+            .find(|value| {
+                value.get("role").and_then(Value::as_str) == Some("assistant")
+                    && value.get("tool_calls").is_some()
+            })
+            .expect("tool-call assistant message");
+        assert_eq!(
+            tool_assistant
+                .get("reasoning_content")
+                .and_then(Value::as_str),
+            Some("Need to call a tool"),
+            "thinking-mode tool sub-turns must replay reasoning_content until the tool chain finishes"
         );
     }
 
@@ -1163,7 +1214,54 @@ mod tests {
                 .get("reasoning_content")
                 .and_then(Value::as_str),
             Some("Need to call a tool"),
-            "thinking-mode tool rounds must replay reasoning_content on later requests"
+            "tool-call reasoning_content must be replayed across later user turns"
+        );
+    }
+
+    #[test]
+    fn chat_messages_omit_prior_non_tool_reasoning_after_new_user_turn() {
+        let messages = vec![
+            Message {
+                role: "user".to_string(),
+                content: vec![ContentBlock::Text {
+                    text: "Explain it".to_string(),
+                    cache_control: None,
+                }],
+            },
+            Message {
+                role: "assistant".to_string(),
+                content: vec![
+                    ContentBlock::Thinking {
+                        thinking: "Internal explanation plan".to_string(),
+                    },
+                    ContentBlock::Text {
+                        text: "Final answer".to_string(),
+                        cache_control: None,
+                    },
+                ],
+            },
+            Message {
+                role: "user".to_string(),
+                content: vec![ContentBlock::Text {
+                    text: "Next question".to_string(),
+                    cache_control: None,
+                }],
+            },
+        ];
+
+        let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
+        let assistant = out
+            .iter()
+            .find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
+            .expect("assistant message");
+
+        assert_eq!(
+            assistant.get("content").and_then(Value::as_str),
+            Some("Final answer")
+        );
+        assert!(
+            assistant.get("reasoning_content").is_none(),
+            "non-tool reasoning from previous turns should not be replayed"
         );
     }
 
@@ -2000,6 +2098,35 @@ mod tests {
         assert_eq!(chars, 19);
     }
 
+    #[test]
+    fn sanitize_thinking_mode_keeps_tool_call_placeholder_after_new_user_turn() {
+        let mut body = json!({
+            "model": "deepseek-v4-pro",
+            "messages": [
+                { "role": "user", "content": "step 1" },
+                {
+                    "role": "assistant",
+                    "content": "",
+                    "tool_calls": [{ "id": "1", "type": "function" }]
+                },
+                { "role": "tool", "tool_call_id": "1", "content": "ok" },
+                { "role": "user", "content": "step 2" }
+            ]
+        });
+
+        sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max"));
+
+        let messages = body["messages"].as_array().unwrap();
+        let assistant = messages
+            .iter()
+            .find(|m| m["role"] == "assistant")
+            .expect("assistant tool-call message");
+        assert_eq!(
+            assistant.get("reasoning_content").and_then(Value::as_str),
+            Some("(reasoning omitted)")
+        );
+    }
+
     #[test]
     fn token_bucket_enforces_delay_when_empty() {
         let now = Instant::now();
diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs
index cd854ec2..3f8cfefc 100644
--- a/crates/tui/src/client/chat.rs
+++ b/crates/tui/src/client/chat.rs
@@ -437,13 +437,16 @@ fn build_chat_messages_with_reasoning(
         }));
     }
 
-    for message in messages.iter() {
+    for (message_index, message) in messages.iter().enumerate() {
         let role = message.role.as_str();
         let mut text_parts = Vec::new();
         let mut thinking_parts = Vec::new();
         let mut tool_calls = Vec::new();
         let mut tool_call_ids = Vec::new();
         let mut tool_results: Vec<(String, Value)> = Vec::new();
+        let later_user_turn = messages[message_index + 1..]
+            .iter()
+            .any(message_starts_user_turn);
 
         for block in &message.content {
             match block {
@@ -499,19 +502,14 @@ fn build_chat_messages_with_reasoning(
             let mut reasoning_content = thinking_parts.join("\n");
             let has_text = !content.trim().is_empty();
             let has_tool_calls = !tool_calls.is_empty();
-            // DeepSeek thinking-mode rule: every assistant message in the
-            // conversation must carry its `reasoning_content` when thinking
-            // is enabled. The docs say non-tool-call messages' reasoning is
-            // "ignored", but the API still validates presence and rejects
-            // with a 400 if any assistant message is missing it. If reasoning
-            // was lost (e.g. a session checkpoint from before this rule was
-            // enforced, or a sub-turn with no streamed reasoning text),
-            // substitute a non-empty placeholder so the API accepts the
-            // request.
-            let include_reasoning_for_turn = include_reasoning;
+            // DeepSeek thinking-mode tool calls must replay `reasoning_content`
+            // on subsequent requests. Non-tool assistant reasoning can be
+            // omitted once a later real user text message starts a new turn.
+            let include_reasoning_for_turn =
+                include_reasoning && (has_tool_calls || !later_user_turn);
             let mut has_reasoning =
                 include_reasoning_for_turn && !reasoning_content.trim().is_empty();
-            if include_reasoning_for_turn && !has_reasoning {
+            if include_reasoning_for_turn && has_tool_calls && !has_reasoning {
                 logging::warn(
                     "Substituting placeholder reasoning_content for DeepSeek tool-call assistant message",
                 );
@@ -696,6 +694,14 @@ fn build_chat_messages_with_reasoning(
     out
 }
 
+fn message_starts_user_turn(message: &Message) -> bool {
+    message.role == "user"
+        && message.content.iter().any(|block| match block {
+            ContentBlock::Text { text, .. } => !text.trim().is_empty(),
+            _ => false,
+        })
+}
+
 pub(super) fn tool_to_chat(tool: &Tool) -> Value {
     let mut value = json!({
         "type": "function",
@@ -766,16 +772,15 @@ fn map_tool_choice_for_chat(choice: &Value) -> Option<Value> {
 }
 
 /// Final-pass sanitizer over the outgoing chat-completions JSON payload.
-/// Forces a non-empty `reasoning_content` onto every `assistant` message that
-/// carries `tool_calls`, when the model + effort combination requires it.
-/// DeepSeek's thinking-mode API rejects such messages with a 400 error;
-/// substituting a placeholder keeps the conversation chain intact.
+/// Forces a non-empty `reasoning_content` onto assistant messages that carry
+/// `tool_calls`, when the model + effort combination requires it. DeepSeek's
+/// thinking-mode API rejects such messages with a 400 error; substituting a
+/// placeholder keeps the conversation chain intact. Non-tool assistant
+/// reasoning can stay omitted once a later user text turn begins.
 ///
 /// Also tallies the size of all replayed `reasoning_content` and logs it, so
 /// users on `RUST_LOG=deepseek_tui=debug` can see how much of their input
-/// budget is being spent re-sending prior thinking traces (V4 §5.1.1
-/// "Interleaved Thinking" requires the full trace to be replayed across user
-/// message boundaries in tool-calling sessions).
+/// budget is being spent re-sending prior thinking traces.
 pub(super) fn sanitize_thinking_mode_messages(
     body: &mut Value,
     model: &str,
@@ -792,11 +797,12 @@ pub(super) fn sanitize_thinking_mode_messages(
         if msg.get("role").and_then(Value::as_str) != Some("assistant") {
             continue;
         }
+        let has_tool_calls = msg.get("tool_calls").is_some();
         let needs_placeholder = msg
             .get("reasoning_content")
             .and_then(Value::as_str)
             .is_none_or(|s| s.trim().is_empty());
-        if needs_placeholder {
+        if has_tool_calls && needs_placeholder {
             msg["reasoning_content"] = json!("(reasoning omitted)");
             substitutions = substitutions.saturating_add(1);
             logging::warn(format!(