fix(api): narrow reasoning replay policy (#1009)

2026-05-07 04:45:55 -05:00
parent 17a010aecc
commit 3e2c832933
2 changed files with 156 additions and 23 deletions
@@ -1073,7 +1073,7 @@ mod tests {
    }

    #[test]
-    fn chat_messages_keep_reasoning_content_on_all_assistant_messages() {
+    fn chat_messages_keep_current_turn_reasoning_content() {
        let message = Message {
            role: "assistant".to_string(),
            content: vec![
@@ -1098,7 +1098,58 @@ mod tests {
        assert_eq!(
            assistant.get("reasoning_content").and_then(Value::as_str),
            Some("plan"),
-            "thinking-mode models must keep reasoning_content on all assistant messages"
+            "thinking-mode models keep reasoning_content while still in the current turn"
+        );
+    }
+
+    #[test]
+    fn chat_messages_replay_tool_round_reasoning_before_new_user_turn() {
+        let messages = vec![
+            Message {
+                role: "user".to_string(),
+                content: vec![ContentBlock::Text {
+                    text: "Need the date".to_string(),
+                    cache_control: None,
+                }],
+            },
+            Message {
+                role: "assistant".to_string(),
+                content: vec![
+                    ContentBlock::Thinking {
+                        thinking: "Need to call a tool".to_string(),
+                    },
+                    ContentBlock::ToolUse {
+                        id: "tool-1".to_string(),
+                        name: "get_date".to_string(),
+                        input: json!({}),
+                        caller: None,
+                    },
+                ],
+            },
+            Message {
+                role: "user".to_string(),
+                content: vec![ContentBlock::ToolResult {
+                    tool_use_id: "tool-1".to_string(),
+                    content: "2026-04-23".to_string(),
+                    is_error: None,
+                    content_blocks: None,
+                }],
+            },
+        ];
+        let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
+        let tool_assistant = out
+            .iter()
+            .find(|value| {
+                value.get("role").and_then(Value::as_str) == Some("assistant")
+                    && value.get("tool_calls").is_some()
+            })
+            .expect("tool-call assistant message");
+        assert_eq!(
+            tool_assistant
+                .get("reasoning_content")
+                .and_then(Value::as_str),
+            Some("Need to call a tool"),
+            "thinking-mode tool sub-turns must replay reasoning_content until the tool chain finishes"
        );
    }

@@ -1163,7 +1214,54 @@ mod tests {
                .get("reasoning_content")
                .and_then(Value::as_str),
            Some("Need to call a tool"),
-            "thinking-mode tool rounds must replay reasoning_content on later requests"
+            "tool-call reasoning_content must be replayed across later user turns"
+        );
+    }
+
+    #[test]
+    fn chat_messages_omit_prior_non_tool_reasoning_after_new_user_turn() {
+        let messages = vec![
+            Message {
+                role: "user".to_string(),
+                content: vec![ContentBlock::Text {
+                    text: "Explain it".to_string(),
+                    cache_control: None,
+                }],
+            },
+            Message {
+                role: "assistant".to_string(),
+                content: vec![
+                    ContentBlock::Thinking {
+                        thinking: "Internal explanation plan".to_string(),
+                    },
+                    ContentBlock::Text {
+                        text: "Final answer".to_string(),
+                        cache_control: None,
+                    },
+                ],
+            },
+            Message {
+                role: "user".to_string(),
+                content: vec![ContentBlock::Text {
+                    text: "Next question".to_string(),
+                    cache_control: None,
+                }],
+            },
+        ];
+
+        let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
+        let assistant = out
+            .iter()
+            .find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
+            .expect("assistant message");
+
+        assert_eq!(
+            assistant.get("content").and_then(Value::as_str),
+            Some("Final answer")
+        );
+        assert!(
+            assistant.get("reasoning_content").is_none(),
+            "non-tool reasoning from previous turns should not be replayed"
        );
    }

@@ -2000,6 +2098,35 @@ mod tests {
        assert_eq!(chars, 19);
    }

+    #[test]
+    fn sanitize_thinking_mode_keeps_tool_call_placeholder_after_new_user_turn() {
+        let mut body = json!({
+            "model": "deepseek-v4-pro",
+            "messages": [
+                { "role": "user", "content": "step 1" },
+                {
+                    "role": "assistant",
+                    "content": "",
+                    "tool_calls": [{ "id": "1", "type": "function" }]
+                },
+                { "role": "tool", "tool_call_id": "1", "content": "ok" },
+                { "role": "user", "content": "step 2" }
+            ]
+        });
+
+        sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max"));
+
+        let messages = body["messages"].as_array().unwrap();
+        let assistant = messages
+            .iter()
+            .find(|m| m["role"] == "assistant")
+            .expect("assistant tool-call message");
+        assert_eq!(
+            assistant.get("reasoning_content").and_then(Value::as_str),
+            Some("(reasoning omitted)")
+        );
+    }
+
    #[test]
    fn token_bucket_enforces_delay_when_empty() {
        let now = Instant::now();
@@ -437,13 +437,16 @@ fn build_chat_messages_with_reasoning(
        }));
    }

-    for message in messages.iter() {
+    for (message_index, message) in messages.iter().enumerate() {
        let role = message.role.as_str();
        let mut text_parts = Vec::new();
        let mut thinking_parts = Vec::new();
        let mut tool_calls = Vec::new();
        let mut tool_call_ids = Vec::new();
        let mut tool_results: Vec<(String, Value)> = Vec::new();
+        let later_user_turn = messages[message_index + 1..]
+            .iter()
+            .any(message_starts_user_turn);

        for block in &message.content {
            match block {
@@ -499,19 +502,14 @@ fn build_chat_messages_with_reasoning(
            let mut reasoning_content = thinking_parts.join("\n");
            let has_text = !content.trim().is_empty();
            let has_tool_calls = !tool_calls.is_empty();
-            // DeepSeek thinking-mode rule: every assistant message in the
-            // conversation must carry its `reasoning_content` when thinking
-            // is enabled. The docs say non-tool-call messages' reasoning is
-            // "ignored", but the API still validates presence and rejects
-            // with a 400 if any assistant message is missing it. If reasoning
-            // was lost (e.g. a session checkpoint from before this rule was
-            // enforced, or a sub-turn with no streamed reasoning text),
-            // substitute a non-empty placeholder so the API accepts the
-            // request.
-            let include_reasoning_for_turn = include_reasoning;
+            // DeepSeek thinking-mode tool calls must replay `reasoning_content`
+            // on subsequent requests. Non-tool assistant reasoning can be
+            // omitted once a later real user text message starts a new turn.
+            let include_reasoning_for_turn =
+                include_reasoning && (has_tool_calls || !later_user_turn);
            let mut has_reasoning =
                include_reasoning_for_turn && !reasoning_content.trim().is_empty();
-            if include_reasoning_for_turn && !has_reasoning {
+            if include_reasoning_for_turn && has_tool_calls && !has_reasoning {
                logging::warn(
                    "Substituting placeholder reasoning_content for DeepSeek tool-call assistant message",
                );
@@ -696,6 +694,14 @@ fn build_chat_messages_with_reasoning(
    out
 }

+fn message_starts_user_turn(message: &Message) -> bool {
+    message.role == "user"
+        && message.content.iter().any(|block| match block {
+            ContentBlock::Text { text, .. } => !text.trim().is_empty(),
+            _ => false,
+        })
+}
+
 pub(super) fn tool_to_chat(tool: &Tool) -> Value {
    let mut value = json!({
        "type": "function",
@@ -766,16 +772,15 @@ fn map_tool_choice_for_chat(choice: &Value) -> Option<Value> {
 }

 /// Final-pass sanitizer over the outgoing chat-completions JSON payload.
-/// Forces a non-empty `reasoning_content` onto every `assistant` message that
-/// carries `tool_calls`, when the model + effort combination requires it.
-/// DeepSeek's thinking-mode API rejects such messages with a 400 error;
-/// substituting a placeholder keeps the conversation chain intact.
+/// Forces a non-empty `reasoning_content` onto assistant messages that carry
+/// `tool_calls`, when the model + effort combination requires it. DeepSeek's
+/// thinking-mode API rejects such messages with a 400 error; substituting a
+/// placeholder keeps the conversation chain intact. Non-tool assistant
+/// reasoning can stay omitted once a later user text turn begins.
 ///
 /// Also tallies the size of all replayed `reasoning_content` and logs it, so
 /// users on `RUST_LOG=deepseek_tui=debug` can see how much of their input
-/// budget is being spent re-sending prior thinking traces (V4 §5.1.1
-/// "Interleaved Thinking" requires the full trace to be replayed across user
-/// message boundaries in tool-calling sessions).
+/// budget is being spent re-sending prior thinking traces.
 pub(super) fn sanitize_thinking_mode_messages(
    body: &mut Value,
    model: &str,
@@ -792,11 +797,12 @@ pub(super) fn sanitize_thinking_mode_messages(
        if msg.get("role").and_then(Value::as_str) != Some("assistant") {
            continue;
        }
+        let has_tool_calls = msg.get("tool_calls").is_some();
        let needs_placeholder = msg
            .get("reasoning_content")
            .and_then(Value::as_str)
            .is_none_or(|s| s.trim().is_empty());
-        if needs_placeholder {
+        if has_tool_calls && needs_placeholder {
            msg["reasoning_content"] = json!("(reasoning omitted)");
            substitutions = substitutions.saturating_add(1);
            logging::warn(format!(