From 3e2c8329336dfcbb3ba6c47dc8093210c35f0edb Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Thu, 7 May 2026 04:45:55 -0500 Subject: [PATCH] fix(api): narrow reasoning replay policy (#1009) --- crates/tui/src/client.rs | 133 +++++++++++++++++++++++++++++++++- crates/tui/src/client/chat.rs | 46 +++++++----- 2 files changed, 156 insertions(+), 23 deletions(-) diff --git a/crates/tui/src/client.rs b/crates/tui/src/client.rs index b5a0eb56..be262748 100644 --- a/crates/tui/src/client.rs +++ b/crates/tui/src/client.rs @@ -1073,7 +1073,7 @@ mod tests { } #[test] - fn chat_messages_keep_reasoning_content_on_all_assistant_messages() { + fn chat_messages_keep_current_turn_reasoning_content() { let message = Message { role: "assistant".to_string(), content: vec![ @@ -1098,7 +1098,58 @@ mod tests { assert_eq!( assistant.get("reasoning_content").and_then(Value::as_str), Some("plan"), - "thinking-mode models must keep reasoning_content on all assistant messages" + "thinking-mode models keep reasoning_content while still in the current turn" + ); + } + + #[test] + fn chat_messages_replay_tool_round_reasoning_before_new_user_turn() { + let messages = vec![ + Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: "Need the date".to_string(), + cache_control: None, + }], + }, + Message { + role: "assistant".to_string(), + content: vec![ + ContentBlock::Thinking { + thinking: "Need to call a tool".to_string(), + }, + ContentBlock::ToolUse { + id: "tool-1".to_string(), + name: "get_date".to_string(), + input: json!({}), + caller: None, + }, + ], + }, + Message { + role: "user".to_string(), + content: vec![ContentBlock::ToolResult { + tool_use_id: "tool-1".to_string(), + content: "2026-04-23".to_string(), + is_error: None, + content_blocks: None, + }], + }, + ]; + let out = build_chat_messages(None, &messages, "deepseek-v4-pro"); + let tool_assistant = out + .iter() + .find(|value| { + value.get("role").and_then(Value::as_str) == Some("assistant") + && value.get("tool_calls").is_some() + }) + .expect("tool-call assistant message"); + assert_eq!( + tool_assistant + .get("reasoning_content") + .and_then(Value::as_str), + Some("Need to call a tool"), + "thinking-mode tool sub-turns must replay reasoning_content until the tool chain finishes" ); } @@ -1163,7 +1214,54 @@ mod tests { .get("reasoning_content") .and_then(Value::as_str), Some("Need to call a tool"), - "thinking-mode tool rounds must replay reasoning_content on later requests" + "tool-call reasoning_content must be replayed across later user turns" + ); + } + + #[test] + fn chat_messages_omit_prior_non_tool_reasoning_after_new_user_turn() { + let messages = vec![ + Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: "Explain it".to_string(), + cache_control: None, + }], + }, + Message { + role: "assistant".to_string(), + content: vec![ + ContentBlock::Thinking { + thinking: "Internal explanation plan".to_string(), + }, + ContentBlock::Text { + text: "Final answer".to_string(), + cache_control: None, + }, + ], + }, + Message { + role: "user".to_string(), + content: vec![ContentBlock::Text { + text: "Next question".to_string(), + cache_control: None, + }], + }, + ]; + + let out = build_chat_messages(None, &messages, "deepseek-v4-pro"); + let assistant = out + .iter() + .find(|value| value.get("role").and_then(Value::as_str) == Some("assistant")) + .expect("assistant message"); + + assert_eq!( + assistant.get("content").and_then(Value::as_str), + Some("Final answer") + ); + assert!( + assistant.get("reasoning_content").is_none(), + "non-tool reasoning from previous turns should not be replayed" ); } @@ -2000,6 +2098,35 @@ mod tests { assert_eq!(chars, 19); } + #[test] + fn sanitize_thinking_mode_keeps_tool_call_placeholder_after_new_user_turn() { + let mut body = json!({ + "model": "deepseek-v4-pro", + "messages": [ + { "role": "user", "content": "step 1" }, + { + "role": "assistant", + "content": "", + "tool_calls": [{ "id": "1", "type": "function" }] + }, + { "role": "tool", "tool_call_id": "1", "content": "ok" }, + { "role": "user", "content": "step 2" } + ] + }); + + sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max")); + + let messages = body["messages"].as_array().unwrap(); + let assistant = messages + .iter() + .find(|m| m["role"] == "assistant") + .expect("assistant tool-call message"); + assert_eq!( + assistant.get("reasoning_content").and_then(Value::as_str), + Some("(reasoning omitted)") + ); + } + #[test] fn token_bucket_enforces_delay_when_empty() { let now = Instant::now(); diff --git a/crates/tui/src/client/chat.rs b/crates/tui/src/client/chat.rs index cd854ec2..3f8cfefc 100644 --- a/crates/tui/src/client/chat.rs +++ b/crates/tui/src/client/chat.rs @@ -437,13 +437,16 @@ fn build_chat_messages_with_reasoning( })); } - for message in messages.iter() { + for (message_index, message) in messages.iter().enumerate() { let role = message.role.as_str(); let mut text_parts = Vec::new(); let mut thinking_parts = Vec::new(); let mut tool_calls = Vec::new(); let mut tool_call_ids = Vec::new(); let mut tool_results: Vec<(String, Value)> = Vec::new(); + let later_user_turn = messages[message_index + 1..] + .iter() + .any(message_starts_user_turn); for block in &message.content { match block { @@ -499,19 +502,14 @@ fn build_chat_messages_with_reasoning( let mut reasoning_content = thinking_parts.join("\n"); let has_text = !content.trim().is_empty(); let has_tool_calls = !tool_calls.is_empty(); - // DeepSeek thinking-mode rule: every assistant message in the - // conversation must carry its `reasoning_content` when thinking - // is enabled. The docs say non-tool-call messages' reasoning is - // "ignored", but the API still validates presence and rejects - // with a 400 if any assistant message is missing it. If reasoning - // was lost (e.g. a session checkpoint from before this rule was - // enforced, or a sub-turn with no streamed reasoning text), - // substitute a non-empty placeholder so the API accepts the - // request. - let include_reasoning_for_turn = include_reasoning; + // DeepSeek thinking-mode tool calls must replay `reasoning_content` + // on subsequent requests. Non-tool assistant reasoning can be + // omitted once a later real user text message starts a new turn. + let include_reasoning_for_turn = + include_reasoning && (has_tool_calls || !later_user_turn); let mut has_reasoning = include_reasoning_for_turn && !reasoning_content.trim().is_empty(); - if include_reasoning_for_turn && !has_reasoning { + if include_reasoning_for_turn && has_tool_calls && !has_reasoning { logging::warn( "Substituting placeholder reasoning_content for DeepSeek tool-call assistant message", ); @@ -696,6 +694,14 @@ fn build_chat_messages_with_reasoning( out } +fn message_starts_user_turn(message: &Message) -> bool { + message.role == "user" + && message.content.iter().any(|block| match block { + ContentBlock::Text { text, .. } => !text.trim().is_empty(), + _ => false, + }) +} + pub(super) fn tool_to_chat(tool: &Tool) -> Value { let mut value = json!({ "type": "function", @@ -766,16 +772,15 @@ fn map_tool_choice_for_chat(choice: &Value) -> Option { } /// Final-pass sanitizer over the outgoing chat-completions JSON payload. -/// Forces a non-empty `reasoning_content` onto every `assistant` message that -/// carries `tool_calls`, when the model + effort combination requires it. -/// DeepSeek's thinking-mode API rejects such messages with a 400 error; -/// substituting a placeholder keeps the conversation chain intact. +/// Forces a non-empty `reasoning_content` onto assistant messages that carry +/// `tool_calls`, when the model + effort combination requires it. DeepSeek's +/// thinking-mode API rejects such messages with a 400 error; substituting a +/// placeholder keeps the conversation chain intact. Non-tool assistant +/// reasoning can stay omitted once a later user text turn begins. /// /// Also tallies the size of all replayed `reasoning_content` and logs it, so /// users on `RUST_LOG=deepseek_tui=debug` can see how much of their input -/// budget is being spent re-sending prior thinking traces (V4 ยง5.1.1 -/// "Interleaved Thinking" requires the full trace to be replayed across user -/// message boundaries in tool-calling sessions). +/// budget is being spent re-sending prior thinking traces. pub(super) fn sanitize_thinking_mode_messages( body: &mut Value, model: &str, @@ -792,11 +797,12 @@ pub(super) fn sanitize_thinking_mode_messages( if msg.get("role").and_then(Value::as_str) != Some("assistant") { continue; } + let has_tool_calls = msg.get("tool_calls").is_some(); let needs_placeholder = msg .get("reasoning_content") .and_then(Value::as_str) .is_none_or(|s| s.trim().is_empty()); - if needs_placeholder { + if has_tool_calls && needs_placeholder { msg["reasoning_content"] = json!("(reasoning omitted)"); substitutions = substitutions.saturating_add(1); logging::warn(format!(