fix(api): narrow reasoning replay policy (#1009)

This commit is contained in:
Hunter Bown
2026-05-07 04:45:55 -05:00
committed by GitHub
parent 17a010aecc
commit 3e2c832933
2 changed files with 156 additions and 23 deletions
+130 -3
View File
@@ -1073,7 +1073,7 @@ mod tests {
}
#[test]
fn chat_messages_keep_reasoning_content_on_all_assistant_messages() {
fn chat_messages_keep_current_turn_reasoning_content() {
let message = Message {
role: "assistant".to_string(),
content: vec![
@@ -1098,7 +1098,58 @@ mod tests {
assert_eq!(
assistant.get("reasoning_content").and_then(Value::as_str),
Some("plan"),
"thinking-mode models must keep reasoning_content on all assistant messages"
"thinking-mode models keep reasoning_content while still in the current turn"
);
}
#[test]
fn chat_messages_replay_tool_round_reasoning_before_new_user_turn() {
let messages = vec![
Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "Need the date".to_string(),
cache_control: None,
}],
},
Message {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
thinking: "Need to call a tool".to_string(),
},
ContentBlock::ToolUse {
id: "tool-1".to_string(),
name: "get_date".to_string(),
input: json!({}),
caller: None,
},
],
},
Message {
role: "user".to_string(),
content: vec![ContentBlock::ToolResult {
tool_use_id: "tool-1".to_string(),
content: "2026-04-23".to_string(),
is_error: None,
content_blocks: None,
}],
},
];
let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
let tool_assistant = out
.iter()
.find(|value| {
value.get("role").and_then(Value::as_str) == Some("assistant")
&& value.get("tool_calls").is_some()
})
.expect("tool-call assistant message");
assert_eq!(
tool_assistant
.get("reasoning_content")
.and_then(Value::as_str),
Some("Need to call a tool"),
"thinking-mode tool sub-turns must replay reasoning_content until the tool chain finishes"
);
}
@@ -1163,7 +1214,54 @@ mod tests {
.get("reasoning_content")
.and_then(Value::as_str),
Some("Need to call a tool"),
"thinking-mode tool rounds must replay reasoning_content on later requests"
"tool-call reasoning_content must be replayed across later user turns"
);
}
#[test]
fn chat_messages_omit_prior_non_tool_reasoning_after_new_user_turn() {
let messages = vec![
Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "Explain it".to_string(),
cache_control: None,
}],
},
Message {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
thinking: "Internal explanation plan".to_string(),
},
ContentBlock::Text {
text: "Final answer".to_string(),
cache_control: None,
},
],
},
Message {
role: "user".to_string(),
content: vec![ContentBlock::Text {
text: "Next question".to_string(),
cache_control: None,
}],
},
];
let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
let assistant = out
.iter()
.find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
.expect("assistant message");
assert_eq!(
assistant.get("content").and_then(Value::as_str),
Some("Final answer")
);
assert!(
assistant.get("reasoning_content").is_none(),
"non-tool reasoning from previous turns should not be replayed"
);
}
@@ -2000,6 +2098,35 @@ mod tests {
assert_eq!(chars, 19);
}
#[test]
fn sanitize_thinking_mode_keeps_tool_call_placeholder_after_new_user_turn() {
let mut body = json!({
"model": "deepseek-v4-pro",
"messages": [
{ "role": "user", "content": "step 1" },
{
"role": "assistant",
"content": "",
"tool_calls": [{ "id": "1", "type": "function" }]
},
{ "role": "tool", "tool_call_id": "1", "content": "ok" },
{ "role": "user", "content": "step 2" }
]
});
sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max"));
let messages = body["messages"].as_array().unwrap();
let assistant = messages
.iter()
.find(|m| m["role"] == "assistant")
.expect("assistant tool-call message");
assert_eq!(
assistant.get("reasoning_content").and_then(Value::as_str),
Some("(reasoning omitted)")
);
}
#[test]
fn token_bucket_enforces_delay_when_empty() {
let now = Instant::now();
+26 -20
View File
@@ -437,13 +437,16 @@ fn build_chat_messages_with_reasoning(
}));
}
for message in messages.iter() {
for (message_index, message) in messages.iter().enumerate() {
let role = message.role.as_str();
let mut text_parts = Vec::new();
let mut thinking_parts = Vec::new();
let mut tool_calls = Vec::new();
let mut tool_call_ids = Vec::new();
let mut tool_results: Vec<(String, Value)> = Vec::new();
let later_user_turn = messages[message_index + 1..]
.iter()
.any(message_starts_user_turn);
for block in &message.content {
match block {
@@ -499,19 +502,14 @@ fn build_chat_messages_with_reasoning(
let mut reasoning_content = thinking_parts.join("\n");
let has_text = !content.trim().is_empty();
let has_tool_calls = !tool_calls.is_empty();
// DeepSeek thinking-mode rule: every assistant message in the
// conversation must carry its `reasoning_content` when thinking
// is enabled. The docs say non-tool-call messages' reasoning is
// "ignored", but the API still validates presence and rejects
// with a 400 if any assistant message is missing it. If reasoning
// was lost (e.g. a session checkpoint from before this rule was
// enforced, or a sub-turn with no streamed reasoning text),
// substitute a non-empty placeholder so the API accepts the
// request.
let include_reasoning_for_turn = include_reasoning;
// DeepSeek thinking-mode tool calls must replay `reasoning_content`
// on subsequent requests. Non-tool assistant reasoning can be
// omitted once a later real user text message starts a new turn.
let include_reasoning_for_turn =
include_reasoning && (has_tool_calls || !later_user_turn);
let mut has_reasoning =
include_reasoning_for_turn && !reasoning_content.trim().is_empty();
if include_reasoning_for_turn && !has_reasoning {
if include_reasoning_for_turn && has_tool_calls && !has_reasoning {
logging::warn(
"Substituting placeholder reasoning_content for DeepSeek tool-call assistant message",
);
@@ -696,6 +694,14 @@ fn build_chat_messages_with_reasoning(
out
}
fn message_starts_user_turn(message: &Message) -> bool {
message.role == "user"
&& message.content.iter().any(|block| match block {
ContentBlock::Text { text, .. } => !text.trim().is_empty(),
_ => false,
})
}
pub(super) fn tool_to_chat(tool: &Tool) -> Value {
let mut value = json!({
"type": "function",
@@ -766,16 +772,15 @@ fn map_tool_choice_for_chat(choice: &Value) -> Option<Value> {
}
/// Final-pass sanitizer over the outgoing chat-completions JSON payload.
/// Forces a non-empty `reasoning_content` onto every `assistant` message that
/// carries `tool_calls`, when the model + effort combination requires it.
/// DeepSeek's thinking-mode API rejects such messages with a 400 error;
/// substituting a placeholder keeps the conversation chain intact.
/// Forces a non-empty `reasoning_content` onto assistant messages that carry
/// `tool_calls`, when the model + effort combination requires it. DeepSeek's
/// thinking-mode API rejects such messages with a 400 error; substituting a
/// placeholder keeps the conversation chain intact. Non-tool assistant
/// reasoning can stay omitted once a later user text turn begins.
///
/// Also tallies the size of all replayed `reasoning_content` and logs it, so
/// users on `RUST_LOG=deepseek_tui=debug` can see how much of their input
/// budget is being spent re-sending prior thinking traces (V4 §5.1.1
/// "Interleaved Thinking" requires the full trace to be replayed across user
/// message boundaries in tool-calling sessions).
/// budget is being spent re-sending prior thinking traces.
pub(super) fn sanitize_thinking_mode_messages(
body: &mut Value,
model: &str,
@@ -792,11 +797,12 @@ pub(super) fn sanitize_thinking_mode_messages(
if msg.get("role").and_then(Value::as_str) != Some("assistant") {
continue;
}
let has_tool_calls = msg.get("tool_calls").is_some();
let needs_placeholder = msg
.get("reasoning_content")
.and_then(Value::as_str)
.is_none_or(|s| s.trim().is_empty());
if needs_placeholder {
if has_tool_calls && needs_placeholder {
msg["reasoning_content"] = json!("(reasoning omitted)");
substitutions = substitutions.saturating_add(1);
logging::warn(format!(