fix(api): narrow reasoning replay policy (#1009)
This commit is contained in:
+130
-3
@@ -1073,7 +1073,7 @@ mod tests {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chat_messages_keep_reasoning_content_on_all_assistant_messages() {
|
||||
fn chat_messages_keep_current_turn_reasoning_content() {
|
||||
let message = Message {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
@@ -1098,7 +1098,58 @@ mod tests {
|
||||
assert_eq!(
|
||||
assistant.get("reasoning_content").and_then(Value::as_str),
|
||||
Some("plan"),
|
||||
"thinking-mode models must keep reasoning_content on all assistant messages"
|
||||
"thinking-mode models keep reasoning_content while still in the current turn"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chat_messages_replay_tool_round_reasoning_before_new_user_turn() {
|
||||
let messages = vec![
|
||||
Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: "Need the date".to_string(),
|
||||
cache_control: None,
|
||||
}],
|
||||
},
|
||||
Message {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
thinking: "Need to call a tool".to_string(),
|
||||
},
|
||||
ContentBlock::ToolUse {
|
||||
id: "tool-1".to_string(),
|
||||
name: "get_date".to_string(),
|
||||
input: json!({}),
|
||||
caller: None,
|
||||
},
|
||||
],
|
||||
},
|
||||
Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::ToolResult {
|
||||
tool_use_id: "tool-1".to_string(),
|
||||
content: "2026-04-23".to_string(),
|
||||
is_error: None,
|
||||
content_blocks: None,
|
||||
}],
|
||||
},
|
||||
];
|
||||
let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
|
||||
let tool_assistant = out
|
||||
.iter()
|
||||
.find(|value| {
|
||||
value.get("role").and_then(Value::as_str) == Some("assistant")
|
||||
&& value.get("tool_calls").is_some()
|
||||
})
|
||||
.expect("tool-call assistant message");
|
||||
assert_eq!(
|
||||
tool_assistant
|
||||
.get("reasoning_content")
|
||||
.and_then(Value::as_str),
|
||||
Some("Need to call a tool"),
|
||||
"thinking-mode tool sub-turns must replay reasoning_content until the tool chain finishes"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -1163,7 +1214,54 @@ mod tests {
|
||||
.get("reasoning_content")
|
||||
.and_then(Value::as_str),
|
||||
Some("Need to call a tool"),
|
||||
"thinking-mode tool rounds must replay reasoning_content on later requests"
|
||||
"tool-call reasoning_content must be replayed across later user turns"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chat_messages_omit_prior_non_tool_reasoning_after_new_user_turn() {
|
||||
let messages = vec![
|
||||
Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: "Explain it".to_string(),
|
||||
cache_control: None,
|
||||
}],
|
||||
},
|
||||
Message {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
thinking: "Internal explanation plan".to_string(),
|
||||
},
|
||||
ContentBlock::Text {
|
||||
text: "Final answer".to_string(),
|
||||
cache_control: None,
|
||||
},
|
||||
],
|
||||
},
|
||||
Message {
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentBlock::Text {
|
||||
text: "Next question".to_string(),
|
||||
cache_control: None,
|
||||
}],
|
||||
},
|
||||
];
|
||||
|
||||
let out = build_chat_messages(None, &messages, "deepseek-v4-pro");
|
||||
let assistant = out
|
||||
.iter()
|
||||
.find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
|
||||
.expect("assistant message");
|
||||
|
||||
assert_eq!(
|
||||
assistant.get("content").and_then(Value::as_str),
|
||||
Some("Final answer")
|
||||
);
|
||||
assert!(
|
||||
assistant.get("reasoning_content").is_none(),
|
||||
"non-tool reasoning from previous turns should not be replayed"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -2000,6 +2098,35 @@ mod tests {
|
||||
assert_eq!(chars, 19);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanitize_thinking_mode_keeps_tool_call_placeholder_after_new_user_turn() {
|
||||
let mut body = json!({
|
||||
"model": "deepseek-v4-pro",
|
||||
"messages": [
|
||||
{ "role": "user", "content": "step 1" },
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [{ "id": "1", "type": "function" }]
|
||||
},
|
||||
{ "role": "tool", "tool_call_id": "1", "content": "ok" },
|
||||
{ "role": "user", "content": "step 2" }
|
||||
]
|
||||
});
|
||||
|
||||
sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max"));
|
||||
|
||||
let messages = body["messages"].as_array().unwrap();
|
||||
let assistant = messages
|
||||
.iter()
|
||||
.find(|m| m["role"] == "assistant")
|
||||
.expect("assistant tool-call message");
|
||||
assert_eq!(
|
||||
assistant.get("reasoning_content").and_then(Value::as_str),
|
||||
Some("(reasoning omitted)")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn token_bucket_enforces_delay_when_empty() {
|
||||
let now = Instant::now();
|
||||
|
||||
@@ -437,13 +437,16 @@ fn build_chat_messages_with_reasoning(
|
||||
}));
|
||||
}
|
||||
|
||||
for message in messages.iter() {
|
||||
for (message_index, message) in messages.iter().enumerate() {
|
||||
let role = message.role.as_str();
|
||||
let mut text_parts = Vec::new();
|
||||
let mut thinking_parts = Vec::new();
|
||||
let mut tool_calls = Vec::new();
|
||||
let mut tool_call_ids = Vec::new();
|
||||
let mut tool_results: Vec<(String, Value)> = Vec::new();
|
||||
let later_user_turn = messages[message_index + 1..]
|
||||
.iter()
|
||||
.any(message_starts_user_turn);
|
||||
|
||||
for block in &message.content {
|
||||
match block {
|
||||
@@ -499,19 +502,14 @@ fn build_chat_messages_with_reasoning(
|
||||
let mut reasoning_content = thinking_parts.join("\n");
|
||||
let has_text = !content.trim().is_empty();
|
||||
let has_tool_calls = !tool_calls.is_empty();
|
||||
// DeepSeek thinking-mode rule: every assistant message in the
|
||||
// conversation must carry its `reasoning_content` when thinking
|
||||
// is enabled. The docs say non-tool-call messages' reasoning is
|
||||
// "ignored", but the API still validates presence and rejects
|
||||
// with a 400 if any assistant message is missing it. If reasoning
|
||||
// was lost (e.g. a session checkpoint from before this rule was
|
||||
// enforced, or a sub-turn with no streamed reasoning text),
|
||||
// substitute a non-empty placeholder so the API accepts the
|
||||
// request.
|
||||
let include_reasoning_for_turn = include_reasoning;
|
||||
// DeepSeek thinking-mode tool calls must replay `reasoning_content`
|
||||
// on subsequent requests. Non-tool assistant reasoning can be
|
||||
// omitted once a later real user text message starts a new turn.
|
||||
let include_reasoning_for_turn =
|
||||
include_reasoning && (has_tool_calls || !later_user_turn);
|
||||
let mut has_reasoning =
|
||||
include_reasoning_for_turn && !reasoning_content.trim().is_empty();
|
||||
if include_reasoning_for_turn && !has_reasoning {
|
||||
if include_reasoning_for_turn && has_tool_calls && !has_reasoning {
|
||||
logging::warn(
|
||||
"Substituting placeholder reasoning_content for DeepSeek tool-call assistant message",
|
||||
);
|
||||
@@ -696,6 +694,14 @@ fn build_chat_messages_with_reasoning(
|
||||
out
|
||||
}
|
||||
|
||||
fn message_starts_user_turn(message: &Message) -> bool {
|
||||
message.role == "user"
|
||||
&& message.content.iter().any(|block| match block {
|
||||
ContentBlock::Text { text, .. } => !text.trim().is_empty(),
|
||||
_ => false,
|
||||
})
|
||||
}
|
||||
|
||||
pub(super) fn tool_to_chat(tool: &Tool) -> Value {
|
||||
let mut value = json!({
|
||||
"type": "function",
|
||||
@@ -766,16 +772,15 @@ fn map_tool_choice_for_chat(choice: &Value) -> Option<Value> {
|
||||
}
|
||||
|
||||
/// Final-pass sanitizer over the outgoing chat-completions JSON payload.
|
||||
/// Forces a non-empty `reasoning_content` onto every `assistant` message that
|
||||
/// carries `tool_calls`, when the model + effort combination requires it.
|
||||
/// DeepSeek's thinking-mode API rejects such messages with a 400 error;
|
||||
/// substituting a placeholder keeps the conversation chain intact.
|
||||
/// Forces a non-empty `reasoning_content` onto assistant messages that carry
|
||||
/// `tool_calls`, when the model + effort combination requires it. DeepSeek's
|
||||
/// thinking-mode API rejects such messages with a 400 error; substituting a
|
||||
/// placeholder keeps the conversation chain intact. Non-tool assistant
|
||||
/// reasoning can stay omitted once a later user text turn begins.
|
||||
///
|
||||
/// Also tallies the size of all replayed `reasoning_content` and logs it, so
|
||||
/// users on `RUST_LOG=deepseek_tui=debug` can see how much of their input
|
||||
/// budget is being spent re-sending prior thinking traces (V4 §5.1.1
|
||||
/// "Interleaved Thinking" requires the full trace to be replayed across user
|
||||
/// message boundaries in tool-calling sessions).
|
||||
/// budget is being spent re-sending prior thinking traces.
|
||||
pub(super) fn sanitize_thinking_mode_messages(
|
||||
body: &mut Value,
|
||||
model: &str,
|
||||
@@ -792,11 +797,12 @@ pub(super) fn sanitize_thinking_mode_messages(
|
||||
if msg.get("role").and_then(Value::as_str) != Some("assistant") {
|
||||
continue;
|
||||
}
|
||||
let has_tool_calls = msg.get("tool_calls").is_some();
|
||||
let needs_placeholder = msg
|
||||
.get("reasoning_content")
|
||||
.and_then(Value::as_str)
|
||||
.is_none_or(|s| s.trim().is_empty());
|
||||
if needs_placeholder {
|
||||
if has_tool_calls && needs_placeholder {
|
||||
msg["reasoning_content"] = json!("(reasoning omitted)");
|
||||
substitutions = substitutions.saturating_add(1);
|
||||
logging::warn(format!(
|
||||
|
||||
Reference in New Issue
Block a user