fix(client): strip reasoning content for generic providers

Fixes #1542.
This commit is contained in:
Hunter Bown
2026-05-13 00:17:14 -05:00
parent bad58784e4
commit ac01b225e7
2 changed files with 171 additions and 10 deletions
+119 -7
View File
@@ -1085,7 +1085,8 @@ pub(crate) fn build_cache_warmup_request(request: &MessageRequest) -> MessageReq
mod tests { mod tests {
use super::*; use super::*;
use crate::client::chat::{ use crate::client::chat::{
build_chat_messages, build_chat_messages_for_request, count_reasoning_replay_chars, build_chat_messages, build_chat_messages_for_request,
build_chat_messages_for_request_and_provider, count_reasoning_replay_chars,
parse_chat_message, parse_sse_chunk, sanitize_thinking_mode_messages, tool_to_chat, parse_chat_message, parse_sse_chunk, sanitize_thinking_mode_messages, tool_to_chat,
tool_to_chat_for_base_url, tool_to_chat_for_base_url,
}; };
@@ -1258,6 +1259,62 @@ mod tests {
); );
} }
#[test]
fn generic_openai_provider_drops_deepseek_reasoning_content() {
let request = MessageRequest {
model: "deepseek-v4-pro".to_string(),
messages: vec![Message {
role: "assistant".to_string(),
content: vec![
ContentBlock::Thinking {
thinking: "plan".to_string(),
},
ContentBlock::Text {
text: "done".to_string(),
cache_control: None,
},
],
}],
max_tokens: 16,
system: None,
tools: None,
tool_choice: None,
metadata: None,
thinking: None,
reasoning_effort: Some("max".to_string()),
stream: None,
temperature: None,
top_p: None,
};
let deepseek =
build_chat_messages_for_request_and_provider(&request, ApiProvider::Deepseek);
let native_assistant = deepseek
.iter()
.find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
.expect("assistant message");
assert_eq!(
native_assistant
.get("reasoning_content")
.and_then(Value::as_str),
Some("plan")
);
let openai = build_chat_messages_for_request_and_provider(&request, ApiProvider::Openai);
let generic_assistant = openai
.iter()
.find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
.expect("assistant message");
assert_eq!(
generic_assistant.get("content").and_then(Value::as_str),
Some("done")
);
assert!(
generic_assistant.get("reasoning_content").is_none(),
"generic OpenAI-compatible providers reject DeepSeek-only reasoning_content (#1542)"
);
}
#[test] #[test]
fn chat_messages_replay_tool_round_reasoning_before_new_user_turn() { fn chat_messages_replay_tool_round_reasoning_before_new_user_turn() {
let messages = vec![ let messages = vec![
@@ -2529,9 +2586,13 @@ mod tests {
] ]
}); });
let approx_tokens = let approx_tokens = sanitize_thinking_mode_messages(
sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max")) &mut body,
.expect("multi-turn thinking-mode conversation should report replay tokens"); "deepseek-v4-pro",
Some("max"),
ApiProvider::Deepseek,
)
.expect("multi-turn thinking-mode conversation should report replay tokens");
// ~4 chars/token; 46 bytes of reasoning -> 11 tokens. // ~4 chars/token; 46 bytes of reasoning -> 11 tokens.
assert_eq!(approx_tokens, 11); assert_eq!(approx_tokens, 11);
@@ -2564,7 +2625,12 @@ mod tests {
{ "role": "user", "content": "hi" } { "role": "user", "content": "hi" }
] ]
}); });
let result = sanitize_thinking_mode_messages(&mut body, "deepseek-v4-flash", None); let result = sanitize_thinking_mode_messages(
&mut body,
"deepseek-v4-flash",
None,
ApiProvider::Deepseek,
);
// reasoning_effort is None → no thinking injection, result is None // reasoning_effort is None → no thinking injection, result is None
assert!(result.is_none()); assert!(result.is_none());
} }
@@ -2587,13 +2653,54 @@ mod tests {
] ]
}); });
sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max")); sanitize_thinking_mode_messages(
&mut body,
"deepseek-v4-pro",
Some("max"),
ApiProvider::Deepseek,
);
let chars = count_reasoning_replay_chars(&body); let chars = count_reasoning_replay_chars(&body);
// "(reasoning omitted)" is 19 bytes. // "(reasoning omitted)" is 19 bytes.
assert_eq!(chars, 19); assert_eq!(chars, 19);
} }
#[test]
fn sanitize_thinking_mode_skips_generic_openai_provider() {
let mut body = json!({
"model": "deepseek-v4-pro",
"messages": [
{ "role": "user", "content": "hi" },
{
"role": "assistant",
"content": "",
"tool_calls": [{ "id": "1", "type": "function" }]
}
]
});
let result = sanitize_thinking_mode_messages(
&mut body,
"deepseek-v4-pro",
Some("max"),
ApiProvider::Openai,
);
assert!(result.is_none());
let assistant = body["messages"]
.as_array()
.and_then(|messages| {
messages
.iter()
.find(|message| message["role"] == "assistant")
})
.expect("assistant message");
assert!(
assistant.get("reasoning_content").is_none(),
"generic OpenAI-compatible provider payload must not get reasoning_content (#1542)"
);
}
#[test] #[test]
fn sanitize_thinking_mode_keeps_tool_call_placeholder_after_new_user_turn() { fn sanitize_thinking_mode_keeps_tool_call_placeholder_after_new_user_turn() {
let mut body = json!({ let mut body = json!({
@@ -2610,7 +2717,12 @@ mod tests {
] ]
}); });
sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max")); sanitize_thinking_mode_messages(
&mut body,
"deepseek-v4-pro",
Some("max"),
ApiProvider::Deepseek,
);
let messages = body["messages"].as_array().unwrap(); let messages = body["messages"].as_array().unwrap();
let assistant = messages let assistant = messages
+52 -3
View File
@@ -56,6 +56,7 @@ fn stream_idle_timeout() -> Duration {
Duration::from_secs(secs) Duration::from_secs(secs)
} }
use crate::config::ApiProvider;
use crate::llm_client::StreamEventBox; use crate::llm_client::StreamEventBox;
use crate::logging; use crate::logging;
use crate::models::{ use crate::models::{
@@ -75,7 +76,7 @@ impl DeepSeekClient {
&self, &self,
request: &MessageRequest, request: &MessageRequest,
) -> Result<MessageResponse> { ) -> Result<MessageResponse> {
let messages = build_chat_messages_for_request(request); let messages = build_chat_messages_for_request_and_provider(request, self.api_provider);
let mut body = json!({ let mut body = json!({
"model": request.model, "model": request.model,
"messages": messages, "messages": messages,
@@ -145,7 +146,7 @@ impl DeepSeekClient {
request: MessageRequest, request: MessageRequest,
) -> Result<StreamEventBox> { ) -> Result<StreamEventBox> {
// Try true SSE streaming via chat completions (widely supported) // Try true SSE streaming via chat completions (widely supported)
let messages = build_chat_messages_for_request(&request); let messages = build_chat_messages_for_request_and_provider(&request, self.api_provider);
let mut body = json!({ let mut body = json!({
"model": request.model, "model": request.model,
"messages": messages, "messages": messages,
@@ -193,6 +194,7 @@ impl DeepSeekClient {
&mut body, &mut body,
&request.model, &request.model,
request.reasoning_effort.as_deref(), request.reasoning_effort.as_deref(),
self.api_provider,
); );
let url = api_url(&self.base_url, "chat/completions"); let url = api_url(&self.base_url, "chat/completions");
@@ -412,10 +414,18 @@ pub(super) fn build_chat_messages(
) )
} }
#[cfg(test)]
pub(super) fn build_chat_messages_for_request(request: &MessageRequest) -> Vec<Value> { pub(super) fn build_chat_messages_for_request(request: &MessageRequest) -> Vec<Value> {
PromptBuilder::for_request(request).build() PromptBuilder::for_request(request).build()
} }
pub(super) fn build_chat_messages_for_request_and_provider(
request: &MessageRequest,
provider: ApiProvider,
) -> Vec<Value> {
PromptBuilder::for_request(request).build_for_provider(provider)
}
pub(crate) fn inspect_prompt_for_request(request: &MessageRequest) -> PromptInspection { pub(crate) fn inspect_prompt_for_request(request: &MessageRequest) -> PromptInspection {
PromptBuilder::for_request(request).inspect() PromptBuilder::for_request(request).inspect()
} }
@@ -441,6 +451,7 @@ impl<'a> PromptBuilder<'a> {
} }
} }
#[cfg(test)]
fn build(self) -> Vec<Value> { fn build(self) -> Vec<Value> {
build_chat_messages_with_reasoning( build_chat_messages_with_reasoning(
self.system, self.system,
@@ -451,6 +462,20 @@ impl<'a> PromptBuilder<'a> {
) )
} }
fn build_for_provider(self, provider: ApiProvider) -> Vec<Value> {
build_chat_messages_with_reasoning(
self.system,
self.messages,
self.model,
should_replay_reasoning_content_for_provider(
provider,
self.model,
self.reasoning_effort,
),
false,
)
}
fn inspect(self) -> PromptInspection { fn inspect(self) -> PromptInspection {
let messages = build_chat_messages_with_reasoning( let messages = build_chat_messages_with_reasoning(
self.system, self.system,
@@ -1445,8 +1470,9 @@ pub(super) fn sanitize_thinking_mode_messages(
body: &mut Value, body: &mut Value,
model: &str, model: &str,
effort: Option<&str>, effort: Option<&str>,
provider: ApiProvider,
) -> Option<u32> { ) -> Option<u32> {
if !should_replay_reasoning_content(model, effort) { if !should_replay_reasoning_content_for_provider(provider, model, effort) {
return None; return None;
} }
let messages = body.get_mut("messages").and_then(Value::as_array_mut)?; let messages = body.get_mut("messages").and_then(Value::as_array_mut)?;
@@ -1604,6 +1630,29 @@ fn should_replay_reasoning_content(model: &str, effort: Option<&str>) -> bool {
requires_reasoning_content(model) requires_reasoning_content(model)
} }
fn should_replay_reasoning_content_for_provider(
provider: ApiProvider,
model: &str,
effort: Option<&str>,
) -> bool {
if !provider_accepts_reasoning_content(provider) {
return false;
}
should_replay_reasoning_content(model, effort)
}
fn provider_accepts_reasoning_content(provider: ApiProvider) -> bool {
matches!(
provider,
ApiProvider::Deepseek
| ApiProvider::DeepseekCN
| ApiProvider::Openrouter
| ApiProvider::Novita
| ApiProvider::Fireworks
| ApiProvider::Sglang
)
}
fn has_deepseek_r_series_marker(model_lower: &str) -> bool { fn has_deepseek_r_series_marker(model_lower: &str) -> bool {
const PREFIX: &str = "deepseek-r"; const PREFIX: &str = "deepseek-r";
model_lower.match_indices(PREFIX).any(|(idx, _)| { model_lower.match_indices(PREFIX).any(|(idx, _)| {