fix(client): strip reasoning content for generic providers
Fixes #1542.
This commit is contained in:
+119
-7
@@ -1085,7 +1085,8 @@ pub(crate) fn build_cache_warmup_request(request: &MessageRequest) -> MessageReq
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::client::chat::{
|
||||
build_chat_messages, build_chat_messages_for_request, count_reasoning_replay_chars,
|
||||
build_chat_messages, build_chat_messages_for_request,
|
||||
build_chat_messages_for_request_and_provider, count_reasoning_replay_chars,
|
||||
parse_chat_message, parse_sse_chunk, sanitize_thinking_mode_messages, tool_to_chat,
|
||||
tool_to_chat_for_base_url,
|
||||
};
|
||||
@@ -1258,6 +1259,62 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generic_openai_provider_drops_deepseek_reasoning_content() {
|
||||
let request = MessageRequest {
|
||||
model: "deepseek-v4-pro".to_string(),
|
||||
messages: vec![Message {
|
||||
role: "assistant".to_string(),
|
||||
content: vec![
|
||||
ContentBlock::Thinking {
|
||||
thinking: "plan".to_string(),
|
||||
},
|
||||
ContentBlock::Text {
|
||||
text: "done".to_string(),
|
||||
cache_control: None,
|
||||
},
|
||||
],
|
||||
}],
|
||||
max_tokens: 16,
|
||||
system: None,
|
||||
tools: None,
|
||||
tool_choice: None,
|
||||
metadata: None,
|
||||
thinking: None,
|
||||
reasoning_effort: Some("max".to_string()),
|
||||
stream: None,
|
||||
temperature: None,
|
||||
top_p: None,
|
||||
};
|
||||
|
||||
let deepseek =
|
||||
build_chat_messages_for_request_and_provider(&request, ApiProvider::Deepseek);
|
||||
let native_assistant = deepseek
|
||||
.iter()
|
||||
.find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
|
||||
.expect("assistant message");
|
||||
assert_eq!(
|
||||
native_assistant
|
||||
.get("reasoning_content")
|
||||
.and_then(Value::as_str),
|
||||
Some("plan")
|
||||
);
|
||||
|
||||
let openai = build_chat_messages_for_request_and_provider(&request, ApiProvider::Openai);
|
||||
let generic_assistant = openai
|
||||
.iter()
|
||||
.find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
|
||||
.expect("assistant message");
|
||||
assert_eq!(
|
||||
generic_assistant.get("content").and_then(Value::as_str),
|
||||
Some("done")
|
||||
);
|
||||
assert!(
|
||||
generic_assistant.get("reasoning_content").is_none(),
|
||||
"generic OpenAI-compatible providers reject DeepSeek-only reasoning_content (#1542)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chat_messages_replay_tool_round_reasoning_before_new_user_turn() {
|
||||
let messages = vec![
|
||||
@@ -2529,9 +2586,13 @@ mod tests {
|
||||
]
|
||||
});
|
||||
|
||||
let approx_tokens =
|
||||
sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max"))
|
||||
.expect("multi-turn thinking-mode conversation should report replay tokens");
|
||||
let approx_tokens = sanitize_thinking_mode_messages(
|
||||
&mut body,
|
||||
"deepseek-v4-pro",
|
||||
Some("max"),
|
||||
ApiProvider::Deepseek,
|
||||
)
|
||||
.expect("multi-turn thinking-mode conversation should report replay tokens");
|
||||
// ~4 chars/token; 46 bytes of reasoning -> 11 tokens.
|
||||
assert_eq!(approx_tokens, 11);
|
||||
|
||||
@@ -2564,7 +2625,12 @@ mod tests {
|
||||
{ "role": "user", "content": "hi" }
|
||||
]
|
||||
});
|
||||
let result = sanitize_thinking_mode_messages(&mut body, "deepseek-v4-flash", None);
|
||||
let result = sanitize_thinking_mode_messages(
|
||||
&mut body,
|
||||
"deepseek-v4-flash",
|
||||
None,
|
||||
ApiProvider::Deepseek,
|
||||
);
|
||||
// reasoning_effort is None → no thinking injection, result is None
|
||||
assert!(result.is_none());
|
||||
}
|
||||
@@ -2587,13 +2653,54 @@ mod tests {
|
||||
]
|
||||
});
|
||||
|
||||
sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max"));
|
||||
sanitize_thinking_mode_messages(
|
||||
&mut body,
|
||||
"deepseek-v4-pro",
|
||||
Some("max"),
|
||||
ApiProvider::Deepseek,
|
||||
);
|
||||
|
||||
let chars = count_reasoning_replay_chars(&body);
|
||||
// "(reasoning omitted)" is 19 bytes.
|
||||
assert_eq!(chars, 19);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanitize_thinking_mode_skips_generic_openai_provider() {
|
||||
let mut body = json!({
|
||||
"model": "deepseek-v4-pro",
|
||||
"messages": [
|
||||
{ "role": "user", "content": "hi" },
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [{ "id": "1", "type": "function" }]
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
let result = sanitize_thinking_mode_messages(
|
||||
&mut body,
|
||||
"deepseek-v4-pro",
|
||||
Some("max"),
|
||||
ApiProvider::Openai,
|
||||
);
|
||||
|
||||
assert!(result.is_none());
|
||||
let assistant = body["messages"]
|
||||
.as_array()
|
||||
.and_then(|messages| {
|
||||
messages
|
||||
.iter()
|
||||
.find(|message| message["role"] == "assistant")
|
||||
})
|
||||
.expect("assistant message");
|
||||
assert!(
|
||||
assistant.get("reasoning_content").is_none(),
|
||||
"generic OpenAI-compatible provider payload must not get reasoning_content (#1542)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanitize_thinking_mode_keeps_tool_call_placeholder_after_new_user_turn() {
|
||||
let mut body = json!({
|
||||
@@ -2610,7 +2717,12 @@ mod tests {
|
||||
]
|
||||
});
|
||||
|
||||
sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max"));
|
||||
sanitize_thinking_mode_messages(
|
||||
&mut body,
|
||||
"deepseek-v4-pro",
|
||||
Some("max"),
|
||||
ApiProvider::Deepseek,
|
||||
);
|
||||
|
||||
let messages = body["messages"].as_array().unwrap();
|
||||
let assistant = messages
|
||||
|
||||
@@ -56,6 +56,7 @@ fn stream_idle_timeout() -> Duration {
|
||||
Duration::from_secs(secs)
|
||||
}
|
||||
|
||||
use crate::config::ApiProvider;
|
||||
use crate::llm_client::StreamEventBox;
|
||||
use crate::logging;
|
||||
use crate::models::{
|
||||
@@ -75,7 +76,7 @@ impl DeepSeekClient {
|
||||
&self,
|
||||
request: &MessageRequest,
|
||||
) -> Result<MessageResponse> {
|
||||
let messages = build_chat_messages_for_request(request);
|
||||
let messages = build_chat_messages_for_request_and_provider(request, self.api_provider);
|
||||
let mut body = json!({
|
||||
"model": request.model,
|
||||
"messages": messages,
|
||||
@@ -145,7 +146,7 @@ impl DeepSeekClient {
|
||||
request: MessageRequest,
|
||||
) -> Result<StreamEventBox> {
|
||||
// Try true SSE streaming via chat completions (widely supported)
|
||||
let messages = build_chat_messages_for_request(&request);
|
||||
let messages = build_chat_messages_for_request_and_provider(&request, self.api_provider);
|
||||
let mut body = json!({
|
||||
"model": request.model,
|
||||
"messages": messages,
|
||||
@@ -193,6 +194,7 @@ impl DeepSeekClient {
|
||||
&mut body,
|
||||
&request.model,
|
||||
request.reasoning_effort.as_deref(),
|
||||
self.api_provider,
|
||||
);
|
||||
|
||||
let url = api_url(&self.base_url, "chat/completions");
|
||||
@@ -412,10 +414,18 @@ pub(super) fn build_chat_messages(
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(super) fn build_chat_messages_for_request(request: &MessageRequest) -> Vec<Value> {
|
||||
PromptBuilder::for_request(request).build()
|
||||
}
|
||||
|
||||
pub(super) fn build_chat_messages_for_request_and_provider(
|
||||
request: &MessageRequest,
|
||||
provider: ApiProvider,
|
||||
) -> Vec<Value> {
|
||||
PromptBuilder::for_request(request).build_for_provider(provider)
|
||||
}
|
||||
|
||||
pub(crate) fn inspect_prompt_for_request(request: &MessageRequest) -> PromptInspection {
|
||||
PromptBuilder::for_request(request).inspect()
|
||||
}
|
||||
@@ -441,6 +451,7 @@ impl<'a> PromptBuilder<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn build(self) -> Vec<Value> {
|
||||
build_chat_messages_with_reasoning(
|
||||
self.system,
|
||||
@@ -451,6 +462,20 @@ impl<'a> PromptBuilder<'a> {
|
||||
)
|
||||
}
|
||||
|
||||
fn build_for_provider(self, provider: ApiProvider) -> Vec<Value> {
|
||||
build_chat_messages_with_reasoning(
|
||||
self.system,
|
||||
self.messages,
|
||||
self.model,
|
||||
should_replay_reasoning_content_for_provider(
|
||||
provider,
|
||||
self.model,
|
||||
self.reasoning_effort,
|
||||
),
|
||||
false,
|
||||
)
|
||||
}
|
||||
|
||||
fn inspect(self) -> PromptInspection {
|
||||
let messages = build_chat_messages_with_reasoning(
|
||||
self.system,
|
||||
@@ -1445,8 +1470,9 @@ pub(super) fn sanitize_thinking_mode_messages(
|
||||
body: &mut Value,
|
||||
model: &str,
|
||||
effort: Option<&str>,
|
||||
provider: ApiProvider,
|
||||
) -> Option<u32> {
|
||||
if !should_replay_reasoning_content(model, effort) {
|
||||
if !should_replay_reasoning_content_for_provider(provider, model, effort) {
|
||||
return None;
|
||||
}
|
||||
let messages = body.get_mut("messages").and_then(Value::as_array_mut)?;
|
||||
@@ -1604,6 +1630,29 @@ fn should_replay_reasoning_content(model: &str, effort: Option<&str>) -> bool {
|
||||
requires_reasoning_content(model)
|
||||
}
|
||||
|
||||
fn should_replay_reasoning_content_for_provider(
|
||||
provider: ApiProvider,
|
||||
model: &str,
|
||||
effort: Option<&str>,
|
||||
) -> bool {
|
||||
if !provider_accepts_reasoning_content(provider) {
|
||||
return false;
|
||||
}
|
||||
should_replay_reasoning_content(model, effort)
|
||||
}
|
||||
|
||||
fn provider_accepts_reasoning_content(provider: ApiProvider) -> bool {
|
||||
matches!(
|
||||
provider,
|
||||
ApiProvider::Deepseek
|
||||
| ApiProvider::DeepseekCN
|
||||
| ApiProvider::Openrouter
|
||||
| ApiProvider::Novita
|
||||
| ApiProvider::Fireworks
|
||||
| ApiProvider::Sglang
|
||||
)
|
||||
}
|
||||
|
||||
fn has_deepseek_r_series_marker(model_lower: &str) -> bool {
|
||||
const PREFIX: &str = "deepseek-r";
|
||||
model_lower.match_indices(PREFIX).any(|(idx, _)| {
|
||||
|
||||
Reference in New Issue
Block a user