fix(client): strip reasoning content for generic providers
Fixes #1542.
This commit is contained in:
+119
-7
@@ -1085,7 +1085,8 @@ pub(crate) fn build_cache_warmup_request(request: &MessageRequest) -> MessageReq
|
|||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::client::chat::{
|
use crate::client::chat::{
|
||||||
build_chat_messages, build_chat_messages_for_request, count_reasoning_replay_chars,
|
build_chat_messages, build_chat_messages_for_request,
|
||||||
|
build_chat_messages_for_request_and_provider, count_reasoning_replay_chars,
|
||||||
parse_chat_message, parse_sse_chunk, sanitize_thinking_mode_messages, tool_to_chat,
|
parse_chat_message, parse_sse_chunk, sanitize_thinking_mode_messages, tool_to_chat,
|
||||||
tool_to_chat_for_base_url,
|
tool_to_chat_for_base_url,
|
||||||
};
|
};
|
||||||
@@ -1258,6 +1259,62 @@ mod tests {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn generic_openai_provider_drops_deepseek_reasoning_content() {
|
||||||
|
let request = MessageRequest {
|
||||||
|
model: "deepseek-v4-pro".to_string(),
|
||||||
|
messages: vec![Message {
|
||||||
|
role: "assistant".to_string(),
|
||||||
|
content: vec![
|
||||||
|
ContentBlock::Thinking {
|
||||||
|
thinking: "plan".to_string(),
|
||||||
|
},
|
||||||
|
ContentBlock::Text {
|
||||||
|
text: "done".to_string(),
|
||||||
|
cache_control: None,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}],
|
||||||
|
max_tokens: 16,
|
||||||
|
system: None,
|
||||||
|
tools: None,
|
||||||
|
tool_choice: None,
|
||||||
|
metadata: None,
|
||||||
|
thinking: None,
|
||||||
|
reasoning_effort: Some("max".to_string()),
|
||||||
|
stream: None,
|
||||||
|
temperature: None,
|
||||||
|
top_p: None,
|
||||||
|
};
|
||||||
|
|
||||||
|
let deepseek =
|
||||||
|
build_chat_messages_for_request_and_provider(&request, ApiProvider::Deepseek);
|
||||||
|
let native_assistant = deepseek
|
||||||
|
.iter()
|
||||||
|
.find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
|
||||||
|
.expect("assistant message");
|
||||||
|
assert_eq!(
|
||||||
|
native_assistant
|
||||||
|
.get("reasoning_content")
|
||||||
|
.and_then(Value::as_str),
|
||||||
|
Some("plan")
|
||||||
|
);
|
||||||
|
|
||||||
|
let openai = build_chat_messages_for_request_and_provider(&request, ApiProvider::Openai);
|
||||||
|
let generic_assistant = openai
|
||||||
|
.iter()
|
||||||
|
.find(|value| value.get("role").and_then(Value::as_str) == Some("assistant"))
|
||||||
|
.expect("assistant message");
|
||||||
|
assert_eq!(
|
||||||
|
generic_assistant.get("content").and_then(Value::as_str),
|
||||||
|
Some("done")
|
||||||
|
);
|
||||||
|
assert!(
|
||||||
|
generic_assistant.get("reasoning_content").is_none(),
|
||||||
|
"generic OpenAI-compatible providers reject DeepSeek-only reasoning_content (#1542)"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn chat_messages_replay_tool_round_reasoning_before_new_user_turn() {
|
fn chat_messages_replay_tool_round_reasoning_before_new_user_turn() {
|
||||||
let messages = vec![
|
let messages = vec![
|
||||||
@@ -2529,9 +2586,13 @@ mod tests {
|
|||||||
]
|
]
|
||||||
});
|
});
|
||||||
|
|
||||||
let approx_tokens =
|
let approx_tokens = sanitize_thinking_mode_messages(
|
||||||
sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max"))
|
&mut body,
|
||||||
.expect("multi-turn thinking-mode conversation should report replay tokens");
|
"deepseek-v4-pro",
|
||||||
|
Some("max"),
|
||||||
|
ApiProvider::Deepseek,
|
||||||
|
)
|
||||||
|
.expect("multi-turn thinking-mode conversation should report replay tokens");
|
||||||
// ~4 chars/token; 46 bytes of reasoning -> 11 tokens.
|
// ~4 chars/token; 46 bytes of reasoning -> 11 tokens.
|
||||||
assert_eq!(approx_tokens, 11);
|
assert_eq!(approx_tokens, 11);
|
||||||
|
|
||||||
@@ -2564,7 +2625,12 @@ mod tests {
|
|||||||
{ "role": "user", "content": "hi" }
|
{ "role": "user", "content": "hi" }
|
||||||
]
|
]
|
||||||
});
|
});
|
||||||
let result = sanitize_thinking_mode_messages(&mut body, "deepseek-v4-flash", None);
|
let result = sanitize_thinking_mode_messages(
|
||||||
|
&mut body,
|
||||||
|
"deepseek-v4-flash",
|
||||||
|
None,
|
||||||
|
ApiProvider::Deepseek,
|
||||||
|
);
|
||||||
// reasoning_effort is None → no thinking injection, result is None
|
// reasoning_effort is None → no thinking injection, result is None
|
||||||
assert!(result.is_none());
|
assert!(result.is_none());
|
||||||
}
|
}
|
||||||
@@ -2587,13 +2653,54 @@ mod tests {
|
|||||||
]
|
]
|
||||||
});
|
});
|
||||||
|
|
||||||
sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max"));
|
sanitize_thinking_mode_messages(
|
||||||
|
&mut body,
|
||||||
|
"deepseek-v4-pro",
|
||||||
|
Some("max"),
|
||||||
|
ApiProvider::Deepseek,
|
||||||
|
);
|
||||||
|
|
||||||
let chars = count_reasoning_replay_chars(&body);
|
let chars = count_reasoning_replay_chars(&body);
|
||||||
// "(reasoning omitted)" is 19 bytes.
|
// "(reasoning omitted)" is 19 bytes.
|
||||||
assert_eq!(chars, 19);
|
assert_eq!(chars, 19);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn sanitize_thinking_mode_skips_generic_openai_provider() {
|
||||||
|
let mut body = json!({
|
||||||
|
"model": "deepseek-v4-pro",
|
||||||
|
"messages": [
|
||||||
|
{ "role": "user", "content": "hi" },
|
||||||
|
{
|
||||||
|
"role": "assistant",
|
||||||
|
"content": "",
|
||||||
|
"tool_calls": [{ "id": "1", "type": "function" }]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
});
|
||||||
|
|
||||||
|
let result = sanitize_thinking_mode_messages(
|
||||||
|
&mut body,
|
||||||
|
"deepseek-v4-pro",
|
||||||
|
Some("max"),
|
||||||
|
ApiProvider::Openai,
|
||||||
|
);
|
||||||
|
|
||||||
|
assert!(result.is_none());
|
||||||
|
let assistant = body["messages"]
|
||||||
|
.as_array()
|
||||||
|
.and_then(|messages| {
|
||||||
|
messages
|
||||||
|
.iter()
|
||||||
|
.find(|message| message["role"] == "assistant")
|
||||||
|
})
|
||||||
|
.expect("assistant message");
|
||||||
|
assert!(
|
||||||
|
assistant.get("reasoning_content").is_none(),
|
||||||
|
"generic OpenAI-compatible provider payload must not get reasoning_content (#1542)"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn sanitize_thinking_mode_keeps_tool_call_placeholder_after_new_user_turn() {
|
fn sanitize_thinking_mode_keeps_tool_call_placeholder_after_new_user_turn() {
|
||||||
let mut body = json!({
|
let mut body = json!({
|
||||||
@@ -2610,7 +2717,12 @@ mod tests {
|
|||||||
]
|
]
|
||||||
});
|
});
|
||||||
|
|
||||||
sanitize_thinking_mode_messages(&mut body, "deepseek-v4-pro", Some("max"));
|
sanitize_thinking_mode_messages(
|
||||||
|
&mut body,
|
||||||
|
"deepseek-v4-pro",
|
||||||
|
Some("max"),
|
||||||
|
ApiProvider::Deepseek,
|
||||||
|
);
|
||||||
|
|
||||||
let messages = body["messages"].as_array().unwrap();
|
let messages = body["messages"].as_array().unwrap();
|
||||||
let assistant = messages
|
let assistant = messages
|
||||||
|
|||||||
@@ -56,6 +56,7 @@ fn stream_idle_timeout() -> Duration {
|
|||||||
Duration::from_secs(secs)
|
Duration::from_secs(secs)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
use crate::config::ApiProvider;
|
||||||
use crate::llm_client::StreamEventBox;
|
use crate::llm_client::StreamEventBox;
|
||||||
use crate::logging;
|
use crate::logging;
|
||||||
use crate::models::{
|
use crate::models::{
|
||||||
@@ -75,7 +76,7 @@ impl DeepSeekClient {
|
|||||||
&self,
|
&self,
|
||||||
request: &MessageRequest,
|
request: &MessageRequest,
|
||||||
) -> Result<MessageResponse> {
|
) -> Result<MessageResponse> {
|
||||||
let messages = build_chat_messages_for_request(request);
|
let messages = build_chat_messages_for_request_and_provider(request, self.api_provider);
|
||||||
let mut body = json!({
|
let mut body = json!({
|
||||||
"model": request.model,
|
"model": request.model,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
@@ -145,7 +146,7 @@ impl DeepSeekClient {
|
|||||||
request: MessageRequest,
|
request: MessageRequest,
|
||||||
) -> Result<StreamEventBox> {
|
) -> Result<StreamEventBox> {
|
||||||
// Try true SSE streaming via chat completions (widely supported)
|
// Try true SSE streaming via chat completions (widely supported)
|
||||||
let messages = build_chat_messages_for_request(&request);
|
let messages = build_chat_messages_for_request_and_provider(&request, self.api_provider);
|
||||||
let mut body = json!({
|
let mut body = json!({
|
||||||
"model": request.model,
|
"model": request.model,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
@@ -193,6 +194,7 @@ impl DeepSeekClient {
|
|||||||
&mut body,
|
&mut body,
|
||||||
&request.model,
|
&request.model,
|
||||||
request.reasoning_effort.as_deref(),
|
request.reasoning_effort.as_deref(),
|
||||||
|
self.api_provider,
|
||||||
);
|
);
|
||||||
|
|
||||||
let url = api_url(&self.base_url, "chat/completions");
|
let url = api_url(&self.base_url, "chat/completions");
|
||||||
@@ -412,10 +414,18 @@ pub(super) fn build_chat_messages(
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
pub(super) fn build_chat_messages_for_request(request: &MessageRequest) -> Vec<Value> {
|
pub(super) fn build_chat_messages_for_request(request: &MessageRequest) -> Vec<Value> {
|
||||||
PromptBuilder::for_request(request).build()
|
PromptBuilder::for_request(request).build()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(super) fn build_chat_messages_for_request_and_provider(
|
||||||
|
request: &MessageRequest,
|
||||||
|
provider: ApiProvider,
|
||||||
|
) -> Vec<Value> {
|
||||||
|
PromptBuilder::for_request(request).build_for_provider(provider)
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn inspect_prompt_for_request(request: &MessageRequest) -> PromptInspection {
|
pub(crate) fn inspect_prompt_for_request(request: &MessageRequest) -> PromptInspection {
|
||||||
PromptBuilder::for_request(request).inspect()
|
PromptBuilder::for_request(request).inspect()
|
||||||
}
|
}
|
||||||
@@ -441,6 +451,7 @@ impl<'a> PromptBuilder<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
fn build(self) -> Vec<Value> {
|
fn build(self) -> Vec<Value> {
|
||||||
build_chat_messages_with_reasoning(
|
build_chat_messages_with_reasoning(
|
||||||
self.system,
|
self.system,
|
||||||
@@ -451,6 +462,20 @@ impl<'a> PromptBuilder<'a> {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn build_for_provider(self, provider: ApiProvider) -> Vec<Value> {
|
||||||
|
build_chat_messages_with_reasoning(
|
||||||
|
self.system,
|
||||||
|
self.messages,
|
||||||
|
self.model,
|
||||||
|
should_replay_reasoning_content_for_provider(
|
||||||
|
provider,
|
||||||
|
self.model,
|
||||||
|
self.reasoning_effort,
|
||||||
|
),
|
||||||
|
false,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
fn inspect(self) -> PromptInspection {
|
fn inspect(self) -> PromptInspection {
|
||||||
let messages = build_chat_messages_with_reasoning(
|
let messages = build_chat_messages_with_reasoning(
|
||||||
self.system,
|
self.system,
|
||||||
@@ -1445,8 +1470,9 @@ pub(super) fn sanitize_thinking_mode_messages(
|
|||||||
body: &mut Value,
|
body: &mut Value,
|
||||||
model: &str,
|
model: &str,
|
||||||
effort: Option<&str>,
|
effort: Option<&str>,
|
||||||
|
provider: ApiProvider,
|
||||||
) -> Option<u32> {
|
) -> Option<u32> {
|
||||||
if !should_replay_reasoning_content(model, effort) {
|
if !should_replay_reasoning_content_for_provider(provider, model, effort) {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
let messages = body.get_mut("messages").and_then(Value::as_array_mut)?;
|
let messages = body.get_mut("messages").and_then(Value::as_array_mut)?;
|
||||||
@@ -1604,6 +1630,29 @@ fn should_replay_reasoning_content(model: &str, effort: Option<&str>) -> bool {
|
|||||||
requires_reasoning_content(model)
|
requires_reasoning_content(model)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn should_replay_reasoning_content_for_provider(
|
||||||
|
provider: ApiProvider,
|
||||||
|
model: &str,
|
||||||
|
effort: Option<&str>,
|
||||||
|
) -> bool {
|
||||||
|
if !provider_accepts_reasoning_content(provider) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
should_replay_reasoning_content(model, effort)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn provider_accepts_reasoning_content(provider: ApiProvider) -> bool {
|
||||||
|
matches!(
|
||||||
|
provider,
|
||||||
|
ApiProvider::Deepseek
|
||||||
|
| ApiProvider::DeepseekCN
|
||||||
|
| ApiProvider::Openrouter
|
||||||
|
| ApiProvider::Novita
|
||||||
|
| ApiProvider::Fireworks
|
||||||
|
| ApiProvider::Sglang
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
fn has_deepseek_r_series_marker(model_lower: &str) -> bool {
|
fn has_deepseek_r_series_marker(model_lower: &str) -> bool {
|
||||||
const PREFIX: &str = "deepseek-r";
|
const PREFIX: &str = "deepseek-r";
|
||||||
model_lower.match_indices(PREFIX).any(|(idx, _)| {
|
model_lower.match_indices(PREFIX).any(|(idx, _)| {
|
||||||
|
|||||||
Reference in New Issue
Block a user