fix(web_search): improve Volcengine reliability — 90 s timeout, retry, and faster model (#2439)

* fix(web_search): add timeout floor and retry for Volcengine provider

Volcengine's Responses API pipeline (web search + model inference +
JSON generation) can exceed the default 15 s timeout on complex
queries, causing ~50% of requests to fail with transport errors.

Changes:
- Enforce a 60 s minimum timeout for the Volcengine provider
- Separate connect_timeout (15 s) from total request timeout
- Retry transient transport errors up to 3 times (1 s / 2 s backoff)
- Add TCP keepalive, HTTP/2 keepalive, and User-Agent headers
  matching the patterns used in client.rs

Co-authored-by: Cursor <cursoragent@cursor.com>

* fix: raise Volcengine timeout floor to 90 s

Co-authored-by: Cursor <cursoragent@cursor.com>

* fix: correct stale comment — floor is 90 s, not 60 s

Co-authored-by: Cursor <cursoragent@cursor.com>

* fix(web_search): tighten Volcengine retry semantics

---------

Co-authored-by: Cursor <cursoragent@cursor.com>
Co-authored-by: Hunter B <hmbown@gmail.com>
This commit is contained in:
Implementist
2026-06-01 02:01:34 +08:00
committed by GitHub
parent 46134e5bb5
commit 45787bcf70
+84 -38
View File
@@ -740,6 +740,12 @@ impl WebSearchTool {
/// Search via Volcengine Ark Responses API web_search tool.
/// Uses strict JSON prompt constraints to extract structured results
/// from the model's search-augmented response.
///
/// Overrides the user-supplied timeout to a minimum of 90 s because the
/// Responses API pipeline (web search → model inference → JSON generation)
/// is inherently slower than simple search-API round-trips. A separate
/// `connect_timeout` of 15 s lets DNS/TLS failures surface quickly.
/// Transient transport errors are retried twice with exponential backoff.
async fn run_volcengine_search(
&self,
query: &str,
@@ -763,8 +769,18 @@ impl WebSearchTool {
)
})?;
// Volcengine Responses API pipeline (search + model inference) is
// slow, so enforce a floor of 90 s. The caller's value is used only
// when it exceeds 90_000 ms.
let effective_timeout = timeout_ms.max(90_000);
let client = reqwest::Client::builder()
.timeout(Duration::from_millis(timeout_ms))
.connect_timeout(Duration::from_secs(15))
.timeout(Duration::from_millis(effective_timeout))
.tcp_keepalive(Some(Duration::from_secs(30)))
.http2_keep_alive_interval(Some(Duration::from_secs(15)))
.http2_keep_alive_timeout(Duration::from_secs(20))
.user_agent(USER_AGENT)
.build()
.map_err(|e| {
ToolError::execution_failed(format!("Failed to build HTTP client: {e}"))
@@ -772,47 +788,77 @@ impl WebSearchTool {
let payload = volcengine_search_payload(query, max_results);
let resp = client
.post(VOLCENGINE_RESPONSES_ENDPOINT)
.header("Authorization", format!("Bearer {api_key}"))
.json(&payload)
.send()
.await
.map_err(|e| {
ToolError::execution_failed(format!("Volcengine search request failed: {e}"))
})?;
// Retry transient transport errors (DNS, connection reset, timeout)
// up to 2 times with exponential backoff: 1 s, 2 s.
let mut last_err: Option<ToolError> = None;
for attempt in 0..3 {
if attempt > 0 {
tokio::time::sleep(Duration::from_millis(1000 * (1 << (attempt - 1)))).await;
}
let status = resp.status();
let body = resp.text().await.map_err(|e| {
ToolError::execution_failed(format!("Failed to read Volcengine response: {e}"))
})?;
match client
.post(VOLCENGINE_RESPONSES_ENDPOINT)
.header("Authorization", format!("Bearer {api_key}"))
.json(&payload)
.send()
.await
{
Ok(resp) => {
let status = resp.status();
let body = resp.text().await.map_err(|e| {
ToolError::execution_failed(format!(
"Failed to read Volcengine response: {e}"
))
})?;
if !status.is_success() {
let msg = match status.as_u16() {
401 | 403 => "Volcengine API key rejected — check VOLCENGINE_API_KEY or `[search] api_key` in config.toml".to_string(),
429 => "Volcengine API rate-limited — wait and retry, or check your quota".to_string(),
_ => {
let truncated = truncate_error_body(&body);
format!("Volcengine search failed: HTTP {}{truncated}", status.as_u16())
if !status.is_success() {
let msg = match status.as_u16() {
401 | 403 => "Volcengine API key rejected — check VOLCENGINE_API_KEY or `[search] api_key` in config.toml".to_string(),
429 => "Volcengine API rate-limited — wait and retry, or check your quota".to_string(),
_ => {
let truncated = truncate_error_body(&body);
format!("Volcengine search failed: HTTP {}{truncated}", status.as_u16())
}
};
return Err(ToolError::execution_failed(msg));
}
let parsed: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
ToolError::execution_failed(format!(
"Failed to parse Volcengine response: {e}"
))
})?;
if let Some(error) = volcengine_error_message(&parsed) {
return Err(ToolError::execution_failed(error));
}
let response_text = volcengine_extract_text(&parsed).ok_or_else(|| {
ToolError::execution_failed("Volcengine response contains no output text")
})?;
let results = parse_volcengine_results(&response_text, max_results);
return search_tool_result(query.to_string(), "volcengine", results, None);
}
};
return Err(ToolError::execution_failed(msg));
Err(e) => {
let is_transient = e.is_timeout() || e.is_connect();
if !is_transient || attempt == 2 {
return Err(ToolError::execution_failed(format!(
"Volcengine search request failed: {e}"
)));
}
last_err = Some(ToolError::execution_failed(format!(
"Volcengine search request failed (attempt {}/3): {e}",
attempt + 1
)));
}
}
}
let parsed: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
ToolError::execution_failed(format!("Failed to parse Volcengine response: {e}"))
})?;
if let Some(error) = volcengine_error_message(&parsed) {
return Err(ToolError::execution_failed(error));
}
let response_text = volcengine_extract_text(&parsed).ok_or_else(|| {
ToolError::execution_failed("Volcengine response contains no output text")
})?;
let results = parse_volcengine_results(&response_text, max_results);
search_tool_result(query.to_string(), "volcengine", results, None)
// Unreachable — the final iteration always returns above.
Err(last_err.unwrap_or_else(|| {
ToolError::execution_failed("Volcengine search: unexpected retry exit")
}))
}
}
@@ -914,7 +960,7 @@ fn baidu_search_payload(query: &str, max_results: usize) -> Value {
fn volcengine_search_payload(query: &str, max_results: usize) -> Value {
json!({
"model": "doubao-seed-1-6-250615",
"model": "doubao-seed-2-0-lite-260428",
"stream": false,
"tools": [{"type": "web_search"}],
"input": [{