fix(web_search): improve Volcengine reliability — 90 s timeout, retry, and faster model (#2439)
* fix(web_search): add timeout floor and retry for Volcengine provider Volcengine's Responses API pipeline (web search + model inference + JSON generation) can exceed the default 15 s timeout on complex queries, causing ~50% of requests to fail with transport errors. Changes: - Enforce a 60 s minimum timeout for the Volcengine provider - Separate connect_timeout (15 s) from total request timeout - Retry transient transport errors up to 3 times (1 s / 2 s backoff) - Add TCP keepalive, HTTP/2 keepalive, and User-Agent headers matching the patterns used in client.rs Co-authored-by: Cursor <cursoragent@cursor.com> * fix: raise Volcengine timeout floor to 90 s Co-authored-by: Cursor <cursoragent@cursor.com> * fix: correct stale comment — floor is 90 s, not 60 s Co-authored-by: Cursor <cursoragent@cursor.com> * fix(web_search): tighten Volcengine retry semantics --------- Co-authored-by: Cursor <cursoragent@cursor.com> Co-authored-by: Hunter B <hmbown@gmail.com>
This commit is contained in:
@@ -740,6 +740,12 @@ impl WebSearchTool {
|
||||
/// Search via Volcengine Ark Responses API web_search tool.
|
||||
/// Uses strict JSON prompt constraints to extract structured results
|
||||
/// from the model's search-augmented response.
|
||||
///
|
||||
/// Overrides the user-supplied timeout to a minimum of 90 s because the
|
||||
/// Responses API pipeline (web search → model inference → JSON generation)
|
||||
/// is inherently slower than simple search-API round-trips. A separate
|
||||
/// `connect_timeout` of 15 s lets DNS/TLS failures surface quickly.
|
||||
/// Transient transport errors are retried twice with exponential backoff.
|
||||
async fn run_volcengine_search(
|
||||
&self,
|
||||
query: &str,
|
||||
@@ -763,8 +769,18 @@ impl WebSearchTool {
|
||||
)
|
||||
})?;
|
||||
|
||||
// Volcengine Responses API pipeline (search + model inference) is
|
||||
// slow, so enforce a floor of 90 s. The caller's value is used only
|
||||
// when it exceeds 90_000 ms.
|
||||
let effective_timeout = timeout_ms.max(90_000);
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_millis(timeout_ms))
|
||||
.connect_timeout(Duration::from_secs(15))
|
||||
.timeout(Duration::from_millis(effective_timeout))
|
||||
.tcp_keepalive(Some(Duration::from_secs(30)))
|
||||
.http2_keep_alive_interval(Some(Duration::from_secs(15)))
|
||||
.http2_keep_alive_timeout(Duration::from_secs(20))
|
||||
.user_agent(USER_AGENT)
|
||||
.build()
|
||||
.map_err(|e| {
|
||||
ToolError::execution_failed(format!("Failed to build HTTP client: {e}"))
|
||||
@@ -772,47 +788,77 @@ impl WebSearchTool {
|
||||
|
||||
let payload = volcengine_search_payload(query, max_results);
|
||||
|
||||
let resp = client
|
||||
.post(VOLCENGINE_RESPONSES_ENDPOINT)
|
||||
.header("Authorization", format!("Bearer {api_key}"))
|
||||
.json(&payload)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ToolError::execution_failed(format!("Volcengine search request failed: {e}"))
|
||||
})?;
|
||||
// Retry transient transport errors (DNS, connection reset, timeout)
|
||||
// up to 2 times with exponential backoff: 1 s, 2 s.
|
||||
let mut last_err: Option<ToolError> = None;
|
||||
for attempt in 0..3 {
|
||||
if attempt > 0 {
|
||||
tokio::time::sleep(Duration::from_millis(1000 * (1 << (attempt - 1)))).await;
|
||||
}
|
||||
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.map_err(|e| {
|
||||
ToolError::execution_failed(format!("Failed to read Volcengine response: {e}"))
|
||||
})?;
|
||||
match client
|
||||
.post(VOLCENGINE_RESPONSES_ENDPOINT)
|
||||
.header("Authorization", format!("Bearer {api_key}"))
|
||||
.json(&payload)
|
||||
.send()
|
||||
.await
|
||||
{
|
||||
Ok(resp) => {
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.map_err(|e| {
|
||||
ToolError::execution_failed(format!(
|
||||
"Failed to read Volcengine response: {e}"
|
||||
))
|
||||
})?;
|
||||
|
||||
if !status.is_success() {
|
||||
let msg = match status.as_u16() {
|
||||
401 | 403 => "Volcengine API key rejected — check VOLCENGINE_API_KEY or `[search] api_key` in config.toml".to_string(),
|
||||
429 => "Volcengine API rate-limited — wait and retry, or check your quota".to_string(),
|
||||
_ => {
|
||||
let truncated = truncate_error_body(&body);
|
||||
format!("Volcengine search failed: HTTP {} — {truncated}", status.as_u16())
|
||||
if !status.is_success() {
|
||||
let msg = match status.as_u16() {
|
||||
401 | 403 => "Volcengine API key rejected — check VOLCENGINE_API_KEY or `[search] api_key` in config.toml".to_string(),
|
||||
429 => "Volcengine API rate-limited — wait and retry, or check your quota".to_string(),
|
||||
_ => {
|
||||
let truncated = truncate_error_body(&body);
|
||||
format!("Volcengine search failed: HTTP {} — {truncated}", status.as_u16())
|
||||
}
|
||||
};
|
||||
return Err(ToolError::execution_failed(msg));
|
||||
}
|
||||
|
||||
let parsed: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
|
||||
ToolError::execution_failed(format!(
|
||||
"Failed to parse Volcengine response: {e}"
|
||||
))
|
||||
})?;
|
||||
|
||||
if let Some(error) = volcengine_error_message(&parsed) {
|
||||
return Err(ToolError::execution_failed(error));
|
||||
}
|
||||
|
||||
let response_text = volcengine_extract_text(&parsed).ok_or_else(|| {
|
||||
ToolError::execution_failed("Volcengine response contains no output text")
|
||||
})?;
|
||||
|
||||
let results = parse_volcengine_results(&response_text, max_results);
|
||||
return search_tool_result(query.to_string(), "volcengine", results, None);
|
||||
}
|
||||
};
|
||||
return Err(ToolError::execution_failed(msg));
|
||||
Err(e) => {
|
||||
let is_transient = e.is_timeout() || e.is_connect();
|
||||
if !is_transient || attempt == 2 {
|
||||
return Err(ToolError::execution_failed(format!(
|
||||
"Volcengine search request failed: {e}"
|
||||
)));
|
||||
}
|
||||
last_err = Some(ToolError::execution_failed(format!(
|
||||
"Volcengine search request failed (attempt {}/3): {e}",
|
||||
attempt + 1
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let parsed: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
|
||||
ToolError::execution_failed(format!("Failed to parse Volcengine response: {e}"))
|
||||
})?;
|
||||
|
||||
if let Some(error) = volcengine_error_message(&parsed) {
|
||||
return Err(ToolError::execution_failed(error));
|
||||
}
|
||||
|
||||
let response_text = volcengine_extract_text(&parsed).ok_or_else(|| {
|
||||
ToolError::execution_failed("Volcengine response contains no output text")
|
||||
})?;
|
||||
|
||||
let results = parse_volcengine_results(&response_text, max_results);
|
||||
search_tool_result(query.to_string(), "volcengine", results, None)
|
||||
// Unreachable — the final iteration always returns above.
|
||||
Err(last_err.unwrap_or_else(|| {
|
||||
ToolError::execution_failed("Volcengine search: unexpected retry exit")
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -914,7 +960,7 @@ fn baidu_search_payload(query: &str, max_results: usize) -> Value {
|
||||
|
||||
fn volcengine_search_payload(query: &str, max_results: usize) -> Value {
|
||||
json!({
|
||||
"model": "doubao-seed-1-6-250615",
|
||||
"model": "doubao-seed-2-0-lite-260428",
|
||||
"stream": false,
|
||||
"tools": [{"type": "web_search"}],
|
||||
"input": [{
|
||||
|
||||
Reference in New Issue
Block a user