feat(web_search): add Volcengine Ark search provider
Add Volcengine (火山引擎) as a new SearchProvider in web_search tool.
Uses Volcengine's Responses API with `tools: [{type: "web_search"}]`
and strict JSON prompt constraints to extract structured search results.
- Free tier: 20K queries/month per API key
- API key resolution: [search] api_key → VOLCENGINE_API_KEY →
VOLCENGINE_ARK_API_KEY → ARK_API_KEY env vars
- Select via `DEEPSEEK_SEARCH_PROVIDER=volcengine` or
`[search] provider = "volcengine"` in config.toml
Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
+3
-1
@@ -308,7 +308,7 @@ max_subagents = 10 # optional (1-20)
|
||||
# API-backed search.
|
||||
#
|
||||
# [search]
|
||||
# provider = "duckduckgo" # duckduckgo | bing | tavily | bocha | metaso | baidu
|
||||
# provider = "duckduckgo" # duckduckgo | bing | tavily | bocha | metaso | baidu | volcengine
|
||||
# # duckduckgo: HTML scrape with Bing fallback
|
||||
# # bing: HTML scrape, no API key
|
||||
# # tavily: https://tavily.com — AI search, needs api_key
|
||||
@@ -316,6 +316,8 @@ max_subagents = 10 # optional (1-20)
|
||||
# # metaso: https://metaso.cn — 秘塔AI搜索,每天 100 次免费
|
||||
# # 设置 METASO_API_KEY 或 [search] api_key 可提升额度
|
||||
# # baidu: 百度 AI Search via qianfan.baidubce.com,需 api_key
|
||||
# # volcengine: 火山引擎 Ark web_search (免费 2 万次/月), 需 api_key
|
||||
# # 也回退到 VOLCENGINE_API_KEY / VOLCENGINE_ARK_API_KEY / ARK_API_KEY 环境变量
|
||||
# api_key = "YOUR_SEARCH_KEY" # required for tavily, bocha, and baidu; optional for metaso
|
||||
# # WARNING: treat config.toml like a secret file when
|
||||
# # storing API keys. Prefer env vars for local smoke tests.
|
||||
|
||||
@@ -755,6 +755,12 @@ pub enum SearchProvider {
|
||||
alias = "baidu-ai-search"
|
||||
)]
|
||||
Baidu,
|
||||
/// Volcengine Ark web_search via Responses API. Requires api_key.
|
||||
/// Free tier: 20K queries/month per API key. Falls back to
|
||||
/// `VOLCENGINE_API_KEY` / `VOLCENGINE_ARK_API_KEY` / `ARK_API_KEY`
|
||||
/// env vars when `[search] api_key` is not set.
|
||||
#[serde(alias = "volcengine", alias = "ark", alias = "volc")]
|
||||
Volcengine,
|
||||
}
|
||||
|
||||
impl SearchProvider {
|
||||
@@ -769,6 +775,7 @@ impl SearchProvider {
|
||||
"baidu" | "baidu-search" | "baidu_search" | "baidu-ai-search" | "baidu_ai_search" => {
|
||||
Some(Self::Baidu)
|
||||
}
|
||||
"volcengine" | "ark" | "volc" | "volcengine-ark" => Some(Self::Volcengine),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -782,6 +789,7 @@ impl SearchProvider {
|
||||
Self::Bocha => "bocha",
|
||||
Self::Metaso => "metaso",
|
||||
Self::Baidu => "baidu",
|
||||
Self::Volcengine => "volcengine",
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -813,12 +821,13 @@ pub struct SearchProviderResolution {
|
||||
/// Web search provider configuration (`[search]` table in config.toml).
|
||||
#[derive(Debug, Clone, Deserialize, Default)]
|
||||
pub struct SearchConfig {
|
||||
/// Search provider: `bing` | `duckduckgo` | `tavily` | `bocha` | `metaso` | `baidu`. Default: `duckduckgo`.
|
||||
/// Search provider: `bing` | `duckduckgo` | `tavily` | `bocha` | `metaso` | `baidu` | `volcengine`. Default: `duckduckgo`.
|
||||
#[serde(default)]
|
||||
pub provider: Option<SearchProvider>,
|
||||
/// API key for Tavily, Bocha, Metaso, or Baidu. Not required for Bing or DuckDuckGo.
|
||||
/// API key for Tavily, Bocha, Metaso, Baidu, or Volcengine. Not required for Bing or DuckDuckGo.
|
||||
/// Metaso also falls back to `METASO_API_KEY` env var, then a built-in default.
|
||||
/// Baidu also falls back to `BAIDU_SEARCH_API_KEY` env var.
|
||||
/// Volcengine also falls back to `VOLCENGINE_API_KEY` / `VOLCENGINE_ARK_API_KEY` / `ARK_API_KEY` env vars.
|
||||
#[serde(default)]
|
||||
pub api_key: Option<String>,
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
//! Web search tool backed by multiple providers: Bing HTML scrape, DuckDuckGo
|
||||
//! (HTML scrape with Bing fallback), Tavily API, Bocha (博查) API,
|
||||
//! Metaso API (<https://metaso.cn>), and Baidu AI Search.
|
||||
//! Metaso API (<https://metaso.cn>), Baidu AI Search, and Volcengine Ark.
|
||||
//!
|
||||
//! This is the primary web search surface for agents. For browsing workflows
|
||||
//! (page open, click, screenshot) use a direct URL approach instead.
|
||||
//!
|
||||
//! Set `[search]` in config.toml to switch providers:
|
||||
//! provider = "duckduckgo" # or tavily/bocha/metaso/baidu
|
||||
//! provider = "duckduckgo" # or tavily/bocha/metaso/baidu/volcengine
|
||||
//! api_key = "tvly-..."
|
||||
|
||||
use super::spec::{
|
||||
@@ -28,6 +28,7 @@ const TAVILY_ENDPOINT: &str = "https://api.tavily.com/search";
|
||||
const BOCHA_ENDPOINT: &str = "https://api.bochaai.com/v1/ai/search";
|
||||
const METASO_ENDPOINT: &str = "https://metaso.cn/api/v1";
|
||||
const BAIDU_ENDPOINT: &str = "https://qianfan.baidubce.com/v2/ai_search/web_search";
|
||||
const VOLCENGINE_RESPONSES_ENDPOINT: &str = "https://ark.cn-beijing.volces.com/api/v3/responses";
|
||||
/// Intentionally public default key provided by Metaso for open-source/community use.
|
||||
/// Last-resort fallback after config and env var. Rate-limited to ~100 searches/day.
|
||||
const METASO_DEFAULT_API_KEY: &str = "mk-E384C1DD5E8501BB7EFE27C949AFDE5B";
|
||||
@@ -226,6 +227,13 @@ impl ToolSpec for WebSearchTool {
|
||||
.run_baidu_search(&query, max_results, timeout_ms, context)
|
||||
.await;
|
||||
}
|
||||
SearchProvider::Volcengine => {
|
||||
let decider = context.network_policy.as_ref();
|
||||
check_policy(decider, "ark.cn-beijing.volces.com")?;
|
||||
return self
|
||||
.run_volcengine_search(&query, max_results, timeout_ms, context)
|
||||
.await;
|
||||
}
|
||||
SearchProvider::Bing | SearchProvider::DuckDuckGo => {}
|
||||
}
|
||||
|
||||
@@ -728,6 +736,84 @@ impl WebSearchTool {
|
||||
let results = parse_baidu_results(&parsed, max_results);
|
||||
search_tool_result(query.to_string(), "baidu", results, None)
|
||||
}
|
||||
|
||||
/// Search via Volcengine Ark Responses API web_search tool.
|
||||
/// Uses strict JSON prompt constraints to extract structured results
|
||||
/// from the model's search-augmented response.
|
||||
async fn run_volcengine_search(
|
||||
&self,
|
||||
query: &str,
|
||||
max_results: usize,
|
||||
timeout_ms: u64,
|
||||
context: &ToolContext,
|
||||
) -> Result<ToolResult, ToolError> {
|
||||
let volc_key = std::env::var("VOLCENGINE_API_KEY").ok();
|
||||
let volc_ark_key = std::env::var("VOLCENGINE_ARK_API_KEY").ok();
|
||||
let ark_key = std::env::var("ARK_API_KEY").ok();
|
||||
let api_key = context
|
||||
.search_api_key
|
||||
.as_deref()
|
||||
.or(volc_key.as_deref())
|
||||
.or(volc_ark_key.as_deref())
|
||||
.or(ark_key.as_deref())
|
||||
.ok_or_else(|| {
|
||||
ToolError::execution_failed(
|
||||
"Volcengine search requires an API key. Set `[search] api_key`, \
|
||||
or DEEPSEEK_SEARCH_API_KEY, or VOLCENGINE_API_KEY env var.",
|
||||
)
|
||||
})?;
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_millis(timeout_ms))
|
||||
.build()
|
||||
.map_err(|e| {
|
||||
ToolError::execution_failed(format!("Failed to build HTTP client: {e}"))
|
||||
})?;
|
||||
|
||||
let payload = volcengine_search_payload(query, max_results);
|
||||
|
||||
let resp = client
|
||||
.post(VOLCENGINE_RESPONSES_ENDPOINT)
|
||||
.header("Authorization", format!("Bearer {api_key}"))
|
||||
.json(&payload)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ToolError::execution_failed(format!("Volcengine search request failed: {e}"))
|
||||
})?;
|
||||
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.map_err(|e| {
|
||||
ToolError::execution_failed(format!("Failed to read Volcengine response: {e}"))
|
||||
})?;
|
||||
|
||||
if !status.is_success() {
|
||||
let msg = match status.as_u16() {
|
||||
401 | 403 => "Volcengine API key rejected — check VOLCENGINE_API_KEY or `[search] api_key` in config.toml".to_string(),
|
||||
429 => "Volcengine API rate-limited — wait and retry, or check your quota".to_string(),
|
||||
_ => {
|
||||
let truncated = truncate_error_body(&body);
|
||||
format!("Volcengine search failed: HTTP {} — {truncated}", status.as_u16())
|
||||
}
|
||||
};
|
||||
return Err(ToolError::execution_failed(msg));
|
||||
}
|
||||
|
||||
let parsed: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
|
||||
ToolError::execution_failed(format!("Failed to parse Volcengine response: {e}"))
|
||||
})?;
|
||||
|
||||
if let Some(error) = volcengine_error_message(&parsed) {
|
||||
return Err(ToolError::execution_failed(error));
|
||||
}
|
||||
|
||||
let response_text = volcengine_extract_text(&parsed).ok_or_else(|| {
|
||||
ToolError::execution_failed("Volcengine response contains no output text")
|
||||
})?;
|
||||
|
||||
let results = parse_volcengine_results(&response_text, max_results);
|
||||
search_tool_result(query.to_string(), "volcengine", results, None)
|
||||
}
|
||||
}
|
||||
|
||||
fn truncate_error_body(body: &str) -> String {
|
||||
@@ -826,6 +912,118 @@ fn baidu_search_payload(query: &str, max_results: usize) -> Value {
|
||||
})
|
||||
}
|
||||
|
||||
fn volcengine_search_payload(query: &str, max_results: usize) -> Value {
|
||||
json!({
|
||||
"model": "doubao-seed-1-6-250615",
|
||||
"stream": false,
|
||||
"tools": [{"type": "web_search"}],
|
||||
"input": [{
|
||||
"role": "user",
|
||||
"content": [{
|
||||
"type": "input_text",
|
||||
"text": format!(
|
||||
"Search the web for: {query}\n\n\
|
||||
CRITICAL: Respond ONLY with a valid JSON object. No markdown, no explanation.\n\
|
||||
Schema: {{\"results\":[{{\"title\":\"...\",\"url\":\"https://...\",\"snippet\":\"...\"}}]}}\n\
|
||||
- results: 1-{max_results} most relevant pages\n\
|
||||
- title: page title (required)\n\
|
||||
- url: full URL starting with https:// (required)\n\
|
||||
- snippet: 1-2 sentence factual summary (required)\n\
|
||||
- If zero results: {{\"results\":[]}}\n\
|
||||
- Your entire response must be valid, parseable JSON."
|
||||
)
|
||||
}]
|
||||
}]
|
||||
})
|
||||
}
|
||||
|
||||
/// Extracts the model's text response from a Volcengine Responses API output.
|
||||
fn volcengine_extract_text(parsed: &Value) -> Option<String> {
|
||||
parsed
|
||||
.get("output")
|
||||
.and_then(|v| v.as_array())
|
||||
.into_iter()
|
||||
.flat_map(|arr| arr.iter().rev())
|
||||
.find(|item| item.get("type").and_then(|t| t.as_str()) == Some("message"))
|
||||
.and_then(|msg| msg.get("content").and_then(|c| c.as_array()))
|
||||
.and_then(|content| content.first())
|
||||
.and_then(|c| c.get("text").and_then(|t| t.as_str()))
|
||||
.map(|s| s.to_string())
|
||||
}
|
||||
|
||||
/// Checks for business-logic errors in a Volcengine Responses API response.
|
||||
fn volcengine_error_message(parsed: &Value) -> Option<String> {
|
||||
let error = parsed.get("error")?;
|
||||
let code = error
|
||||
.get("code")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown");
|
||||
let message = error
|
||||
.get("message")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("no details");
|
||||
Some(format!("Volcengine API error (code {code}: {message})"))
|
||||
}
|
||||
|
||||
/// Parses Volcengine model-generated JSON results into `WebSearchEntry` items.
|
||||
fn parse_volcengine_results(response_text: &str, max_results: usize) -> Vec<WebSearchEntry> {
|
||||
let json_text = extract_json_block(response_text).unwrap_or(response_text);
|
||||
|
||||
let parsed: Value = match serde_json::from_str(json_text) {
|
||||
Ok(v) => v,
|
||||
Err(_) => return Vec::new(),
|
||||
};
|
||||
|
||||
parsed
|
||||
.get("results")
|
||||
.and_then(|v| v.as_array())
|
||||
.into_iter()
|
||||
.flat_map(|arr| arr.iter())
|
||||
.filter_map(|item| {
|
||||
let title = item
|
||||
.get("title")
|
||||
.and_then(|s| s.as_str())?
|
||||
.trim();
|
||||
let url = item
|
||||
.get("url")
|
||||
.and_then(|s| s.as_str())?
|
||||
.trim();
|
||||
if title.is_empty() || url.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let snippet = item
|
||||
.get("snippet")
|
||||
.and_then(|s| s.as_str())
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(ToString::to_string);
|
||||
Some(WebSearchEntry {
|
||||
title: title.to_string(),
|
||||
url: url.to_string(),
|
||||
snippet,
|
||||
})
|
||||
})
|
||||
.take(max_results)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Attempts to extract a JSON block from text that may be wrapped in
|
||||
/// markdown fences (```json ... ```) or contain surrounding commentary.
|
||||
fn extract_json_block(text: &str) -> Option<&str> {
|
||||
if let Some(start) = text.find("```json") {
|
||||
let inner = &text[start + 7..];
|
||||
if let Some(end) = inner.find("```") {
|
||||
return Some(inner[..end].trim());
|
||||
}
|
||||
}
|
||||
if let Some(start) = text.find('{') {
|
||||
if let Some(end) = text.rfind('}') {
|
||||
return Some(&text[start..=end]);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn extract_search_query(input: &Value) -> Result<String, ToolError> {
|
||||
for key in ["query", "q"] {
|
||||
if let Some(value) = input.get(key) {
|
||||
|
||||
Reference in New Issue
Block a user