From 70ef86c600b158094b774e6cb61c6d18a5adbc75 Mon Sep 17 00:00:00 2001 From: implecao Date: Sun, 31 May 2026 18:12:44 +0800 Subject: [PATCH] feat(web_search): add Volcengine Ark search provider MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Volcengine (火山引擎) as a new SearchProvider in web_search tool. Uses Volcengine's Responses API with `tools: [{type: "web_search"}]` and strict JSON prompt constraints to extract structured search results. - Free tier: 20K queries/month per API key - API key resolution: [search] api_key → VOLCENGINE_API_KEY → VOLCENGINE_ARK_API_KEY → ARK_API_KEY env vars - Select via `DEEPSEEK_SEARCH_PROVIDER=volcengine` or `[search] provider = "volcengine"` in config.toml Co-authored-by: Cursor --- config.example.toml | 4 +- crates/tui/src/config.rs | 13 +- crates/tui/src/tools/web_search.rs | 202 ++++++++++++++++++++++++++++- 3 files changed, 214 insertions(+), 5 deletions(-) diff --git a/config.example.toml b/config.example.toml index a7ff5e09..f9dcd733 100644 --- a/config.example.toml +++ b/config.example.toml @@ -308,7 +308,7 @@ max_subagents = 10 # optional (1-20) # API-backed search. # # [search] -# provider = "duckduckgo" # duckduckgo | bing | tavily | bocha | metaso | baidu +# provider = "duckduckgo" # duckduckgo | bing | tavily | bocha | metaso | baidu | volcengine # # duckduckgo: HTML scrape with Bing fallback # # bing: HTML scrape, no API key # # tavily: https://tavily.com — AI search, needs api_key @@ -316,6 +316,8 @@ max_subagents = 10 # optional (1-20) # # metaso: https://metaso.cn — 秘塔AI搜索,每天 100 次免费 # # 设置 METASO_API_KEY 或 [search] api_key 可提升额度 # # baidu: 百度 AI Search via qianfan.baidubce.com,需 api_key +# # volcengine: 火山引擎 Ark web_search (免费 2 万次/月), 需 api_key +# # 也回退到 VOLCENGINE_API_KEY / VOLCENGINE_ARK_API_KEY / ARK_API_KEY 环境变量 # api_key = "YOUR_SEARCH_KEY" # required for tavily, bocha, and baidu; optional for metaso # # WARNING: treat config.toml like a secret file when # # storing API keys. Prefer env vars for local smoke tests. diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index 1d1ae7d9..a77a12f2 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -755,6 +755,12 @@ pub enum SearchProvider { alias = "baidu-ai-search" )] Baidu, + /// Volcengine Ark web_search via Responses API. Requires api_key. + /// Free tier: 20K queries/month per API key. Falls back to + /// `VOLCENGINE_API_KEY` / `VOLCENGINE_ARK_API_KEY` / `ARK_API_KEY` + /// env vars when `[search] api_key` is not set. + #[serde(alias = "volcengine", alias = "ark", alias = "volc")] + Volcengine, } impl SearchProvider { @@ -769,6 +775,7 @@ impl SearchProvider { "baidu" | "baidu-search" | "baidu_search" | "baidu-ai-search" | "baidu_ai_search" => { Some(Self::Baidu) } + "volcengine" | "ark" | "volc" | "volcengine-ark" => Some(Self::Volcengine), _ => None, } } @@ -782,6 +789,7 @@ impl SearchProvider { Self::Bocha => "bocha", Self::Metaso => "metaso", Self::Baidu => "baidu", + Self::Volcengine => "volcengine", } } } @@ -813,12 +821,13 @@ pub struct SearchProviderResolution { /// Web search provider configuration (`[search]` table in config.toml). #[derive(Debug, Clone, Deserialize, Default)] pub struct SearchConfig { - /// Search provider: `bing` | `duckduckgo` | `tavily` | `bocha` | `metaso` | `baidu`. Default: `duckduckgo`. + /// Search provider: `bing` | `duckduckgo` | `tavily` | `bocha` | `metaso` | `baidu` | `volcengine`. Default: `duckduckgo`. #[serde(default)] pub provider: Option, - /// API key for Tavily, Bocha, Metaso, or Baidu. Not required for Bing or DuckDuckGo. + /// API key for Tavily, Bocha, Metaso, Baidu, or Volcengine. Not required for Bing or DuckDuckGo. /// Metaso also falls back to `METASO_API_KEY` env var, then a built-in default. /// Baidu also falls back to `BAIDU_SEARCH_API_KEY` env var. + /// Volcengine also falls back to `VOLCENGINE_API_KEY` / `VOLCENGINE_ARK_API_KEY` / `ARK_API_KEY` env vars. #[serde(default)] pub api_key: Option, } diff --git a/crates/tui/src/tools/web_search.rs b/crates/tui/src/tools/web_search.rs index 140ffc24..edaa6050 100644 --- a/crates/tui/src/tools/web_search.rs +++ b/crates/tui/src/tools/web_search.rs @@ -1,12 +1,12 @@ //! Web search tool backed by multiple providers: Bing HTML scrape, DuckDuckGo //! (HTML scrape with Bing fallback), Tavily API, Bocha (博查) API, -//! Metaso API (), and Baidu AI Search. +//! Metaso API (), Baidu AI Search, and Volcengine Ark. //! //! This is the primary web search surface for agents. For browsing workflows //! (page open, click, screenshot) use a direct URL approach instead. //! //! Set `[search]` in config.toml to switch providers: -//! provider = "duckduckgo" # or tavily/bocha/metaso/baidu +//! provider = "duckduckgo" # or tavily/bocha/metaso/baidu/volcengine //! api_key = "tvly-..." use super::spec::{ @@ -28,6 +28,7 @@ const TAVILY_ENDPOINT: &str = "https://api.tavily.com/search"; const BOCHA_ENDPOINT: &str = "https://api.bochaai.com/v1/ai/search"; const METASO_ENDPOINT: &str = "https://metaso.cn/api/v1"; const BAIDU_ENDPOINT: &str = "https://qianfan.baidubce.com/v2/ai_search/web_search"; +const VOLCENGINE_RESPONSES_ENDPOINT: &str = "https://ark.cn-beijing.volces.com/api/v3/responses"; /// Intentionally public default key provided by Metaso for open-source/community use. /// Last-resort fallback after config and env var. Rate-limited to ~100 searches/day. const METASO_DEFAULT_API_KEY: &str = "mk-E384C1DD5E8501BB7EFE27C949AFDE5B"; @@ -226,6 +227,13 @@ impl ToolSpec for WebSearchTool { .run_baidu_search(&query, max_results, timeout_ms, context) .await; } + SearchProvider::Volcengine => { + let decider = context.network_policy.as_ref(); + check_policy(decider, "ark.cn-beijing.volces.com")?; + return self + .run_volcengine_search(&query, max_results, timeout_ms, context) + .await; + } SearchProvider::Bing | SearchProvider::DuckDuckGo => {} } @@ -728,6 +736,84 @@ impl WebSearchTool { let results = parse_baidu_results(&parsed, max_results); search_tool_result(query.to_string(), "baidu", results, None) } + + /// Search via Volcengine Ark Responses API web_search tool. + /// Uses strict JSON prompt constraints to extract structured results + /// from the model's search-augmented response. + async fn run_volcengine_search( + &self, + query: &str, + max_results: usize, + timeout_ms: u64, + context: &ToolContext, + ) -> Result { + let volc_key = std::env::var("VOLCENGINE_API_KEY").ok(); + let volc_ark_key = std::env::var("VOLCENGINE_ARK_API_KEY").ok(); + let ark_key = std::env::var("ARK_API_KEY").ok(); + let api_key = context + .search_api_key + .as_deref() + .or(volc_key.as_deref()) + .or(volc_ark_key.as_deref()) + .or(ark_key.as_deref()) + .ok_or_else(|| { + ToolError::execution_failed( + "Volcengine search requires an API key. Set `[search] api_key`, \ + or DEEPSEEK_SEARCH_API_KEY, or VOLCENGINE_API_KEY env var.", + ) + })?; + + let client = reqwest::Client::builder() + .timeout(Duration::from_millis(timeout_ms)) + .build() + .map_err(|e| { + ToolError::execution_failed(format!("Failed to build HTTP client: {e}")) + })?; + + let payload = volcengine_search_payload(query, max_results); + + let resp = client + .post(VOLCENGINE_RESPONSES_ENDPOINT) + .header("Authorization", format!("Bearer {api_key}")) + .json(&payload) + .send() + .await + .map_err(|e| { + ToolError::execution_failed(format!("Volcengine search request failed: {e}")) + })?; + + let status = resp.status(); + let body = resp.text().await.map_err(|e| { + ToolError::execution_failed(format!("Failed to read Volcengine response: {e}")) + })?; + + if !status.is_success() { + let msg = match status.as_u16() { + 401 | 403 => "Volcengine API key rejected — check VOLCENGINE_API_KEY or `[search] api_key` in config.toml".to_string(), + 429 => "Volcengine API rate-limited — wait and retry, or check your quota".to_string(), + _ => { + let truncated = truncate_error_body(&body); + format!("Volcengine search failed: HTTP {} — {truncated}", status.as_u16()) + } + }; + return Err(ToolError::execution_failed(msg)); + } + + let parsed: serde_json::Value = serde_json::from_str(&body).map_err(|e| { + ToolError::execution_failed(format!("Failed to parse Volcengine response: {e}")) + })?; + + if let Some(error) = volcengine_error_message(&parsed) { + return Err(ToolError::execution_failed(error)); + } + + let response_text = volcengine_extract_text(&parsed).ok_or_else(|| { + ToolError::execution_failed("Volcengine response contains no output text") + })?; + + let results = parse_volcengine_results(&response_text, max_results); + search_tool_result(query.to_string(), "volcengine", results, None) + } } fn truncate_error_body(body: &str) -> String { @@ -826,6 +912,118 @@ fn baidu_search_payload(query: &str, max_results: usize) -> Value { }) } +fn volcengine_search_payload(query: &str, max_results: usize) -> Value { + json!({ + "model": "doubao-seed-1-6-250615", + "stream": false, + "tools": [{"type": "web_search"}], + "input": [{ + "role": "user", + "content": [{ + "type": "input_text", + "text": format!( + "Search the web for: {query}\n\n\ + CRITICAL: Respond ONLY with a valid JSON object. No markdown, no explanation.\n\ + Schema: {{\"results\":[{{\"title\":\"...\",\"url\":\"https://...\",\"snippet\":\"...\"}}]}}\n\ + - results: 1-{max_results} most relevant pages\n\ + - title: page title (required)\n\ + - url: full URL starting with https:// (required)\n\ + - snippet: 1-2 sentence factual summary (required)\n\ + - If zero results: {{\"results\":[]}}\n\ + - Your entire response must be valid, parseable JSON." + ) + }] + }] + }) +} + +/// Extracts the model's text response from a Volcengine Responses API output. +fn volcengine_extract_text(parsed: &Value) -> Option { + parsed + .get("output") + .and_then(|v| v.as_array()) + .into_iter() + .flat_map(|arr| arr.iter().rev()) + .find(|item| item.get("type").and_then(|t| t.as_str()) == Some("message")) + .and_then(|msg| msg.get("content").and_then(|c| c.as_array())) + .and_then(|content| content.first()) + .and_then(|c| c.get("text").and_then(|t| t.as_str())) + .map(|s| s.to_string()) +} + +/// Checks for business-logic errors in a Volcengine Responses API response. +fn volcengine_error_message(parsed: &Value) -> Option { + let error = parsed.get("error")?; + let code = error + .get("code") + .and_then(|v| v.as_str()) + .unwrap_or("unknown"); + let message = error + .get("message") + .and_then(|v| v.as_str()) + .unwrap_or("no details"); + Some(format!("Volcengine API error (code {code}: {message})")) +} + +/// Parses Volcengine model-generated JSON results into `WebSearchEntry` items. +fn parse_volcengine_results(response_text: &str, max_results: usize) -> Vec { + let json_text = extract_json_block(response_text).unwrap_or(response_text); + + let parsed: Value = match serde_json::from_str(json_text) { + Ok(v) => v, + Err(_) => return Vec::new(), + }; + + parsed + .get("results") + .and_then(|v| v.as_array()) + .into_iter() + .flat_map(|arr| arr.iter()) + .filter_map(|item| { + let title = item + .get("title") + .and_then(|s| s.as_str())? + .trim(); + let url = item + .get("url") + .and_then(|s| s.as_str())? + .trim(); + if title.is_empty() || url.is_empty() { + return None; + } + let snippet = item + .get("snippet") + .and_then(|s| s.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string); + Some(WebSearchEntry { + title: title.to_string(), + url: url.to_string(), + snippet, + }) + }) + .take(max_results) + .collect() +} + +/// Attempts to extract a JSON block from text that may be wrapped in +/// markdown fences (```json ... ```) or contain surrounding commentary. +fn extract_json_block(text: &str) -> Option<&str> { + if let Some(start) = text.find("```json") { + let inner = &text[start + 7..]; + if let Some(end) = inner.find("```") { + return Some(inner[..end].trim()); + } + } + if let Some(start) = text.find('{') { + if let Some(end) = text.rfind('}') { + return Some(&text[start..=end]); + } + } + None +} + fn extract_search_query(input: &Value) -> Result { for key in ["query", "q"] { if let Some(value) = input.get(key) {