feat: add baidu web search backend
This commit is contained in:
@@ -661,6 +661,9 @@ pub enum SearchProvider {
|
||||
/// or `METASO_API_KEY` env var; configurable via `[search] api_key`.
|
||||
#[serde(alias = "metaso")]
|
||||
Metaso,
|
||||
/// Baidu AI Search API (<https://qianfan.baidubce.com>). Requires api_key.
|
||||
#[serde(alias = "baidu-search", alias = "baidu_ai_search")]
|
||||
Baidu,
|
||||
}
|
||||
|
||||
impl SearchProvider {
|
||||
@@ -671,6 +674,9 @@ impl SearchProvider {
|
||||
"duckduckgo" | "duck-duck-go" | "duck_duck_go" | "ddg" => Some(Self::DuckDuckGo),
|
||||
"tavily" => Some(Self::Tavily),
|
||||
"bocha" => Some(Self::Bocha),
|
||||
"metaso" => Some(Self::Metaso),
|
||||
"baidu" | "baidu-search" | "baidu_search" | "baidu-ai-search"
|
||||
| "baidu_ai_search" => Some(Self::Baidu),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -683,6 +689,7 @@ impl SearchProvider {
|
||||
Self::Tavily => "tavily",
|
||||
Self::Bocha => "bocha",
|
||||
Self::Metaso => "metaso",
|
||||
Self::Baidu => "baidu",
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -714,11 +721,12 @@ pub struct SearchProviderResolution {
|
||||
/// Web search provider configuration (`[search]` table in config.toml).
|
||||
#[derive(Debug, Clone, Deserialize, Default)]
|
||||
pub struct SearchConfig {
|
||||
/// Search provider: `bing` | `duckduckgo` | `tavily` | `bocha` | `metaso`. Default: `duckduckgo`.
|
||||
/// Search provider: `bing` | `duckduckgo` | `tavily` | `bocha` | `metaso` | `baidu`. Default: `duckduckgo`.
|
||||
#[serde(default)]
|
||||
pub provider: Option<SearchProvider>,
|
||||
/// API key for Tavily, Bocha, or Metaso. Not required for Bing or DuckDuckGo.
|
||||
/// API key for Tavily, Bocha, Metaso, or Baidu. Not required for Bing or DuckDuckGo.
|
||||
/// Metaso also falls back to `METASO_API_KEY` env var, then a built-in default.
|
||||
/// Baidu also falls back to `BAIDU_SEARCH_API_KEY` env var.
|
||||
#[serde(default)]
|
||||
pub api_key: Option<String>,
|
||||
}
|
||||
@@ -4281,6 +4289,35 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_baidu_search_provider_is_preserved() {
|
||||
let config: Config = toml::from_str(
|
||||
r#"
|
||||
[search]
|
||||
provider = "baidu"
|
||||
"#,
|
||||
)
|
||||
.expect("search config");
|
||||
|
||||
assert_eq!(
|
||||
config.search.and_then(|search| search.provider),
|
||||
Some(SearchProvider::Baidu)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn baidu_search_provider_aliases_parse() {
|
||||
assert_eq!(SearchProvider::parse("baidu"), Some(SearchProvider::Baidu));
|
||||
assert_eq!(
|
||||
SearchProvider::parse("baidu-search"),
|
||||
Some(SearchProvider::Baidu)
|
||||
);
|
||||
assert_eq!(
|
||||
SearchProvider::parse("baidu_ai_search"),
|
||||
Some(SearchProvider::Baidu)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_provider_resolution_reports_default_source() {
|
||||
let _guard = lock_test_env();
|
||||
@@ -4334,6 +4371,26 @@ mod tests {
|
||||
assert_eq!(resolution.source, SearchProviderSource::EnvOverride);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_provider_env_override_accepts_baidu() {
|
||||
let _guard = lock_test_env();
|
||||
let prev = env::var_os("DEEPSEEK_SEARCH_PROVIDER");
|
||||
unsafe { env::set_var("DEEPSEEK_SEARCH_PROVIDER", "baidu") };
|
||||
let config: Config = toml::from_str(
|
||||
r#"
|
||||
[search]
|
||||
provider = "duckduckgo"
|
||||
"#,
|
||||
)
|
||||
.expect("search config");
|
||||
|
||||
let resolution = config.search_provider_resolution();
|
||||
|
||||
unsafe { EnvGuard::restore_var("DEEPSEEK_SEARCH_PROVIDER", prev) };
|
||||
assert_eq!(resolution.provider, SearchProvider::Baidu);
|
||||
assert_eq!(resolution.source, SearchProviderSource::EnvOverride);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_provider_resolution_ignores_invalid_env_override() {
|
||||
let _guard = lock_test_env();
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//! Web search tool backed by multiple providers: Bing HTML scrape, DuckDuckGo
|
||||
//! (HTML scrape with Bing fallback), Tavily API, Bocha (博查) API, and
|
||||
//! Metaso API (<https://metaso.cn>).
|
||||
//! (HTML scrape with Bing fallback), Tavily API, Bocha (博查) API,
|
||||
//! Metaso API (<https://metaso.cn>), and Baidu AI Search.
|
||||
//!
|
||||
//! This is the primary web search surface for agents. For browsing workflows
|
||||
//! (page open, click, screenshot) use a direct URL approach instead.
|
||||
@@ -27,6 +27,7 @@ const BING_HOST: &str = "www.bing.com";
|
||||
const TAVILY_ENDPOINT: &str = "https://api.tavily.com/search";
|
||||
const BOCHA_ENDPOINT: &str = "https://api.bochaai.com/v1/ai/search";
|
||||
const METASO_ENDPOINT: &str = "https://metaso.cn/api/v1";
|
||||
const BAIDU_ENDPOINT: &str = "https://qianfan.baidubce.com/v2/ai_search/web_search";
|
||||
/// Intentionally public default key provided by Metaso for open-source/community use.
|
||||
/// Last-resort fallback after config and env var. Rate-limited to ~100 searches/day.
|
||||
const METASO_DEFAULT_API_KEY: &str = "mk-E384C1DD5E8501BB7EFE27C949AFDE5B";
|
||||
@@ -57,6 +58,7 @@ static TAG_RE: OnceLock<Regex> = OnceLock::new();
|
||||
static BING_RESULT_RE: OnceLock<Regex> = OnceLock::new();
|
||||
static BING_TITLE_RE: OnceLock<Regex> = OnceLock::new();
|
||||
static BING_SNIPPET_RE: OnceLock<Regex> = OnceLock::new();
|
||||
static BEARER_TOKEN_RE: OnceLock<Regex> = OnceLock::new();
|
||||
|
||||
fn get_title_re() -> &'static Regex {
|
||||
TITLE_RE.get_or_init(|| {
|
||||
@@ -99,6 +101,13 @@ fn get_bing_snippet_re() -> &'static Regex {
|
||||
})
|
||||
}
|
||||
|
||||
fn get_bearer_token_re() -> &'static Regex {
|
||||
BEARER_TOKEN_RE.get_or_init(|| {
|
||||
Regex::new(r"(?i)\bBearer\s+[A-Za-z0-9._~+/=-]+")
|
||||
.expect("bearer token regex pattern is valid")
|
||||
})
|
||||
}
|
||||
|
||||
const DEFAULT_MAX_RESULTS: usize = 5;
|
||||
const MAX_RESULTS: usize = 10;
|
||||
const DEFAULT_TIMEOUT_MS: u64 = 15_000;
|
||||
@@ -129,7 +138,7 @@ impl ToolSpec for WebSearchTool {
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Search the web and return ranked results with URLs and snippets. Default backend is DuckDuckGo with Bing fallback; set `[search] provider = \"bing\" | \"tavily\" | \"bocha\"` in config.toml to switch backends. Use this instead of scraping search engines with `curl` in `exec_shell`. For a known canonical URL, prefer `fetch_url` directly."
|
||||
"Search the web and return ranked results with URLs and snippets. Default backend is DuckDuckGo with Bing fallback; set `[search] provider = \"bing\" | \"tavily\" | \"bocha\" | \"metaso\" | \"baidu\"` in config.toml to switch backends. Use this instead of scraping search engines with `curl` in `exec_shell`. For a known canonical URL, prefer `fetch_url` directly."
|
||||
}
|
||||
|
||||
fn input_schema(&self) -> Value {
|
||||
@@ -210,6 +219,13 @@ impl ToolSpec for WebSearchTool {
|
||||
.run_metaso_search(&query, max_results, timeout_ms, context)
|
||||
.await;
|
||||
}
|
||||
SearchProvider::Baidu => {
|
||||
let decider = context.network_policy.as_ref();
|
||||
check_policy(decider, "qianfan.baidubce.com")?;
|
||||
return self
|
||||
.run_baidu_search(&query, max_results, timeout_ms, context)
|
||||
.await;
|
||||
}
|
||||
SearchProvider::Bing | SearchProvider::DuckDuckGo => {}
|
||||
}
|
||||
|
||||
@@ -645,6 +661,88 @@ impl WebSearchTool {
|
||||
|
||||
search_tool_result(query.to_string(), "metaso", results, None)
|
||||
}
|
||||
|
||||
/// Search via Baidu AI Search API (<https://qianfan.baidubce.com>).
|
||||
async fn run_baidu_search(
|
||||
&self,
|
||||
query: &str,
|
||||
max_results: usize,
|
||||
timeout_ms: u64,
|
||||
context: &ToolContext,
|
||||
) -> Result<ToolResult, ToolError> {
|
||||
let env_key = std::env::var("BAIDU_SEARCH_API_KEY").ok();
|
||||
let api_key = context
|
||||
.search_api_key
|
||||
.as_deref()
|
||||
.or(env_key.as_deref())
|
||||
.ok_or_else(|| {
|
||||
ToolError::execution_failed(
|
||||
"Baidu search requires an API key. Set `BAIDU_SEARCH_API_KEY` or `[search] api_key` in config.toml.",
|
||||
)
|
||||
})?;
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_millis(timeout_ms))
|
||||
.build()
|
||||
.map_err(|e| {
|
||||
ToolError::execution_failed(format!("Failed to build HTTP client: {e}"))
|
||||
})?;
|
||||
|
||||
let payload = json!({
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": query,
|
||||
}
|
||||
],
|
||||
"search_source": "baidu_search",
|
||||
"resource_type_filter": [
|
||||
{
|
||||
"type": "web",
|
||||
"top_k": max_results,
|
||||
}
|
||||
],
|
||||
});
|
||||
|
||||
let resp = client
|
||||
.post(BAIDU_ENDPOINT)
|
||||
.header("Content-Type", "application/json")
|
||||
.header("Authorization", format!("Bearer {api_key}"))
|
||||
.json(&payload)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ToolError::execution_failed(format!("Baidu search request failed: {e}"))
|
||||
})?;
|
||||
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.map_err(|e| {
|
||||
ToolError::execution_failed(format!("Failed to read Baidu response: {e}"))
|
||||
})?;
|
||||
|
||||
if !status.is_success() {
|
||||
let msg = match status.as_u16() {
|
||||
401 | 403 => "Baidu search API key rejected — check BAIDU_SEARCH_API_KEY or `[search] api_key` in config.toml".to_string(),
|
||||
429 => "Baidu search rate-limited — wait and retry, or check your Baidu AI Search quota".to_string(),
|
||||
_ => {
|
||||
let truncated = truncate_error_body(&body);
|
||||
format!("Baidu search failed: HTTP {} — {truncated}", status.as_u16())
|
||||
}
|
||||
};
|
||||
return Err(ToolError::execution_failed(msg));
|
||||
}
|
||||
|
||||
let parsed: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
|
||||
ToolError::execution_failed(format!("Failed to parse Baidu response: {e}"))
|
||||
})?;
|
||||
|
||||
if let Some(error) = baidu_error_message(&parsed) {
|
||||
return Err(ToolError::execution_failed(error));
|
||||
}
|
||||
|
||||
let results = parse_baidu_results(&parsed, max_results);
|
||||
search_tool_result(query.to_string(), "baidu", results, None)
|
||||
}
|
||||
}
|
||||
|
||||
fn truncate_error_body(body: &str) -> String {
|
||||
@@ -662,12 +760,69 @@ fn truncate_error_body(body: &str) -> String {
|
||||
|
||||
fn sanitize_error_body(body: &str) -> String {
|
||||
let stripped = strip_html_tags(body);
|
||||
stripped
|
||||
let visible: String = stripped
|
||||
.chars()
|
||||
.filter(|c| !c.is_control() || c.is_ascii_whitespace())
|
||||
.collect();
|
||||
get_bearer_token_re()
|
||||
.replace_all(&visible, "Bearer [REDACTED]")
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn parse_baidu_results(parsed: &Value, max_results: usize) -> Vec<WebSearchEntry> {
|
||||
parsed
|
||||
.get("references")
|
||||
.and_then(|v| v.as_array())
|
||||
.into_iter()
|
||||
.flat_map(|arr| arr.iter())
|
||||
.filter_map(|item| {
|
||||
let title = item
|
||||
.get("title")
|
||||
.or_else(|| item.get("name"))
|
||||
.and_then(|s| s.as_str())?
|
||||
.trim();
|
||||
let url = item
|
||||
.get("url")
|
||||
.or_else(|| item.get("link"))
|
||||
.and_then(|s| s.as_str())?
|
||||
.trim();
|
||||
if title.is_empty() || url.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let snippet = item
|
||||
.get("content")
|
||||
.or_else(|| item.get("snippet"))
|
||||
.or_else(|| item.get("summary"))
|
||||
.and_then(|s| s.as_str())
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(ToString::to_string);
|
||||
Some(WebSearchEntry {
|
||||
title: title.to_string(),
|
||||
url: url.to_string(),
|
||||
snippet,
|
||||
})
|
||||
})
|
||||
.take(max_results)
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn baidu_error_message(parsed: &Value) -> Option<String> {
|
||||
let code = parsed
|
||||
.get("error_code")
|
||||
.or_else(|| parsed.get("code"))
|
||||
.and_then(|v| v.as_i64())?;
|
||||
if code == 0 {
|
||||
return None;
|
||||
}
|
||||
let message = parsed
|
||||
.get("error_msg")
|
||||
.or_else(|| parsed.get("message"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown error");
|
||||
Some(format!("Baidu search API error (code {code}: {message})"))
|
||||
}
|
||||
|
||||
fn extract_search_query(input: &Value) -> Result<String, ToolError> {
|
||||
for key in ["query", "q"] {
|
||||
if let Some(value) = input.get(key) {
|
||||
@@ -1028,7 +1183,7 @@ mod tests {
|
||||
use super::{
|
||||
ERROR_BODY_PREVIEW_BYTES, WebSearchEntry, WebSearchTool, decode_html_entities,
|
||||
extract_search_query, is_likely_spam_results, optional_search_max_results, root_domain,
|
||||
sanitize_error_body, truncate_error_body,
|
||||
parse_baidu_results, sanitize_error_body, truncate_error_body,
|
||||
};
|
||||
use serde_json::json;
|
||||
|
||||
@@ -1295,6 +1450,69 @@ mod tests {
|
||||
assert_eq!(sanitized, "error");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanitize_error_body_redacts_bearer_tokens() {
|
||||
let body =
|
||||
r#"{"error":"bad token","authorization":"Bearer bce-v3/ALTAK-example/secret"}"#;
|
||||
|
||||
let sanitized = sanitize_error_body(body);
|
||||
|
||||
assert!(!sanitized.contains("bce-v3/ALTAK-example/secret"));
|
||||
assert!(sanitized.contains("Bearer [REDACTED]"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_baidu_references_extracts_ranked_results() {
|
||||
let body = json!({
|
||||
"references": [
|
||||
{
|
||||
"title": "Rust 官方文档",
|
||||
"url": "https://www.rust-lang.org/",
|
||||
"content": "Rust 是一门注重性能和可靠性的语言。"
|
||||
},
|
||||
{
|
||||
"title": "Cargo Book",
|
||||
"url": "https://doc.rust-lang.org/cargo/",
|
||||
"snippet": "Cargo is Rust's package manager."
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
let results = parse_baidu_results(&body, 10);
|
||||
|
||||
assert_eq!(results.len(), 2);
|
||||
assert_eq!(results[0].title, "Rust 官方文档");
|
||||
assert_eq!(results[0].url, "https://www.rust-lang.org/");
|
||||
assert_eq!(
|
||||
results[0].snippet.as_deref(),
|
||||
Some("Rust 是一门注重性能和可靠性的语言。")
|
||||
);
|
||||
assert_eq!(results[1].title, "Cargo Book");
|
||||
assert_eq!(results[1].url, "https://doc.rust-lang.org/cargo/");
|
||||
assert_eq!(
|
||||
results[1].snippet.as_deref(),
|
||||
Some("Cargo is Rust's package manager.")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_baidu_references_skips_incomplete_entries() {
|
||||
let body = json!({
|
||||
"references": [
|
||||
{"title": "No URL", "content": "missing url"},
|
||||
{"url": "https://example.com/no-title", "content": "missing title"},
|
||||
{"title": "Valid", "url": "https://example.com/valid"}
|
||||
]
|
||||
});
|
||||
|
||||
let results = parse_baidu_results(&body, 10);
|
||||
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].title, "Valid");
|
||||
assert_eq!(results[0].url, "https://example.com/valid");
|
||||
assert_eq!(results[0].snippet, None);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn tavily_provider_without_api_key_surfaces_clear_error_not_silent_fallback() {
|
||||
// Trust-boundary pin: if a user has opted into Tavily but
|
||||
@@ -1341,6 +1559,35 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn baidu_provider_without_api_key_surfaces_clear_error_not_silent_fallback() {
|
||||
use crate::config::SearchProvider;
|
||||
use crate::tools::spec::{ToolContext, ToolSpec};
|
||||
|
||||
let prev = std::env::var_os("BAIDU_SEARCH_API_KEY");
|
||||
unsafe { std::env::remove_var("BAIDU_SEARCH_API_KEY") };
|
||||
|
||||
let tmp = tempfile::tempdir().expect("tempdir");
|
||||
let mut ctx = ToolContext::new(tmp.path().to_path_buf());
|
||||
ctx.search_provider = SearchProvider::Baidu;
|
||||
ctx.search_api_key = None;
|
||||
let err = WebSearchTool
|
||||
.execute(json!({"query": "anything"}), &ctx)
|
||||
.await
|
||||
.expect_err("missing api_key must surface as ToolError");
|
||||
|
||||
match prev {
|
||||
Some(value) => unsafe { std::env::set_var("BAIDU_SEARCH_API_KEY", value) },
|
||||
None => unsafe { std::env::remove_var("BAIDU_SEARCH_API_KEY") },
|
||||
}
|
||||
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("Baidu") && msg.contains("API key"),
|
||||
"error must name the provider and missing key; got `{msg}`"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn metaso_provider_uses_built_in_key_when_no_config_key_set() {
|
||||
// Unlike Tavily/Bocha, Metaso falls back to a built-in default, so
|
||||
|
||||
Reference in New Issue
Block a user