Merge remote-tracking branch 'origin/main' into HEAD
This commit is contained in:
+16
-12
@@ -286,25 +286,29 @@ max_subagents = 10 # optional (1-20)
|
||||
# ─────────────────────────────────────────────────────────────────────────────────
|
||||
# Web Search Provider
|
||||
# ─────────────────────────────────────────────────────────────────────────────────
|
||||
# Choose which backend `web_search` uses. Default is Bing HTML scraping — no
|
||||
# API key needed. DuckDuckGo remains selectable and still falls back to Bing
|
||||
# when its HTML endpoint returns a bot challenge or no parseable results.
|
||||
# Switch to Tavily or Bocha for reliable search in mainland China.
|
||||
# Choose which backend `web_search` uses. Default is DuckDuckGo HTML scraping
|
||||
# with Bing fallback — no API key needed. Bing remains selectable for users who
|
||||
# explicitly prefer it. Switch to Tavily, Bocha, Metaso, or Baidu for
|
||||
# API-backed search.
|
||||
#
|
||||
# [search]
|
||||
# provider = "bing" # bing | duckduckgo | tavily | bocha | metaso
|
||||
# provider = "duckduckgo" # duckduckgo | bing | tavily | bocha | metaso | baidu
|
||||
# # duckduckgo: HTML scrape with Bing fallback
|
||||
# # tavily: https://tavily.com — AI search, needs api_key
|
||||
# # bocha: https://bochaai.com — 博查AI搜索,国内友好,需api_key
|
||||
# # metaso: https://metaso.cn — 秘塔AI搜索,每天 100 次免费
|
||||
# # 设置 METASO_API_KEY 或 [search] api_key 可提升额度
|
||||
# api_key = "tvly-YOUR_KEY" # required for tavily, bocha, and metaso (optional for metaso)
|
||||
# # WARNING: treat config.toml like a secret file when
|
||||
# # storing API keys. Use env vars or `auth set` instead.
|
||||
# # bing: HTML scrape, no API key
|
||||
# # tavily: https://tavily.com — AI search, needs api_key
|
||||
# # bocha: https://bochaai.com — 博查AI搜索,国内友好,需api_key
|
||||
# # metaso: https://metaso.cn — 秘塔AI搜索,每天 100 次免费
|
||||
# # 设置 METASO_API_KEY 或 [search] api_key 可提升额度
|
||||
# # baidu: 百度 AI Search via qianfan.baidubce.com,需 api_key
|
||||
# api_key = "YOUR_SEARCH_KEY" # required for tavily, bocha, and baidu; optional for metaso
|
||||
# # WARNING: treat config.toml like a secret file when
|
||||
# # storing API keys. Prefer env vars for local smoke tests.
|
||||
#
|
||||
# Env-var overrides:
|
||||
# DEEPSEEK_SEARCH_PROVIDER → search.provider
|
||||
# DEEPSEEK_SEARCH_API_KEY → search.api_key
|
||||
# METASO_API_KEY → metaso key fallback
|
||||
# BAIDU_SEARCH_API_KEY → baidu key fallback
|
||||
|
||||
# ─────────────────────────────────────────────────────────────────────────────────
|
||||
# Network Policy (#135)
|
||||
|
||||
@@ -684,6 +684,14 @@ pub enum SearchProvider {
|
||||
/// or `METASO_API_KEY` env var; configurable via `[search] api_key`.
|
||||
#[serde(alias = "metaso")]
|
||||
Metaso,
|
||||
/// Baidu AI Search API (<https://qianfan.baidubce.com>). Requires api_key.
|
||||
#[serde(
|
||||
alias = "baidu-search",
|
||||
alias = "baidu_ai_search",
|
||||
alias = "baidu_search",
|
||||
alias = "baidu-ai-search"
|
||||
)]
|
||||
Baidu,
|
||||
}
|
||||
|
||||
impl SearchProvider {
|
||||
@@ -694,6 +702,10 @@ impl SearchProvider {
|
||||
"duckduckgo" | "duck-duck-go" | "duck_duck_go" | "ddg" => Some(Self::DuckDuckGo),
|
||||
"tavily" => Some(Self::Tavily),
|
||||
"bocha" => Some(Self::Bocha),
|
||||
"metaso" => Some(Self::Metaso),
|
||||
"baidu" | "baidu-search" | "baidu_search" | "baidu-ai-search" | "baidu_ai_search" => {
|
||||
Some(Self::Baidu)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -706,6 +718,7 @@ impl SearchProvider {
|
||||
Self::Tavily => "tavily",
|
||||
Self::Bocha => "bocha",
|
||||
Self::Metaso => "metaso",
|
||||
Self::Baidu => "baidu",
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -737,11 +750,12 @@ pub struct SearchProviderResolution {
|
||||
/// Web search provider configuration (`[search]` table in config.toml).
|
||||
#[derive(Debug, Clone, Deserialize, Default)]
|
||||
pub struct SearchConfig {
|
||||
/// Search provider: `bing` | `duckduckgo` | `tavily` | `bocha` | `metaso`. Default: `duckduckgo`.
|
||||
/// Search provider: `bing` | `duckduckgo` | `tavily` | `bocha` | `metaso` | `baidu`. Default: `duckduckgo`.
|
||||
#[serde(default)]
|
||||
pub provider: Option<SearchProvider>,
|
||||
/// API key for Tavily, Bocha, or Metaso. Not required for Bing or DuckDuckGo.
|
||||
/// API key for Tavily, Bocha, Metaso, or Baidu. Not required for Bing or DuckDuckGo.
|
||||
/// Metaso also falls back to `METASO_API_KEY` env var, then a built-in default.
|
||||
/// Baidu also falls back to `BAIDU_SEARCH_API_KEY` env var.
|
||||
#[serde(default)]
|
||||
pub api_key: Option<String>,
|
||||
}
|
||||
@@ -2912,6 +2926,14 @@ fn apply_env_overrides(config: &mut Config) {
|
||||
if let Ok(value) = std::env::var("DEEPSEEK_MANAGED_CONFIG_PATH") {
|
||||
config.managed_config_path = Some(value);
|
||||
}
|
||||
if let Ok(value) = std::env::var("DEEPSEEK_SEARCH_API_KEY")
|
||||
&& !value.trim().is_empty()
|
||||
{
|
||||
config
|
||||
.search
|
||||
.get_or_insert_with(SearchConfig::default)
|
||||
.api_key = Some(value);
|
||||
}
|
||||
if let Ok(value) = std::env::var("DEEPSEEK_REQUIREMENTS_PATH") {
|
||||
config.requirements_path = Some(value);
|
||||
}
|
||||
@@ -4363,6 +4385,35 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_baidu_search_provider_is_preserved() {
|
||||
let config: Config = toml::from_str(
|
||||
r#"
|
||||
[search]
|
||||
provider = "baidu"
|
||||
"#,
|
||||
)
|
||||
.expect("search config");
|
||||
|
||||
assert_eq!(
|
||||
config.search.and_then(|search| search.provider),
|
||||
Some(SearchProvider::Baidu)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn baidu_search_provider_aliases_parse() {
|
||||
assert_eq!(SearchProvider::parse("baidu"), Some(SearchProvider::Baidu));
|
||||
assert_eq!(
|
||||
SearchProvider::parse("baidu-search"),
|
||||
Some(SearchProvider::Baidu)
|
||||
);
|
||||
assert_eq!(
|
||||
SearchProvider::parse("baidu_ai_search"),
|
||||
Some(SearchProvider::Baidu)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_provider_resolution_reports_default_source() {
|
||||
let _guard = lock_test_env();
|
||||
@@ -4416,6 +4467,42 @@ mod tests {
|
||||
assert_eq!(resolution.source, SearchProviderSource::EnvOverride);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_provider_env_override_accepts_baidu() {
|
||||
let _guard = lock_test_env();
|
||||
let prev = env::var_os("DEEPSEEK_SEARCH_PROVIDER");
|
||||
unsafe { env::set_var("DEEPSEEK_SEARCH_PROVIDER", "baidu") };
|
||||
let config: Config = toml::from_str(
|
||||
r#"
|
||||
[search]
|
||||
provider = "duckduckgo"
|
||||
"#,
|
||||
)
|
||||
.expect("search config");
|
||||
|
||||
let resolution = config.search_provider_resolution();
|
||||
|
||||
unsafe { EnvGuard::restore_var("DEEPSEEK_SEARCH_PROVIDER", prev) };
|
||||
assert_eq!(resolution.provider, SearchProvider::Baidu);
|
||||
assert_eq!(resolution.source, SearchProviderSource::EnvOverride);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn apply_env_overrides_sets_search_api_key() {
|
||||
let _guard = lock_test_env();
|
||||
let prev = env::var_os("DEEPSEEK_SEARCH_API_KEY");
|
||||
unsafe { env::set_var("DEEPSEEK_SEARCH_API_KEY", "search-env-key") };
|
||||
let mut config = Config::default();
|
||||
|
||||
apply_env_overrides(&mut config);
|
||||
|
||||
unsafe { EnvGuard::restore_var("DEEPSEEK_SEARCH_API_KEY", prev) };
|
||||
assert_eq!(
|
||||
config.search.and_then(|search| search.api_key),
|
||||
Some("search-env-key".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_provider_resolution_ignores_invalid_env_override() {
|
||||
let _guard = lock_test_env();
|
||||
|
||||
@@ -177,8 +177,9 @@ pub struct EngineConfig {
|
||||
pub workshop: Option<crate::tools::large_output_router::WorkshopConfig>,
|
||||
/// Which search backend `web_search` should use. Default: DuckDuckGo.
|
||||
pub search_provider: crate::config::SearchProvider,
|
||||
/// API key for Tavily, Bocha, or Metaso. `None` for Bing or DuckDuckGo.
|
||||
/// API key for Tavily, Bocha, Metaso, or Baidu. `None` for Bing or DuckDuckGo.
|
||||
/// Metaso also falls back to `METASO_API_KEY` env var, then a built-in key.
|
||||
/// Baidu also falls back to `BAIDU_SEARCH_API_KEY`.
|
||||
pub search_api_key: Option<String>,
|
||||
/// Per-step DeepSeek API timeout for sub-agent `create_message` requests.
|
||||
/// Resolved from `[subagents] api_timeout_secs` (clamped to 1..=1800)
|
||||
|
||||
@@ -165,8 +165,9 @@ pub struct ToolContext {
|
||||
/// Which search backend `web_search` should use. Default: DuckDuckGo. Set via
|
||||
/// `[search] provider` in config.toml.
|
||||
pub search_provider: crate::config::SearchProvider,
|
||||
/// API key for Tavily, Bocha, or Metaso. `None` for Bing or DuckDuckGo.
|
||||
/// API key for Tavily, Bocha, Metaso, or Baidu. `None` for Bing or DuckDuckGo.
|
||||
/// Metaso also falls back to `METASO_API_KEY` env var, then a built-in key.
|
||||
/// Baidu also falls back to `BAIDU_SEARCH_API_KEY`.
|
||||
pub search_api_key: Option<String>,
|
||||
|
||||
/// Per-session workshop variable store (#548). Holds the raw content of
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
//! Web search tool backed by multiple providers: Bing HTML scrape, DuckDuckGo
|
||||
//! (HTML scrape with Bing fallback), Tavily API, Bocha (博查) API, and
|
||||
//! Metaso API (<https://metaso.cn>).
|
||||
//! (HTML scrape with Bing fallback), Tavily API, Bocha (博查) API,
|
||||
//! Metaso API (<https://metaso.cn>), and Baidu AI Search.
|
||||
//!
|
||||
//! This is the primary web search surface for agents. For browsing workflows
|
||||
//! (page open, click, screenshot) use a direct URL approach instead.
|
||||
//!
|
||||
//! Set `[search]` in config.toml to switch providers:
|
||||
//! provider = "duckduckgo" # or tavily/bocha/metaso
|
||||
//! provider = "duckduckgo" # or tavily/bocha/metaso/baidu
|
||||
//! api_key = "tvly-..."
|
||||
|
||||
use super::spec::{
|
||||
@@ -27,6 +27,7 @@ const BING_HOST: &str = "www.bing.com";
|
||||
const TAVILY_ENDPOINT: &str = "https://api.tavily.com/search";
|
||||
const BOCHA_ENDPOINT: &str = "https://api.bochaai.com/v1/ai/search";
|
||||
const METASO_ENDPOINT: &str = "https://metaso.cn/api/v1";
|
||||
const BAIDU_ENDPOINT: &str = "https://qianfan.baidubce.com/v2/ai_search/web_search";
|
||||
/// Intentionally public default key provided by Metaso for open-source/community use.
|
||||
/// Last-resort fallback after config and env var. Rate-limited to ~100 searches/day.
|
||||
const METASO_DEFAULT_API_KEY: &str = "mk-E384C1DD5E8501BB7EFE27C949AFDE5B";
|
||||
@@ -57,6 +58,7 @@ static TAG_RE: OnceLock<Regex> = OnceLock::new();
|
||||
static BING_RESULT_RE: OnceLock<Regex> = OnceLock::new();
|
||||
static BING_TITLE_RE: OnceLock<Regex> = OnceLock::new();
|
||||
static BING_SNIPPET_RE: OnceLock<Regex> = OnceLock::new();
|
||||
static BEARER_TOKEN_RE: OnceLock<Regex> = OnceLock::new();
|
||||
|
||||
fn get_title_re() -> &'static Regex {
|
||||
TITLE_RE.get_or_init(|| {
|
||||
@@ -99,6 +101,13 @@ fn get_bing_snippet_re() -> &'static Regex {
|
||||
})
|
||||
}
|
||||
|
||||
fn get_bearer_token_re() -> &'static Regex {
|
||||
BEARER_TOKEN_RE.get_or_init(|| {
|
||||
Regex::new(r"(?i)\bBearer\s+[A-Za-z0-9._~+/=-]+")
|
||||
.expect("bearer token regex pattern is valid")
|
||||
})
|
||||
}
|
||||
|
||||
const DEFAULT_MAX_RESULTS: usize = 5;
|
||||
const MAX_RESULTS: usize = 10;
|
||||
const DEFAULT_TIMEOUT_MS: u64 = 15_000;
|
||||
@@ -129,7 +138,7 @@ impl ToolSpec for WebSearchTool {
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Search the web and return ranked results with URLs and snippets. Default backend is DuckDuckGo with Bing fallback; set `[search] provider = \"bing\" | \"tavily\" | \"bocha\"` in config.toml to switch backends. Use this instead of scraping search engines with `curl` in `exec_shell`. For a known canonical URL, prefer `fetch_url` directly."
|
||||
"Search the web and return ranked results with URLs and snippets. Default backend is DuckDuckGo with Bing fallback; set `[search] provider = \"bing\" | \"tavily\" | \"bocha\" | \"metaso\" | \"baidu\"` in config.toml to switch backends. Use this instead of scraping search engines with `curl` in `exec_shell`. For a known canonical URL, prefer `fetch_url` directly."
|
||||
}
|
||||
|
||||
fn input_schema(&self) -> Value {
|
||||
@@ -210,6 +219,13 @@ impl ToolSpec for WebSearchTool {
|
||||
.run_metaso_search(&query, max_results, timeout_ms, context)
|
||||
.await;
|
||||
}
|
||||
SearchProvider::Baidu => {
|
||||
let decider = context.network_policy.as_ref();
|
||||
check_policy(decider, "qianfan.baidubce.com")?;
|
||||
return self
|
||||
.run_baidu_search(&query, max_results, timeout_ms, context)
|
||||
.await;
|
||||
}
|
||||
SearchProvider::Bing | SearchProvider::DuckDuckGo => {}
|
||||
}
|
||||
|
||||
@@ -645,6 +661,73 @@ impl WebSearchTool {
|
||||
|
||||
search_tool_result(query.to_string(), "metaso", results, None)
|
||||
}
|
||||
|
||||
/// Search via Baidu AI Search API (<https://qianfan.baidubce.com>).
|
||||
async fn run_baidu_search(
|
||||
&self,
|
||||
query: &str,
|
||||
max_results: usize,
|
||||
timeout_ms: u64,
|
||||
context: &ToolContext,
|
||||
) -> Result<ToolResult, ToolError> {
|
||||
let env_key = std::env::var("BAIDU_SEARCH_API_KEY").ok();
|
||||
let api_key = context
|
||||
.search_api_key
|
||||
.as_deref()
|
||||
.or(env_key.as_deref())
|
||||
.ok_or_else(|| {
|
||||
ToolError::execution_failed(
|
||||
"Baidu search requires an API key. Set `BAIDU_SEARCH_API_KEY` or `[search] api_key` in config.toml.",
|
||||
)
|
||||
})?;
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_millis(timeout_ms))
|
||||
.build()
|
||||
.map_err(|e| {
|
||||
ToolError::execution_failed(format!("Failed to build HTTP client: {e}"))
|
||||
})?;
|
||||
|
||||
let payload = baidu_search_payload(query, max_results);
|
||||
|
||||
let resp = client
|
||||
.post(BAIDU_ENDPOINT)
|
||||
.header("Authorization", format!("Bearer {api_key}"))
|
||||
.json(&payload)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ToolError::execution_failed(format!("Baidu search request failed: {e}"))
|
||||
})?;
|
||||
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.map_err(|e| {
|
||||
ToolError::execution_failed(format!("Failed to read Baidu response: {e}"))
|
||||
})?;
|
||||
|
||||
if !status.is_success() {
|
||||
let msg = match status.as_u16() {
|
||||
401 | 403 => "Baidu search API key rejected — check BAIDU_SEARCH_API_KEY or `[search] api_key` in config.toml".to_string(),
|
||||
429 => "Baidu search rate-limited — wait and retry, or check your Baidu AI Search quota".to_string(),
|
||||
_ => {
|
||||
let truncated = truncate_error_body(&body);
|
||||
format!("Baidu search failed: HTTP {} — {truncated}", status.as_u16())
|
||||
}
|
||||
};
|
||||
return Err(ToolError::execution_failed(msg));
|
||||
}
|
||||
|
||||
let parsed: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
|
||||
ToolError::execution_failed(format!("Failed to parse Baidu response: {e}"))
|
||||
})?;
|
||||
|
||||
if let Some(error) = baidu_error_message(&parsed) {
|
||||
return Err(ToolError::execution_failed(error));
|
||||
}
|
||||
|
||||
let results = parse_baidu_results(&parsed, max_results);
|
||||
search_tool_result(query.to_string(), "baidu", results, None)
|
||||
}
|
||||
}
|
||||
|
||||
fn truncate_error_body(body: &str) -> String {
|
||||
@@ -662,12 +745,87 @@ fn truncate_error_body(body: &str) -> String {
|
||||
|
||||
fn sanitize_error_body(body: &str) -> String {
|
||||
let stripped = strip_html_tags(body);
|
||||
stripped
|
||||
let visible: String = stripped
|
||||
.chars()
|
||||
.filter(|c| !c.is_control() || c.is_ascii_whitespace())
|
||||
.collect();
|
||||
get_bearer_token_re()
|
||||
.replace_all(&visible, "Bearer [REDACTED]")
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn parse_baidu_results(parsed: &Value, max_results: usize) -> Vec<WebSearchEntry> {
|
||||
parsed
|
||||
.get("references")
|
||||
.and_then(|v| v.as_array())
|
||||
.into_iter()
|
||||
.flat_map(|arr| arr.iter())
|
||||
.filter_map(|item| {
|
||||
let title = item
|
||||
.get("title")
|
||||
.or_else(|| item.get("name"))
|
||||
.and_then(|s| s.as_str())?
|
||||
.trim();
|
||||
let url = item
|
||||
.get("url")
|
||||
.or_else(|| item.get("link"))
|
||||
.and_then(|s| s.as_str())?
|
||||
.trim();
|
||||
if title.is_empty() || url.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let snippet = item
|
||||
.get("content")
|
||||
.or_else(|| item.get("snippet"))
|
||||
.or_else(|| item.get("summary"))
|
||||
.and_then(|s| s.as_str())
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(ToString::to_string);
|
||||
Some(WebSearchEntry {
|
||||
title: title.to_string(),
|
||||
url: url.to_string(),
|
||||
snippet,
|
||||
})
|
||||
})
|
||||
.take(max_results)
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn baidu_error_message(parsed: &Value) -> Option<String> {
|
||||
let code = parsed
|
||||
.get("error_code")
|
||||
.or_else(|| parsed.get("code"))
|
||||
.and_then(|v| v.as_i64())?;
|
||||
if code == 0 {
|
||||
return None;
|
||||
}
|
||||
let message = parsed
|
||||
.get("error_msg")
|
||||
.or_else(|| parsed.get("message"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown error");
|
||||
Some(format!("Baidu search API error (code {code}: {message})"))
|
||||
}
|
||||
|
||||
fn baidu_search_payload(query: &str, max_results: usize) -> Value {
|
||||
json!({
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": query,
|
||||
}
|
||||
],
|
||||
"search_source": "baidu_search_v2",
|
||||
"resource_type_filter": [
|
||||
{
|
||||
"type": "web",
|
||||
"top_k": max_results,
|
||||
}
|
||||
],
|
||||
})
|
||||
}
|
||||
|
||||
fn extract_search_query(input: &Value) -> Result<String, ToolError> {
|
||||
for key in ["query", "q"] {
|
||||
if let Some(value) = input.get(key) {
|
||||
@@ -1034,9 +1192,10 @@ fn extract_query_param(url: &str, key: &str) -> Option<String> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{
|
||||
ERROR_BODY_PREVIEW_BYTES, WebSearchEntry, WebSearchTool, decode_html_entities,
|
||||
extract_search_query, is_likely_spam_results, normalize_bing_url,
|
||||
optional_search_max_results, root_domain, sanitize_error_body, truncate_error_body,
|
||||
ERROR_BODY_PREVIEW_BYTES, WebSearchEntry, WebSearchTool, baidu_search_payload,
|
||||
decode_html_entities, extract_search_query, is_likely_spam_results, normalize_bing_url,
|
||||
optional_search_max_results, parse_baidu_results, root_domain, sanitize_error_body,
|
||||
truncate_error_body,
|
||||
};
|
||||
use serde_json::json;
|
||||
|
||||
@@ -1314,6 +1473,96 @@ mod tests {
|
||||
assert_eq!(sanitized, "error");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sanitize_error_body_redacts_bearer_tokens() {
|
||||
let body = r#"{"error":"bad token","authorization":"Bearer test-token/with+chars="}"#;
|
||||
|
||||
let sanitized = sanitize_error_body(body);
|
||||
|
||||
assert!(!sanitized.contains("test-token/with+chars="));
|
||||
assert!(sanitized.contains("Bearer [REDACTED]"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_baidu_references_extracts_ranked_results() {
|
||||
let body = json!({
|
||||
"references": [
|
||||
{
|
||||
"title": "Rust 官方文档",
|
||||
"url": "https://www.rust-lang.org/",
|
||||
"content": "Rust 是一门注重性能和可靠性的语言。"
|
||||
},
|
||||
{
|
||||
"title": "Cargo Book",
|
||||
"url": "https://doc.rust-lang.org/cargo/",
|
||||
"snippet": "Cargo is Rust's package manager."
|
||||
}
|
||||
]
|
||||
});
|
||||
|
||||
let results = parse_baidu_results(&body, 10);
|
||||
|
||||
assert_eq!(results.len(), 2);
|
||||
assert_eq!(results[0].title, "Rust 官方文档");
|
||||
assert_eq!(results[0].url, "https://www.rust-lang.org/");
|
||||
assert_eq!(
|
||||
results[0].snippet.as_deref(),
|
||||
Some("Rust 是一门注重性能和可靠性的语言。")
|
||||
);
|
||||
assert_eq!(results[1].title, "Cargo Book");
|
||||
assert_eq!(results[1].url, "https://doc.rust-lang.org/cargo/");
|
||||
assert_eq!(
|
||||
results[1].snippet.as_deref(),
|
||||
Some("Cargo is Rust's package manager.")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_baidu_references_skips_incomplete_entries() {
|
||||
let body = json!({
|
||||
"references": [
|
||||
{"title": "No URL", "content": "missing url"},
|
||||
{"url": "https://example.com/no-title", "content": "missing title"},
|
||||
{"title": "Valid", "url": "https://example.com/valid"}
|
||||
]
|
||||
});
|
||||
|
||||
let results = parse_baidu_results(&body, 10);
|
||||
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].title, "Valid");
|
||||
assert_eq!(results[0].url, "https://example.com/valid");
|
||||
assert_eq!(results[0].snippet, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn baidu_search_payload_uses_official_search_source() {
|
||||
let payload = baidu_search_payload("Rust cargo workspace", 3);
|
||||
|
||||
assert_eq!(
|
||||
payload.get("search_source").and_then(|v| v.as_str()),
|
||||
Some("baidu_search_v2")
|
||||
);
|
||||
assert_eq!(
|
||||
payload
|
||||
.get("messages")
|
||||
.and_then(|v| v.as_array())
|
||||
.and_then(|messages| messages.first())
|
||||
.and_then(|message| message.get("content"))
|
||||
.and_then(|v| v.as_str()),
|
||||
Some("Rust cargo workspace")
|
||||
);
|
||||
assert_eq!(
|
||||
payload
|
||||
.get("resource_type_filter")
|
||||
.and_then(|v| v.as_array())
|
||||
.and_then(|filters| filters.first())
|
||||
.and_then(|filter| filter.get("top_k"))
|
||||
.and_then(|v| v.as_u64()),
|
||||
Some(3)
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn tavily_provider_without_api_key_surfaces_clear_error_not_silent_fallback() {
|
||||
// Trust-boundary pin: if a user has opted into Tavily but
|
||||
@@ -1360,6 +1609,35 @@ mod tests {
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn baidu_provider_without_api_key_surfaces_clear_error_not_silent_fallback() {
|
||||
use crate::config::SearchProvider;
|
||||
use crate::tools::spec::{ToolContext, ToolSpec};
|
||||
|
||||
let prev = std::env::var_os("BAIDU_SEARCH_API_KEY");
|
||||
unsafe { std::env::remove_var("BAIDU_SEARCH_API_KEY") };
|
||||
|
||||
let tmp = tempfile::tempdir().expect("tempdir");
|
||||
let mut ctx = ToolContext::new(tmp.path().to_path_buf());
|
||||
ctx.search_provider = SearchProvider::Baidu;
|
||||
ctx.search_api_key = None;
|
||||
let err = WebSearchTool
|
||||
.execute(json!({"query": "anything"}), &ctx)
|
||||
.await
|
||||
.expect_err("missing api_key must surface as ToolError");
|
||||
|
||||
match prev {
|
||||
Some(value) => unsafe { std::env::set_var("BAIDU_SEARCH_API_KEY", value) },
|
||||
None => unsafe { std::env::remove_var("BAIDU_SEARCH_API_KEY") },
|
||||
}
|
||||
|
||||
let msg = err.to_string();
|
||||
assert!(
|
||||
msg.contains("Baidu") && msg.contains("API key"),
|
||||
"error must name the provider and missing key; got `{msg}`"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn metaso_provider_uses_built_in_key_when_no_config_key_set() {
|
||||
// Unlike Tavily/Bocha, Metaso falls back to a built-in default, so
|
||||
|
||||
+12
-5
@@ -686,14 +686,21 @@ Use `codewhale-tui features list` to inspect known flags and their effective sta
|
||||
`web_search` uses DuckDuckGo by default and does not require an API key. The
|
||||
DuckDuckGo path keeps a Bing fallback when DDG returns a bot challenge or no
|
||||
parseable results. Bing remains selectable for users who explicitly want it,
|
||||
and Tavily or Bocha can be selected when an API-backed provider is preferred.
|
||||
**Metaso** ([metaso.cn](https://metaso.cn))
|
||||
100 searches/day free quota — set `METASO_API_KEY` or `[search] api_key` for a higher quota.
|
||||
and Tavily, Bocha, Metaso, or Baidu can be selected when an API-backed provider
|
||||
is preferred.
|
||||
|
||||
**Metaso** ([metaso.cn](https://metaso.cn)) has a 100 searches/day free quota;
|
||||
set `METASO_API_KEY` or `[search] api_key` for a higher quota.
|
||||
|
||||
**Baidu** uses Baidu AI Search at
|
||||
`https://qianfan.baidubce.com/v2/ai_search/web_search`. Set
|
||||
`BAIDU_SEARCH_API_KEY` or `[search] api_key`. This is a search-tool backend
|
||||
only; it does not add a Baidu model provider.
|
||||
|
||||
```toml
|
||||
[search]
|
||||
provider = "duckduckgo" # duckduckgo | bing | tavily | bocha | metaso
|
||||
# api_key = "YOUR_KEY" # required for tavily and bocha; optional for metaso (100 searches/day free quota)
|
||||
provider = "baidu" # duckduckgo | bing | tavily | bocha | metaso | baidu
|
||||
# api_key = "YOUR_KEY" # required for tavily, bocha, and baidu; optional for metaso
|
||||
```
|
||||
|
||||
## Local Media Attachments
|
||||
|
||||
@@ -35,7 +35,7 @@ chosen over the available shell equivalent. Companion to `crates/tui/src/prompts
|
||||
|---|---|
|
||||
| `grep_files` | Regex search file contents within the workspace; structured matches + context lines. Pure-Rust (`regex` crate), no `rg`/`grep` shell-out. |
|
||||
| `file_search` | Fuzzy-match filenames (not contents). Use when you know roughly the name. |
|
||||
| `web_search` | DuckDuckGo by default with Bing fallback; Bing, Tavily, and Bocha are selectable in config. Ranked snippets + `ref_id` for citation. |
|
||||
| `web_search` | DuckDuckGo by default with Bing fallback; Bing, Tavily, Bocha, Metaso, and Baidu are selectable in config. Ranked snippets + `ref_id` for citation. |
|
||||
| `fetch_url` | Direct HTTP GET on a known URL. Faster than `web_search` when the link is already known. HTML stripped to text by default. |
|
||||
|
||||
### Shell
|
||||
|
||||
Reference in New Issue
Block a user