feat(web): add Metaso as a web search provider (metaso.cn) (#2059)
Adds Metaso AI Search as a new SearchProvider option alongside Bing, DuckDuckGo, Tavily, and Bocha. Co-authored-by: Zhao Xiaohong <zhaoxiaohong@metasota.ai>
This commit is contained in:
+4
-2
@@ -271,11 +271,13 @@ max_subagents = 10 # optional (1-20)
|
||||
# Switch to Tavily or Bocha for reliable search in mainland China.
|
||||
#
|
||||
# [search]
|
||||
# provider = "bing" # bing | duckduckgo | tavily | bocha
|
||||
# provider = "bing" # bing | duckduckgo | tavily | bocha | metaso
|
||||
# # duckduckgo: HTML scrape with Bing fallback
|
||||
# # tavily: https://tavily.com — AI search, needs api_key
|
||||
# # bocha: https://bochaai.com — 博查AI搜索,国内友好,需api_key
|
||||
# api_key = "tvly-YOUR_KEY" # required for tavily and bocha
|
||||
# # metaso: https://metaso.cn — 秘塔AI搜索,每天 100 次免费
|
||||
# # 设置 METASO_API_KEY 或 [search] api_key 可提升额度
|
||||
# api_key = "tvly-YOUR_KEY" # required for tavily, bocha, and metaso (optional for metaso)
|
||||
# # WARNING: treat config.toml like a secret file when
|
||||
# # storing API keys. Use env vars or `auth set` instead.
|
||||
#
|
||||
|
||||
@@ -657,6 +657,10 @@ pub enum SearchProvider {
|
||||
Tavily,
|
||||
/// Bocha AI Search API (<https://bochaai.com>). Requires api_key.
|
||||
Bocha,
|
||||
/// Metaso AI Search API (<https://metaso.cn>). Uses built-in default key
|
||||
/// or `METASO_API_KEY` env var; configurable via `[search] api_key`.
|
||||
#[serde(alias = "metaso")]
|
||||
Metaso,
|
||||
}
|
||||
|
||||
impl SearchProvider {
|
||||
@@ -678,6 +682,7 @@ impl SearchProvider {
|
||||
Self::DuckDuckGo => "duckduckgo",
|
||||
Self::Tavily => "tavily",
|
||||
Self::Bocha => "bocha",
|
||||
Self::Metaso => "metaso",
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -709,10 +714,11 @@ pub struct SearchProviderResolution {
|
||||
/// Web search provider configuration (`[search]` table in config.toml).
|
||||
#[derive(Debug, Clone, Deserialize, Default)]
|
||||
pub struct SearchConfig {
|
||||
/// Search provider: `bing` | `duckduckgo` | `tavily` | `bocha`. Default: `bing`.
|
||||
/// Search provider: `bing` | `duckduckgo` | `tavily` | `bocha` | `metaso`. Default: `bing`.
|
||||
#[serde(default)]
|
||||
pub provider: Option<SearchProvider>,
|
||||
/// API key for Tavily or Bocha. Not required for Bing or DuckDuckGo.
|
||||
/// API key for Tavily, Bocha, or Metaso. Not required for Bing or DuckDuckGo.
|
||||
/// Metaso also falls back to `METASO_API_KEY` env var, then a built-in default.
|
||||
#[serde(default)]
|
||||
pub api_key: Option<String>,
|
||||
}
|
||||
|
||||
@@ -164,7 +164,8 @@ pub struct EngineConfig {
|
||||
pub workshop: Option<crate::tools::large_output_router::WorkshopConfig>,
|
||||
/// Which search backend `web_search` should use. Default: Bing.
|
||||
pub search_provider: crate::config::SearchProvider,
|
||||
/// API key for Tavily or Bocha. `None` for Bing or DuckDuckGo.
|
||||
/// API key for Tavily, Bocha, or Metaso. `None` for Bing or DuckDuckGo.
|
||||
/// Metaso also falls back to `METASO_API_KEY` env var, then a built-in key.
|
||||
pub search_api_key: Option<String>,
|
||||
/// Per-step DeepSeek API timeout for sub-agent `create_message` requests.
|
||||
/// Resolved from `[subagents] api_timeout_secs` (clamped to 1..=1800)
|
||||
|
||||
@@ -165,7 +165,8 @@ pub struct ToolContext {
|
||||
/// Which search backend `web_search` should use. Default: Bing. Set via
|
||||
/// `[search] provider` in config.toml.
|
||||
pub search_provider: crate::config::SearchProvider,
|
||||
/// API key for Tavily or Bocha. `None` for Bing or DuckDuckGo.
|
||||
/// API key for Tavily, Bocha, or Metaso. `None` for Bing or DuckDuckGo.
|
||||
/// Metaso also falls back to `METASO_API_KEY` env var, then a built-in key.
|
||||
pub search_api_key: Option<String>,
|
||||
|
||||
/// Per-session workshop variable store (#548). Holds the raw content of
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
//! Web search tool backed by multiple providers: Bing HTML scrape, DuckDuckGo
|
||||
//! (HTML scrape with Bing fallback), Tavily API, and Bocha (博查) API.
|
||||
//! (HTML scrape with Bing fallback), Tavily API, Bocha (博查) API, and
|
||||
//! Metaso API (<https://metaso.cn>).
|
||||
//!
|
||||
//! This is the primary web search surface for agents. For browsing workflows
|
||||
//! (page open, click, screenshot) use a direct URL approach instead.
|
||||
//!
|
||||
//! Set `[search]` in config.toml to switch providers:
|
||||
//! provider = "duckduckgo" # or tavily/bocha
|
||||
//! provider = "duckduckgo" # or tavily/bocha/metaso
|
||||
//! api_key = "tvly-..."
|
||||
|
||||
use super::spec::{
|
||||
@@ -25,6 +26,10 @@ const DUCKDUCKGO_HOST: &str = "html.duckduckgo.com";
|
||||
const BING_HOST: &str = "www.bing.com";
|
||||
const TAVILY_ENDPOINT: &str = "https://api.tavily.com/search";
|
||||
const BOCHA_ENDPOINT: &str = "https://api.bochaai.com/v1/ai/search";
|
||||
const METASO_ENDPOINT: &str = "https://metaso.cn/api/v1";
|
||||
/// Intentionally public default key provided by Metaso for open-source/community use.
|
||||
/// Last-resort fallback after config and env var. Rate-limited to ~100 searches/day.
|
||||
const METASO_DEFAULT_API_KEY: &str = "mk-E384C1DD5E8501BB7EFE27C949AFDE5B";
|
||||
const ERROR_BODY_PREVIEW_BYTES: usize = 512;
|
||||
|
||||
/// Returns `Ok(())` if the policy allows the call, or a `ToolError` otherwise.
|
||||
@@ -198,6 +203,13 @@ impl ToolSpec for WebSearchTool {
|
||||
.run_bocha_search(&query, max_results, timeout_ms, context)
|
||||
.await;
|
||||
}
|
||||
SearchProvider::Metaso => {
|
||||
let decider = context.network_policy.as_ref();
|
||||
check_policy(decider, "metaso.cn")?;
|
||||
return self
|
||||
.run_metaso_search(&query, max_results, timeout_ms, context)
|
||||
.await;
|
||||
}
|
||||
SearchProvider::Bing | SearchProvider::DuckDuckGo => {}
|
||||
}
|
||||
|
||||
@@ -530,6 +542,109 @@ impl WebSearchTool {
|
||||
|
||||
ToolResult::json(&response).map_err(|e| ToolError::execution_failed(e.to_string()))
|
||||
}
|
||||
|
||||
/// Search via Metaso AI Search API (<https://metaso.cn>). Falls back to
|
||||
/// `METASO_API_KEY` env var then a built-in default key if no config key
|
||||
/// is set.
|
||||
async fn run_metaso_search(
|
||||
&self,
|
||||
query: &str,
|
||||
max_results: usize,
|
||||
timeout_ms: u64,
|
||||
context: &ToolContext,
|
||||
) -> Result<ToolResult, ToolError> {
|
||||
let env_key = std::env::var("METASO_API_KEY").ok();
|
||||
let api_key = context
|
||||
.search_api_key
|
||||
.as_deref()
|
||||
.or(env_key.as_deref())
|
||||
.unwrap_or(METASO_DEFAULT_API_KEY);
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_millis(timeout_ms))
|
||||
.build()
|
||||
.map_err(|e| {
|
||||
ToolError::execution_failed(format!("Failed to build HTTP client: {e}"))
|
||||
})?;
|
||||
|
||||
let size = max_results.clamp(1, 100);
|
||||
let payload = json!({
|
||||
"q": query,
|
||||
"scope": "webpage",
|
||||
"size": size,
|
||||
});
|
||||
|
||||
let resp = client
|
||||
.post(format!("{METASO_ENDPOINT}/search"))
|
||||
.header("Content-Type", "application/json")
|
||||
.header("Authorization", format!("Bearer {api_key}"))
|
||||
.json(&payload)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
ToolError::execution_failed(format!("Metaso search request failed: {e}"))
|
||||
})?;
|
||||
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.map_err(|e| {
|
||||
ToolError::execution_failed(format!("Failed to read Metaso response: {e}"))
|
||||
})?;
|
||||
|
||||
if !status.is_success() {
|
||||
let msg = match status.as_u16() {
|
||||
401 | 403 => "Metaso API key rejected — check METASO_API_KEY or set `[search] api_key` in config.toml, or get one at https://metaso.cn/search-api/playground".to_string(),
|
||||
429 => "Metaso rate-limited — wait and retry, or get your own API key at https://metaso.cn/search-api/playground".to_string(),
|
||||
_ => {
|
||||
let truncated = truncate_error_body(&body);
|
||||
format!("Metaso server error (HTTP {status}) — {truncated}")
|
||||
}
|
||||
};
|
||||
return Err(ToolError::execution_failed(msg));
|
||||
}
|
||||
|
||||
let parsed: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
|
||||
ToolError::execution_failed(format!("Failed to parse Metaso response: {e}"))
|
||||
})?;
|
||||
|
||||
// Check business-logic error codes in the response body.
|
||||
if let Some(code) = parsed.get("code").and_then(|v| v.as_i64())
|
||||
&& code != 0
|
||||
{
|
||||
let msg = parsed
|
||||
.get("message")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown error");
|
||||
return Err(ToolError::execution_failed(match code {
|
||||
3003 => "Metaso: daily search limit reached — set METASO_API_KEY or get one at https://metaso.cn/search-api/playground".to_string(),
|
||||
2005 => "Metaso API key rejected — check METASO_API_KEY or set `[search] api_key` in config.toml".to_string(),
|
||||
_ => format!("Metaso API error (code {code}: {msg})"),
|
||||
}));
|
||||
}
|
||||
|
||||
let results: Vec<WebSearchEntry> = parsed
|
||||
.get("webpages")
|
||||
.and_then(|v| v.as_array())
|
||||
.into_iter()
|
||||
.flat_map(|arr| arr.iter())
|
||||
.filter_map(|item| {
|
||||
let title = item.get("title")?.as_str()?.to_string();
|
||||
let url = item.get("link")?.as_str()?.to_string();
|
||||
let snippet = item
|
||||
.get("snippet")
|
||||
.or_else(|| item.get("summary"))
|
||||
.and_then(|s| s.as_str())
|
||||
.map(|s| s.to_string());
|
||||
Some(WebSearchEntry {
|
||||
title,
|
||||
url,
|
||||
snippet,
|
||||
})
|
||||
})
|
||||
.take(size)
|
||||
.collect();
|
||||
|
||||
search_tool_result(query.to_string(), "metaso", results, None)
|
||||
}
|
||||
}
|
||||
|
||||
fn truncate_error_body(body: &str) -> String {
|
||||
@@ -1225,4 +1340,30 @@ mod tests {
|
||||
"error must name the provider and missing key; got `{msg}`"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn metaso_provider_uses_built_in_key_when_no_config_key_set() {
|
||||
// Unlike Tavily/Bocha, Metaso falls back to a built-in default, so
|
||||
// the call should NOT return an API-key-related error — it should
|
||||
// either succeed or fail with a network-level error, but never a
|
||||
// missing-key error.
|
||||
use crate::config::SearchProvider;
|
||||
use crate::tools::spec::{ToolContext, ToolSpec};
|
||||
|
||||
let tmp = tempfile::tempdir().expect("tempdir");
|
||||
let mut ctx = ToolContext::new(tmp.path().to_path_buf());
|
||||
ctx.search_provider = SearchProvider::Metaso;
|
||||
ctx.search_api_key = None;
|
||||
let result = WebSearchTool
|
||||
.execute(json!({"query": "anything"}), &ctx)
|
||||
.await;
|
||||
let msg = match &result {
|
||||
Ok(res) => format!("{res:?}"),
|
||||
Err(e) => e.to_string(),
|
||||
};
|
||||
assert!(
|
||||
!msg.contains("API key"),
|
||||
"should not complain about missing API key (built-in default); got `{msg}`"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -656,12 +656,13 @@ Use `codewhale-tui features list` to inspect known flags and their effective sta
|
||||
|
||||
`web_search` uses Bing by default and does not require an API key. DuckDuckGo
|
||||
remains selectable for users who explicitly want it, and Tavily or Bocha can be
|
||||
selected when an API-backed provider is preferred.
|
||||
selected when an API-backed provider is preferred. **Metaso** ([metaso.cn](https://metaso.cn))
|
||||
100 searches/day free quota — set `METASO_API_KEY` or `[search] api_key` for a higher quota.
|
||||
|
||||
```toml
|
||||
[search]
|
||||
provider = "bing" # bing | duckduckgo | tavily | bocha
|
||||
# api_key = "tvly-YOUR_KEY" # required for tavily and bocha
|
||||
provider = "bing" # bing | duckduckgo | tavily | bocha | metaso
|
||||
# api_key = "YOUR_KEY" # required for tavily and bocha; optional for metaso (100 searches/day free quota)
|
||||
```
|
||||
|
||||
## Local Media Attachments
|
||||
|
||||
Reference in New Issue
Block a user