diff --git a/config.example.toml b/config.example.toml index c8a7155b..4169be0f 100644 --- a/config.example.toml +++ b/config.example.toml @@ -265,25 +265,29 @@ max_subagents = 10 # optional (1-20) # ───────────────────────────────────────────────────────────────────────────────── # Web Search Provider # ───────────────────────────────────────────────────────────────────────────────── -# Choose which backend `web_search` uses. Default is Bing HTML scraping — no -# API key needed. DuckDuckGo remains selectable and still falls back to Bing -# when its HTML endpoint returns a bot challenge or no parseable results. -# Switch to Tavily or Bocha for reliable search in mainland China. +# Choose which backend `web_search` uses. Default is DuckDuckGo HTML scraping +# with Bing fallback — no API key needed. Bing remains selectable for users who +# explicitly prefer it. Switch to Tavily, Bocha, Metaso, or Baidu for +# API-backed search. # # [search] -# provider = "bing" # bing | duckduckgo | tavily | bocha | metaso +# provider = "duckduckgo" # duckduckgo | bing | tavily | bocha | metaso | baidu # # duckduckgo: HTML scrape with Bing fallback -# # tavily: https://tavily.com — AI search, needs api_key -# # bocha: https://bochaai.com — 博查AI搜索,国内友好,需api_key -# # metaso: https://metaso.cn — 秘塔AI搜索,每天 100 次免费 -# # 设置 METASO_API_KEY 或 [search] api_key 可提升额度 -# api_key = "tvly-YOUR_KEY" # required for tavily, bocha, and metaso (optional for metaso) -# # WARNING: treat config.toml like a secret file when -# # storing API keys. Use env vars or `auth set` instead. +# # bing: HTML scrape, no API key +# # tavily: https://tavily.com — AI search, needs api_key +# # bocha: https://bochaai.com — 博查AI搜索,国内友好,需api_key +# # metaso: https://metaso.cn — 秘塔AI搜索,每天 100 次免费 +# # 设置 METASO_API_KEY 或 [search] api_key 可提升额度 +# # baidu: 百度 AI Search via qianfan.baidubce.com,需 api_key +# api_key = "YOUR_SEARCH_KEY" # required for tavily, bocha, and baidu; optional for metaso +# # WARNING: treat config.toml like a secret file when +# # storing API keys. Prefer env vars for local smoke tests. # # Env-var overrides: # DEEPSEEK_SEARCH_PROVIDER → search.provider # DEEPSEEK_SEARCH_API_KEY → search.api_key +# METASO_API_KEY → metaso key fallback +# BAIDU_SEARCH_API_KEY → baidu key fallback # ───────────────────────────────────────────────────────────────────────────────── # Network Policy (#135) diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index c0beec79..03c19344 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -675,8 +675,9 @@ impl SearchProvider { "tavily" => Some(Self::Tavily), "bocha" => Some(Self::Bocha), "metaso" => Some(Self::Metaso), - "baidu" | "baidu-search" | "baidu_search" | "baidu-ai-search" - | "baidu_ai_search" => Some(Self::Baidu), + "baidu" | "baidu-search" | "baidu_search" | "baidu-ai-search" | "baidu_ai_search" => { + Some(Self::Baidu) + } _ => None, } } diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 02737eb7..37877750 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -170,8 +170,9 @@ pub struct EngineConfig { pub workshop: Option, /// Which search backend `web_search` should use. Default: DuckDuckGo. pub search_provider: crate::config::SearchProvider, - /// API key for Tavily, Bocha, or Metaso. `None` for Bing or DuckDuckGo. + /// API key for Tavily, Bocha, Metaso, or Baidu. `None` for Bing or DuckDuckGo. /// Metaso also falls back to `METASO_API_KEY` env var, then a built-in key. + /// Baidu also falls back to `BAIDU_SEARCH_API_KEY`. pub search_api_key: Option, /// Per-step DeepSeek API timeout for sub-agent `create_message` requests. /// Resolved from `[subagents] api_timeout_secs` (clamped to 1..=1800) diff --git a/crates/tui/src/tools/spec.rs b/crates/tui/src/tools/spec.rs index 8f2e186e..6a66c37f 100644 --- a/crates/tui/src/tools/spec.rs +++ b/crates/tui/src/tools/spec.rs @@ -165,8 +165,9 @@ pub struct ToolContext { /// Which search backend `web_search` should use. Default: DuckDuckGo. Set via /// `[search] provider` in config.toml. pub search_provider: crate::config::SearchProvider, - /// API key for Tavily, Bocha, or Metaso. `None` for Bing or DuckDuckGo. + /// API key for Tavily, Bocha, Metaso, or Baidu. `None` for Bing or DuckDuckGo. /// Metaso also falls back to `METASO_API_KEY` env var, then a built-in key. + /// Baidu also falls back to `BAIDU_SEARCH_API_KEY`. pub search_api_key: Option, /// Per-session workshop variable store (#548). Holds the raw content of diff --git a/crates/tui/src/tools/web_search.rs b/crates/tui/src/tools/web_search.rs index c5f68ab7..a86b3c0b 100644 --- a/crates/tui/src/tools/web_search.rs +++ b/crates/tui/src/tools/web_search.rs @@ -1182,8 +1182,8 @@ fn extract_query_param(url: &str, key: &str) -> Option { mod tests { use super::{ ERROR_BODY_PREVIEW_BYTES, WebSearchEntry, WebSearchTool, decode_html_entities, - extract_search_query, is_likely_spam_results, optional_search_max_results, root_domain, - parse_baidu_results, sanitize_error_body, truncate_error_body, + extract_search_query, is_likely_spam_results, optional_search_max_results, + parse_baidu_results, root_domain, sanitize_error_body, truncate_error_body, }; use serde_json::json; @@ -1452,12 +1452,11 @@ mod tests { #[test] fn sanitize_error_body_redacts_bearer_tokens() { - let body = - r#"{"error":"bad token","authorization":"Bearer bce-v3/ALTAK-example/secret"}"#; + let body = r#"{"error":"bad token","authorization":"Bearer test-token/with+chars="}"#; let sanitized = sanitize_error_body(body); - assert!(!sanitized.contains("bce-v3/ALTAK-example/secret")); + assert!(!sanitized.contains("test-token/with+chars=")); assert!(sanitized.contains("Bearer [REDACTED]")); } diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index f0b75de0..3d4ce5d1 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -663,14 +663,21 @@ Use `codewhale-tui features list` to inspect known flags and their effective sta `web_search` uses DuckDuckGo by default and does not require an API key. The DuckDuckGo path keeps a Bing fallback when DDG returns a bot challenge or no parseable results. Bing remains selectable for users who explicitly want it, -and Tavily or Bocha can be selected when an API-backed provider is preferred. -**Metaso** ([metaso.cn](https://metaso.cn)) -100 searches/day free quota — set `METASO_API_KEY` or `[search] api_key` for a higher quota. +and Tavily, Bocha, Metaso, or Baidu can be selected when an API-backed provider +is preferred. + +**Metaso** ([metaso.cn](https://metaso.cn)) has a 100 searches/day free quota; +set `METASO_API_KEY` or `[search] api_key` for a higher quota. + +**Baidu** uses Baidu AI Search at +`https://qianfan.baidubce.com/v2/ai_search/web_search`. Set +`BAIDU_SEARCH_API_KEY` or `[search] api_key`. This is a search-tool backend +only; it does not add a Baidu model provider. ```toml [search] -provider = "duckduckgo" # duckduckgo | bing | tavily | bocha | metaso -# api_key = "YOUR_KEY" # required for tavily and bocha; optional for metaso (100 searches/day free quota) +provider = "baidu" # duckduckgo | bing | tavily | bocha | metaso | baidu +# api_key = "YOUR_KEY" # required for tavily, bocha, and baidu; optional for metaso ``` ## Local Media Attachments diff --git a/docs/TOOL_SURFACE.md b/docs/TOOL_SURFACE.md index aa31fb4e..36933b0d 100644 --- a/docs/TOOL_SURFACE.md +++ b/docs/TOOL_SURFACE.md @@ -35,7 +35,7 @@ chosen over the available shell equivalent. Companion to `crates/tui/src/prompts |---|---| | `grep_files` | Regex search file contents within the workspace; structured matches + context lines. Pure-Rust (`regex` crate), no `rg`/`grep` shell-out. | | `file_search` | Fuzzy-match filenames (not contents). Use when you know roughly the name. | -| `web_search` | DuckDuckGo by default with Bing fallback; Bing, Tavily, and Bocha are selectable in config. Ranked snippets + `ref_id` for citation. | +| `web_search` | DuckDuckGo by default with Bing fallback; Bing, Tavily, Bocha, Metaso, and Baidu are selectable in config. Ranked snippets + `ref_id` for citation. | | `fetch_url` | Direct HTTP GET on a known URL. Faster than `web_search` when the link is already known. HTML stripped to text by default. | ### Shell