From a3f88bf6cf674c2089954dc88c2db7e24076f344 Mon Sep 17 00:00:00 2001 From: Hunter Bown Date: Thu, 14 May 2026 03:31:15 -0500 Subject: [PATCH] fix(search): default web search to bing (#1619) Summary: - add Bing as explicit default web_search provider - keep explicit DuckDuckGo configuration supported - update docs/help/config examples Validation: CI green before merge. --- CHANGELOG.md | 7 +++ config.example.toml | 8 +-- crates/tui/CHANGELOG.md | 7 +++ crates/tui/src/config.rs | 36 ++++++++++--- crates/tui/src/core/engine.rs | 4 +- crates/tui/src/tools/spec.rs | 6 +-- crates/tui/src/tools/web_search.rs | 85 ++++++++++++++++++------------ docs/CONFIGURATION.md | 12 +++++ docs/TOOL_SURFACE.md | 2 +- 9 files changed, 117 insertions(+), 50 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c883e39a..5f3ff686 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Changed + +- **Bing is the default `web_search` backend.** DuckDuckGo remains selectable + with `[search] provider = "duckduckgo"` and keeps its Bing fallback path. + ## [0.8.36] - 2026-05-14 ### Added diff --git a/config.example.toml b/config.example.toml index c2ce1e55..6aceae34 100644 --- a/config.example.toml +++ b/config.example.toml @@ -220,12 +220,14 @@ max_subagents = 10 # optional (1-20) # ───────────────────────────────────────────────────────────────────────────────── # Web Search Provider # ───────────────────────────────────────────────────────────────────────────────── -# Choose which backend `web_search` uses. Default is DuckDuckGo HTML scraping -# with Bing fallback — no API key needed but may be blocked in some regions. +# Choose which backend `web_search` uses. Default is Bing HTML scraping — no +# API key needed. DuckDuckGo remains selectable and still falls back to Bing +# when its HTML endpoint returns a bot challenge or no parseable results. # Switch to Tavily or Bocha for reliable search in mainland China. # # [search] -# provider = "duckduckgo" # duckduckgo | tavily | bocha +# provider = "bing" # bing | duckduckgo | tavily | bocha +# # duckduckgo: HTML scrape with Bing fallback # # tavily: https://tavily.com — AI search, needs api_key # # bocha: https://bochaai.com — 博查AI搜索,国内友好,需api_key # api_key = "tvly-YOUR_KEY" # required for tavily and bocha diff --git a/crates/tui/CHANGELOG.md b/crates/tui/CHANGELOG.md index c883e39a..5f3ff686 100644 --- a/crates/tui/CHANGELOG.md +++ b/crates/tui/CHANGELOG.md @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Changed + +- **Bing is the default `web_search` backend.** DuckDuckGo remains selectable + with `[search] provider = "duckduckgo"` and keeps its Bing fallback path. + ## [0.8.36] - 2026-05-14 ### Added diff --git a/crates/tui/src/config.rs b/crates/tui/src/config.rs index a651fb5b..3d7bab52 100644 --- a/crates/tui/src/config.rs +++ b/crates/tui/src/config.rs @@ -525,8 +525,10 @@ impl SnapshotsConfig { #[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)] #[serde(rename_all = "snake_case")] pub enum SearchProvider { - /// DuckDuckGo HTML scraping with Bing fallback. No API key needed. + /// Bing HTML scraping. No API key needed. #[default] + Bing, + /// DuckDuckGo HTML scraping with Bing fallback. No API key needed. #[serde(alias = "duckduckgo")] DuckDuckGo, /// Tavily AI Search API (). Requires api_key. @@ -539,6 +541,7 @@ impl SearchProvider { #[must_use] pub fn as_str(self) -> &'static str { match self { + Self::Bing => "bing", Self::DuckDuckGo => "duckduckgo", Self::Tavily => "tavily", Self::Bocha => "bocha", @@ -549,10 +552,10 @@ impl SearchProvider { /// Web search provider configuration (`[search]` table in config.toml). #[derive(Debug, Clone, Deserialize, Default)] pub struct SearchConfig { - /// Search provider: `duckduckgo` | `tavily` | `bocha`. Default: `duckduckgo`. + /// Search provider: `bing` | `duckduckgo` | `tavily` | `bocha`. Default: `bing`. #[serde(default)] pub provider: Option, - /// API key for Tavily or Bocha. Not required for DuckDuckGo. + /// API key for Tavily or Bocha. Not required for Bing or DuckDuckGo. #[serde(default)] pub api_key: Option, } @@ -905,9 +908,9 @@ pub struct Config { #[serde(default)] pub snapshots: Option, - /// Web search provider configuration. When absent, defaults to DuckDuckGo - /// with Bing fallback. Set `provider` to `tavily` or `bocha` and provide - /// an `api_key` to use those services instead. + /// Web search provider configuration. When absent, defaults to Bing. + /// Set `provider` to `duckduckgo`, `tavily`, or `bocha` to use those + /// services instead; Tavily and Bocha also require an `api_key`. #[serde(default)] pub search: Option, @@ -3468,6 +3471,27 @@ mod tests { assert!(runtime.trusts_proxy_fakeip_host("raw.githubusercontent.com")); } + #[test] + fn search_provider_defaults_to_bing() { + assert_eq!(SearchProvider::default(), SearchProvider::Bing); + } + + #[test] + fn explicit_duckduckgo_search_provider_is_preserved() { + let config: Config = toml::from_str( + r#" + [search] + provider = "duckduckgo" + "#, + ) + .expect("search config"); + + assert_eq!( + config.search.and_then(|search| search.provider), + Some(SearchProvider::DuckDuckGo) + ); + } + struct EnvGuard { home: Option, userprofile: Option, diff --git a/crates/tui/src/core/engine.rs b/crates/tui/src/core/engine.rs index 32c251aa..633f4825 100644 --- a/crates/tui/src/core/engine.rs +++ b/crates/tui/src/core/engine.rs @@ -162,9 +162,9 @@ pub struct EngineConfig { pub strict_tool_mode: bool, /// Workshop / large-tool-output routing (#548). `None` disables routing. pub workshop: Option, - /// Which search backend `web_search` should use. Default: DuckDuckGo. + /// Which search backend `web_search` should use. Default: Bing. pub search_provider: crate::config::SearchProvider, - /// API key for Tavily or Bocha. `None` for DuckDuckGo. + /// API key for Tavily or Bocha. `None` for Bing or DuckDuckGo. pub search_api_key: Option, } diff --git a/crates/tui/src/tools/spec.rs b/crates/tui/src/tools/spec.rs index 21c91fa5..2f7d0595 100644 --- a/crates/tui/src/tools/spec.rs +++ b/crates/tui/src/tools/spec.rs @@ -157,10 +157,10 @@ pub struct ToolContext { /// routing (e.g. in sub-agents and test contexts to avoid recursion). pub large_output_router: Option, - /// Which search backend `web_search` should use. Default: DuckDuckGo with - /// Bing fallback. Set via `[search] provider` in config.toml. + /// Which search backend `web_search` should use. Default: Bing. Set via + /// `[search] provider` in config.toml. pub search_provider: crate::config::SearchProvider, - /// API key for Tavily or Bocha. `None` for DuckDuckGo. + /// API key for Tavily or Bocha. `None` for Bing or DuckDuckGo. pub search_api_key: Option, /// Per-session workshop variable store (#548). Holds the raw content of diff --git a/crates/tui/src/tools/web_search.rs b/crates/tui/src/tools/web_search.rs index d4f3f13a..d46cac7e 100644 --- a/crates/tui/src/tools/web_search.rs +++ b/crates/tui/src/tools/web_search.rs @@ -1,11 +1,11 @@ -//! Web search tool backed by multiple providers: DuckDuckGo (HTML scrape -//! with Bing fallback), Tavily API, and Bocha (博查) API. +//! Web search tool backed by multiple providers: Bing HTML scrape, DuckDuckGo +//! (HTML scrape with Bing fallback), Tavily API, and Bocha (博查) API. //! //! This is the primary web search surface for agents. For browsing workflows //! (page open, click, screenshot) use a direct URL approach instead. //! //! Set `[search]` in config.toml to switch providers: -//! provider = "tavily" # requires api_key +//! provider = "duckduckgo" # or tavily/bocha //! api_key = "tvly-..." use super::spec::{ @@ -124,7 +124,7 @@ impl ToolSpec for WebSearchTool { } fn description(&self) -> &'static str { - "Search the web and return ranked results with URLs and snippets. Default backend is DuckDuckGo (with Bing fallback); set `[search] provider = \"tavily\" | \"bocha\"` in config.toml to switch backends. Use this instead of scraping search engines with `curl` in `exec_shell`. For a known canonical URL, prefer `fetch_url` directly." + "Search the web and return ranked results with URLs and snippets. Default backend is Bing; set `[search] provider = \"duckduckgo\" | \"tavily\" | \"bocha\"` in config.toml to switch backends. Use this instead of scraping search engines with `curl` in `exec_shell`. For a known canonical URL, prefer `fetch_url` directly." } fn input_schema(&self) -> Value { @@ -181,7 +181,8 @@ impl ToolSpec for WebSearchTool { let max_results = max_results.clamp(1, MAX_RESULTS); let timeout_ms = optional_u64(&input, "timeout_ms", DEFAULT_TIMEOUT_MS).min(60_000); - // Dispatch to the configured search provider. + // Dispatch to the configured API-backed search providers before + // building the HTML-scraping client used by Bing/DuckDuckGo. match context.search_provider { SearchProvider::Tavily => { let decider = context.network_policy.as_ref(); @@ -197,18 +198,10 @@ impl ToolSpec for WebSearchTool { .run_bocha_search(&query, max_results, timeout_ms, context) .await; } - SearchProvider::DuckDuckGo => { - // fall through to existing DuckDuckGo + Bing fallback logic - } + SearchProvider::Bing | SearchProvider::DuckDuckGo => {} } - // Per-domain network policy gate (#135). The "host" for web search is - // the upstream search engine domain — DuckDuckGo first, Bing on - // fallback. We gate DuckDuckGo here; Bing is gated separately inside - // `run_bing_search` so a deny on one engine doesn't block the other. let decider = context.network_policy.as_ref(); - check_policy(decider, DUCKDUCKGO_HOST)?; - let client = reqwest::Client::builder() .timeout(Duration::from_millis(timeout_ms)) .user_agent(USER_AGENT) @@ -217,6 +210,18 @@ impl ToolSpec for WebSearchTool { ToolError::execution_failed(format!("Failed to build HTTP client: {e}")) })?; + if matches!(context.search_provider, SearchProvider::Bing) { + check_policy(decider, BING_HOST)?; + let results = run_bing_search(&client, &query, max_results).await?; + return search_tool_result(query, "bing", results, None); + } + + // Per-domain network policy gate (#135). The "host" for web search is + // the upstream search engine domain — DuckDuckGo first, Bing on + // fallback. We gate DuckDuckGo here; Bing is gated separately inside + // the fallback path so a deny on one engine doesn't block the other. + check_policy(decider, DUCKDUCKGO_HOST)?; + let encoded = url_encode(&query); let url = format!("https://html.duckduckgo.com/html/?q={encoded}"); let resp = client @@ -244,7 +249,7 @@ impl ToolSpec for WebSearchTool { } let mut results = parse_duckduckgo_results(&body, max_results); - let mut source = "duckduckgo".to_string(); + let mut source = "duckduckgo"; let mut message_suffix = None; if results.is_empty() { let duckduckgo_blocked = is_duckduckgo_challenge(&body); @@ -254,7 +259,7 @@ impl ToolSpec for WebSearchTool { match run_bing_search(&client, &query, max_results).await { Ok(fallback_results) if !fallback_results.is_empty() => { results = fallback_results; - source = "bing".to_string(); + source = "bing"; message_suffix = Some(if duckduckgo_blocked { "DuckDuckGo returned a bot challenge; used Bing fallback" } else { @@ -274,26 +279,36 @@ impl ToolSpec for WebSearchTool { Ok(_) | Err(_) => {} } } - let message = if results.is_empty() { - "No results found".to_string() - } else if let Some(suffix) = message_suffix { - format!("Found {} result(s). {suffix}", results.len()) - } else { - format!("Found {} result(s)", results.len()) - }; - let response = WebSearchResponse { - query, - source, - count: results.len(), - message, - results, - }; - - ToolResult::json(&response).map_err(|e| ToolError::execution_failed(e.to_string())) + search_tool_result(query, source, results, message_suffix) } } +fn search_tool_result( + query: String, + source: &'static str, + results: Vec, + message_suffix: Option<&str>, +) -> Result { + let message = if results.is_empty() { + "No results found".to_string() + } else if let Some(suffix) = message_suffix { + format!("Found {} result(s). {suffix}", results.len()) + } else { + format!("Found {} result(s)", results.len()) + }; + + let response = WebSearchResponse { + query, + source: source.to_string(), + count: results.len(), + message, + results, + }; + + ToolResult::json(&response).map_err(|e| ToolError::execution_failed(e.to_string())) +} + impl WebSearchTool { /// Search via Tavily AI Search API (). async fn run_tavily_search( @@ -591,16 +606,16 @@ async fn run_bing_search( .header("Accept-Language", "en-US,en;q=0.9") .send() .await - .map_err(|e| ToolError::execution_failed(format!("Bing fallback request failed: {e}")))?; + .map_err(|e| ToolError::execution_failed(format!("Bing search request failed: {e}")))?; let status = resp.status(); let body = resp.text().await.map_err(|e| { - ToolError::execution_failed(format!("Failed to read Bing fallback response: {e}")) + ToolError::execution_failed(format!("Failed to read Bing search response: {e}")) })?; if !status.is_success() { return Err(ToolError::execution_failed(format!( - "Bing fallback failed: HTTP {}", + "Bing search failed: HTTP {}", status.as_u16() ))); } diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md index d68dc6c2..774c82c5 100644 --- a/docs/CONFIGURATION.md +++ b/docs/CONFIGURATION.md @@ -620,6 +620,18 @@ You can also override features for a single run: Use `deepseek-tui features list` to inspect known flags and their effective state. +## Web Search Provider + +`web_search` uses Bing by default and does not require an API key. DuckDuckGo +remains selectable for users who explicitly want it, and Tavily or Bocha can be +selected when an API-backed provider is preferred. + +```toml +[search] +provider = "bing" # bing | duckduckgo | tavily | bocha +# api_key = "tvly-YOUR_KEY" # required for tavily and bocha +``` + ## Local Media Attachments Use `@path/to/file` in the composer to add local text file or directory context diff --git a/docs/TOOL_SURFACE.md b/docs/TOOL_SURFACE.md index b1305c89..08301841 100644 --- a/docs/TOOL_SURFACE.md +++ b/docs/TOOL_SURFACE.md @@ -35,7 +35,7 @@ chosen over the available shell equivalent. Companion to `crates/tui/src/prompts |---|---| | `grep_files` | Regex search file contents within the workspace; structured matches + context lines. Pure-Rust (`regex` crate), no `rg`/`grep` shell-out. | | `file_search` | Fuzzy-match filenames (not contents). Use when you know roughly the name. | -| `web_search` | DuckDuckGo (with Bing fallback); ranked snippets + `ref_id` for citation. | +| `web_search` | Bing by default; DuckDuckGo, Tavily, and Bocha are selectable in config. Ranked snippets + `ref_id` for citation. | | `fetch_url` | Direct HTTP GET on a known URL. Faster than `web_search` when the link is already known. HTML stripped to text by default. | ### Shell