fix(search): default web search to bing (#1619)
Summary: - add Bing as explicit default web_search provider - keep explicit DuckDuckGo configuration supported - update docs/help/config examples Validation: CI green before merge.
This commit is contained in:
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Changed
|
||||
|
||||
- **Bing is the default `web_search` backend.** DuckDuckGo remains selectable
|
||||
with `[search] provider = "duckduckgo"` and keeps its Bing fallback path.
|
||||
|
||||
## [0.8.36] - 2026-05-14
|
||||
|
||||
### Added
|
||||
|
||||
+5
-3
@@ -220,12 +220,14 @@ max_subagents = 10 # optional (1-20)
|
||||
# ─────────────────────────────────────────────────────────────────────────────────
|
||||
# Web Search Provider
|
||||
# ─────────────────────────────────────────────────────────────────────────────────
|
||||
# Choose which backend `web_search` uses. Default is DuckDuckGo HTML scraping
|
||||
# with Bing fallback — no API key needed but may be blocked in some regions.
|
||||
# Choose which backend `web_search` uses. Default is Bing HTML scraping — no
|
||||
# API key needed. DuckDuckGo remains selectable and still falls back to Bing
|
||||
# when its HTML endpoint returns a bot challenge or no parseable results.
|
||||
# Switch to Tavily or Bocha for reliable search in mainland China.
|
||||
#
|
||||
# [search]
|
||||
# provider = "duckduckgo" # duckduckgo | tavily | bocha
|
||||
# provider = "bing" # bing | duckduckgo | tavily | bocha
|
||||
# # duckduckgo: HTML scrape with Bing fallback
|
||||
# # tavily: https://tavily.com — AI search, needs api_key
|
||||
# # bocha: https://bochaai.com — 博查AI搜索,国内友好,需api_key
|
||||
# api_key = "tvly-YOUR_KEY" # required for tavily and bocha
|
||||
|
||||
@@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
### Changed
|
||||
|
||||
- **Bing is the default `web_search` backend.** DuckDuckGo remains selectable
|
||||
with `[search] provider = "duckduckgo"` and keeps its Bing fallback path.
|
||||
|
||||
## [0.8.36] - 2026-05-14
|
||||
|
||||
### Added
|
||||
|
||||
@@ -525,8 +525,10 @@ impl SnapshotsConfig {
|
||||
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Deserialize, Serialize)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum SearchProvider {
|
||||
/// DuckDuckGo HTML scraping with Bing fallback. No API key needed.
|
||||
/// Bing HTML scraping. No API key needed.
|
||||
#[default]
|
||||
Bing,
|
||||
/// DuckDuckGo HTML scraping with Bing fallback. No API key needed.
|
||||
#[serde(alias = "duckduckgo")]
|
||||
DuckDuckGo,
|
||||
/// Tavily AI Search API (<https://tavily.com>). Requires api_key.
|
||||
@@ -539,6 +541,7 @@ impl SearchProvider {
|
||||
#[must_use]
|
||||
pub fn as_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Bing => "bing",
|
||||
Self::DuckDuckGo => "duckduckgo",
|
||||
Self::Tavily => "tavily",
|
||||
Self::Bocha => "bocha",
|
||||
@@ -549,10 +552,10 @@ impl SearchProvider {
|
||||
/// Web search provider configuration (`[search]` table in config.toml).
|
||||
#[derive(Debug, Clone, Deserialize, Default)]
|
||||
pub struct SearchConfig {
|
||||
/// Search provider: `duckduckgo` | `tavily` | `bocha`. Default: `duckduckgo`.
|
||||
/// Search provider: `bing` | `duckduckgo` | `tavily` | `bocha`. Default: `bing`.
|
||||
#[serde(default)]
|
||||
pub provider: Option<SearchProvider>,
|
||||
/// API key for Tavily or Bocha. Not required for DuckDuckGo.
|
||||
/// API key for Tavily or Bocha. Not required for Bing or DuckDuckGo.
|
||||
#[serde(default)]
|
||||
pub api_key: Option<String>,
|
||||
}
|
||||
@@ -905,9 +908,9 @@ pub struct Config {
|
||||
#[serde(default)]
|
||||
pub snapshots: Option<SnapshotsConfig>,
|
||||
|
||||
/// Web search provider configuration. When absent, defaults to DuckDuckGo
|
||||
/// with Bing fallback. Set `provider` to `tavily` or `bocha` and provide
|
||||
/// an `api_key` to use those services instead.
|
||||
/// Web search provider configuration. When absent, defaults to Bing.
|
||||
/// Set `provider` to `duckduckgo`, `tavily`, or `bocha` to use those
|
||||
/// services instead; Tavily and Bocha also require an `api_key`.
|
||||
#[serde(default)]
|
||||
pub search: Option<SearchConfig>,
|
||||
|
||||
@@ -3468,6 +3471,27 @@ mod tests {
|
||||
assert!(runtime.trusts_proxy_fakeip_host("raw.githubusercontent.com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_provider_defaults_to_bing() {
|
||||
assert_eq!(SearchProvider::default(), SearchProvider::Bing);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_duckduckgo_search_provider_is_preserved() {
|
||||
let config: Config = toml::from_str(
|
||||
r#"
|
||||
[search]
|
||||
provider = "duckduckgo"
|
||||
"#,
|
||||
)
|
||||
.expect("search config");
|
||||
|
||||
assert_eq!(
|
||||
config.search.and_then(|search| search.provider),
|
||||
Some(SearchProvider::DuckDuckGo)
|
||||
);
|
||||
}
|
||||
|
||||
struct EnvGuard {
|
||||
home: Option<OsString>,
|
||||
userprofile: Option<OsString>,
|
||||
|
||||
@@ -162,9 +162,9 @@ pub struct EngineConfig {
|
||||
pub strict_tool_mode: bool,
|
||||
/// Workshop / large-tool-output routing (#548). `None` disables routing.
|
||||
pub workshop: Option<crate::tools::large_output_router::WorkshopConfig>,
|
||||
/// Which search backend `web_search` should use. Default: DuckDuckGo.
|
||||
/// Which search backend `web_search` should use. Default: Bing.
|
||||
pub search_provider: crate::config::SearchProvider,
|
||||
/// API key for Tavily or Bocha. `None` for DuckDuckGo.
|
||||
/// API key for Tavily or Bocha. `None` for Bing or DuckDuckGo.
|
||||
pub search_api_key: Option<String>,
|
||||
}
|
||||
|
||||
|
||||
@@ -157,10 +157,10 @@ pub struct ToolContext {
|
||||
/// routing (e.g. in sub-agents and test contexts to avoid recursion).
|
||||
pub large_output_router: Option<crate::tools::large_output_router::LargeOutputRouter>,
|
||||
|
||||
/// Which search backend `web_search` should use. Default: DuckDuckGo with
|
||||
/// Bing fallback. Set via `[search] provider` in config.toml.
|
||||
/// Which search backend `web_search` should use. Default: Bing. Set via
|
||||
/// `[search] provider` in config.toml.
|
||||
pub search_provider: crate::config::SearchProvider,
|
||||
/// API key for Tavily or Bocha. `None` for DuckDuckGo.
|
||||
/// API key for Tavily or Bocha. `None` for Bing or DuckDuckGo.
|
||||
pub search_api_key: Option<String>,
|
||||
|
||||
/// Per-session workshop variable store (#548). Holds the raw content of
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
//! Web search tool backed by multiple providers: DuckDuckGo (HTML scrape
|
||||
//! with Bing fallback), Tavily API, and Bocha (博查) API.
|
||||
//! Web search tool backed by multiple providers: Bing HTML scrape, DuckDuckGo
|
||||
//! (HTML scrape with Bing fallback), Tavily API, and Bocha (博查) API.
|
||||
//!
|
||||
//! This is the primary web search surface for agents. For browsing workflows
|
||||
//! (page open, click, screenshot) use a direct URL approach instead.
|
||||
//!
|
||||
//! Set `[search]` in config.toml to switch providers:
|
||||
//! provider = "tavily" # requires api_key
|
||||
//! provider = "duckduckgo" # or tavily/bocha
|
||||
//! api_key = "tvly-..."
|
||||
|
||||
use super::spec::{
|
||||
@@ -124,7 +124,7 @@ impl ToolSpec for WebSearchTool {
|
||||
}
|
||||
|
||||
fn description(&self) -> &'static str {
|
||||
"Search the web and return ranked results with URLs and snippets. Default backend is DuckDuckGo (with Bing fallback); set `[search] provider = \"tavily\" | \"bocha\"` in config.toml to switch backends. Use this instead of scraping search engines with `curl` in `exec_shell`. For a known canonical URL, prefer `fetch_url` directly."
|
||||
"Search the web and return ranked results with URLs and snippets. Default backend is Bing; set `[search] provider = \"duckduckgo\" | \"tavily\" | \"bocha\"` in config.toml to switch backends. Use this instead of scraping search engines with `curl` in `exec_shell`. For a known canonical URL, prefer `fetch_url` directly."
|
||||
}
|
||||
|
||||
fn input_schema(&self) -> Value {
|
||||
@@ -181,7 +181,8 @@ impl ToolSpec for WebSearchTool {
|
||||
let max_results = max_results.clamp(1, MAX_RESULTS);
|
||||
let timeout_ms = optional_u64(&input, "timeout_ms", DEFAULT_TIMEOUT_MS).min(60_000);
|
||||
|
||||
// Dispatch to the configured search provider.
|
||||
// Dispatch to the configured API-backed search providers before
|
||||
// building the HTML-scraping client used by Bing/DuckDuckGo.
|
||||
match context.search_provider {
|
||||
SearchProvider::Tavily => {
|
||||
let decider = context.network_policy.as_ref();
|
||||
@@ -197,18 +198,10 @@ impl ToolSpec for WebSearchTool {
|
||||
.run_bocha_search(&query, max_results, timeout_ms, context)
|
||||
.await;
|
||||
}
|
||||
SearchProvider::DuckDuckGo => {
|
||||
// fall through to existing DuckDuckGo + Bing fallback logic
|
||||
}
|
||||
SearchProvider::Bing | SearchProvider::DuckDuckGo => {}
|
||||
}
|
||||
|
||||
// Per-domain network policy gate (#135). The "host" for web search is
|
||||
// the upstream search engine domain — DuckDuckGo first, Bing on
|
||||
// fallback. We gate DuckDuckGo here; Bing is gated separately inside
|
||||
// `run_bing_search` so a deny on one engine doesn't block the other.
|
||||
let decider = context.network_policy.as_ref();
|
||||
check_policy(decider, DUCKDUCKGO_HOST)?;
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_millis(timeout_ms))
|
||||
.user_agent(USER_AGENT)
|
||||
@@ -217,6 +210,18 @@ impl ToolSpec for WebSearchTool {
|
||||
ToolError::execution_failed(format!("Failed to build HTTP client: {e}"))
|
||||
})?;
|
||||
|
||||
if matches!(context.search_provider, SearchProvider::Bing) {
|
||||
check_policy(decider, BING_HOST)?;
|
||||
let results = run_bing_search(&client, &query, max_results).await?;
|
||||
return search_tool_result(query, "bing", results, None);
|
||||
}
|
||||
|
||||
// Per-domain network policy gate (#135). The "host" for web search is
|
||||
// the upstream search engine domain — DuckDuckGo first, Bing on
|
||||
// fallback. We gate DuckDuckGo here; Bing is gated separately inside
|
||||
// the fallback path so a deny on one engine doesn't block the other.
|
||||
check_policy(decider, DUCKDUCKGO_HOST)?;
|
||||
|
||||
let encoded = url_encode(&query);
|
||||
let url = format!("https://html.duckduckgo.com/html/?q={encoded}");
|
||||
let resp = client
|
||||
@@ -244,7 +249,7 @@ impl ToolSpec for WebSearchTool {
|
||||
}
|
||||
|
||||
let mut results = parse_duckduckgo_results(&body, max_results);
|
||||
let mut source = "duckduckgo".to_string();
|
||||
let mut source = "duckduckgo";
|
||||
let mut message_suffix = None;
|
||||
if results.is_empty() {
|
||||
let duckduckgo_blocked = is_duckduckgo_challenge(&body);
|
||||
@@ -254,7 +259,7 @@ impl ToolSpec for WebSearchTool {
|
||||
match run_bing_search(&client, &query, max_results).await {
|
||||
Ok(fallback_results) if !fallback_results.is_empty() => {
|
||||
results = fallback_results;
|
||||
source = "bing".to_string();
|
||||
source = "bing";
|
||||
message_suffix = Some(if duckduckgo_blocked {
|
||||
"DuckDuckGo returned a bot challenge; used Bing fallback"
|
||||
} else {
|
||||
@@ -274,26 +279,36 @@ impl ToolSpec for WebSearchTool {
|
||||
Ok(_) | Err(_) => {}
|
||||
}
|
||||
}
|
||||
let message = if results.is_empty() {
|
||||
"No results found".to_string()
|
||||
} else if let Some(suffix) = message_suffix {
|
||||
format!("Found {} result(s). {suffix}", results.len())
|
||||
} else {
|
||||
format!("Found {} result(s)", results.len())
|
||||
};
|
||||
|
||||
let response = WebSearchResponse {
|
||||
query,
|
||||
source,
|
||||
count: results.len(),
|
||||
message,
|
||||
results,
|
||||
};
|
||||
|
||||
ToolResult::json(&response).map_err(|e| ToolError::execution_failed(e.to_string()))
|
||||
search_tool_result(query, source, results, message_suffix)
|
||||
}
|
||||
}
|
||||
|
||||
fn search_tool_result(
|
||||
query: String,
|
||||
source: &'static str,
|
||||
results: Vec<WebSearchEntry>,
|
||||
message_suffix: Option<&str>,
|
||||
) -> Result<ToolResult, ToolError> {
|
||||
let message = if results.is_empty() {
|
||||
"No results found".to_string()
|
||||
} else if let Some(suffix) = message_suffix {
|
||||
format!("Found {} result(s). {suffix}", results.len())
|
||||
} else {
|
||||
format!("Found {} result(s)", results.len())
|
||||
};
|
||||
|
||||
let response = WebSearchResponse {
|
||||
query,
|
||||
source: source.to_string(),
|
||||
count: results.len(),
|
||||
message,
|
||||
results,
|
||||
};
|
||||
|
||||
ToolResult::json(&response).map_err(|e| ToolError::execution_failed(e.to_string()))
|
||||
}
|
||||
|
||||
impl WebSearchTool {
|
||||
/// Search via Tavily AI Search API (<https://tavily.com>).
|
||||
async fn run_tavily_search(
|
||||
@@ -591,16 +606,16 @@ async fn run_bing_search(
|
||||
.header("Accept-Language", "en-US,en;q=0.9")
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| ToolError::execution_failed(format!("Bing fallback request failed: {e}")))?;
|
||||
.map_err(|e| ToolError::execution_failed(format!("Bing search request failed: {e}")))?;
|
||||
|
||||
let status = resp.status();
|
||||
let body = resp.text().await.map_err(|e| {
|
||||
ToolError::execution_failed(format!("Failed to read Bing fallback response: {e}"))
|
||||
ToolError::execution_failed(format!("Failed to read Bing search response: {e}"))
|
||||
})?;
|
||||
|
||||
if !status.is_success() {
|
||||
return Err(ToolError::execution_failed(format!(
|
||||
"Bing fallback failed: HTTP {}",
|
||||
"Bing search failed: HTTP {}",
|
||||
status.as_u16()
|
||||
)));
|
||||
}
|
||||
|
||||
@@ -620,6 +620,18 @@ You can also override features for a single run:
|
||||
|
||||
Use `deepseek-tui features list` to inspect known flags and their effective state.
|
||||
|
||||
## Web Search Provider
|
||||
|
||||
`web_search` uses Bing by default and does not require an API key. DuckDuckGo
|
||||
remains selectable for users who explicitly want it, and Tavily or Bocha can be
|
||||
selected when an API-backed provider is preferred.
|
||||
|
||||
```toml
|
||||
[search]
|
||||
provider = "bing" # bing | duckduckgo | tavily | bocha
|
||||
# api_key = "tvly-YOUR_KEY" # required for tavily and bocha
|
||||
```
|
||||
|
||||
## Local Media Attachments
|
||||
|
||||
Use `@path/to/file` in the composer to add local text file or directory context
|
||||
|
||||
@@ -35,7 +35,7 @@ chosen over the available shell equivalent. Companion to `crates/tui/src/prompts
|
||||
|---|---|
|
||||
| `grep_files` | Regex search file contents within the workspace; structured matches + context lines. Pure-Rust (`regex` crate), no `rg`/`grep` shell-out. |
|
||||
| `file_search` | Fuzzy-match filenames (not contents). Use when you know roughly the name. |
|
||||
| `web_search` | DuckDuckGo (with Bing fallback); ranked snippets + `ref_id` for citation. |
|
||||
| `web_search` | Bing by default; DuckDuckGo, Tavily, and Bocha are selectable in config. Ranked snippets + `ref_id` for citation. |
|
||||
| `fetch_url` | Direct HTTP GET on a known URL. Faster than `web_search` when the link is already known. HTML stripped to text by default. |
|
||||
|
||||
### Shell
|
||||
|
||||
Reference in New Issue
Block a user