diff --git a/crates/tui/src/tools/web_search.rs b/crates/tui/src/tools/web_search.rs index 330b6ca9..bafc94d8 100644 --- a/crates/tui/src/tools/web_search.rs +++ b/crates/tui/src/tools/web_search.rs @@ -27,7 +27,7 @@ use std::time::Duration; const DUCKDUCKGO_ENDPOINT: &str = "https://html.duckduckgo.com/html/"; const BING_HOST: &str = "www.bing.com"; const TAVILY_ENDPOINT: &str = "https://api.tavily.com/search"; -const BOCHA_ENDPOINT: &str = "https://api.bochaai.com/v1/ai/search"; +const BOCHA_ENDPOINT: &str = "https://api.bochaai.com/v1/web-search"; const METASO_ENDPOINT: &str = "https://metaso.cn/api/v1"; const BAIDU_ENDPOINT: &str = "https://qianfan.baidubce.com/v2/ai_search/web_search"; const VOLCENGINE_RESPONSES_ENDPOINT: &str = "https://ark.cn-beijing.volces.com/api/v3/responses"; @@ -634,39 +634,11 @@ impl WebSearchTool { ToolError::execution_failed(format!("Failed to parse Bocha response: {e}")) })?; - // Bocha returns `{"code": 200, "data": {"pages": [...]}}` - let results: Vec = parsed - .get("data") - .and_then(|d| d.get("pages")) - .or_else(|| parsed.get("pages")) - .and_then(|v| v.as_array()) - .into_iter() - .flat_map(|arr| arr.iter()) - .filter_map(|item| { - let title = item - .get("name") - .or_else(|| item.get("title")) - .and_then(|s| s.as_str())? - .to_string(); - let url = item - .get("url") - .or_else(|| item.get("link")) - .and_then(|s| s.as_str())? - .to_string(); - let snippet = item - .get("summary") - .or_else(|| item.get("snippet")) - .or_else(|| item.get("description")) - .and_then(|s| s.as_str()) - .map(|s| s.to_string()); - Some(WebSearchEntry { - title, - url, - snippet, - }) - }) - .take(max_results) - .collect(); + if let Some(error) = bocha_error_message(&parsed) { + return Err(ToolError::execution_failed(error)); + } + + let results = parse_bocha_results(&parsed, max_results); let message = if results.is_empty() { "No results found".to_string() @@ -1004,6 +976,63 @@ fn sanitize_error_body(body: &str) -> String { .to_string() } +fn parse_bocha_results(parsed: &Value, max_results: usize) -> Vec { + parsed + .get("data") + .and_then(|d| { + d.get("webPages") + .and_then(|w| w.get("value")) + .or_else(|| d.get("pages")) + }) + .or_else(|| parsed.get("pages")) + .and_then(|v| v.as_array()) + .into_iter() + .flat_map(|arr| arr.iter()) + .filter_map(|item| { + let title = item + .get("name") + .or_else(|| item.get("title")) + .and_then(|s| s.as_str())? + .trim(); + let url = item + .get("url") + .or_else(|| item.get("link")) + .and_then(|s| s.as_str())? + .trim(); + if title.is_empty() || url.is_empty() { + return None; + } + let snippet = item + .get("summary") + .or_else(|| item.get("snippet")) + .or_else(|| item.get("description")) + .and_then(|s| s.as_str()) + .map(str::trim) + .filter(|s| !s.is_empty()) + .map(ToString::to_string); + Some(WebSearchEntry { + title: title.to_string(), + url: url.to_string(), + snippet, + }) + }) + .take(max_results) + .collect() +} + +fn bocha_error_message(parsed: &Value) -> Option { + let code = parsed.get("code").and_then(|v| v.as_i64())?; + if code == 0 || code == 200 { + return None; + } + let message = parsed + .get("msg") + .or_else(|| parsed.get("message")) + .and_then(|v| v.as_str()) + .unwrap_or("unknown error"); + Some(format!("Bocha search API error (code {code}: {message})")) +} + fn parse_baidu_results(parsed: &Value, max_results: usize) -> Vec { parsed .get("references") @@ -1608,9 +1637,10 @@ fn extract_query_param(url: &str, key: &str) -> Option { mod tests { use super::{ ERROR_BODY_PREVIEW_BYTES, WebSearchEntry, WebSearchTool, baidu_search_payload, - decode_html_entities, duckduckgo_search_url, extract_search_query, is_likely_spam_results, - normalize_bing_url, optional_search_max_results, parse_baidu_results, parse_sofya_results, - root_domain, sanitize_error_body, truncate_error_body, volcengine_extract_text, + bocha_error_message, decode_html_entities, duckduckgo_search_url, extract_search_query, + is_likely_spam_results, normalize_bing_url, optional_search_max_results, + parse_baidu_results, parse_bocha_results, parse_sofya_results, root_domain, + sanitize_error_body, truncate_error_body, volcengine_extract_text, }; use serde_json::json; @@ -1898,6 +1928,72 @@ mod tests { assert!(sanitized.contains("Bearer [REDACTED]")); } + #[test] + fn parse_bocha_web_pages_value_extracts_ranked_results() { + let body = json!({ + "code": 200, + "msg": null, + "data": { + "webPages": { + "value": [ + { + "name": "广州天气", + "url": "https://bocha.cn/share/weather", + "snippet": "广州今日雷阵雨转晴。" + }, + { + "name": "中央气象台", + "url": "https://www.weather.com.cn/", + "summary": "天气实况。" + } + ] + } + } + }); + + let results = parse_bocha_results(&body, 10); + + assert_eq!(results.len(), 2); + assert_eq!(results[0].title, "广州天气"); + assert_eq!(results[0].url, "https://bocha.cn/share/weather"); + assert_eq!(results[0].snippet.as_deref(), Some("广州今日雷阵雨转晴。")); + assert_eq!(results[1].title, "中央气象台"); + } + + #[test] + fn parse_bocha_keeps_legacy_pages_shape() { + let body = json!({ + "code": 200, + "data": { + "pages": [ + { + "title": "Legacy title", + "link": "https://example.com/legacy", + "description": "Legacy description" + } + ] + } + }); + + let results = parse_bocha_results(&body, 5); + + assert_eq!(results.len(), 1); + assert_eq!(results[0].title, "Legacy title"); + assert_eq!(results[0].url, "https://example.com/legacy"); + assert_eq!(results[0].snippet.as_deref(), Some("Legacy description")); + } + + #[test] + fn bocha_error_message_flags_non_success_business_code() { + let body = json!({"code": 401, "msg": "invalid api key"}); + + let error = bocha_error_message(&body).expect("non-success code should error"); + + assert!(error.contains("Bocha")); + assert!(error.contains("401")); + assert!(error.contains("invalid api key")); + } + #[test] fn parse_baidu_references_extracts_ranked_results() { let body = json!({