Merge PR #2946 from h3c-hexin: update Bocha web search response handling
Correct /v1/web-search endpoint, parse webPages.value with legacy pages fallback, surface business error codes; unit tests added.
This commit is contained in:
@@ -27,7 +27,7 @@ use std::time::Duration;
|
||||
const DUCKDUCKGO_ENDPOINT: &str = "https://html.duckduckgo.com/html/";
|
||||
const BING_HOST: &str = "www.bing.com";
|
||||
const TAVILY_ENDPOINT: &str = "https://api.tavily.com/search";
|
||||
const BOCHA_ENDPOINT: &str = "https://api.bochaai.com/v1/ai/search";
|
||||
const BOCHA_ENDPOINT: &str = "https://api.bochaai.com/v1/web-search";
|
||||
const METASO_ENDPOINT: &str = "https://metaso.cn/api/v1";
|
||||
const BAIDU_ENDPOINT: &str = "https://qianfan.baidubce.com/v2/ai_search/web_search";
|
||||
const VOLCENGINE_RESPONSES_ENDPOINT: &str = "https://ark.cn-beijing.volces.com/api/v3/responses";
|
||||
@@ -634,39 +634,11 @@ impl WebSearchTool {
|
||||
ToolError::execution_failed(format!("Failed to parse Bocha response: {e}"))
|
||||
})?;
|
||||
|
||||
// Bocha returns `{"code": 200, "data": {"pages": [...]}}`
|
||||
let results: Vec<WebSearchEntry> = parsed
|
||||
.get("data")
|
||||
.and_then(|d| d.get("pages"))
|
||||
.or_else(|| parsed.get("pages"))
|
||||
.and_then(|v| v.as_array())
|
||||
.into_iter()
|
||||
.flat_map(|arr| arr.iter())
|
||||
.filter_map(|item| {
|
||||
let title = item
|
||||
.get("name")
|
||||
.or_else(|| item.get("title"))
|
||||
.and_then(|s| s.as_str())?
|
||||
.to_string();
|
||||
let url = item
|
||||
.get("url")
|
||||
.or_else(|| item.get("link"))
|
||||
.and_then(|s| s.as_str())?
|
||||
.to_string();
|
||||
let snippet = item
|
||||
.get("summary")
|
||||
.or_else(|| item.get("snippet"))
|
||||
.or_else(|| item.get("description"))
|
||||
.and_then(|s| s.as_str())
|
||||
.map(|s| s.to_string());
|
||||
Some(WebSearchEntry {
|
||||
title,
|
||||
url,
|
||||
snippet,
|
||||
})
|
||||
})
|
||||
.take(max_results)
|
||||
.collect();
|
||||
if let Some(error) = bocha_error_message(&parsed) {
|
||||
return Err(ToolError::execution_failed(error));
|
||||
}
|
||||
|
||||
let results = parse_bocha_results(&parsed, max_results);
|
||||
|
||||
let message = if results.is_empty() {
|
||||
"No results found".to_string()
|
||||
@@ -1004,6 +976,63 @@ fn sanitize_error_body(body: &str) -> String {
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn parse_bocha_results(parsed: &Value, max_results: usize) -> Vec<WebSearchEntry> {
|
||||
parsed
|
||||
.get("data")
|
||||
.and_then(|d| {
|
||||
d.get("webPages")
|
||||
.and_then(|w| w.get("value"))
|
||||
.or_else(|| d.get("pages"))
|
||||
})
|
||||
.or_else(|| parsed.get("pages"))
|
||||
.and_then(|v| v.as_array())
|
||||
.into_iter()
|
||||
.flat_map(|arr| arr.iter())
|
||||
.filter_map(|item| {
|
||||
let title = item
|
||||
.get("name")
|
||||
.or_else(|| item.get("title"))
|
||||
.and_then(|s| s.as_str())?
|
||||
.trim();
|
||||
let url = item
|
||||
.get("url")
|
||||
.or_else(|| item.get("link"))
|
||||
.and_then(|s| s.as_str())?
|
||||
.trim();
|
||||
if title.is_empty() || url.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let snippet = item
|
||||
.get("summary")
|
||||
.or_else(|| item.get("snippet"))
|
||||
.or_else(|| item.get("description"))
|
||||
.and_then(|s| s.as_str())
|
||||
.map(str::trim)
|
||||
.filter(|s| !s.is_empty())
|
||||
.map(ToString::to_string);
|
||||
Some(WebSearchEntry {
|
||||
title: title.to_string(),
|
||||
url: url.to_string(),
|
||||
snippet,
|
||||
})
|
||||
})
|
||||
.take(max_results)
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn bocha_error_message(parsed: &Value) -> Option<String> {
|
||||
let code = parsed.get("code").and_then(|v| v.as_i64())?;
|
||||
if code == 0 || code == 200 {
|
||||
return None;
|
||||
}
|
||||
let message = parsed
|
||||
.get("msg")
|
||||
.or_else(|| parsed.get("message"))
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("unknown error");
|
||||
Some(format!("Bocha search API error (code {code}: {message})"))
|
||||
}
|
||||
|
||||
fn parse_baidu_results(parsed: &Value, max_results: usize) -> Vec<WebSearchEntry> {
|
||||
parsed
|
||||
.get("references")
|
||||
@@ -1608,9 +1637,10 @@ fn extract_query_param(url: &str, key: &str) -> Option<String> {
|
||||
mod tests {
|
||||
use super::{
|
||||
ERROR_BODY_PREVIEW_BYTES, WebSearchEntry, WebSearchTool, baidu_search_payload,
|
||||
decode_html_entities, duckduckgo_search_url, extract_search_query, is_likely_spam_results,
|
||||
normalize_bing_url, optional_search_max_results, parse_baidu_results, parse_sofya_results,
|
||||
root_domain, sanitize_error_body, truncate_error_body, volcengine_extract_text,
|
||||
bocha_error_message, decode_html_entities, duckduckgo_search_url, extract_search_query,
|
||||
is_likely_spam_results, normalize_bing_url, optional_search_max_results,
|
||||
parse_baidu_results, parse_bocha_results, parse_sofya_results, root_domain,
|
||||
sanitize_error_body, truncate_error_body, volcengine_extract_text,
|
||||
};
|
||||
use serde_json::json;
|
||||
|
||||
@@ -1898,6 +1928,72 @@ mod tests {
|
||||
assert!(sanitized.contains("Bearer [REDACTED]"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_bocha_web_pages_value_extracts_ranked_results() {
|
||||
let body = json!({
|
||||
"code": 200,
|
||||
"msg": null,
|
||||
"data": {
|
||||
"webPages": {
|
||||
"value": [
|
||||
{
|
||||
"name": "广州天气",
|
||||
"url": "https://bocha.cn/share/weather",
|
||||
"snippet": "广州今日雷阵雨转晴。"
|
||||
},
|
||||
{
|
||||
"name": "中央气象台",
|
||||
"url": "https://www.weather.com.cn/",
|
||||
"summary": "天气实况。"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
let results = parse_bocha_results(&body, 10);
|
||||
|
||||
assert_eq!(results.len(), 2);
|
||||
assert_eq!(results[0].title, "广州天气");
|
||||
assert_eq!(results[0].url, "https://bocha.cn/share/weather");
|
||||
assert_eq!(results[0].snippet.as_deref(), Some("广州今日雷阵雨转晴。"));
|
||||
assert_eq!(results[1].title, "中央气象台");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_bocha_keeps_legacy_pages_shape() {
|
||||
let body = json!({
|
||||
"code": 200,
|
||||
"data": {
|
||||
"pages": [
|
||||
{
|
||||
"title": "Legacy title",
|
||||
"link": "https://example.com/legacy",
|
||||
"description": "Legacy description"
|
||||
}
|
||||
]
|
||||
}
|
||||
});
|
||||
|
||||
let results = parse_bocha_results(&body, 5);
|
||||
|
||||
assert_eq!(results.len(), 1);
|
||||
assert_eq!(results[0].title, "Legacy title");
|
||||
assert_eq!(results[0].url, "https://example.com/legacy");
|
||||
assert_eq!(results[0].snippet.as_deref(), Some("Legacy description"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bocha_error_message_flags_non_success_business_code() {
|
||||
let body = json!({"code": 401, "msg": "invalid api key"});
|
||||
|
||||
let error = bocha_error_message(&body).expect("non-success code should error");
|
||||
|
||||
assert!(error.contains("Bocha"));
|
||||
assert!(error.contains("401"));
|
||||
assert!(error.contains("invalid api key"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_baidu_references_extracts_ranked_results() {
|
||||
let body = json!({
|
||||
|
||||
Reference in New Issue
Block a user