Summarize Cargo failures in tool metadata (#1973)

* Summarize cargo failures in tool metadata

* fix: preserve shell summaries for cargo failures

---------

Co-authored-by: Codex <codex@local>
This commit is contained in:
kunpeng-ai-lab
2026-05-26 23:38:27 +08:00
committed by GitHub
parent 16728360f1
commit c97c3a7a04
5 changed files with 628 additions and 24 deletions
@@ -0,0 +1,469 @@
//! Compact summaries for Cargo failures.
//!
//! Cargo output can be large and noisy. This module extracts stable failure
//! signals for tool metadata so context compaction can preserve the actionable
//! lines without re-running `cargo test | tail`.
use serde::{Deserialize, Serialize};
use serde_json::{Value, json};
const MAX_ITEMS: usize = 8;
const MAX_SUMMARY_CHARS: usize = 1_200;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub(crate) enum CargoFailureKind {
TestFailure,
CompileError,
CargoFailure,
}
impl CargoFailureKind {
fn label(&self) -> &'static str {
match self {
Self::TestFailure => "test_failure",
Self::CompileError => "compile_error",
Self::CargoFailure => "cargo_failure",
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
pub(crate) struct CargoFailureSummary {
pub(crate) kind: CargoFailureKind,
pub(crate) summary: String,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub(crate) failing_tests: Vec<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub(crate) error_codes: Vec<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub(crate) primary_errors: Vec<String>,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub(crate) panic_locations: Vec<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub(crate) test_result: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub(crate) final_error: Option<String>,
}
impl CargoFailureSummary {
pub(crate) fn to_metadata_value(&self) -> Value {
json!(self)
}
}
pub(crate) fn summarize_cargo_failure(
command: &str,
stdout: &str,
stderr: &str,
exit_code: Option<i32>,
) -> Option<CargoFailureSummary> {
if exit_code == Some(0) || !looks_like_cargo_command(command) {
return None;
}
let mut failing_tests = Vec::new();
let mut error_codes = Vec::new();
let mut primary_errors = Vec::new();
let mut panic_locations = Vec::new();
let mut test_result = None;
let mut final_error = None;
for line in stderr.lines().chain(stdout.lines()) {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
if let Some(test) = parse_failed_test_line(trimmed) {
push_unique_limited(&mut failing_tests, test);
}
if let Some(test) = parse_failure_header(trimmed) {
push_unique_limited(&mut failing_tests, test);
}
if let Some(code) = parse_error_code(trimmed) {
push_unique_limited(&mut error_codes, code);
}
if is_primary_error_line(trimmed) {
push_unique_limited(&mut primary_errors, trimmed.to_string());
}
if trimmed.contains("panicked at ") {
push_unique_limited(&mut panic_locations, trimmed.to_string());
}
if trimmed.starts_with("test result:") {
test_result = Some(trimmed.to_string());
}
if trimmed.starts_with("error: could not compile")
|| trimmed.starts_with("error: aborting due to")
|| trimmed.starts_with("error: test failed")
{
final_error = Some(trimmed.to_string());
}
}
let kind = classify_failure(&failing_tests, &primary_errors, test_result.as_deref());
if !has_actionable_signal(
&failing_tests,
&error_codes,
&primary_errors,
&panic_locations,
test_result.as_deref(),
final_error.as_deref(),
) {
return None;
}
let summary = build_summary(
&kind,
&failing_tests,
&error_codes,
&primary_errors,
&panic_locations,
test_result.as_deref(),
final_error.as_deref(),
);
Some(CargoFailureSummary {
kind,
summary,
failing_tests,
error_codes,
primary_errors,
panic_locations,
test_result,
final_error,
})
}
fn looks_like_cargo_command(command: &str) -> bool {
let Some(tokens) = shlex::split(command) else {
return false;
};
let mut expect_command = true;
for (idx, raw_token) in tokens.iter().enumerate() {
let token = normalize_shell_token(raw_token);
if token.is_empty() {
continue;
}
if is_shell_separator(token) {
expect_command = true;
continue;
}
if !expect_command {
continue;
}
if looks_like_env_assignment(token) {
continue;
}
if is_cargo_binary(token) {
return cargo_subcommand(&tokens[idx + 1..]).is_some();
}
expect_command = false;
}
false
}
fn parse_failed_test_line(line: &str) -> Option<String> {
let rest = line.strip_prefix("test ")?;
let (name, status) = rest.rsplit_once(" ... ")?;
(status == "FAILED").then(|| name.trim().to_string())
}
fn parse_failure_header(line: &str) -> Option<String> {
let rest = line.strip_prefix("---- ")?;
let name = rest.strip_suffix(" stdout ----")?;
Some(name.trim().to_string())
}
fn parse_error_code(line: &str) -> Option<String> {
let rest = line.strip_prefix("error[")?;
let (code, _) = rest.split_once("]")?;
Some(code.to_string())
}
fn is_primary_error_line(line: &str) -> bool {
line.starts_with("error[")
|| (line.starts_with("error:") && !line.starts_with("error: test failed"))
}
fn classify_failure(
failing_tests: &[String],
primary_errors: &[String],
test_result: Option<&str>,
) -> CargoFailureKind {
if !failing_tests.is_empty()
|| test_result.is_some_and(|line| line.to_ascii_lowercase().contains("failed"))
{
CargoFailureKind::TestFailure
} else if !primary_errors.is_empty() {
CargoFailureKind::CompileError
} else {
CargoFailureKind::CargoFailure
}
}
fn has_actionable_signal(
failing_tests: &[String],
error_codes: &[String],
primary_errors: &[String],
panic_locations: &[String],
test_result: Option<&str>,
final_error: Option<&str>,
) -> bool {
!failing_tests.is_empty()
|| !error_codes.is_empty()
|| !primary_errors.is_empty()
|| !panic_locations.is_empty()
|| test_result.is_some()
|| final_error.is_some()
}
fn build_summary(
kind: &CargoFailureKind,
failing_tests: &[String],
error_codes: &[String],
primary_errors: &[String],
panic_locations: &[String],
test_result: Option<&str>,
final_error: Option<&str>,
) -> String {
let mut lines = Vec::new();
lines.push(format!("Cargo failure kind: {}.", kind.label()));
if !failing_tests.is_empty() {
lines.push(format!("Failing tests: {}.", failing_tests.join(", ")));
}
if !error_codes.is_empty() {
lines.push(format!("Rust error codes: {}.", error_codes.join(", ")));
}
if let Some(line) = primary_errors.first() {
lines.push(format!("Primary error: {line}"));
}
if let Some(line) = panic_locations.first() {
lines.push(format!("Panic: {line}"));
}
if let Some(line) = test_result {
lines.push(line.to_string());
}
if let Some(line) = final_error {
lines.push(line.to_string());
}
truncate_chars(&lines.join("\n"), MAX_SUMMARY_CHARS)
}
fn normalize_shell_token(token: &str) -> &str {
token.trim_matches(|ch| matches!(ch, '(' | ')' | '{' | '}'))
}
fn is_shell_separator(token: &str) -> bool {
matches!(token, "&&" | "||" | ";" | "|")
}
fn looks_like_env_assignment(token: &str) -> bool {
let Some((name, _)) = token.split_once('=') else {
return false;
};
!name.is_empty()
&& name
.bytes()
.all(|byte| byte == b'_' || byte.is_ascii_alphanumeric())
&& !name.as_bytes()[0].is_ascii_digit()
}
fn is_cargo_binary(token: &str) -> bool {
let name = token.rsplit(['/', '\\']).next().unwrap_or(token);
name.eq_ignore_ascii_case("cargo") || name.eq_ignore_ascii_case("cargo.exe")
}
fn cargo_subcommand(tokens: &[String]) -> Option<&str> {
let mut idx = 0;
while let Some(raw_token) = tokens.get(idx) {
let token = normalize_shell_token(raw_token);
if token.is_empty() {
idx += 1;
continue;
}
if is_shell_separator(token) {
return None;
}
if token.starts_with('+') {
idx += 1;
continue;
}
if token.starts_with('-') {
if cargo_global_flag_takes_value(token) {
idx += 2;
} else {
idx += 1;
}
continue;
}
return is_supported_cargo_subcommand(token).then_some(token);
}
None
}
fn cargo_global_flag_takes_value(token: &str) -> bool {
if token.contains('=') {
return false;
}
matches!(
token,
"--color"
| "--config"
| "-C"
| "--jobs"
| "-j"
| "--lockfile-path"
| "--manifest-path"
| "--message-format"
| "--package"
| "-p"
| "--target"
| "--target-dir"
| "-Z"
)
}
fn is_supported_cargo_subcommand(token: &str) -> bool {
matches!(
token,
"test" | "check" | "build" | "clippy" | "run" | "t" | "c" | "b" | "r"
)
}
fn push_unique_limited(target: &mut Vec<String>, value: String) {
if target.len() >= MAX_ITEMS || target.iter().any(|existing| existing == &value) {
return;
}
target.push(value);
}
fn truncate_chars(text: &str, max_chars: usize) -> String {
if let Some((idx, _)) = text.char_indices().nth(max_chars) {
if max_chars < 3 {
return text[..idx].to_string();
}
let truncate_at = text
.char_indices()
.nth(max_chars - 3)
.map(|(idx, _)| idx)
.unwrap_or(0);
format!("{}...", &text[..truncate_at])
} else {
text.to_string()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn summarizes_failed_libtest_output() {
let stdout = r"
running 1 test
test tests::fails ... FAILED
failures:
---- tests::fails stdout ----
thread 'tests::fails' panicked at src/lib.rs:7:9:
assertion `left == right` failed
test result: FAILED. 0 passed; 1 failed; 0 ignored; finished in 0.00s
";
let summary =
summarize_cargo_failure("cargo test", stdout, "", Some(101)).expect("summary");
assert_eq!(summary.kind, CargoFailureKind::TestFailure);
assert_eq!(summary.failing_tests, vec!["tests::fails"]);
assert!(summary.summary.contains("Failing tests: tests::fails"));
assert!(summary.test_result.unwrap().contains("1 failed"));
}
#[test]
fn summarizes_rustc_compile_error() {
let stderr = r#"
error[E0308]: mismatched types
--> src/lib.rs:2:5
|
2 | ""
| ^^ expected `i32`, found `&str`
error: could not compile `demo` (lib) due to 1 previous error
"#;
let summary =
summarize_cargo_failure("cargo check", "", stderr, Some(101)).expect("summary");
assert_eq!(summary.kind, CargoFailureKind::CompileError);
assert_eq!(summary.error_codes, vec!["E0308"]);
assert!(summary.primary_errors[0].contains("mismatched types"));
assert!(summary.final_error.unwrap().contains("could not compile"));
}
#[test]
fn recognizes_cargo_aliases_and_uncoded_errors() {
let stderr = "error: cannot find value `missing` in this scope\n";
let summary = summarize_cargo_failure("cargo c", "", stderr, Some(101)).expect("summary");
assert_eq!(summary.kind, CargoFailureKind::CompileError);
assert_eq!(
summary.primary_errors,
vec!["error: cannot find value `missing` in this scope"]
);
}
#[test]
fn recognizes_tokenized_cargo_invocations() {
assert!(
summarize_cargo_failure(
"cargo +nightly --manifest-path demo/Cargo.toml test",
"test tests::fails ... FAILED\n",
"",
Some(101),
)
.is_some()
);
assert!(
summarize_cargo_failure(
"DEMO=1 cargo --locked run",
"",
"error: process didn't exit successfully\n",
Some(101),
)
.is_some()
);
assert!(
summarize_cargo_failure(
"echo cargo test && false",
"test tests::fails ... FAILED\n",
"",
Some(1),
)
.is_none()
);
}
#[test]
fn skips_generic_cargo_failure_without_actionable_signal() {
assert!(
summarize_cargo_failure("cargo test", "build failed", "command failed", Some(1))
.is_none()
);
}
#[test]
fn truncate_chars_respects_tiny_limits() {
assert_eq!(truncate_chars("abcdef", 0), "");
assert_eq!(truncate_chars("abcdef", 1), "a");
assert_eq!(truncate_chars("abcdef", 2), "ab");
assert_eq!(truncate_chars("abcdef", 3), "...");
assert_eq!(truncate_chars("abcdef", 4), "a...");
}
#[test]
fn ignores_successful_or_non_cargo_commands() {
assert!(summarize_cargo_failure("cargo test", "", "", Some(0)).is_none());
assert!(summarize_cargo_failure("npm test", "failed", "", Some(1)).is_none());
}
}
+1
View File
@@ -12,6 +12,7 @@ pub mod apply_patch;
pub mod approval_cache;
pub mod arg_repair;
pub mod automation;
pub mod cargo_failure_summary;
pub mod diagnostics;
pub mod diff_format;
pub mod file;
+41 -23
View File
@@ -1505,6 +1505,7 @@ pub fn new_shared_shell_manager(workspace: PathBuf) -> SharedShellManager {
use crate::command_safety::{SafetyLevel, analyze_command, extract_primary_command};
use crate::execpolicy::{ExecPolicyDecision, load_default_policy};
use crate::features::Feature;
use crate::tools::cargo_failure_summary::summarize_cargo_failure;
use crate::tools::spec::{
ApprovalRequirement, ToolCapability, ToolContext, ToolError, ToolResult, ToolSpec,
optional_bool, optional_u64, required_str,
@@ -1523,6 +1524,18 @@ shell sandbox). Workarounds: (1) run the Docker build from a regular terminal ou
TUI, or (2) disable BuildKit with DOCKER_BUILDKIT=0 (only works if your Dockerfiles do not \
use RUN --mount directives).";
fn attach_cargo_failure_summary(
metadata: &mut serde_json::Value,
command: &str,
result: &ShellResult,
) {
if let Some(summary) =
summarize_cargo_failure(command, &result.stdout, &result.stderr, result.exit_code)
{
metadata["cargo_failure_summary"] = summary.to_metadata_value();
}
}
pub(crate) fn looks_like_macos_provenance_failure(result: &ShellResult) -> bool {
if matches!(result.status, ShellStatus::Completed) && result.exit_code == Some(0) {
return false;
@@ -1965,7 +1978,7 @@ impl ToolSpec for ExecShellTool {
format!("{}\n\nSTDERR:\n{}", result.stdout, result.stderr)
};
let metadata = json!({
let mut metadata = json!({
"exit_code": result.exit_code,
"status": format!("{:?}", result.status),
"duration_ms": result.duration_ms,
@@ -1987,6 +2000,7 @@ impl ToolSpec for ExecShellTool {
"canceled": false,
"sandbox_backend": "opensandbox",
});
attach_cargo_failure_summary(&mut metadata, command, &result);
return Ok(ToolResult {
content: output,
@@ -2165,6 +2179,7 @@ impl ToolSpec for ExecShellTool {
if provenance_hint.is_some() {
metadata["macos_provenance_restricted"] = json!(true);
}
attach_cargo_failure_summary(&mut metadata, command, &result);
Ok(ToolResult {
content: output,
@@ -2239,31 +2254,34 @@ fn build_shell_delta_tool_result(delta: ShellDeltaResult, context: &ToolContext)
output = format!("{hint}\n\n{output}");
}
let mut metadata = json!({
"exit_code": result.exit_code,
"status": format!("{:?}", result.status),
"duration_ms": result.duration_ms,
"sandboxed": result.sandboxed,
"sandbox_type": result.sandbox_type,
"sandbox_denied": result.sandbox_denied,
"task_id": result.task_id,
"stdout_len": result.stdout_len,
"stderr_len": result.stderr_len,
"stdout_truncated": result.stdout_truncated,
"stderr_truncated": result.stderr_truncated,
"stdout_omitted": result.stdout_omitted,
"stderr_omitted": result.stderr_omitted,
"stdout_total_len": delta.stdout_total_len,
"stderr_total_len": delta.stderr_total_len,
"summary": summary,
"stdout_summary": stdout_summary,
"stderr_summary": stderr_summary,
"command": delta.command,
"stream_delta": true,
});
attach_cargo_failure_summary(&mut metadata, &delta.command, &result);
let mut tool_result = ToolResult {
content: output,
success: matches!(result.status, ShellStatus::Completed | ShellStatus::Running),
metadata: Some(json!({
"exit_code": result.exit_code,
"status": format!("{:?}", result.status),
"duration_ms": result.duration_ms,
"sandboxed": result.sandboxed,
"sandbox_type": result.sandbox_type,
"sandbox_denied": result.sandbox_denied,
"task_id": result.task_id,
"stdout_len": result.stdout_len,
"stderr_len": result.stderr_len,
"stdout_truncated": result.stdout_truncated,
"stderr_truncated": result.stderr_truncated,
"stdout_omitted": result.stdout_omitted,
"stderr_omitted": result.stderr_omitted,
"stdout_total_len": delta.stdout_total_len,
"stderr_total_len": delta.stderr_total_len,
"summary": summary,
"stdout_summary": stdout_summary,
"stderr_summary": stderr_summary,
"command": delta.command,
"stream_delta": true,
})),
metadata: Some(metadata),
};
if let Some(hint) = network_restricted_hint
&& let Some(metadata) = tool_result.metadata.as_mut()
+91
View File
@@ -366,6 +366,97 @@ fn shell_delta_result_surfaces_network_restricted_hint() {
);
}
#[test]
fn shell_delta_result_includes_cargo_failure_summary() {
let tmp = tempdir().expect("tempdir");
let ctx = ToolContext::new(tmp.path());
let result = ShellResult {
task_id: None,
status: ShellStatus::Failed,
exit_code: Some(101),
stdout: "running 1 test\ntest tests::fails ... FAILED\n\nfailures:\n\n---- tests::fails stdout ----\nthread 'tests::fails' panicked at src/lib.rs:7:9:\nboom\n\ntest result: FAILED. 0 passed; 1 failed; 0 ignored; finished in 0.00s\n".to_string(),
stderr: "error: test failed, to rerun pass `--lib`".to_string(),
duration_ms: 12,
stdout_len: 0,
stderr_len: 0,
stdout_omitted: 0,
stderr_omitted: 0,
stdout_truncated: false,
stderr_truncated: false,
sandboxed: false,
sandbox_type: None,
sandbox_denied: false,
};
let tool_result = build_shell_delta_tool_result(
ShellDeltaResult {
command: "cargo test".to_string(),
result,
stdout_total_len: 0,
stderr_total_len: 0,
},
&ctx,
);
let metadata = tool_result.metadata.expect("metadata");
assert_eq!(
metadata["cargo_failure_summary"]["kind"],
json!("test_failure")
);
assert!(
metadata["cargo_failure_summary"]["summary"]
.as_str()
.unwrap()
.contains("Failing tests: tests::fails")
);
assert!(
metadata["summary"]
.as_str()
.unwrap()
.contains("error: test failed")
);
}
#[test]
fn shell_delta_result_keeps_existing_summary_for_generic_cargo_failure() {
let tmp = tempdir().expect("tempdir");
let ctx = ToolContext::new(tmp.path());
let result = ShellResult {
task_id: None,
status: ShellStatus::Failed,
exit_code: Some(1),
stdout: "build failed".to_string(),
stderr: "command failed without structured cargo diagnostics".to_string(),
duration_ms: 12,
stdout_len: 0,
stderr_len: 0,
stdout_omitted: 0,
stderr_omitted: 0,
stdout_truncated: false,
stderr_truncated: false,
sandboxed: false,
sandbox_type: None,
sandbox_denied: false,
};
let tool_result = build_shell_delta_tool_result(
ShellDeltaResult {
command: "cargo test".to_string(),
result,
stdout_total_len: 0,
stderr_total_len: 0,
},
&ctx,
);
let metadata = tool_result.metadata.expect("metadata");
assert!(metadata.get("cargo_failure_summary").is_none());
assert_eq!(
metadata["summary"],
json!("command failed without structured cargo diagnostics")
);
}
#[test]
fn test_summarize_output_strips_truncation_note() {
let long_output = "x".repeat(60_000);
+26 -1
View File
@@ -10,6 +10,7 @@ use async_trait::async_trait;
use serde::{Deserialize, Serialize};
use serde_json::{Value, json};
use super::cargo_failure_summary::summarize_cargo_failure;
use super::spec::{
ApprovalRequirement, ToolCapability, ToolContext, ToolError, ToolResult, ToolSpec,
optional_bool, optional_str,
@@ -100,7 +101,20 @@ impl ToolSpec for RunTestsTool {
command: command_str,
};
ToolResult::json(&result).map_err(|e| ToolError::execution_failed(e.to_string()))
let mut tool_result =
ToolResult::json(&result).map_err(|e| ToolError::execution_failed(e.to_string()))?;
if let Some(summary) = summarize_cargo_failure(
&result.command,
&result.stdout,
&result.stderr,
Some(result.exit_code),
) {
tool_result = tool_result.with_metadata(json!({
"summary": summary.summary,
"cargo_failure_summary": summary.to_metadata_value(),
}));
}
Ok(tool_result)
}
}
@@ -255,6 +269,17 @@ mod tests {
serde_json::from_str(&result.content).expect("tool result should be json");
assert!(!parsed.success);
assert_ne!(parsed.exit_code, 0);
let metadata = result.metadata.expect("metadata");
assert_eq!(
metadata["cargo_failure_summary"]["kind"],
json!("test_failure")
);
assert!(
metadata["cargo_failure_summary"]["summary"]
.as_str()
.unwrap()
.contains("Failing tests:")
);
}
#[test]