From 537afcf07e35b3c818cff0687520db09089f1c1b Mon Sep 17 00:00:00 2001 From: cyq <15000851237@163.com> Date: Tue, 2 Jun 2026 09:17:41 +0800 Subject: [PATCH] fix(subagent): cap truncated response retries --- crates/tui/src/tools/subagent/mod.rs | 70 +++++++++++++++++++------- crates/tui/src/tools/subagent/tests.rs | 33 ++++++++++++ 2 files changed, 86 insertions(+), 17 deletions(-) diff --git a/crates/tui/src/tools/subagent/mod.rs b/crates/tui/src/tools/subagent/mod.rs index a4debdf4..3ab494b5 100644 --- a/crates/tui/src/tools/subagent/mod.rs +++ b/crates/tui/src/tools/subagent/mod.rs @@ -73,6 +73,7 @@ const TOOL_TIMEOUT: Duration = Duration::from_secs(30); // arguments, especially write_file content. The API bills generated tokens, not // the requested ceiling. const SUBAGENT_RESPONSE_MAX_TOKENS: u32 = 16_384; +const MAX_CONSECUTIVE_TRUNCATED_SUBAGENT_RESPONSES: u32 = 5; /// Per-step LLM API call timeout. Each `create_message` request must complete /// within this window or the step is treated as timed out. Prevents a single /// stuck API call from blocking the sub-agent indefinitely. @@ -3669,6 +3670,28 @@ fn truncated_response_tool_results(tool_uses: &[(String, String, Value)]) -> Vec .collect() } +fn truncated_response_text_retry_message() -> Vec { + vec![ContentBlock::Text { + text: "Error: the model response was truncated by max_tokens. No complete tool call was available, so the partial response was not accepted as the sub-agent result. Retry with a shorter response or split the work into smaller steps.".to_string(), + cache_control: None, + }] +} + +fn record_truncated_subagent_response(consecutive: &mut u32) -> Result<()> { + *consecutive = consecutive.saturating_add(1); + if *consecutive > MAX_CONSECUTIVE_TRUNCATED_SUBAGENT_RESPONSES { + return Err(anyhow!( + "Sub-agent response was truncated by max_tokens {count} consecutive times; stopping to avoid an unbounded retry loop.", + count = *consecutive + )); + } + Ok(()) +} + +fn reset_truncated_subagent_responses(consecutive: &mut u32) { + *consecutive = 0; +} + #[allow(clippy::too_many_arguments)] async fn insert_subagent_full_transcript_handle( runtime: &SubAgentRuntime, @@ -3753,6 +3776,7 @@ async fn run_subagent( let mut steps = 0; let mut final_result: Option = None; let mut pending_inputs: VecDeque = VecDeque::new(); + let mut consecutive_truncated_responses = 0; for _step in 0..max_steps { // Cooperative cancellation: bail if this session's token was cancelled @@ -3932,6 +3956,35 @@ async fn run_subagent( content: response.content.clone(), }); + if response_was_truncated(&response) { + final_result = None; + record_truncated_subagent_response(&mut consecutive_truncated_responses)?; + let progress = if tool_uses.is_empty() { + "response truncated, returning retry instruction".to_string() + } else { + format!( + "response truncated, returning {} tool error(s)", + tool_uses.len() + ) + }; + emit_agent_progress( + runtime.event_tx.as_ref(), + runtime.mailbox.as_ref(), + &agent_id, + format!("step {steps}/{max_steps}: {progress}"), + ); + messages.push(Message { + role: "user".to_string(), + content: if tool_uses.is_empty() { + truncated_response_text_retry_message() + } else { + truncated_response_tool_results(&tool_uses) + }, + }); + continue; + } + reset_truncated_subagent_responses(&mut consecutive_truncated_responses); + if tool_uses.is_empty() { while let Ok(input) = input_rx.try_recv() { if input.interrupt { @@ -3951,23 +4004,6 @@ async fn run_subagent( continue; } - if response_was_truncated(&response) { - emit_agent_progress( - runtime.event_tx.as_ref(), - runtime.mailbox.as_ref(), - &agent_id, - format!( - "step {steps}/{max_steps}: response truncated, returning {} tool error(s)", - tool_uses.len() - ), - ); - messages.push(Message { - role: "user".to_string(), - content: truncated_response_tool_results(&tool_uses), - }); - continue; - } - emit_agent_progress( runtime.event_tx.as_ref(), runtime.mailbox.as_ref(), diff --git a/crates/tui/src/tools/subagent/tests.rs b/crates/tui/src/tools/subagent/tests.rs index 82616630..a2039a46 100644 --- a/crates/tui/src/tools/subagent/tests.rs +++ b/crates/tui/src/tools/subagent/tests.rs @@ -1536,6 +1536,39 @@ fn truncated_subagent_tool_calls_return_model_visible_errors() { } } +#[test] +fn truncated_subagent_text_response_returns_model_visible_error() { + let results = truncated_response_text_retry_message(); + + assert_eq!(results.len(), 1); + match &results[0] { + ContentBlock::Text { text, .. } => { + assert!(text.contains("truncated by max_tokens")); + assert!(text.contains("No complete tool call was available")); + assert!(text.contains("Retry with a shorter response")); + } + other => panic!("expected text retry message, got {other:?}"), + } +} + +#[test] +fn consecutive_truncated_subagent_responses_are_capped() { + let mut consecutive = 0; + + for _ in 0..MAX_CONSECUTIVE_TRUNCATED_SUBAGENT_RESPONSES { + record_truncated_subagent_response(&mut consecutive).expect("within truncation cap"); + } + + let err = record_truncated_subagent_response(&mut consecutive) + .expect_err("one more truncation should stop the sub-agent"); + assert!(err.to_string().contains("truncated by max_tokens")); + assert!(err.to_string().contains("consecutive")); + + reset_truncated_subagent_responses(&mut consecutive); + record_truncated_subagent_response(&mut consecutive).expect("reset should allow recovery"); + assert_eq!(consecutive, 1); +} + #[test] fn child_cancellation_cascades_from_parent() { let parent = stub_runtime();