fix(subagent): cap truncated response retries
This commit is contained in:
@@ -73,6 +73,7 @@ const TOOL_TIMEOUT: Duration = Duration::from_secs(30);
|
||||
// arguments, especially write_file content. The API bills generated tokens, not
|
||||
// the requested ceiling.
|
||||
const SUBAGENT_RESPONSE_MAX_TOKENS: u32 = 16_384;
|
||||
const MAX_CONSECUTIVE_TRUNCATED_SUBAGENT_RESPONSES: u32 = 5;
|
||||
/// Per-step LLM API call timeout. Each `create_message` request must complete
|
||||
/// within this window or the step is treated as timed out. Prevents a single
|
||||
/// stuck API call from blocking the sub-agent indefinitely.
|
||||
@@ -3669,6 +3670,28 @@ fn truncated_response_tool_results(tool_uses: &[(String, String, Value)]) -> Vec
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn truncated_response_text_retry_message() -> Vec<ContentBlock> {
|
||||
vec![ContentBlock::Text {
|
||||
text: "Error: the model response was truncated by max_tokens. No complete tool call was available, so the partial response was not accepted as the sub-agent result. Retry with a shorter response or split the work into smaller steps.".to_string(),
|
||||
cache_control: None,
|
||||
}]
|
||||
}
|
||||
|
||||
fn record_truncated_subagent_response(consecutive: &mut u32) -> Result<()> {
|
||||
*consecutive = consecutive.saturating_add(1);
|
||||
if *consecutive > MAX_CONSECUTIVE_TRUNCATED_SUBAGENT_RESPONSES {
|
||||
return Err(anyhow!(
|
||||
"Sub-agent response was truncated by max_tokens {count} consecutive times; stopping to avoid an unbounded retry loop.",
|
||||
count = *consecutive
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn reset_truncated_subagent_responses(consecutive: &mut u32) {
|
||||
*consecutive = 0;
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
async fn insert_subagent_full_transcript_handle(
|
||||
runtime: &SubAgentRuntime,
|
||||
@@ -3753,6 +3776,7 @@ async fn run_subagent(
|
||||
let mut steps = 0;
|
||||
let mut final_result: Option<String> = None;
|
||||
let mut pending_inputs: VecDeque<SubAgentInput> = VecDeque::new();
|
||||
let mut consecutive_truncated_responses = 0;
|
||||
|
||||
for _step in 0..max_steps {
|
||||
// Cooperative cancellation: bail if this session's token was cancelled
|
||||
@@ -3932,6 +3956,35 @@ async fn run_subagent(
|
||||
content: response.content.clone(),
|
||||
});
|
||||
|
||||
if response_was_truncated(&response) {
|
||||
final_result = None;
|
||||
record_truncated_subagent_response(&mut consecutive_truncated_responses)?;
|
||||
let progress = if tool_uses.is_empty() {
|
||||
"response truncated, returning retry instruction".to_string()
|
||||
} else {
|
||||
format!(
|
||||
"response truncated, returning {} tool error(s)",
|
||||
tool_uses.len()
|
||||
)
|
||||
};
|
||||
emit_agent_progress(
|
||||
runtime.event_tx.as_ref(),
|
||||
runtime.mailbox.as_ref(),
|
||||
&agent_id,
|
||||
format!("step {steps}/{max_steps}: {progress}"),
|
||||
);
|
||||
messages.push(Message {
|
||||
role: "user".to_string(),
|
||||
content: if tool_uses.is_empty() {
|
||||
truncated_response_text_retry_message()
|
||||
} else {
|
||||
truncated_response_tool_results(&tool_uses)
|
||||
},
|
||||
});
|
||||
continue;
|
||||
}
|
||||
reset_truncated_subagent_responses(&mut consecutive_truncated_responses);
|
||||
|
||||
if tool_uses.is_empty() {
|
||||
while let Ok(input) = input_rx.try_recv() {
|
||||
if input.interrupt {
|
||||
@@ -3951,23 +4004,6 @@ async fn run_subagent(
|
||||
continue;
|
||||
}
|
||||
|
||||
if response_was_truncated(&response) {
|
||||
emit_agent_progress(
|
||||
runtime.event_tx.as_ref(),
|
||||
runtime.mailbox.as_ref(),
|
||||
&agent_id,
|
||||
format!(
|
||||
"step {steps}/{max_steps}: response truncated, returning {} tool error(s)",
|
||||
tool_uses.len()
|
||||
),
|
||||
);
|
||||
messages.push(Message {
|
||||
role: "user".to_string(),
|
||||
content: truncated_response_tool_results(&tool_uses),
|
||||
});
|
||||
continue;
|
||||
}
|
||||
|
||||
emit_agent_progress(
|
||||
runtime.event_tx.as_ref(),
|
||||
runtime.mailbox.as_ref(),
|
||||
|
||||
@@ -1536,6 +1536,39 @@ fn truncated_subagent_tool_calls_return_model_visible_errors() {
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncated_subagent_text_response_returns_model_visible_error() {
|
||||
let results = truncated_response_text_retry_message();
|
||||
|
||||
assert_eq!(results.len(), 1);
|
||||
match &results[0] {
|
||||
ContentBlock::Text { text, .. } => {
|
||||
assert!(text.contains("truncated by max_tokens"));
|
||||
assert!(text.contains("No complete tool call was available"));
|
||||
assert!(text.contains("Retry with a shorter response"));
|
||||
}
|
||||
other => panic!("expected text retry message, got {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn consecutive_truncated_subagent_responses_are_capped() {
|
||||
let mut consecutive = 0;
|
||||
|
||||
for _ in 0..MAX_CONSECUTIVE_TRUNCATED_SUBAGENT_RESPONSES {
|
||||
record_truncated_subagent_response(&mut consecutive).expect("within truncation cap");
|
||||
}
|
||||
|
||||
let err = record_truncated_subagent_response(&mut consecutive)
|
||||
.expect_err("one more truncation should stop the sub-agent");
|
||||
assert!(err.to_string().contains("truncated by max_tokens"));
|
||||
assert!(err.to_string().contains("consecutive"));
|
||||
|
||||
reset_truncated_subagent_responses(&mut consecutive);
|
||||
record_truncated_subagent_response(&mut consecutive).expect("reset should allow recovery");
|
||||
assert_eq!(consecutive, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn child_cancellation_cascades_from_parent() {
|
||||
let parent = stub_runtime();
|
||||
|
||||
Reference in New Issue
Block a user