fix: retry quota errors returned as HTTP 400

This commit is contained in:
dst1213
2026-05-08 20:46:48 +08:00
committed by Hunter Bown
parent 39fd5379fd
commit 682e915857
2 changed files with 18 additions and 5 deletions
-4
View File
@@ -654,10 +654,6 @@ impl DeepSeekClient {
if status.is_success() {
return Ok(response);
}
let retryable = status.as_u16() == 429 || status.is_server_error();
if !retryable {
return Ok(response);
}
let retry_after = extract_retry_after(response.headers());
let body = bounded_error_text(response, ERROR_BODY_MAX_BYTES).await;
Err(LlmError::from_http_response_with_retry_after(
+18 -1
View File
@@ -202,7 +202,16 @@ impl LlmError {
400 => {
// Classify 400 errors by examining the response body
let body_lower = body.to_lowercase();
if body_lower.contains("context_length")
if body_lower.contains("insufficientquota")
|| body_lower.contains("insufficient_quota")
|| body_lower.contains("exceeded your current quota")
|| body_lower.contains("quota exceeded")
{
LlmError::RateLimited {
message: body.to_string(),
retry_after: None,
}
} else if body_lower.contains("context_length")
|| body_lower.contains("token")
|| body_lower.contains("too long")
|| body_lower.contains("maximum")
@@ -846,6 +855,14 @@ mod tests {
let err = LlmError::from_http_response(400, "context_length_exceeded");
assert!(matches!(err, LlmError::ContextLengthError(_)));
// Some OpenAI-compatible gateways return quota/rate-limit errors as HTTP 400.
let err = LlmError::from_http_response(
400,
r#"{"error":{"code":"insufficientquota","message":"You exceeded your current quota"}}"#,
);
assert!(matches!(err, LlmError::RateLimited { .. }));
assert!(err.is_retryable());
// Content policy
let err = LlmError::from_http_response(400, "content_policy_violation");
assert!(matches!(err, LlmError::ContentPolicyError(_)));