fix: retry quota errors returned as HTTP 400
This commit is contained in:
@@ -654,10 +654,6 @@ impl DeepSeekClient {
|
||||
if status.is_success() {
|
||||
return Ok(response);
|
||||
}
|
||||
let retryable = status.as_u16() == 429 || status.is_server_error();
|
||||
if !retryable {
|
||||
return Ok(response);
|
||||
}
|
||||
let retry_after = extract_retry_after(response.headers());
|
||||
let body = bounded_error_text(response, ERROR_BODY_MAX_BYTES).await;
|
||||
Err(LlmError::from_http_response_with_retry_after(
|
||||
|
||||
@@ -202,7 +202,16 @@ impl LlmError {
|
||||
400 => {
|
||||
// Classify 400 errors by examining the response body
|
||||
let body_lower = body.to_lowercase();
|
||||
if body_lower.contains("context_length")
|
||||
if body_lower.contains("insufficientquota")
|
||||
|| body_lower.contains("insufficient_quota")
|
||||
|| body_lower.contains("exceeded your current quota")
|
||||
|| body_lower.contains("quota exceeded")
|
||||
{
|
||||
LlmError::RateLimited {
|
||||
message: body.to_string(),
|
||||
retry_after: None,
|
||||
}
|
||||
} else if body_lower.contains("context_length")
|
||||
|| body_lower.contains("token")
|
||||
|| body_lower.contains("too long")
|
||||
|| body_lower.contains("maximum")
|
||||
@@ -846,6 +855,14 @@ mod tests {
|
||||
let err = LlmError::from_http_response(400, "context_length_exceeded");
|
||||
assert!(matches!(err, LlmError::ContextLengthError(_)));
|
||||
|
||||
// Some OpenAI-compatible gateways return quota/rate-limit errors as HTTP 400.
|
||||
let err = LlmError::from_http_response(
|
||||
400,
|
||||
r#"{"error":{"code":"insufficientquota","message":"You exceeded your current quota"}}"#,
|
||||
);
|
||||
assert!(matches!(err, LlmError::RateLimited { .. }));
|
||||
assert!(err.is_retryable());
|
||||
|
||||
// Content policy
|
||||
let err = LlmError::from_http_response(400, "content_policy_violation");
|
||||
assert!(matches!(err, LlmError::ContentPolicyError(_)));
|
||||
|
||||
Reference in New Issue
Block a user