fix: retry quota errors returned as HTTP 400

2026-05-08 20:46:48 +08:00
parent 39fd5379fd
commit 682e915857
2 changed files with 18 additions and 5 deletions
@@ -654,10 +654,6 @@ impl DeepSeekClient {
                    if status.is_success() {
                        return Ok(response);
                    }
-                    let retryable = status.as_u16() == 429 || status.is_server_error();
-                    if !retryable {
-                        return Ok(response);
-                    }
                    let retry_after = extract_retry_after(response.headers());
                    let body = bounded_error_text(response, ERROR_BODY_MAX_BYTES).await;
                    Err(LlmError::from_http_response_with_retry_after(
@@ -202,7 +202,16 @@ impl LlmError {
            400 => {
                // Classify 400 errors by examining the response body
                let body_lower = body.to_lowercase();
-                if body_lower.contains("context_length")
+                if body_lower.contains("insufficientquota")
+                    || body_lower.contains("insufficient_quota")
+                    || body_lower.contains("exceeded your current quota")
+                    || body_lower.contains("quota exceeded")
+                {
+                    LlmError::RateLimited {
+                        message: body.to_string(),
+                        retry_after: None,
+                    }
+                } else if body_lower.contains("context_length")
                    || body_lower.contains("token")
                    || body_lower.contains("too long")
                    || body_lower.contains("maximum")
@@ -846,6 +855,14 @@ mod tests {
        let err = LlmError::from_http_response(400, "context_length_exceeded");
        assert!(matches!(err, LlmError::ContextLengthError(_)));

+        // Some OpenAI-compatible gateways return quota/rate-limit errors as HTTP 400.
+        let err = LlmError::from_http_response(
+            400,
+            r#"{"error":{"code":"insufficientquota","message":"You exceeded your current quota"}}"#,
+        );
+        assert!(matches!(err, LlmError::RateLimited { .. }));
+        assert!(err.is_retryable());
+
        // Content policy
        let err = LlmError::from_http_response(400, "content_policy_violation");
        assert!(matches!(err, LlmError::ContentPolicyError(_)));