AI APIs fail. Plan for it.
AI API calls fail for three reasons: rate limits (429), server errors (500/503), and timeouts. If your app doesn't handle these, it crashes. Here's how to build resilience.
Pattern 1: Exponential backoff with jitter
Python
import time
import random
from openai import OpenAI, RateLimitError, APIConnectionError, APITimeoutError
client = OpenAI(api_key="izzi-YOUR_KEY_HERE", base_url="https://api.izziapi.com/v1")
def call_with_backoff(messages, model="claude-sonnet-4-20250514", max_retries=5):
for attempt in range(max_retries):
try:
return client.chat.completions.create(
model=model,
messages=messages,
max_tokens=2000,
timeout=30
)
except RateLimitError:
wait = min(2 ** attempt + random.uniform(0, 1), 60)
print(f"Rate limited. Retry {attempt+1}/{max_retries} in {wait:.1f}s")
time.sleep(wait)
except APIConnectionError:
wait = min(2 ** attempt, 30)
print(f"Connection error. Retry in {wait}s")
time.sleep(wait)
except APITimeoutError:
print(f"Timeout. Retry {attempt+1}/{max_retries}")
continue
raise Exception("All retries exhausted")Pattern 2: Model fallback chain
Python
FALLBACK_CHAIN = [
"claude-sonnet-4-20250514", # Primary (paid)
"gpt-5.4", # Fallback 1 (paid)
"deepseek-r1-0528", # Fallback 2 (free)
"qwen3-235b-a22b", # Fallback 3 (free)
]
def call_with_fallback(messages):
for model in FALLBACK_CHAIN:
try:
return client.chat.completions.create(
model=model,
messages=messages,
max_tokens=2000,
timeout=30
)
except Exception as e:
print(f"{model} failed: {e}")
continue
raise Exception("All models in fallback chain failed")Pattern 3: Circuit breaker
Python
from datetime import datetime, timedelta
class CircuitBreaker:
def __init__(self, failure_threshold=5, reset_timeout=60):
self.failures = 0
self.threshold = failure_threshold
self.reset_timeout = reset_timeout
self.last_failure = None
self.state = "closed" # closed | open | half-open
def can_proceed(self) -> bool:
if self.state == "closed":
return True
if self.state == "open":
if datetime.now() - self.last_failure > timedelta(seconds=self.reset_timeout):
self.state = "half-open"
return True
return False
return True # half-open: allow one request
def record_success(self):
self.failures = 0
self.state = "closed"
def record_failure(self):
self.failures += 1
self.last_failure = datetime.now()
if self.failures >= self.threshold:
self.state = "open"
# Usage
breaker = CircuitBreaker(failure_threshold=3, reset_timeout=30)
def safe_call(messages):
if not breaker.can_proceed():
return call_with_fallback(messages) # Skip to fallback
try:
result = client.chat.completions.create(
model="claude-sonnet-4-20250514",
messages=messages,
timeout=15
)
breaker.record_success()
return result
except Exception as e:
breaker.record_failure()
return call_with_fallback(messages)Error types and responses
| Error | HTTP Code | Action | Wait time |
|---|---|---|---|
| Rate limit | 429 | Retry with backoff | 2^n + jitter |
| Server error | 500 | Retry 2x then fallback | 1-5 seconds |
| Overloaded | 503 | Fallback to another model | Immediate |
| Timeout | — | Retry with shorter timeout | Immediate |
| Auth error | 401 | Don't retry — fix key | — |
| Bad request | 400 | Don't retry — fix input | — |
