Sequential is slow. Concurrent is 50x faster.
Processing 100 AI requests one-at-a-time takes 5+ minutes. With concurrent requests, the same batch completes in 6 seconds. Here's how to do it safely.
Pattern 1: asyncio with AsyncOpenAI
import asyncio
from openai import AsyncOpenAI
client = AsyncOpenAI(
api_key="izzi-YOUR_KEY_HERE",
base_url="https://api.izziapi.com/v1"
)
async def process_single(prompt: str) -> str:
response = await client.chat.completions.create(
model="claude-sonnet-4-20250514",
messages=[{"role": "user", "content": prompt}],
max_tokens=500
)
return response.choices[0].message.content
async def process_batch(prompts: list[str], max_concurrent: int = 20) -> list[str]:
"""Process prompts concurrently with a concurrency limit."""
semaphore = asyncio.Semaphore(max_concurrent)
async def limited(prompt):
async with semaphore:
return await process_single(prompt)
return await asyncio.gather(*[limited(p) for p in prompts])
# Process 100 prompts, 20 at a time
prompts = [f"Summarize concept #{i}" for i in range(100)]
results = asyncio.run(process_batch(prompts, max_concurrent=20))Pattern 2: With rate limiting
import time
class RateLimiter:
def __init__(self, requests_per_second: int = 10):
self.rps = requests_per_second
self.tokens = requests_per_second
self.last_refill = time.monotonic()
self.lock = asyncio.Lock()
async def acquire(self):
async with self.lock:
now = time.monotonic()
elapsed = now - self.last_refill
self.tokens = min(self.rps, self.tokens + elapsed * self.rps)
self.last_refill = now
if self.tokens < 1:
wait = (1 - self.tokens) / self.rps
await asyncio.sleep(wait)
self.tokens = 0
else:
self.tokens -= 1
limiter = RateLimiter(requests_per_second=10)
async def rate_limited_call(prompt: str) -> str:
await limiter.acquire()
return await process_single(prompt)Pattern 3: With progress tracking
from tqdm.asyncio import tqdm_asyncio
async def process_with_progress(prompts: list[str]) -> list[str]:
semaphore = asyncio.Semaphore(20)
async def task(prompt):
async with semaphore:
return await process_single(prompt)
tasks = [task(p) for p in prompts]
return await tqdm_asyncio.gather(*tasks, desc="Processing")Performance benchmarks
| Approach | 100 requests | 1000 requests |
|---|---|---|
| Sequential | 5 min | 50 min |
| Concurrent (10) | 30 sec | 5 min |
| Concurrent (20) | 15 sec | 2.5 min |
| Concurrent (50) | 6 sec | 1 min |
Using Claude Sonnet 4 on Izzi API (avg 500ms per request)
