AI at the edge = sub-50ms cold starts
Cloudflare Workers run in 300+ data centers worldwide. Combined with Izzi API, you get AI inference with minimal latency and zero infrastructure. No servers, no scaling, no DevOps.
Step 1: Create the Worker
Bash
npx wrangler init ai-edge --type javascript
cd ai-edgeStep 2: Build the AI endpoint
TypeScript
// src/index.ts
export default {
async fetch(request: Request, env: Env): Promise<Response> {
if (request.method !== "POST") {
return new Response("POST /chat with {message}", { status: 405 });
}
const { message, model = "claude-sonnet-4-20250514" } = await request.json();
// Check KV cache first
const cacheKey = `chat:${model}:${btoa(message).slice(0, 64)}`;
const cached = await env.AI_CACHE.get(cacheKey);
if (cached) {
return Response.json({ reply: cached, cached: true });
}
// Call Izzi API
const response = await fetch("https://api.izziapi.com/v1/chat/completions", {
method: "POST",
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${env.IZZI_API_KEY}`,
},
body: JSON.stringify({
model,
messages: [{ role: "user", content: message }],
max_tokens: 1000,
}),
});
const data = await response.json();
const reply = data.choices[0].message.content;
// Cache for 1 hour
await env.AI_CACHE.put(cacheKey, reply, { expirationTtl: 3600 });
return Response.json({ reply, cached: false, model });
},
};
interface Env {
IZZI_API_KEY: string;
AI_CACHE: KVNamespace;
}Step 3: Configure and deploy
Bash
# Add your API key as a secret
npx wrangler secret put IZZI_API_KEY
# Create KV namespace for caching
npx wrangler kv:namespace create AI_CACHE
# Deploy
npx wrangler deployStep 4: Add streaming
TypeScript
async function streamResponse(request: Request, env: Env): Promise<Response> {
const { message } = await request.json();
const response = await fetch("https://api.izziapi.com/v1/chat/completions", {
method: "POST",
headers: {
"Content-Type": "application/json",
"Authorization": `Bearer ${env.IZZI_API_KEY}`,
},
body: JSON.stringify({
model: "claude-sonnet-4-20250514",
messages: [{ role: "user", content: message }],
stream: true,
}),
});
return new Response(response.body, {
headers: {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
},
});
}Performance
| Metric | Traditional server | Cloudflare Worker |
|---|---|---|
| Cold start | 500ms - 5s | <50ms |
| Global latency | 100-300ms (single region) | <50ms (nearest edge) |
| Scaling | Manual / auto-scale lag | Instant, unlimited |
| Cost (1M requests) | $50-200 | $5 (Workers free tier: 100K/day) |
