diff --git a/packages/llm/src/index.ts b/packages/llm/src/index.ts
index 0fc8451..8c5a874 100644
--- a/packages/llm/src/index.ts
+++ b/packages/llm/src/index.ts
@@ -87,39 +87,44 @@ export interface ModelChoice {
  * fit. Hobby uses GLM as the cost lever; paid tiers escalate to Claude — the
  * visible quality/speed jump *is* the upgrade pitch.
  *
- * Measured token rates: glm-4-plus ~58 tok/s (3500 tok ≈ 60s) ·
- * Claude Haiku 4.5 ~200 tok/s (8192 tok ≈ 41s) · Claude Sonnet 4.6 ~80 tok/s.
+ * Measured token rates: glm-4-plus ~58 tok/s · Claude Haiku 4.5 ~200 tok/s ·
+ * Claude Sonnet 4.6 ~80 tok/s. A spec is small (<= ~10 tools with short
+ * descriptions, ~1.5–2.5k output tokens in practice) so we cap maxTokens at
+ * 4096 — well under the model's hard ceiling and tight enough that even
+ * Sonnet finishes inside 60s in the worst case (4096 / 80 ≈ 51s). The
+ * timeouts above 90s buy headroom for cold starts / slow API responses
+ * while staying clear of Cloudflare's 100s edge cap.
  */
 const PREVIEW_MODELS: Record<Plan, ModelChoice> = {
   hobby: {
     provider: 'glm',
     model: 'glm-4-plus',
     maxTokens: 3500,
-    timeoutMs: 65_000,
+    timeoutMs: 90_000,
     displayName: 'Open-tier AI',
     displayBadge: 'open-tier',
   },
   pro: {
     provider: 'anthropic',
     model: 'claude-haiku-4-5-20251001',
-    maxTokens: 8192,
-    timeoutMs: 60_000,
+    maxTokens: 4096,
+    timeoutMs: 90_000,
     displayName: 'Claude Haiku 4.5',
     displayBadge: 'claude-haiku',
   },
   team: {
     provider: 'anthropic',
     model: 'claude-sonnet-4-6',
-    maxTokens: 8192,
-    timeoutMs: 60_000,
+    maxTokens: 4096,
+    timeoutMs: 90_000,
     displayName: 'Claude Sonnet 4.6',
     displayBadge: 'claude-sonnet',
   },
   enterprise: {
     provider: 'anthropic',
     model: 'claude-sonnet-4-6',
-    maxTokens: 8192,
-    timeoutMs: 60_000,
+    maxTokens: 4096,
+    timeoutMs: 90_000,
     displayName: 'Claude Sonnet 4.6',
     displayBadge: 'claude-sonnet',
   },