From 5a8e7361135f37e8b6040213d2b10969b8e57904 Mon Sep 17 00:00:00 2001 From: Marco Sadjadi Date: Thu, 28 May 2026 18:51:51 +0200 Subject: [PATCH] =?UTF-8?q?fix(llm):=20preview=20timeout=2060s=E2=86=9290s?= =?UTF-8?q?=20+=20maxTokens=208192=E2=86=924096?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enterprise plan was hitting SpecTimeoutError exactly at 60s because the Sonnet 4.6 preview was budgeted for 8192 tokens at ~80 tok/s (≈102s worst case) inside a 60s window. The frontend then rolled back to step 1 with no spec. A real spec is small (<= ~10 tools, ~1.5–2.5k output tokens in practice) so 4096 is plenty and lets even Sonnet finish in ~51s worst case. The 90s timeout buys headroom for cold starts while staying under Cloudflare's 100s edge cap. Hobby/GLM bumped to 90s too — same headroom argument. --- packages/llm/src/index.ts | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/packages/llm/src/index.ts b/packages/llm/src/index.ts index 0fc8451..8c5a874 100644 --- a/packages/llm/src/index.ts +++ b/packages/llm/src/index.ts @@ -87,39 +87,44 @@ export interface ModelChoice { * fit. Hobby uses GLM as the cost lever; paid tiers escalate to Claude — the * visible quality/speed jump *is* the upgrade pitch. * - * Measured token rates: glm-4-plus ~58 tok/s (3500 tok ≈ 60s) · - * Claude Haiku 4.5 ~200 tok/s (8192 tok ≈ 41s) · Claude Sonnet 4.6 ~80 tok/s. + * Measured token rates: glm-4-plus ~58 tok/s · Claude Haiku 4.5 ~200 tok/s · + * Claude Sonnet 4.6 ~80 tok/s. A spec is small (<= ~10 tools with short + * descriptions, ~1.5–2.5k output tokens in practice) so we cap maxTokens at + * 4096 — well under the model's hard ceiling and tight enough that even + * Sonnet finishes inside 60s in the worst case (4096 / 80 ≈ 51s). The + * timeouts above 90s buy headroom for cold starts / slow API responses + * while staying clear of Cloudflare's 100s edge cap. */ const PREVIEW_MODELS: Record = { hobby: { provider: 'glm', model: 'glm-4-plus', maxTokens: 3500, - timeoutMs: 65_000, + timeoutMs: 90_000, displayName: 'Open-tier AI', displayBadge: 'open-tier', }, pro: { provider: 'anthropic', model: 'claude-haiku-4-5-20251001', - maxTokens: 8192, - timeoutMs: 60_000, + maxTokens: 4096, + timeoutMs: 90_000, displayName: 'Claude Haiku 4.5', displayBadge: 'claude-haiku', }, team: { provider: 'anthropic', model: 'claude-sonnet-4-6', - maxTokens: 8192, - timeoutMs: 60_000, + maxTokens: 4096, + timeoutMs: 90_000, displayName: 'Claude Sonnet 4.6', displayBadge: 'claude-sonnet', }, enterprise: { provider: 'anthropic', model: 'claude-sonnet-4-6', - maxTokens: 8192, - timeoutMs: 60_000, + maxTokens: 4096, + timeoutMs: 90_000, displayName: 'Claude Sonnet 4.6', displayBadge: 'claude-sonnet', },