From b930a454e8e0f72e7c3c063ced0688a971085d2e Mon Sep 17 00:00:00 2001 From: Marco Sadjadi Date: Thu, 28 May 2026 21:01:50 +0200 Subject: [PATCH] fix(llm): tighter system prompt + 12288 max_tokens for paid tiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sonnet 4.6 was still hitting max_tokens on ambitious prompts like "WorldWeather MCP for any location" because the implementation bodies ballooned with defensive scaffolding. Two changes: 1. SYSTEM_PROMPT now imposes hard limits the model can self-enforce: - at most 6 tools (combine related capabilities with a mode param) - implementation body <= 40 lines, no comments, no overengineering - descriptions <= 100 chars These keep a typical preview under ~7k output tokens. 2. team/enterprise maxTokens 8192 -> 12288. At ~130 tok/s that fits in ~94s, still under Cloudflare's 100s edge cap. Hobby (GLM) and pro (Haiku) keep their existing limits — they were not hitting the ceiling. SpecTruncatedError still fires + surfaces 422 spec_too_large when even 12288 isn't enough, so the user gets actionable feedback instead of an opaque zod error. --- packages/llm/src/index.ts | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/packages/llm/src/index.ts b/packages/llm/src/index.ts index 9872e3d..743b1f4 100644 --- a/packages/llm/src/index.ts +++ b/packages/llm/src/index.ts @@ -6,16 +6,16 @@ export const SYSTEM_PROMPT = `You generate production-grade MCP server specifica Output ONE JSON object (no markdown, no prose, no code fences) with this exact shape: { - "name": "human-readable server name (max 128 chars)", - "description": "1-2 sentence purpose", + "name": "human-readable server name (max 80 chars)", + "description": "one sentence", "tools": [ { "name": "snake_case_tool_name", - "description": "what the AI client sees — single sentence, clear", + "description": "single sentence, max 100 chars", "inputSchema": { - "param_name": { "type": "string|number|boolean|array|object", "description": "...", "required": true } + "param_name": { "type": "string|number|boolean|array|object", "description": "short", "required": true } }, - "implementation": "ASYNC TypeScript body. Receives {args} pre-validated. Must return MCP content blocks: { content: [{ type: 'text', text: '...' }] }. Use process.env.SECRET_NAME for secrets. NEVER use eval/Function/child_process. Use globalThis.fetch for HTTP. Wrap external calls in try/catch and return { content: [{ type: 'text', text: 'Error: ...' }], isError: true } on failure." + "implementation": "async TS body, return { content: [{ type:'text', text:'...' }] }; secrets via process.env; HTTP via globalThis.fetch with AbortSignal.timeout(10000); try/catch -> { content:[{type:'text',text:'Error: ...'}], isError:true }; no eval/Function/child_process; no imports." } ], "resources": [], @@ -25,16 +25,13 @@ Output ONE JSON object (no markdown, no prose, no code fences) with this exact s "dependencies": {} } -Rules: -- Tools are idempotent unless the description explicitly says destructive. -- Validate all string inputs before use. -- For databases: parameterized queries only (use the 'pg' library with $1 placeholders). -- For HTTP APIs: globalThis.fetch with explicit timeout via AbortSignal.timeout(10000). -- Never hardcode credentials; declare them under requiredSecrets and read via process.env. -- Keep tool implementations under 5000 characters. -- Do not include "import" statements in implementations — the runtime injects fetch, pg, etc. +Hard limits (the output gets truncated past these — write tight): +- At most 6 tools. Combine related capabilities into one tool with a "mode" param rather than splitting. +- Each implementation body: at most 40 lines of code, no defensive overengineering, no comments. +- Each description / inputSchema description: one short clause, no examples. +- Parameterised SQL only (pg with $1 placeholders). No prose, no JSON examples in code. -Return JSON only. No explanation.`; +Return JSON only. No preamble, no closing remark.`; // Regex blacklist — explicitly NOT a security boundary, just an early-warning // for obviously-dangerous LLM output. The real defence is the Docker @@ -122,7 +119,7 @@ const PREVIEW_MODELS: Record = { team: { provider: 'anthropic', model: 'claude-sonnet-4-6', - maxTokens: 8192, + maxTokens: 12288, timeoutMs: 95_000, displayName: 'Claude Sonnet 4.6', displayBadge: 'claude-sonnet', @@ -130,7 +127,7 @@ const PREVIEW_MODELS: Record = { enterprise: { provider: 'anthropic', model: 'claude-sonnet-4-6', - maxTokens: 8192, + maxTokens: 12288, timeoutMs: 95_000, displayName: 'Claude Sonnet 4.6', displayBadge: 'claude-sonnet',