From b930a454e8e0f72e7c3c063ced0688a971085d2e Mon Sep 17 00:00:00 2001
From: Marco Sadjadi <marco@buildmymcpserver.local>
Date: Thu, 28 May 2026 21:01:50 +0200
Subject: [PATCH] fix(llm): tighter system prompt + 12288 max_tokens for paid
 tiers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Sonnet 4.6 was still hitting max_tokens on ambitious prompts like
"WorldWeather MCP for any location" because the implementation bodies
ballooned with defensive scaffolding. Two changes:

1. SYSTEM_PROMPT now imposes hard limits the model can self-enforce:
   - at most 6 tools (combine related capabilities with a mode param)
   - implementation body <= 40 lines, no comments, no overengineering
   - descriptions <= 100 chars
   These keep a typical preview under ~7k output tokens.

2. team/enterprise maxTokens 8192 -> 12288. At ~130 tok/s that fits in
   ~94s, still under Cloudflare's 100s edge cap. Hobby (GLM) and pro
   (Haiku) keep their existing limits — they were not hitting the
   ceiling.

SpecTruncatedError still fires + surfaces 422 spec_too_large when even
12288 isn't enough, so the user gets actionable feedback instead of an
opaque zod error.
---
 packages/llm/src/index.ts | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

diff --git a/packages/llm/src/index.ts b/packages/llm/src/index.ts
index 9872e3d..743b1f4 100644
--- a/packages/llm/src/index.ts
+++ b/packages/llm/src/index.ts
@@ -6,16 +6,16 @@ export const SYSTEM_PROMPT = `You generate production-grade MCP server specifica
 Output ONE JSON object (no markdown, no prose, no code fences) with this exact shape:
 
 {
-  "name": "human-readable server name (max 128 chars)",
-  "description": "1-2 sentence purpose",
+  "name": "human-readable server name (max 80 chars)",
+  "description": "one sentence",
   "tools": [
     {
       "name": "snake_case_tool_name",
-      "description": "what the AI client sees — single sentence, clear",
+      "description": "single sentence, max 100 chars",
       "inputSchema": {
-        "param_name": { "type": "string|number|boolean|array|object", "description": "...", "required": true }
+        "param_name": { "type": "string|number|boolean|array|object", "description": "short", "required": true }
       },
-      "implementation": "ASYNC TypeScript body. Receives {args} pre-validated. Must return MCP content blocks: { content: [{ type: 'text', text: '...' }] }. Use process.env.SECRET_NAME for secrets. NEVER use eval/Function/child_process. Use globalThis.fetch for HTTP. Wrap external calls in try/catch and return { content: [{ type: 'text', text: 'Error: ...' }], isError: true } on failure."
+      "implementation": "async TS body, return { content: [{ type:'text', text:'...' }] }; secrets via process.env; HTTP via globalThis.fetch with AbortSignal.timeout(10000); try/catch -> { content:[{type:'text',text:'Error: ...'}], isError:true }; no eval/Function/child_process; no imports."
     }
   ],
   "resources": [],
@@ -25,16 +25,13 @@ Output ONE JSON object (no markdown, no prose, no code fences) with this exact s
   "dependencies": {}
 }
 
-Rules:
-- Tools are idempotent unless the description explicitly says destructive.
-- Validate all string inputs before use.
-- For databases: parameterized queries only (use the 'pg' library with $1 placeholders).
-- For HTTP APIs: globalThis.fetch with explicit timeout via AbortSignal.timeout(10000).
-- Never hardcode credentials; declare them under requiredSecrets and read via process.env.
-- Keep tool implementations under 5000 characters.
-- Do not include "import" statements in implementations — the runtime injects fetch, pg, etc.
+Hard limits (the output gets truncated past these — write tight):
+- At most 6 tools. Combine related capabilities into one tool with a "mode" param rather than splitting.
+- Each implementation body: at most 40 lines of code, no defensive overengineering, no comments.
+- Each description / inputSchema description: one short clause, no examples.
+- Parameterised SQL only (pg with $1 placeholders). No prose, no JSON examples in code.
 
-Return JSON only. No explanation.`;
+Return JSON only. No preamble, no closing remark.`;
 
 // Regex blacklist — explicitly NOT a security boundary, just an early-warning
 // for obviously-dangerous LLM output. The real defence is the Docker
@@ -122,7 +119,7 @@ const PREVIEW_MODELS: Record<Plan, ModelChoice> = {
   team: {
     provider: 'anthropic',
     model: 'claude-sonnet-4-6',
-    maxTokens: 8192,
+    maxTokens: 12288,
     timeoutMs: 95_000,
     displayName: 'Claude Sonnet 4.6',
     displayBadge: 'claude-sonnet',
@@ -130,7 +127,7 @@ const PREVIEW_MODELS: Record<Plan, ModelChoice> = {
   enterprise: {
     provider: 'anthropic',
     model: 'claude-sonnet-4-6',
-    maxTokens: 8192,
+    maxTokens: 12288,
     timeoutMs: 95_000,
     displayName: 'Claude Sonnet 4.6',
     displayBadge: 'claude-sonnet',