import Anthropic from '@anthropic-ai/sdk'; import { GeneratorSpec, type GeneratorSpec as GeneratorSpecT } from '@bmm/types'; export const SYSTEM_PROMPT = `You generate production-grade MCP server specifications as STRICT JSON. Output ONE JSON object (no markdown, no prose, no code fences) with this exact shape: { "name": "human-readable server name (max 128 chars)", "description": "1-2 sentence purpose", "tools": [ { "name": "snake_case_tool_name", "description": "what the AI client sees — single sentence, clear", "inputSchema": { "param_name": { "type": "string|number|boolean|array|object", "description": "...", "required": true } }, "implementation": "ASYNC TypeScript body. Receives {args} pre-validated. Must return MCP content blocks: { content: [{ type: 'text', text: '...' }] }. Use process.env.SECRET_NAME for secrets. NEVER use eval/Function/child_process. Use globalThis.fetch for HTTP. Wrap external calls in try/catch and return { content: [{ type: 'text', text: 'Error: ...' }], isError: true } on failure." } ], "resources": [], "prompts": [], "requiredSecrets": ["UPPER_SNAKE_CASE"], "scopes": ["mcp:read"], "dependencies": {} } Rules: - Tools are idempotent unless the description explicitly says destructive. - Validate all string inputs before use. - For databases: parameterized queries only (use the 'pg' library with $1 placeholders). - For HTTP APIs: globalThis.fetch with explicit timeout via AbortSignal.timeout(10000). - Never hardcode credentials; declare them under requiredSecrets and read via process.env. - Keep tool implementations under 5000 characters. - Do not include "import" statements in implementations — the runtime injects fetch, pg, etc. Return JSON only. No explanation.`; // Regex blacklist — explicitly NOT a security boundary, just an early-warning // for obviously-dangerous LLM output. The real defence is the Docker // hardening in apps/generator/src/lib/deploy.ts (--cap-drop=ALL etc.). A // determined attacker can bypass any of these with string concatenation // (`'chi'+'ld_process'`) or alternate APIs — that's why container isolation // has to hold even when this fails. const BANNED_PATTERNS = [ /\beval\s*\(/, /\bnew\s+Function\s*\(/, /\bFunction\s*\(\s*['"`]/, // Function('...') without `new` /\brequire\s*\(\s*['"]child_process['"]/, /\bchild_process\b/, /\bprocess\.binding\b/, /\bprocess\.dlopen\b/, /\.constructor\s*\.\s*constructor\b/, // [].constructor.constructor('...') /\b_load\s*\(/, /\bvm\.runIn(This|New)Context\b/, /globalThis\s*\[\s*['"`]/, // globalThis['Fun'+'ction'] /ignore\s+previous\s+instructions/i, /disregard\s+(the\s+)?(above|previous)/i, /system\s+prompt\s+override/i, ]; // ────────────────────────────────────────────────────────────────────────── // Plan-aware model selection // ────────────────────────────────────────────────────────────────────────── export type Plan = 'hobby' | 'pro' | 'team' | 'enterprise'; export type Purpose = 'preview' | 'build'; export type Provider = 'anthropic' | 'glm'; export type DisplayBadge = 'open-tier' | 'claude-haiku' | 'claude-sonnet' | 'claude-opus'; export interface ModelChoice { provider: Provider; model: string; maxTokens: number; timeoutMs: number; /** User-facing model name shown in the wizard + previews. */ displayName: string; displayBadge: DisplayBadge; } /** * Preview runs synchronously inside an HTTP request behind Cloudflare's * ~100s edge cap. Each tier's (model + max_tokens + timeout) is bounded to * fit. Hobby uses GLM as the cost lever; paid tiers escalate to Claude — the * visible quality/speed jump *is* the upgrade pitch. * * Measured token rates: glm-4-plus ~58 tok/s (3500 tok ≈ 60s) · * Claude Haiku 4.5 ~200 tok/s (8192 tok ≈ 41s) · Claude Sonnet 4.6 ~80 tok/s. */ const PREVIEW_MODELS: Record = { hobby: { provider: 'glm', model: 'glm-4-plus', maxTokens: 3500, timeoutMs: 65_000, displayName: 'Open-tier AI', displayBadge: 'open-tier', }, pro: { provider: 'anthropic', model: 'claude-haiku-4-5-20251001', maxTokens: 8192, timeoutMs: 60_000, displayName: 'Claude Haiku 4.5', displayBadge: 'claude-haiku', }, team: { provider: 'anthropic', model: 'claude-sonnet-4-6', maxTokens: 8192, timeoutMs: 60_000, displayName: 'Claude Sonnet 4.6', displayBadge: 'claude-sonnet', }, enterprise: { provider: 'anthropic', model: 'claude-sonnet-4-6', maxTokens: 8192, timeoutMs: 60_000, displayName: 'Claude Sonnet 4.6', displayBadge: 'claude-sonnet', }, }; /** * Build worker runs async via BullMQ — no proxy timeout. With the 24h preview * cache TTL cache-misses are rare, so GLM as the default keeps that rare path * cheap; Enterprise gets Opus as a premium-quality promise. */ const BUILD_MODELS: Record = { hobby: { provider: 'glm', model: 'glm-4.5', maxTokens: 8192, timeoutMs: 180_000, displayName: 'Open-tier AI', displayBadge: 'open-tier', }, pro: { provider: 'glm', model: 'glm-4.5', maxTokens: 8192, timeoutMs: 180_000, displayName: 'Open-tier AI', displayBadge: 'open-tier', }, team: { provider: 'glm', model: 'glm-4.5', maxTokens: 8192, timeoutMs: 180_000, displayName: 'Open-tier AI', displayBadge: 'open-tier', }, enterprise: { provider: 'anthropic', model: 'claude-opus-4-7', maxTokens: 8192, timeoutMs: 600_000, displayName: 'Claude Opus 4.7', displayBadge: 'claude-opus', }, }; export function pickPreviewModel(plan: Plan): ModelChoice { return PREVIEW_MODELS[plan]; } export function pickBuildModel(plan: Plan): ModelChoice { return BUILD_MODELS[plan]; } // ────────────────────────────────────────────────────────────────────────── // Generation API // ────────────────────────────────────────────────────────────────────────── export interface GenerationResult { spec: GeneratorSpecT; source: 'claude' | 'glm' | 'mock'; } export interface GenerateOptions { /** 'anthropic' (default) or 'glm'. */ provider?: Provider; /** Anthropic API key — required if provider === 'anthropic'. */ apiKey?: string; /** Zhipu (GLM) API key — required if provider === 'glm'. */ glmApiKey?: string; model?: string; maxTokens?: number; /** Per-attempt request timeout in ms. */ timeoutMs?: number; /** SDK retry count. Anthropic only. */ maxRetries?: number; } export async function generateSpec( prompt: string, opts: GenerateOptions = {}, ): Promise { const provider = opts.provider ?? 'anthropic'; if (provider === 'glm') { if (!opts.glmApiKey) return { spec: mockSpec(prompt), source: 'mock' }; return generateWithGlm(prompt, { apiKey: opts.glmApiKey, model: opts.model ?? 'glm-4-plus', maxTokens: opts.maxTokens ?? 4096, timeoutMs: opts.timeoutMs, }); } if (!opts.apiKey) { return { spec: mockSpec(prompt), source: 'mock' }; } return generateWithAnthropic(prompt, { apiKey: opts.apiKey, model: opts.model ?? 'claude-opus-4-7', maxTokens: opts.maxTokens ?? 8192, timeoutMs: opts.timeoutMs, maxRetries: opts.maxRetries, }); } async function generateWithAnthropic( prompt: string, opts: { apiKey: string; model: string; maxTokens: number; timeoutMs?: number; maxRetries?: number; }, ): Promise { const client = new Anthropic({ apiKey: opts.apiKey }); const requestOptions: { timeout?: number; maxRetries?: number } = {}; if (opts.timeoutMs !== undefined) requestOptions.timeout = opts.timeoutMs; if (opts.maxRetries !== undefined) requestOptions.maxRetries = opts.maxRetries; const response = await client.messages .create( { model: opts.model, max_tokens: opts.maxTokens, system: SYSTEM_PROMPT, messages: [{ role: 'user', content: prompt }], }, requestOptions, ) .catch((err: unknown) => { if (err instanceof Anthropic.APIConnectionTimeoutError) { throw new SpecTimeoutError('spec generation exceeded the time budget'); } throw err; }); const text = response.content .filter((b): b is { type: 'text'; text: string } => b.type === 'text') .map((b) => b.text) .join(''); const json = extractJson(text); const parsed = GeneratorSpec.safeParse(json); if (!parsed.success) throw new SpecValidationError(parsed.error.message); scanForInjection(parsed.data); return { spec: parsed.data, source: 'claude' }; } const GLM_ENDPOINT = 'https://open.bigmodel.cn/api/paas/v4/chat/completions'; async function generateWithGlm( prompt: string, opts: { apiKey: string; model: string; maxTokens: number; timeoutMs?: number }, ): Promise { const controller = new AbortController(); const timer = opts.timeoutMs ? setTimeout(() => controller.abort(), opts.timeoutMs) : null; let res: Response; try { res = await fetch(GLM_ENDPOINT, { method: 'POST', headers: { Authorization: `Bearer ${opts.apiKey}`, 'Content-Type': 'application/json', }, body: JSON.stringify({ model: opts.model, max_tokens: opts.maxTokens, messages: [ { role: 'system', content: SYSTEM_PROMPT }, { role: 'user', content: prompt }, ], }), signal: controller.signal, }); } catch (err) { if ((err as { name?: string }).name === 'AbortError') { throw new SpecTimeoutError('glm spec generation exceeded the time budget'); } throw err; } finally { if (timer) clearTimeout(timer); } if (!res.ok) { const body = await res.text().catch(() => ''); throw new Error(`glm_api_${res.status}: ${body.slice(0, 200)}`); } const data = (await res.json()) as { choices?: Array<{ message?: { content?: string }; finish_reason?: string }>; }; const content = data.choices?.[0]?.message?.content; if (!content) throw new SpecValidationError('glm_empty_response'); const json = extractJson(content); const parsed = GeneratorSpec.safeParse(json); if (!parsed.success) throw new SpecValidationError(parsed.error.message); scanForInjection(parsed.data); return { spec: parsed.data, source: 'glm' }; } export class SpecValidationError extends Error { override readonly name = 'SpecValidationError'; } export class BannedPatternError extends Error { override readonly name = 'BannedPatternError'; } export class SpecTimeoutError extends Error { override readonly name = 'SpecTimeoutError'; } function extractJson(text: string): unknown { const trimmed = text.trim(); const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/); const body = fenced ? fenced[1] : trimmed; if (!body) throw new SpecValidationError('empty_generation_output'); try { return JSON.parse(body); } catch (e) { throw new SpecValidationError(`generation_not_json: ${(e as Error).message}`); } } /** * Public so other layers (the spec-edit merge in apps/api) can re-scan a * user-edited spec without duplicating the pattern list — single source of * truth for what counts as obviously-dangerous LLM output. */ export function scanForInjection(spec: GeneratorSpecT): void { for (const tool of spec.tools) { // Collect every string the LLM could have planted a payload in. Downstream // AI clients (Claude Desktop, Cursor) read tool.name + every inputSchema // description verbatim, so an injection there can pivot the user's AI // session — not only the runtime code. const surfaces: string[] = [tool.name, tool.description, tool.implementation]; for (const param of Object.values(tool.inputSchema)) { if (param && typeof param === 'object' && 'description' in param) { const d = (param as { description?: unknown }).description; if (typeof d === 'string') surfaces.push(d); } } for (const text of surfaces) { for (const pattern of BANNED_PATTERNS) { if (pattern.test(text)) { throw new BannedPatternError(`banned_pattern_detected: ${pattern.source}`); } } } } } export function mockSpec(prompt: string): GeneratorSpecT { return { name: 'Echo MCP', description: `Mock server (no LLM key). Prompt was: ${prompt.slice(0, 200)}`, tools: [ { name: 'echo', description: 'Echoes the input string back to the caller.', inputSchema: { message: { type: 'string', description: 'Message to echo back', required: true }, }, implementation: `const msg = String(args.message ?? '');\nreturn { content: [{ type: 'text', text: \`echo: \${msg}\` }] };`, }, { name: 'now', description: 'Returns the current server UTC timestamp.', inputSchema: {}, implementation: `return { content: [{ type: 'text', text: new Date().toISOString() }] };`, }, ], resources: [], prompts: [], requiredSecrets: [], scopes: ['mcp:read'], dependencies: {}, }; }