buildmymcpserver/packages/llm/src/index.ts

import Anthropic from '@anthropic-ai/sdk';
import { GeneratorSpec, type GeneratorSpec as GeneratorSpecT } from '@bmm/types';

export const SYSTEM_PROMPT = `You generate production-grade MCP server specifications as STRICT JSON.

Output ONE JSON object (no markdown, no prose, no code fences) with this exact shape:

{
  "name": "human-readable server name (max 128 chars)",
  "description": "1-2 sentence purpose",
  "tools": [
    {
      "name": "snake_case_tool_name",
      "description": "what the AI client sees — single sentence, clear",
      "inputSchema": {
        "param_name": { "type": "string|number|boolean|array|object", "description": "...", "required": true }
      },
      "implementation": "ASYNC TypeScript body. Receives {args} pre-validated. Must return MCP content blocks: { content: [{ type: 'text', text: '...' }] }. Use process.env.SECRET_NAME for secrets. NEVER use eval/Function/child_process. Use globalThis.fetch for HTTP. Wrap external calls in try/catch and return { content: [{ type: 'text', text: 'Error: ...' }], isError: true } on failure."
    }
  ],
  "resources": [],
  "prompts": [],
  "requiredSecrets": ["UPPER_SNAKE_CASE"],
  "scopes": ["mcp:read"],
  "dependencies": {}
}

Rules:
- Tools are idempotent unless the description explicitly says destructive.
- Validate all string inputs before use.
- For databases: parameterized queries only (use the 'pg' library with $1 placeholders).
- For HTTP APIs: globalThis.fetch with explicit timeout via AbortSignal.timeout(10000).
- Never hardcode credentials; declare them under requiredSecrets and read via process.env.
- Keep tool implementations under 5000 characters.
- Do not include "import" statements in implementations — the runtime injects fetch, pg, etc.

Return JSON only. No explanation.`;

// Regex blacklist — explicitly NOT a security boundary, just an early-warning
// for obviously-dangerous LLM output. The real defence is the Docker
// hardening in apps/generator/src/lib/deploy.ts (--cap-drop=ALL etc.). A
// determined attacker can bypass any of these with string concatenation
// (`'chi'+'ld_process'`) or alternate APIs — that's why container isolation
// has to hold even when this fails.
const BANNED_PATTERNS = [
  /\beval\s*\(/,
  /\bnew\s+Function\s*\(/,
  /\bFunction\s*\(\s*['"`]/, // Function('...') without `new`
  /\brequire\s*\(\s*['"]child_process['"]/,
  /\bchild_process\b/,
  /\bprocess\.binding\b/,
  /\bprocess\.dlopen\b/,
  /\.constructor\s*\.\s*constructor\b/, // [].constructor.constructor('...')
  /\b_load\s*\(/,
  /\bvm\.runIn(This|New)Context\b/,
  /globalThis\s*\[\s*['"`]/, // globalThis['Fun'+'ction']
  /ignore\s+previous\s+instructions/i,
  /disregard\s+(the\s+)?(above|previous)/i,
  /system\s+prompt\s+override/i,
];

// ──────────────────────────────────────────────────────────────────────────
// Plan-aware model selection
// ──────────────────────────────────────────────────────────────────────────

export type Plan = 'hobby' | 'pro' | 'team' | 'enterprise';
export type Purpose = 'preview' | 'build';
export type Provider = 'anthropic' | 'glm';
export type DisplayBadge = 'open-tier' | 'claude-haiku' | 'claude-sonnet' | 'claude-opus';

export interface ModelChoice {
  provider: Provider;
  model: string;
  maxTokens: number;
  timeoutMs: number;
  /** User-facing model name shown in the wizard + previews. */
  displayName: string;
  displayBadge: DisplayBadge;
}

/**
 * Preview runs synchronously inside an HTTP request behind Cloudflare's
 * ~100s edge cap. Each tier's (model + max_tokens + timeout) is bounded to
 * fit. Hobby uses GLM as the cost lever; paid tiers escalate to Claude — the
 * visible quality/speed jump *is* the upgrade pitch.
 *
 * Measured token rates: glm-4-plus ~58 tok/s (3500 tok ≈ 60s) ·
 * Claude Haiku 4.5 ~200 tok/s (8192 tok ≈ 41s) · Claude Sonnet 4.6 ~80 tok/s.
 */
const PREVIEW_MODELS: Record<Plan, ModelChoice> = {
  hobby: {
    provider: 'glm',
    model: 'glm-4-plus',
    maxTokens: 3500,
    timeoutMs: 65_000,
    displayName: 'Open-tier AI',
    displayBadge: 'open-tier',
  },
  pro: {
    provider: 'anthropic',
    model: 'claude-haiku-4-5-20251001',
    maxTokens: 8192,
    timeoutMs: 60_000,
    displayName: 'Claude Haiku 4.5',
    displayBadge: 'claude-haiku',
  },
  team: {
    provider: 'anthropic',
    model: 'claude-sonnet-4-6',
    maxTokens: 8192,
    timeoutMs: 60_000,
    displayName: 'Claude Sonnet 4.6',
    displayBadge: 'claude-sonnet',
  },
  enterprise: {
    provider: 'anthropic',
    model: 'claude-sonnet-4-6',
    maxTokens: 8192,
    timeoutMs: 60_000,
    displayName: 'Claude Sonnet 4.6',
    displayBadge: 'claude-sonnet',
  },
};

/**
 * Build worker runs async via BullMQ — no proxy timeout. With the 24h preview
 * cache TTL cache-misses are rare, so GLM as the default keeps that rare path
 * cheap; Enterprise gets Opus as a premium-quality promise.
 */
const BUILD_MODELS: Record<Plan, ModelChoice> = {
  hobby: {
    provider: 'glm',
    model: 'glm-4.5',
    maxTokens: 8192,
    timeoutMs: 180_000,
    displayName: 'Open-tier AI',
    displayBadge: 'open-tier',
  },
  pro: {
    provider: 'glm',
    model: 'glm-4.5',
    maxTokens: 8192,
    timeoutMs: 180_000,
    displayName: 'Open-tier AI',
    displayBadge: 'open-tier',
  },
  team: {
    provider: 'glm',
    model: 'glm-4.5',
    maxTokens: 8192,
    timeoutMs: 180_000,
    displayName: 'Open-tier AI',
    displayBadge: 'open-tier',
  },
  enterprise: {
    provider: 'anthropic',
    model: 'claude-opus-4-7',
    maxTokens: 8192,
    timeoutMs: 600_000,
    displayName: 'Claude Opus 4.7',
    displayBadge: 'claude-opus',
  },
};

export function pickPreviewModel(plan: Plan): ModelChoice {
  return PREVIEW_MODELS[plan];
}

export function pickBuildModel(plan: Plan): ModelChoice {
  return BUILD_MODELS[plan];
}

// ──────────────────────────────────────────────────────────────────────────
// Generation API
// ──────────────────────────────────────────────────────────────────────────

export interface GenerationResult {
  spec: GeneratorSpecT;
  source: 'claude' | 'glm' | 'mock';
}

export interface GenerateOptions {
  /** 'anthropic' (default) or 'glm'. */
  provider?: Provider;
  /** Anthropic API key — required if provider === 'anthropic'. */
  apiKey?: string;
  /** Zhipu (GLM) API key — required if provider === 'glm'. */
  glmApiKey?: string;
  model?: string;
  maxTokens?: number;
  /** Per-attempt request timeout in ms. */
  timeoutMs?: number;
  /** SDK retry count. Anthropic only. */
  maxRetries?: number;
}

export async function generateSpec(
  prompt: string,
  opts: GenerateOptions = {},
): Promise<GenerationResult> {
  const provider = opts.provider ?? 'anthropic';

  if (provider === 'glm') {
    if (!opts.glmApiKey) return { spec: mockSpec(prompt), source: 'mock' };
    return generateWithGlm(prompt, {
      apiKey: opts.glmApiKey,
      model: opts.model ?? 'glm-4-plus',
      maxTokens: opts.maxTokens ?? 4096,
      timeoutMs: opts.timeoutMs,
    });
  }

  if (!opts.apiKey) {
    return { spec: mockSpec(prompt), source: 'mock' };
  }
  return generateWithAnthropic(prompt, {
    apiKey: opts.apiKey,
    model: opts.model ?? 'claude-opus-4-7',
    maxTokens: opts.maxTokens ?? 8192,
    timeoutMs: opts.timeoutMs,
    maxRetries: opts.maxRetries,
  });
}

async function generateWithAnthropic(
  prompt: string,
  opts: {
    apiKey: string;
    model: string;
    maxTokens: number;
    timeoutMs?: number;
    maxRetries?: number;
  },
): Promise<GenerationResult> {
  const client = new Anthropic({ apiKey: opts.apiKey });
  const requestOptions: { timeout?: number; maxRetries?: number } = {};
  if (opts.timeoutMs !== undefined) requestOptions.timeout = opts.timeoutMs;
  if (opts.maxRetries !== undefined) requestOptions.maxRetries = opts.maxRetries;

  const response = await client.messages
    .create(
      {
        model: opts.model,
        max_tokens: opts.maxTokens,
        system: SYSTEM_PROMPT,
        messages: [{ role: 'user', content: prompt }],
      },
      requestOptions,
    )
    .catch((err: unknown) => {
      if (err instanceof Anthropic.APIConnectionTimeoutError) {
        throw new SpecTimeoutError('spec generation exceeded the time budget');
      }
      throw err;
    });

  const text = response.content
    .filter((b): b is { type: 'text'; text: string } => b.type === 'text')
    .map((b) => b.text)
    .join('');
  const json = extractJson(text);
  const parsed = GeneratorSpec.safeParse(json);
  if (!parsed.success) throw new SpecValidationError(parsed.error.message);
  scanForInjection(parsed.data);
  return { spec: parsed.data, source: 'claude' };
}

const GLM_ENDPOINT = 'https://open.bigmodel.cn/api/paas/v4/chat/completions';

async function generateWithGlm(
  prompt: string,
  opts: { apiKey: string; model: string; maxTokens: number; timeoutMs?: number },
): Promise<GenerationResult> {
  const controller = new AbortController();
  const timer = opts.timeoutMs ? setTimeout(() => controller.abort(), opts.timeoutMs) : null;
  let res: Response;
  try {
    res = await fetch(GLM_ENDPOINT, {
      method: 'POST',
      headers: {
        Authorization: `Bearer ${opts.apiKey}`,
        'Content-Type': 'application/json',
      },
      body: JSON.stringify({
        model: opts.model,
        max_tokens: opts.maxTokens,
        messages: [
          { role: 'system', content: SYSTEM_PROMPT },
          { role: 'user', content: prompt },
        ],
      }),
      signal: controller.signal,
    });
  } catch (err) {
    if ((err as { name?: string }).name === 'AbortError') {
      throw new SpecTimeoutError('glm spec generation exceeded the time budget');
    }
    throw err;
  } finally {
    if (timer) clearTimeout(timer);
  }
  if (!res.ok) {
    const body = await res.text().catch(() => '');
    throw new Error(`glm_api_${res.status}: ${body.slice(0, 200)}`);
  }
  const data = (await res.json()) as {
    choices?: Array<{ message?: { content?: string }; finish_reason?: string }>;
  };
  const content = data.choices?.[0]?.message?.content;
  if (!content) throw new SpecValidationError('glm_empty_response');
  const json = extractJson(content);
  const parsed = GeneratorSpec.safeParse(json);
  if (!parsed.success) throw new SpecValidationError(parsed.error.message);
  scanForInjection(parsed.data);
  return { spec: parsed.data, source: 'glm' };
}

export class SpecValidationError extends Error {
  override readonly name = 'SpecValidationError';
}

export class BannedPatternError extends Error {
  override readonly name = 'BannedPatternError';
}

export class SpecTimeoutError extends Error {
  override readonly name = 'SpecTimeoutError';
}

function extractJson(text: string): unknown {
  const trimmed = text.trim();
  const fenced = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/);
  const body = fenced ? fenced[1] : trimmed;
  if (!body) throw new SpecValidationError('empty_generation_output');
  try {
    return JSON.parse(body);
  } catch (e) {
    throw new SpecValidationError(`generation_not_json: ${(e as Error).message}`);
  }
}

/**
 * Public so other layers (the spec-edit merge in apps/api) can re-scan a
 * user-edited spec without duplicating the pattern list — single source of
 * truth for what counts as obviously-dangerous LLM output.
 */
export function scanForInjection(spec: GeneratorSpecT): void {
  for (const tool of spec.tools) {
    // Collect every string the LLM could have planted a payload in. Downstream
    // AI clients (Claude Desktop, Cursor) read tool.name + every inputSchema
    // description verbatim, so an injection there can pivot the user's AI
    // session — not only the runtime code.
    const surfaces: string[] = [tool.name, tool.description, tool.implementation];
    for (const param of Object.values(tool.inputSchema)) {
      if (param && typeof param === 'object' && 'description' in param) {
        const d = (param as { description?: unknown }).description;
        if (typeof d === 'string') surfaces.push(d);
      }
    }
    for (const text of surfaces) {
      for (const pattern of BANNED_PATTERNS) {
        if (pattern.test(text)) {
          throw new BannedPatternError(`banned_pattern_detected: ${pattern.source}`);
        }
      }
    }
  }
}

export function mockSpec(prompt: string): GeneratorSpecT {
  return {
    name: 'Echo MCP',
    description: `Mock server (no LLM key). Prompt was: ${prompt.slice(0, 200)}`,
    tools: [
      {
        name: 'echo',
        description: 'Echoes the input string back to the caller.',
        inputSchema: {
          message: { type: 'string', description: 'Message to echo back', required: true },
        },
        implementation: `const msg = String(args.message ?? '');\nreturn { content: [{ type: 'text', text: \`echo: \${msg}\` }] };`,
      },
      {
        name: 'now',
        description: 'Returns the current server UTC timestamp.',
        inputSchema: {},
        implementation: `return { content: [{ type: 'text', text: new Date().toISOString() }] };`,
      },
    ],
    resources: [],
    prompts: [],
    requiredSecrets: [],
    scopes: ['mcp:read'],
    dependencies: {},
  };
}