From bc174c1302affc9a2d0768dd9d0425bca5edabda Mon Sep 17 00:00:00 2001
From: Marco Sadjadi <marco@buildmymcpserver.local>
Date: Sat, 23 May 2026 23:50:00 +0200
Subject: [PATCH] feat: tiered LLM (GLM free / Claude paid) + rate limits +
 quota enforcement
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The free tier was hemorrhaging Anthropic cost with no abuse cap (no rate
limit on /preview, Opus default in the build worker, 5-min cache TTL that
made cache-miss the common case). This switches free users to GLM, paid
users to Claude tiers, and tightens every leak found in the audit.

Backend:
- @bmm/llm: GLM provider via Zhipu's OpenAI-compatible endpoint, pickPreviewModel
  + pickBuildModel helpers, plan-aware ModelChoice
- preview-cache TTL 5min -> 24h (kills the cache-miss path)
- /v1/servers/preview: picks model from caller's plan, returns model name to UI
- /v1/servers POST: enforces SERVER_LIMITS per plan (402), rate-limits builds
- daily rate-limit on preview (5/40/150/1000) and build (3/20/100/500)
- /v1/auth/me returns plan so the wizard can show the right model name
- generator worker: GLM default, Anthropic Sonnet fallback if GLM errors

Frontend:
- Wizard fetches plan, shows "<model> is drafting the tool spec" pre-emptively,
  upgrade hint for hobby users, friendly errors for 402 / 429
- Pricing page: AI-model line per tier (Open-tier / Haiku / Sonnet / Opus),
  Team €149 -> €199, Enterprise €499 -> €999, daily-preview limit per tier
- Privacy + Security: explicit subprocessor disclosure for Anthropic (US) /
  Zhipu (CN) and which tier uses which

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 apps/api/src/config.ts                        |   2 +
 apps/api/src/lib/plan.ts                      |  23 ++
 apps/api/src/lib/preview-cache.ts             |   7 +-
 apps/api/src/lib/rate-limit.ts                |  51 ++++
 apps/api/src/routes/auth.ts                   |   6 +-
 apps/api/src/routes/servers.ts                |  76 +++++-
 apps/generator/src/config.ts                  |   3 +-
 apps/generator/src/lib/claude.ts              |  32 ++-
 apps/generator/src/worker.ts                  |  65 ++++--
 apps/web/app/(dashboard)/servers/new/page.tsx |  61 ++++-
 apps/web/app/(marketing)/pricing/page.tsx     |  22 +-
 apps/web/app/(marketing)/privacy/page.tsx     |  20 +-
 apps/web/app/(marketing)/security/page.tsx    |   6 +-
 packages/llm/src/index.ts                     | 221 +++++++++++++++++-
 14 files changed, 537 insertions(+), 58 deletions(-)
 create mode 100644 apps/api/src/lib/plan.ts
 create mode 100644 apps/api/src/lib/rate-limit.ts
diff --git a/apps/api/src/config.ts b/apps/api/src/config.ts
index b5e2a4f..03cec2b 100644
--- a/apps/api/src/config.ts
+++ b/apps/api/src/config.ts
@@ -8,6 +8,7 @@ const Env = z.object({
   NEXT_PUBLIC_APP_URL: z.string().default('http://localhost:3001'),
   OAUTH_KEY_DIR: z.string().default('./keys'),
   ANTHROPIC_API_KEY: z.string().optional(),
+  GLM_API_KEY: z.string().optional(),
   SECRETS_ENCRYPTION_KEY: z
     .string()
     .min(64, '32 bytes hex required')
@@ -33,6 +34,7 @@ export const config = Env.parse({
   NEXT_PUBLIC_APP_URL: process.env.NEXT_PUBLIC_APP_URL,
   OAUTH_KEY_DIR: process.env.OAUTH_KEY_DIR,
   ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY,
+  GLM_API_KEY: process.env.GLM_API_KEY,
   SECRETS_ENCRYPTION_KEY: process.env.SECRETS_ENCRYPTION_KEY,
   CONTROL_PLANE_PUBLIC_URL: process.env.CONTROL_PLANE_PUBLIC_URL,
   ADMIN_EMAIL: process.env.ADMIN_EMAIL,
diff --git a/apps/api/src/lib/plan.ts b/apps/api/src/lib/plan.ts
new file mode 100644
index 0000000..02c3c7a
--- /dev/null
+++ b/apps/api/src/lib/plan.ts
@@ -0,0 +1,23 @@
+import { createDb, eq, organizations } from '@bmm/db';
+import type { Plan } from '@bmm/llm';
+
+const db = createDb();
+
+/** Look up an org's current plan. Defaults to 'hobby' if the org row is gone
+ *  for any reason — fail-closed to the least expensive tier. */
+export async function getOrgPlan(orgId: string): Promise<Plan> {
+  const [row] = await db
+    .select({ plan: organizations.plan })
+    .from(organizations)
+    .where(eq(organizations.id, orgId))
+    .limit(1);
+  return (row?.plan ?? 'hobby') as Plan;
+}
+
+/** Max MCP servers per org by plan. Enforced at POST /v1/servers. */
+export const SERVER_LIMITS: Record<Plan, number> = {
+  hobby: 1,
+  pro: 5,
+  team: 25,
+  enterprise: Number.MAX_SAFE_INTEGER,
+};
diff --git a/apps/api/src/lib/preview-cache.ts b/apps/api/src/lib/preview-cache.ts
index 1a68bcb..269cd9d 100644
--- a/apps/api/src/lib/preview-cache.ts
+++ b/apps/api/src/lib/preview-cache.ts
@@ -1,8 +1,11 @@
 import crypto from 'node:crypto';
-import { getRedis } from './redis.js';
 import type { GeneratorSpec } from '@bmm/types';
+import { getRedis } from './redis.js';
 
-const TTL_SECONDS = 5 * 60;
+// 24h: previews are LLM-priced; a long TTL eliminates the cache-miss path on
+// the build worker (each miss = another LLM call). Specs are tiny JSON (~5KB),
+// Redis-memory impact is negligible.
+const TTL_SECONDS = 24 * 60 * 60;
 
 function key(previewId: string): string {
   return `preview:${previewId}`;
diff --git a/apps/api/src/lib/rate-limit.ts b/apps/api/src/lib/rate-limit.ts
new file mode 100644
index 0000000..0fcf3df
--- /dev/null
+++ b/apps/api/src/lib/rate-limit.ts
@@ -0,0 +1,51 @@
+import type { Plan } from '@bmm/llm';
+import { getRedis } from './redis.js';
+
+const DAY_SEC = 24 * 60 * 60;
+
+function todayKey(): string {
+  return new Date().toISOString().slice(0, 10);
+}
+
+export interface RateLimitResult {
+  ok: boolean;
+  remaining: number;
+  resetIn: number;
+}
+
+/**
+ * Daily counter via Redis INCR. Atomic — no race window between read & write.
+ * First INCR (count === 1) sets the TTL so the key auto-rolls at midnight UTC.
+ */
+export async function checkDailyLimit(
+  scope: string,
+  userId: string,
+  max: number,
+): Promise<RateLimitResult> {
+  const key = `ratelimit:${scope}:${userId}:${todayKey()}`;
+  const redis = getRedis();
+  const count = await redis.incr(key);
+  if (count === 1) await redis.expire(key, DAY_SEC);
+  const ttl = await redis.ttl(key);
+  return {
+    ok: count <= max,
+    remaining: Math.max(0, max - count),
+    resetIn: ttl > 0 ? ttl : DAY_SEC,
+  };
+}
+
+// Per-tier daily limits on the two LLM-priced actions.
+// Preview = ~€0.002-0.015/call · Build = ~€0.005-0.22/call.
+export const PREVIEW_DAILY_LIMIT: Record<Plan, number> = {
+  hobby: 5,
+  pro: 40,
+  team: 150,
+  enterprise: 1000,
+};
+
+export const BUILD_DAILY_LIMIT: Record<Plan, number> = {
+  hobby: 3,
+  pro: 20,
+  team: 100,
+  enterprise: 500,
+};
diff --git a/apps/api/src/routes/auth.ts b/apps/api/src/routes/auth.ts
index d3f2a8e..7b202ed 100644
--- a/apps/api/src/routes/auth.ts
+++ b/apps/api/src/routes/auth.ts
@@ -13,6 +13,7 @@ import type { FastifyInstance } from 'fastify';
 import { z } from 'zod';
 import { config } from '../config.js';
 import { audit } from '../lib/audit.js';
+import { getOrgPlan } from '../lib/plan.js';
 import { sendSms, smsConfigured } from '../lib/sms.js';
 
 const SESSION_COOKIE = 'bmm_session';
@@ -128,7 +129,10 @@ export async function authRoutes(app: FastifyInstance): Promise<void> {
     const token = req.cookies[SESSION_COOKIE];
     const session = await getSession(token);
     if (!session) return reply.code(401).send({ error: 'unauthorized' });
-    return reply.send({ user: session });
+    // Plan is on the org, not the session — look it up fresh so a Stripe
+    // upgrade is reflected without forcing a re-login.
+    const plan = await getOrgPlan(session.orgId);
+    return reply.send({ user: { ...session, plan } });
   });
 
   app.post('/v1/auth/admin/login', async (req, reply) => {
diff --git a/apps/api/src/routes/servers.ts b/apps/api/src/routes/servers.ts
index 355e6ca..c54b780 100644
--- a/apps/api/src/routes/servers.ts
+++ b/apps/api/src/routes/servers.ts
@@ -11,7 +11,13 @@ import {
   sql,
   templates,
 } from '@bmm/db';
-import { BannedPatternError, SpecTimeoutError, SpecValidationError, generateSpec } from '@bmm/llm';
+import {
+  BannedPatternError,
+  SpecTimeoutError,
+  SpecValidationError,
+  generateSpec,
+  pickPreviewModel,
+} from '@bmm/llm';
 import {
   BuildEvent,
   CreateServerInput,
@@ -26,8 +32,10 @@ import { config } from '../config.js';
 import { audit } from '../lib/audit.js';
 import { encryptSecret } from '../lib/crypto.js';
 import { stopContainer } from '../lib/docker.js';
+import { SERVER_LIMITS, getOrgPlan } from '../lib/plan.js';
 import { cacheSpec, loadSpec, overwriteSpec } from '../lib/preview-cache.js';
 import { getBuildQueue } from '../lib/queue.js';
+import { BUILD_DAILY_LIMIT, PREVIEW_DAILY_LIMIT, checkDailyLimit } from '../lib/rate-limit.js';
 import { buildChannel, getSubscriber } from '../lib/redis.js';
 import { requireAuth } from '../plugins/session.js';
 import { getForkRefTemplate } from './templates.js';
@@ -46,26 +54,47 @@ export async function serverRoutes(app: FastifyInstance): Promise<void> {
   });
 
   app.post('/v1/servers/preview', { preHandler: requireAuth }, async (req, reply) => {
+    const user = req.user!;
     const parsed = PreviewInput.safeParse(req.body);
     if (!parsed.success) {
       return reply.code(400).send({ error: 'invalid_input', issues: parsed.error.flatten() });
     }
+
+    const plan = await getOrgPlan(user.orgId);
+
+    // Daily preview rate-limit per user. Free is tight (5/day) because every
+    // preview is a paid LLM call; paid tiers have headroom for real iteration.
+    const rl = await checkDailyLimit('preview', user.userId, PREVIEW_DAILY_LIMIT[plan]);
+    if (!rl.ok) {
+      return reply.code(429).send({
+        error: 'rate_limited',
+        detail: `Daily preview limit reached for plan "${plan}" (${PREVIEW_DAILY_LIMIT[plan]}/day). Resets in ${Math.ceil(rl.resetIn / 3600)}h.`,
+        plan,
+        limit: PREVIEW_DAILY_LIMIT[plan],
+        resetIn: rl.resetIn,
+      });
+    }
+
+    const choice = pickPreviewModel(plan);
+
     try {
       const { spec, source } = await generateSpec(parsed.data.prompt, {
+        provider: choice.provider,
         apiKey: config.ANTHROPIC_API_KEY,
-        // Preview generates the spec synchronously inside an HTTP request that
-        // sits behind Cloudflare's edge timeout. Haiku 4.5 (~200 tok/s — a full
-        // 8k-token spec in ~40s) is the only model fast enough; Sonnet and Opus
-        // overran the proxy cap, which reached the browser as a CORS error. The
-        // hard 60s timeout guarantees a clean 504 before the proxy gives up.
-        model: 'claude-haiku-4-5-20251001',
-        timeoutMs: 60_000,
+        glmApiKey: config.GLM_API_KEY,
+        model: choice.model,
+        maxTokens: choice.maxTokens,
+        timeoutMs: choice.timeoutMs,
         maxRetries: 0,
       });
       const previewId = await cacheSpec(spec);
       return reply.send({
         previewId,
         source,
+        plan,
+        modelDisplayName: choice.displayName,
+        modelBadge: choice.displayBadge,
+        upgradeHint: plan === 'hobby',
         spec: {
           name: spec.name,
           description: spec.description,
@@ -112,6 +141,37 @@ export async function serverRoutes(app: FastifyInstance): Promise<void> {
       templateId,
     } = parsed.data;
 
+    // ---- Plan enforcement (must happen before any DB write) ----
+    const plan = await getOrgPlan(user.orgId);
+
+    // Daily build rate-limit.
+    const rl = await checkDailyLimit('build', user.userId, BUILD_DAILY_LIMIT[plan]);
+    if (!rl.ok) {
+      return reply.code(429).send({
+        error: 'rate_limited',
+        detail: `Daily build limit reached for plan "${plan}" (${BUILD_DAILY_LIMIT[plan]}/day). Resets in ${Math.ceil(rl.resetIn / 3600)}h.`,
+        plan,
+        limit: BUILD_DAILY_LIMIT[plan],
+        resetIn: rl.resetIn,
+      });
+    }
+
+    // Server-count quota. Counted via SQL (not cached) so race risk is tiny.
+    const [serverCountRow] = await db
+      .select({ count: sql<number>`count(*)::int` })
+      .from(mcpServers)
+      .where(eq(mcpServers.orgId, user.orgId));
+    const existingCount = serverCountRow?.count ?? 0;
+    if (existingCount >= SERVER_LIMITS[plan]) {
+      return reply.code(402).send({
+        error: 'plan_limit_reached',
+        detail: `Plan "${plan}" allows ${SERVER_LIMITS[plan]} server(s); you have ${existingCount}. Upgrade to add more.`,
+        plan,
+        limit: SERVER_LIMITS[plan],
+        current: existingCount,
+      });
+    }
+
     // ---- Template-fork validation ----
     // templateId is user-controlled. To prevent fork_count manipulation + garbage
     // template_id rows, the user MUST have hit POST /v1/templates/:slug/fork,
diff --git a/apps/generator/src/config.ts b/apps/generator/src/config.ts
index c8d25ec..9fafb0d 100644
--- a/apps/generator/src/config.ts
+++ b/apps/generator/src/config.ts
@@ -4,13 +4,14 @@ const Env = z.object({
   DATABASE_URL: z.string(),
   REDIS_URL: z.string().default('redis://localhost:6379'),
   ANTHROPIC_API_KEY: z.string().optional(),
+  GLM_API_KEY: z.string().optional(),
   RUNNER_HOST: z.string().default('localhost'),
   RUNNER_PORT_RANGE_START: z.coerce.number().default(4100),
   RUNNER_PORT_RANGE_END: z.coerce.number().default(4999),
   CONTROL_PLANE_URL: z.string().default('http://host.docker.internal:4000'),
   CONTROL_PLANE_PUBLIC_URL: z.string().default('http://localhost:4000'),
   OAUTH_ISSUER: z.string().optional(),
-  MODEL_GENERATE: z.string().default('claude-opus-4-7'),
+  MODEL_GENERATE: z.string().default('glm-4.5'),
   MODEL_FIX: z.string().default('claude-haiku-4-5-20251001'),
 });
 
diff --git a/apps/generator/src/lib/claude.ts b/apps/generator/src/lib/claude.ts
index 5299421..2928465 100644
--- a/apps/generator/src/lib/claude.ts
+++ b/apps/generator/src/lib/claude.ts
@@ -1,12 +1,40 @@
-import { generateSpec as sharedGenerate, type GenerationResult } from '@bmm/llm';
+import { type GenerationResult, generateSpec as sharedGenerate } from '@bmm/llm';
 import { config } from '../config.js';
 
 export type { GenerationResult };
 
+/**
+ * Build-worker spec generation (cache-miss path). Runs async in a BullMQ
+ * worker — no proxy timeout. Defaults to GLM to keep this rare path cheap;
+ * falls back to Anthropic Sonnet on GLM failure so a temporary outage at one
+ * provider doesn't break builds.
+ */
 export async function generateSpec(prompt: string): Promise<GenerationResult> {
+  if (config.GLM_API_KEY) {
+    try {
+      return await sharedGenerate(prompt, {
+        provider: 'glm',
+        glmApiKey: config.GLM_API_KEY,
+        model: config.MODEL_GENERATE,
+        maxTokens: 8192,
+        timeoutMs: 180_000,
+      });
+    } catch (err) {
+      console.warn(
+        '[generator] GLM failed, falling back to Anthropic Sonnet:',
+        (err as Error).message,
+      );
+    }
+  }
+  if (!config.ANTHROPIC_API_KEY) {
+    // No keys at all → @bmm/llm returns mockSpec, which keeps builds working
+    // in dev without any provider configured.
+    return sharedGenerate(prompt, { provider: 'anthropic' });
+  }
   return sharedGenerate(prompt, {
+    provider: 'anthropic',
     apiKey: config.ANTHROPIC_API_KEY,
-    model: config.MODEL_GENERATE,
+    model: 'claude-sonnet-4-6',
     maxTokens: 8192,
   });
 }
diff --git a/apps/generator/src/worker.ts b/apps/generator/src/worker.ts
index f09904f..fd30f6f 100644
--- a/apps/generator/src/worker.ts
+++ b/apps/generator/src/worker.ts
@@ -1,13 +1,13 @@
+import { builds, createDb, eq, mcpServers } from '@bmm/db';
+import { GeneratorSpec } from '@bmm/types';
 import { Worker } from 'bullmq';
 import { Redis } from 'ioredis';
-import { GeneratorSpec } from '@bmm/types';
-import { builds, createDb, eq, mcpServers } from '@bmm/db';
 import { config } from './config.js';
-import { generateSpec } from './lib/claude.js';
-import { renderServerCode } from './lib/render.js';
 import { dockerBuild, prepareBuildContext, staticCheck } from './lib/build.js';
+import { generateSpec } from './lib/claude.js';
 import { allocatePort, deployContainer, dockerAvailable, stopContainer } from './lib/deploy.js';
 import { emitDone, emitError, emitLog, emitStatus } from './lib/emit.js';
+import { renderServerCode } from './lib/render.js';
 
 const db = createDb();
 const connection = new Redis(config.REDIS_URL, { maxRetriesPerRequest: null });
@@ -57,12 +57,18 @@ export const worker = new Worker<JobData>(
     const oldContainerId = priorState?.containerId ?? null;
 
     try {
-      await db.update(builds).set({ status: 'generating', startedAt: new Date() }).where(eq(builds.id, buildId));
-      await db.update(mcpServers).set({ status: 'generating', updatedAt: new Date() }).where(eq(mcpServers.id, serverId));
+      await db
+        .update(builds)
+        .set({ status: 'generating', startedAt: new Date() })
+        .where(eq(builds.id, buildId));
+      await db
+        .update(mcpServers)
+        .set({ status: 'generating', updatedAt: new Date() })
+        .where(eq(mcpServers.id, serverId));
       await emitStatus(buildId, 'generating');
 
       let spec: GeneratorSpec | null = null;
-      let source: 'claude' | 'mock' | 'cached' = 'mock';
+      let source: 'claude' | 'glm' | 'mock' | 'cached' = 'mock';
 
       if (previewId) {
         spec = await loadCachedSpec(previewId);
@@ -87,7 +93,10 @@ export const worker = new Worker<JobData>(
       let generatedCode: string;
       const prebuilt = previewId ? await loadPrebuiltCode(previewId) : null;
       if (prebuilt) {
-        await log('info', `Using pre-rendered template code (${prebuilt.length} chars) — skipping render`);
+        await log(
+          'info',
+          `Using pre-rendered template code (${prebuilt.length} chars) — skipping render`,
+        );
         generatedCode = prebuilt;
       } else {
         generatedCode = renderServerCode(spec);
@@ -98,11 +107,20 @@ export const worker = new Worker<JobData>(
         .where(eq(builds.id, buildId));
 
       await db.update(builds).set({ status: 'building' }).where(eq(builds.id, buildId));
-      await db.update(mcpServers).set({ status: 'building', toolsSchema: spec.tools, updatedAt: new Date() }).where(eq(mcpServers.id, serverId));
+      await db
+        .update(mcpServers)
+        .set({ status: 'building', toolsSchema: spec.tools, updatedAt: new Date() })
+        .where(eq(mcpServers.id, serverId));
       await emitStatus(buildId, 'building');
       await log('info', 'Preparing build context...');
 
-      const { contextDir, imageTag } = await prepareBuildContext(serverId, version, slug, generatedCode, spec);
+      const { contextDir, imageTag } = await prepareBuildContext(
+        serverId,
+        version,
+        slug,
+        generatedCode,
+        spec,
+      );
       await log('info', `Build context at ${contextDir}`);
 
       await log('info', 'Running static checks...');
@@ -112,8 +130,14 @@ export const worker = new Worker<JobData>(
       const hasDocker = await dockerAvailable();
       if (!hasDocker) {
         await log('warn', 'Docker not available — skipping build/deploy. Server marked draft.');
-        await db.update(builds).set({ status: 'failed', errorMessage: 'docker_unavailable', finishedAt: new Date() }).where(eq(builds.id, buildId));
-        await db.update(mcpServers).set({ status: 'failed', updatedAt: new Date() }).where(eq(mcpServers.id, serverId));
+        await db
+          .update(builds)
+          .set({ status: 'failed', errorMessage: 'docker_unavailable', finishedAt: new Date() })
+          .where(eq(builds.id, buildId));
+        await db
+          .update(mcpServers)
+          .set({ status: 'failed', updatedAt: new Date() })
+          .where(eq(mcpServers.id, serverId));
         await emitDone(buildId, 'failed', serverId, null);
         return;
       }
@@ -125,7 +149,10 @@ export const worker = new Worker<JobData>(
       await log('info', 'Image built.');
 
       await db.update(builds).set({ status: 'deploying' }).where(eq(builds.id, buildId));
-      await db.update(mcpServers).set({ status: 'deploying', updatedAt: new Date() }).where(eq(mcpServers.id, serverId));
+      await db
+        .update(mcpServers)
+        .set({ status: 'deploying', updatedAt: new Date() })
+        .where(eq(mcpServers.id, serverId));
       await emitStatus(buildId, 'deploying');
 
       const port = await allocatePort();
@@ -140,7 +167,10 @@ export const worker = new Worker<JobData>(
       };
 
       const handle = await deployContainer({ serverId, slug, hostPort: port, imageTag, envVars });
-      await log('info', `Container ${handle.containerId.slice(0, 12)} running at ${handle.publicUrl}`);
+      await log(
+        'info',
+        `Container ${handle.containerId.slice(0, 12)} running at ${handle.publicUrl}`,
+      );
 
       await db
         .update(builds)
@@ -148,7 +178,12 @@ export const worker = new Worker<JobData>(
         .where(eq(builds.id, buildId));
       await db
         .update(mcpServers)
-        .set({ status: 'live', currentVersion: version, publicUrl: handle.publicUrl, updatedAt: new Date() })
+        .set({
+          status: 'live',
+          currentVersion: version,
+          publicUrl: handle.publicUrl,
+          updatedAt: new Date(),
+        })
         .where(eq(mcpServers.id, serverId));
 
       // Rolling deploy: the new container is live — now retire the previous one.
diff --git a/apps/web/app/(dashboard)/servers/new/page.tsx b/apps/web/app/(dashboard)/servers/new/page.tsx
index bac0745..786dbec 100644
--- a/apps/web/app/(dashboard)/servers/new/page.tsx
+++ b/apps/web/app/(dashboard)/servers/new/page.tsx
@@ -7,6 +7,7 @@ import { StreamingLogs } from '@/components/streaming-logs';
 import { Button } from '@/components/ui/button';
 import { apiFetch } from '@/lib/api';
 import { Loader2, RotateCcw, X } from 'lucide-react';
+import Link from 'next/link';
 import { useRouter, useSearchParams } from 'next/navigation';
 import { Suspense, useEffect, useState } from 'react';
 
@@ -41,9 +42,15 @@ interface PreviewTool {
   inputSchema: Record<string, unknown>;
 }
 
+type Plan = 'hobby' | 'pro' | 'team' | 'enterprise';
+
 interface PreviewResponse {
   previewId: string;
-  source: 'claude' | 'mock';
+  source: 'claude' | 'glm' | 'mock';
+  plan?: Plan;
+  modelDisplayName?: string;
+  modelBadge?: 'open-tier' | 'claude-haiku' | 'claude-sonnet' | 'claude-opus';
+  upgradeHint?: boolean;
   spec: {
     name: string;
     description?: string;
@@ -53,6 +60,13 @@ interface PreviewResponse {
   };
 }
 
+const PREVIEW_MODEL_BY_PLAN: Record<Plan, { name: string; estimate: string }> = {
+  hobby: { name: 'Open-tier AI', estimate: '30–60 seconds' },
+  pro: { name: 'Claude Haiku 4.5', estimate: '10–20 seconds' },
+  team: { name: 'Claude Sonnet 4.6', estimate: '15–40 seconds' },
+  enterprise: { name: 'Claude Sonnet 4.6', estimate: '15–40 seconds' },
+};
+
 interface EditableTool {
   name: string;
   description: string;
@@ -86,6 +100,7 @@ function NewServerPageInner() {
   const router = useRouter();
   const [step, setStep] = useState<Step>('prompt');
   const [elapsedSec, setElapsedSec] = useState(0);
+  const [userPlan, setUserPlan] = useState<Plan | null>(null);
 
   const [prompt, setPrompt] = useState('');
   const [name, setName] = useState('');
@@ -207,6 +222,14 @@ function NewServerPageInner() {
     return () => clearInterval(id);
   }, [step]);
 
+  // Plan determines which model the preview will use — we display its name
+  // *before* the request so the user knows what they're waiting for.
+  useEffect(() => {
+    apiFetch<{ user: { plan?: Plan } }>('/v1/auth/me')
+      .then((r) => setUserPlan(r.user.plan ?? 'hobby'))
+      .catch(() => setUserPlan('hobby'));
+  }, []);
+
   async function analyze() {
     setError(null);
     if (prompt.trim().length < 10) {
@@ -358,13 +381,23 @@ function NewServerPageInner() {
       setServerId(res.server.id);
       setStep('building');
     } catch (e) {
-      const detail = (e as { detail?: { error?: string; detail?: unknown } }).detail;
+      const detail = (e as { detail?: { error?: string; detail?: string } }).detail;
       const code = detail?.error;
-      setError(
-        code === 'slug_taken'
-          ? `The slug "${slug}" is already used by one of your servers — change the Slug field above.`
-          : (code ?? (e as Error).message),
-      );
+      if (code === 'slug_taken') {
+        setError(
+          `The slug "${slug}" is already used by one of your servers — change the Slug field above.`,
+        );
+        return;
+      }
+      if (code === 'plan_limit_reached') {
+        setError(`${detail?.detail ?? 'Plan limit reached.'} See /pricing to upgrade.`);
+        return;
+      }
+      if (code === 'rate_limited') {
+        setError(detail?.detail ?? 'Daily build limit reached — try again tomorrow or upgrade.');
+        return;
+      }
+      setError(detail?.detail ?? code ?? (e as Error).message);
     }
   }
 
@@ -457,8 +490,18 @@ function NewServerPageInner() {
           <Loader2 className="mx-auto animate-spin text-[--color-accent]" size={22} />
           <p className="mt-4 text-[13px]">Analyzing your prompt…</p>
           <p className="mt-1 text-[12px] text-[--color-fg-subtle]">
-            Claude is drafting the tool spec. Usually 15–40 seconds.
+            {(userPlan ? PREVIEW_MODEL_BY_PLAN[userPlan] : PREVIEW_MODEL_BY_PLAN.hobby).name} is
+            drafting the tool spec. Usually{' '}
+            {(userPlan ? PREVIEW_MODEL_BY_PLAN[userPlan] : PREVIEW_MODEL_BY_PLAN.hobby).estimate}.
           </p>
+          {userPlan === 'hobby' && (
+            <p className="mt-2 text-[11px] text-[--color-fg-muted]">
+              <Link href="/pricing" className="text-[--color-accent] hover:underline">
+                Upgrade to Pro
+              </Link>{' '}
+              for ~3× faster analysis with Claude Haiku.
+            </p>
+          )}
           <p className="mono mt-3 text-[11px] tabular-nums text-[--color-fg-muted]">
             {elapsedSec}s elapsed
           </p>
@@ -524,7 +567,7 @@ function NewServerPageInner() {
                   </button>
                 )}
                 <span className="mono text-[10.5px] text-[--color-fg-subtle]">
-                  spec via {preview.source}
+                  drafted with {preview.modelDisplayName ?? preview.source}
                 </span>
               </div>
             </div>
diff --git a/apps/web/app/(marketing)/pricing/page.tsx b/apps/web/app/(marketing)/pricing/page.tsx
index 798bf24..52dde61 100644
--- a/apps/web/app/(marketing)/pricing/page.tsx
+++ b/apps/web/app/(marketing)/pricing/page.tsx
@@ -14,9 +14,12 @@ const TIERS = [
     price: '€0',
     tag: 'Forever free',
     description: 'For trying things out and shipping single-user tools.',
+    model: 'Open-tier AI',
+    modelDetail: 'Free-tier model · ~30-60s analyze',
     features: [
       '1 MCP server',
       '100,000 tool calls / month',
+      '5 prompt analyses / day',
       'BuildMyMCP subdomain',
       'Community support',
     ],
@@ -28,9 +31,12 @@ const TIERS = [
     price: '€49',
     tag: '/ month',
     description: 'For solo founders and small teams shipping production tools.',
+    model: 'Claude Haiku 4.5',
+    modelDetail: 'Anthropic · ~10-20s analyze',
     features: [
       '5 MCP servers',
       '1M tool calls / month',
+      '40 prompt analyses / day',
       'Custom domain',
       'Priority build queue',
       'Email support, 1 business-day SLA',
@@ -41,12 +47,15 @@ const TIERS = [
   },
   {
     name: 'Team',
-    price: '€149',
+    price: '€199',
     tag: '/ month',
     description: 'For teams with RBAC, audit, and 99.9% SLA needs.',
+    model: 'Claude Sonnet 4.6',
+    modelDetail: "Anthropic's flagship",
     features: [
       '25 MCP servers',
       '10M tool calls / month',
+      '150 prompt analyses / day',
       'RBAC + extended audit log',
       '99.9% uptime SLA',
       'Shared Slack channel support',
@@ -56,9 +65,11 @@ const TIERS = [
   },
   {
     name: 'Enterprise',
-    price: '€499+',
+    price: '€999+',
     tag: '/ month',
     description: 'For organizations bringing their own cloud, SSO and dedicated infra.',
+    model: 'Sonnet + Opus on build',
+    modelDetail: 'EU data-residency option',
     features: [
       'Unlimited servers',
       'BYOC (AWS, GCP, Azure, Hetzner)',
@@ -122,6 +133,13 @@ export default function Pricing() {
             <p className="mt-2 text-[12px] leading-relaxed text-[--color-fg-muted]">
               {t.description}
             </p>
+            <div className="mt-3 rounded-md border border-[--color-border] bg-[--color-bg-subtle] px-2.5 py-1.5">
+              <div className="text-[10.5px] uppercase tracking-wider text-[--color-fg-subtle]">
+                AI model
+              </div>
+              <div className="mt-0.5 text-[12.5px] font-medium text-[--color-fg]">{t.model}</div>
+              <div className="text-[10.5px] text-[--color-fg-subtle]">{t.modelDetail}</div>
+            </div>
             <ul className="mt-4 space-y-1.5 text-[12.5px] text-[--color-fg-muted]">
               {t.features.map((f) => (
                 <li key={f}>— {f}</li>
diff --git a/apps/web/app/(marketing)/privacy/page.tsx b/apps/web/app/(marketing)/privacy/page.tsx
index 16a0be3..5d088bd 100644
--- a/apps/web/app/(marketing)/privacy/page.tsx
+++ b/apps/web/app/(marketing)/privacy/page.tsx
@@ -36,11 +36,21 @@ const SECTIONS = [
   {
     h: 'Subprocessors',
     p: [
-      "Anthropic (generation) — only the prompt text you send. Anthropic's data-retention policy applies.",
-      'Hetzner (compute).',
-      'Backblaze (encrypted backups).',
-      'Stripe (billing).',
-      'Cloudflare (DNS + DDoS).',
+      "Anthropic, USA (Claude AI — used for prompt analysis and code generation on Pro / Team / Enterprise tiers). Only the prompt text and resulting spec are sent. Anthropic's data-retention policy applies.",
+      'Zhipu AI, China (GLM model — used for prompt analysis on the free Hobby tier only). Only the prompt text and resulting spec are sent. Upgrade to a paid tier to keep all AI processing within Anthropic (US).',
+      'Hetzner, Germany (compute).',
+      'Backblaze, EU (encrypted backups).',
+      'Stripe, Ireland (billing).',
+      'Cloudflare (DNS + DDoS protection).',
+    ],
+  },
+  {
+    h: 'AI processing per tier',
+    p: [
+      'Hobby (free): prompts are sent to Zhipu AI (GLM, China) for analysis. Choose a paid tier if your prompts contain data that must not leave the EU/US.',
+      'Pro: prompts are sent to Anthropic (Claude Haiku 4.5, USA).',
+      'Team: prompts are sent to Anthropic (Claude Sonnet 4.6, USA).',
+      'Enterprise: Anthropic (Claude Sonnet + Opus, USA) with EU-data-residency opt-in available on request.',
     ],
   },
   {
diff --git a/apps/web/app/(marketing)/security/page.tsx b/apps/web/app/(marketing)/security/page.tsx
index 51c76d1..cf70159 100644
--- a/apps/web/app/(marketing)/security/page.tsx
+++ b/apps/web/app/(marketing)/security/page.tsx
@@ -40,7 +40,11 @@ const PILLARS = [
   },
   {
     title: 'Rate limiting',
-    body: 'Default 100 requests/min/IP per tool, enforced at the Traefik layer before traffic ever reaches your container.',
+    body: 'Default 100 requests/min/IP per tool, enforced at the Traefik layer before traffic ever reaches your container. Daily preview + build caps per tier protect against runaway LLM spend.',
+  },
+  {
+    title: 'AI provider by tier — transparent',
+    body: "Hobby (free) tier uses Zhipu's GLM model (servers in China) for prompt analysis — chosen for cost so we can offer a real free tier. Pro, Team and Enterprise use Anthropic Claude (US). Enterprise can request EU-only data residency. The provider is shown live in the wizard so you always know where your prompt is going.",
   },
 ];
 
diff --git a/packages/llm/src/index.ts b/packages/llm/src/index.ts
index c02d593..29046d8 100644
--- a/packages/llm/src/index.ts
+++ b/packages/llm/src/index.ts
@@ -45,18 +45,138 @@ const BANNED_PATTERNS = [
   /disregard\s+(the\s+)?(above|previous)/i,
 ];
 
+// ──────────────────────────────────────────────────────────────────────────
+// Plan-aware model selection
+// ──────────────────────────────────────────────────────────────────────────
+
+export type Plan = 'hobby' | 'pro' | 'team' | 'enterprise';
+export type Purpose = 'preview' | 'build';
+export type Provider = 'anthropic' | 'glm';
+export type DisplayBadge = 'open-tier' | 'claude-haiku' | 'claude-sonnet' | 'claude-opus';
+
+export interface ModelChoice {
+  provider: Provider;
+  model: string;
+  maxTokens: number;
+  timeoutMs: number;
+  /** User-facing model name shown in the wizard + previews. */
+  displayName: string;
+  displayBadge: DisplayBadge;
+}
+
+/**
+ * Preview runs synchronously inside an HTTP request behind Cloudflare's
+ * ~100s edge cap. Each tier's (model + max_tokens + timeout) is bounded to
+ * fit. Hobby uses GLM as the cost lever; paid tiers escalate to Claude — the
+ * visible quality/speed jump *is* the upgrade pitch.
+ *
+ * Measured token rates: glm-4-plus ~58 tok/s (3500 tok ≈ 60s) ·
+ * Claude Haiku 4.5 ~200 tok/s (8192 tok ≈ 41s) · Claude Sonnet 4.6 ~80 tok/s.
+ */
+const PREVIEW_MODELS: Record<Plan, ModelChoice> = {
+  hobby: {
+    provider: 'glm',
+    model: 'glm-4-plus',
+    maxTokens: 3500,
+    timeoutMs: 65_000,
+    displayName: 'Open-tier AI',
+    displayBadge: 'open-tier',
+  },
+  pro: {
+    provider: 'anthropic',
+    model: 'claude-haiku-4-5-20251001',
+    maxTokens: 8192,
+    timeoutMs: 60_000,
+    displayName: 'Claude Haiku 4.5',
+    displayBadge: 'claude-haiku',
+  },
+  team: {
+    provider: 'anthropic',
+    model: 'claude-sonnet-4-6',
+    maxTokens: 8192,
+    timeoutMs: 60_000,
+    displayName: 'Claude Sonnet 4.6',
+    displayBadge: 'claude-sonnet',
+  },
+  enterprise: {
+    provider: 'anthropic',
+    model: 'claude-sonnet-4-6',
+    maxTokens: 8192,
+    timeoutMs: 60_000,
+    displayName: 'Claude Sonnet 4.6',
+    displayBadge: 'claude-sonnet',
+  },
+};
+
+/**
+ * Build worker runs async via BullMQ — no proxy timeout. With the 24h preview
+ * cache TTL cache-misses are rare, so GLM as the default keeps that rare path
+ * cheap; Enterprise gets Opus as a premium-quality promise.
+ */
+const BUILD_MODELS: Record<Plan, ModelChoice> = {
+  hobby: {
+    provider: 'glm',
+    model: 'glm-4.5',
+    maxTokens: 8192,
+    timeoutMs: 180_000,
+    displayName: 'Open-tier AI',
+    displayBadge: 'open-tier',
+  },
+  pro: {
+    provider: 'glm',
+    model: 'glm-4.5',
+    maxTokens: 8192,
+    timeoutMs: 180_000,
+    displayName: 'Open-tier AI',
+    displayBadge: 'open-tier',
+  },
+  team: {
+    provider: 'glm',
+    model: 'glm-4.5',
+    maxTokens: 8192,
+    timeoutMs: 180_000,
+    displayName: 'Open-tier AI',
+    displayBadge: 'open-tier',
+  },
+  enterprise: {
+    provider: 'anthropic',
+    model: 'claude-opus-4-7',
+    maxTokens: 8192,
+    timeoutMs: 600_000,
+    displayName: 'Claude Opus 4.7',
+    displayBadge: 'claude-opus',
+  },
+};
+
+export function pickPreviewModel(plan: Plan): ModelChoice {
+  return PREVIEW_MODELS[plan];
+}
+
+export function pickBuildModel(plan: Plan): ModelChoice {
+  return BUILD_MODELS[plan];
+}
+
+// ──────────────────────────────────────────────────────────────────────────
+// Generation API
+// ──────────────────────────────────────────────────────────────────────────
+
 export interface GenerationResult {
   spec: GeneratorSpecT;
-  source: 'claude' | 'mock';
+  source: 'claude' | 'glm' | 'mock';
 }
 
 export interface GenerateOptions {
+  /** 'anthropic' (default) or 'glm'. */
+  provider?: Provider;
+  /** Anthropic API key — required if provider === 'anthropic'. */
   apiKey?: string;
+  /** Zhipu (GLM) API key — required if provider === 'glm'. */
+  glmApiKey?: string;
   model?: string;
   maxTokens?: number;
-  /** Per-attempt request timeout in ms. Omit to use the SDK default. */
+  /** Per-attempt request timeout in ms. */
   timeoutMs?: number;
-  /** SDK retry count. Omit to use the SDK default. */
+  /** SDK retry count. Anthropic only. */
   maxRetries?: number;
 }
 
@@ -64,9 +184,40 @@ export async function generateSpec(
   prompt: string,
   opts: GenerateOptions = {},
 ): Promise<GenerationResult> {
+  const provider = opts.provider ?? 'anthropic';
+
+  if (provider === 'glm') {
+    if (!opts.glmApiKey) return { spec: mockSpec(prompt), source: 'mock' };
+    return generateWithGlm(prompt, {
+      apiKey: opts.glmApiKey,
+      model: opts.model ?? 'glm-4-plus',
+      maxTokens: opts.maxTokens ?? 4096,
+      timeoutMs: opts.timeoutMs,
+    });
+  }
+
   if (!opts.apiKey) {
     return { spec: mockSpec(prompt), source: 'mock' };
   }
+  return generateWithAnthropic(prompt, {
+    apiKey: opts.apiKey,
+    model: opts.model ?? 'claude-opus-4-7',
+    maxTokens: opts.maxTokens ?? 8192,
+    timeoutMs: opts.timeoutMs,
+    maxRetries: opts.maxRetries,
+  });
+}
+
+async function generateWithAnthropic(
+  prompt: string,
+  opts: {
+    apiKey: string;
+    model: string;
+    maxTokens: number;
+    timeoutMs?: number;
+    maxRetries?: number;
+  },
+): Promise<GenerationResult> {
   const client = new Anthropic({ apiKey: opts.apiKey });
   const requestOptions: { timeout?: number; maxRetries?: number } = {};
   if (opts.timeoutMs !== undefined) requestOptions.timeout = opts.timeoutMs;
@@ -75,35 +226,81 @@ export async function generateSpec(
   const response = await client.messages
     .create(
       {
-        model: opts.model ?? 'claude-opus-4-7',
-        max_tokens: opts.maxTokens ?? 8192,
+        model: opts.model,
+        max_tokens: opts.maxTokens,
         system: SYSTEM_PROMPT,
         messages: [{ role: 'user', content: prompt }],
       },
       requestOptions,
     )
     .catch((err: unknown) => {
-      // A per-attempt timeout surfaces as APIConnectionTimeoutError once the
-      // SDK exhausts retries. Map it to a typed error so the API layer returns
-      // a clean 504 instead of letting the edge proxy time out headerless.
       if (err instanceof Anthropic.APIConnectionTimeoutError) {
         throw new SpecTimeoutError('spec generation exceeded the time budget');
       }
       throw err;
     });
+
   const text = response.content
     .filter((b): b is { type: 'text'; text: string } => b.type === 'text')
     .map((b) => b.text)
     .join('');
   const json = extractJson(text);
   const parsed = GeneratorSpec.safeParse(json);
-  if (!parsed.success) {
-    throw new SpecValidationError(parsed.error.message);
-  }
+  if (!parsed.success) throw new SpecValidationError(parsed.error.message);
   scanForInjection(parsed.data);
   return { spec: parsed.data, source: 'claude' };
 }
 
+const GLM_ENDPOINT = 'https://open.bigmodel.cn/api/paas/v4/chat/completions';
+
+async function generateWithGlm(
+  prompt: string,
+  opts: { apiKey: string; model: string; maxTokens: number; timeoutMs?: number },
+): Promise<GenerationResult> {
+  const controller = new AbortController();
+  const timer = opts.timeoutMs ? setTimeout(() => controller.abort(), opts.timeoutMs) : null;
+  let res: Response;
+  try {
+    res = await fetch(GLM_ENDPOINT, {
+      method: 'POST',
+      headers: {
+        Authorization: `Bearer ${opts.apiKey}`,
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify({
+        model: opts.model,
+        max_tokens: opts.maxTokens,
+        messages: [
+          { role: 'system', content: SYSTEM_PROMPT },
+          { role: 'user', content: prompt },
+        ],
+      }),
+      signal: controller.signal,
+    });
+  } catch (err) {
+    if ((err as { name?: string }).name === 'AbortError') {
+      throw new SpecTimeoutError('glm spec generation exceeded the time budget');
+    }
+    throw err;
+  } finally {
+    if (timer) clearTimeout(timer);
+  }
+  if (!res.ok) {
+    const body = await res.text().catch(() => '');
+    throw new Error(`glm_api_${res.status}: ${body.slice(0, 200)}`);
+  }
+  const data = (await res.json()) as {
+    choices?: Array<{ message?: { content?: string }; finish_reason?: string }>;
+  };
+  const content = data.choices?.[0]?.message?.content;
+  if (!content) throw new SpecValidationError('glm_empty_response');
+  const json = extractJson(content);
+  const parsed = GeneratorSpec.safeParse(json);
+  if (!parsed.success) throw new SpecValidationError(parsed.error.message);
+  scanForInjection(parsed.data);
+  return { spec: parsed.data, source: 'glm' };
+}
+
 export class SpecValidationError extends Error {
   override readonly name = 'SpecValidationError';
 }
@@ -141,7 +338,7 @@ function scanForInjection(spec: GeneratorSpecT): void {
 export function mockSpec(prompt: string): GeneratorSpecT {
   return {
     name: 'Echo MCP',
-    description: `Mock server (no ANTHROPIC_API_KEY). Prompt was: ${prompt.slice(0, 200)}`,
+    description: `Mock server (no LLM key). Prompt was: ${prompt.slice(0, 200)}`,
     tools: [
       {
         name: 'echo',