From bc174c1302affc9a2d0768dd9d0425bca5edabda Mon Sep 17 00:00:00 2001 From: Marco Sadjadi Date: Sat, 23 May 2026 23:50:00 +0200 Subject: [PATCH] feat: tiered LLM (GLM free / Claude paid) + rate limits + quota enforcement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The free tier was hemorrhaging Anthropic cost with no abuse cap (no rate limit on /preview, Opus default in the build worker, 5-min cache TTL that made cache-miss the common case). This switches free users to GLM, paid users to Claude tiers, and tightens every leak found in the audit. Backend: - @bmm/llm: GLM provider via Zhipu's OpenAI-compatible endpoint, pickPreviewModel + pickBuildModel helpers, plan-aware ModelChoice - preview-cache TTL 5min -> 24h (kills the cache-miss path) - /v1/servers/preview: picks model from caller's plan, returns model name to UI - /v1/servers POST: enforces SERVER_LIMITS per plan (402), rate-limits builds - daily rate-limit on preview (5/40/150/1000) and build (3/20/100/500) - /v1/auth/me returns plan so the wizard can show the right model name - generator worker: GLM default, Anthropic Sonnet fallback if GLM errors Frontend: - Wizard fetches plan, shows " is drafting the tool spec" pre-emptively, upgrade hint for hobby users, friendly errors for 402 / 429 - Pricing page: AI-model line per tier (Open-tier / Haiku / Sonnet / Opus), Team €149 -> €199, Enterprise €499 -> €999, daily-preview limit per tier - Privacy + Security: explicit subprocessor disclosure for Anthropic (US) / Zhipu (CN) and which tier uses which Co-Authored-By: Claude Opus 4.7 (1M context) --- apps/api/src/config.ts | 2 + apps/api/src/lib/plan.ts | 23 ++ apps/api/src/lib/preview-cache.ts | 7 +- apps/api/src/lib/rate-limit.ts | 51 ++++ apps/api/src/routes/auth.ts | 6 +- apps/api/src/routes/servers.ts | 76 +++++- apps/generator/src/config.ts | 3 +- apps/generator/src/lib/claude.ts | 32 ++- apps/generator/src/worker.ts | 65 ++++-- apps/web/app/(dashboard)/servers/new/page.tsx | 61 ++++- apps/web/app/(marketing)/pricing/page.tsx | 22 +- apps/web/app/(marketing)/privacy/page.tsx | 20 +- apps/web/app/(marketing)/security/page.tsx | 6 +- packages/llm/src/index.ts | 221 +++++++++++++++++- 14 files changed, 537 insertions(+), 58 deletions(-) create mode 100644 apps/api/src/lib/plan.ts create mode 100644 apps/api/src/lib/rate-limit.ts diff --git a/apps/api/src/config.ts b/apps/api/src/config.ts index b5e2a4f..03cec2b 100644 --- a/apps/api/src/config.ts +++ b/apps/api/src/config.ts @@ -8,6 +8,7 @@ const Env = z.object({ NEXT_PUBLIC_APP_URL: z.string().default('http://localhost:3001'), OAUTH_KEY_DIR: z.string().default('./keys'), ANTHROPIC_API_KEY: z.string().optional(), + GLM_API_KEY: z.string().optional(), SECRETS_ENCRYPTION_KEY: z .string() .min(64, '32 bytes hex required') @@ -33,6 +34,7 @@ export const config = Env.parse({ NEXT_PUBLIC_APP_URL: process.env.NEXT_PUBLIC_APP_URL, OAUTH_KEY_DIR: process.env.OAUTH_KEY_DIR, ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY, + GLM_API_KEY: process.env.GLM_API_KEY, SECRETS_ENCRYPTION_KEY: process.env.SECRETS_ENCRYPTION_KEY, CONTROL_PLANE_PUBLIC_URL: process.env.CONTROL_PLANE_PUBLIC_URL, ADMIN_EMAIL: process.env.ADMIN_EMAIL, diff --git a/apps/api/src/lib/plan.ts b/apps/api/src/lib/plan.ts new file mode 100644 index 0000000..02c3c7a --- /dev/null +++ b/apps/api/src/lib/plan.ts @@ -0,0 +1,23 @@ +import { createDb, eq, organizations } from '@bmm/db'; +import type { Plan } from '@bmm/llm'; + +const db = createDb(); + +/** Look up an org's current plan. Defaults to 'hobby' if the org row is gone + * for any reason — fail-closed to the least expensive tier. */ +export async function getOrgPlan(orgId: string): Promise { + const [row] = await db + .select({ plan: organizations.plan }) + .from(organizations) + .where(eq(organizations.id, orgId)) + .limit(1); + return (row?.plan ?? 'hobby') as Plan; +} + +/** Max MCP servers per org by plan. Enforced at POST /v1/servers. */ +export const SERVER_LIMITS: Record = { + hobby: 1, + pro: 5, + team: 25, + enterprise: Number.MAX_SAFE_INTEGER, +}; diff --git a/apps/api/src/lib/preview-cache.ts b/apps/api/src/lib/preview-cache.ts index 1a68bcb..269cd9d 100644 --- a/apps/api/src/lib/preview-cache.ts +++ b/apps/api/src/lib/preview-cache.ts @@ -1,8 +1,11 @@ import crypto from 'node:crypto'; -import { getRedis } from './redis.js'; import type { GeneratorSpec } from '@bmm/types'; +import { getRedis } from './redis.js'; -const TTL_SECONDS = 5 * 60; +// 24h: previews are LLM-priced; a long TTL eliminates the cache-miss path on +// the build worker (each miss = another LLM call). Specs are tiny JSON (~5KB), +// Redis-memory impact is negligible. +const TTL_SECONDS = 24 * 60 * 60; function key(previewId: string): string { return `preview:${previewId}`; diff --git a/apps/api/src/lib/rate-limit.ts b/apps/api/src/lib/rate-limit.ts new file mode 100644 index 0000000..0fcf3df --- /dev/null +++ b/apps/api/src/lib/rate-limit.ts @@ -0,0 +1,51 @@ +import type { Plan } from '@bmm/llm'; +import { getRedis } from './redis.js'; + +const DAY_SEC = 24 * 60 * 60; + +function todayKey(): string { + return new Date().toISOString().slice(0, 10); +} + +export interface RateLimitResult { + ok: boolean; + remaining: number; + resetIn: number; +} + +/** + * Daily counter via Redis INCR. Atomic — no race window between read & write. + * First INCR (count === 1) sets the TTL so the key auto-rolls at midnight UTC. + */ +export async function checkDailyLimit( + scope: string, + userId: string, + max: number, +): Promise { + const key = `ratelimit:${scope}:${userId}:${todayKey()}`; + const redis = getRedis(); + const count = await redis.incr(key); + if (count === 1) await redis.expire(key, DAY_SEC); + const ttl = await redis.ttl(key); + return { + ok: count <= max, + remaining: Math.max(0, max - count), + resetIn: ttl > 0 ? ttl : DAY_SEC, + }; +} + +// Per-tier daily limits on the two LLM-priced actions. +// Preview = ~€0.002-0.015/call · Build = ~€0.005-0.22/call. +export const PREVIEW_DAILY_LIMIT: Record = { + hobby: 5, + pro: 40, + team: 150, + enterprise: 1000, +}; + +export const BUILD_DAILY_LIMIT: Record = { + hobby: 3, + pro: 20, + team: 100, + enterprise: 500, +}; diff --git a/apps/api/src/routes/auth.ts b/apps/api/src/routes/auth.ts index d3f2a8e..7b202ed 100644 --- a/apps/api/src/routes/auth.ts +++ b/apps/api/src/routes/auth.ts @@ -13,6 +13,7 @@ import type { FastifyInstance } from 'fastify'; import { z } from 'zod'; import { config } from '../config.js'; import { audit } from '../lib/audit.js'; +import { getOrgPlan } from '../lib/plan.js'; import { sendSms, smsConfigured } from '../lib/sms.js'; const SESSION_COOKIE = 'bmm_session'; @@ -128,7 +129,10 @@ export async function authRoutes(app: FastifyInstance): Promise { const token = req.cookies[SESSION_COOKIE]; const session = await getSession(token); if (!session) return reply.code(401).send({ error: 'unauthorized' }); - return reply.send({ user: session }); + // Plan is on the org, not the session — look it up fresh so a Stripe + // upgrade is reflected without forcing a re-login. + const plan = await getOrgPlan(session.orgId); + return reply.send({ user: { ...session, plan } }); }); app.post('/v1/auth/admin/login', async (req, reply) => { diff --git a/apps/api/src/routes/servers.ts b/apps/api/src/routes/servers.ts index 355e6ca..c54b780 100644 --- a/apps/api/src/routes/servers.ts +++ b/apps/api/src/routes/servers.ts @@ -11,7 +11,13 @@ import { sql, templates, } from '@bmm/db'; -import { BannedPatternError, SpecTimeoutError, SpecValidationError, generateSpec } from '@bmm/llm'; +import { + BannedPatternError, + SpecTimeoutError, + SpecValidationError, + generateSpec, + pickPreviewModel, +} from '@bmm/llm'; import { BuildEvent, CreateServerInput, @@ -26,8 +32,10 @@ import { config } from '../config.js'; import { audit } from '../lib/audit.js'; import { encryptSecret } from '../lib/crypto.js'; import { stopContainer } from '../lib/docker.js'; +import { SERVER_LIMITS, getOrgPlan } from '../lib/plan.js'; import { cacheSpec, loadSpec, overwriteSpec } from '../lib/preview-cache.js'; import { getBuildQueue } from '../lib/queue.js'; +import { BUILD_DAILY_LIMIT, PREVIEW_DAILY_LIMIT, checkDailyLimit } from '../lib/rate-limit.js'; import { buildChannel, getSubscriber } from '../lib/redis.js'; import { requireAuth } from '../plugins/session.js'; import { getForkRefTemplate } from './templates.js'; @@ -46,26 +54,47 @@ export async function serverRoutes(app: FastifyInstance): Promise { }); app.post('/v1/servers/preview', { preHandler: requireAuth }, async (req, reply) => { + const user = req.user!; const parsed = PreviewInput.safeParse(req.body); if (!parsed.success) { return reply.code(400).send({ error: 'invalid_input', issues: parsed.error.flatten() }); } + + const plan = await getOrgPlan(user.orgId); + + // Daily preview rate-limit per user. Free is tight (5/day) because every + // preview is a paid LLM call; paid tiers have headroom for real iteration. + const rl = await checkDailyLimit('preview', user.userId, PREVIEW_DAILY_LIMIT[plan]); + if (!rl.ok) { + return reply.code(429).send({ + error: 'rate_limited', + detail: `Daily preview limit reached for plan "${plan}" (${PREVIEW_DAILY_LIMIT[plan]}/day). Resets in ${Math.ceil(rl.resetIn / 3600)}h.`, + plan, + limit: PREVIEW_DAILY_LIMIT[plan], + resetIn: rl.resetIn, + }); + } + + const choice = pickPreviewModel(plan); + try { const { spec, source } = await generateSpec(parsed.data.prompt, { + provider: choice.provider, apiKey: config.ANTHROPIC_API_KEY, - // Preview generates the spec synchronously inside an HTTP request that - // sits behind Cloudflare's edge timeout. Haiku 4.5 (~200 tok/s — a full - // 8k-token spec in ~40s) is the only model fast enough; Sonnet and Opus - // overran the proxy cap, which reached the browser as a CORS error. The - // hard 60s timeout guarantees a clean 504 before the proxy gives up. - model: 'claude-haiku-4-5-20251001', - timeoutMs: 60_000, + glmApiKey: config.GLM_API_KEY, + model: choice.model, + maxTokens: choice.maxTokens, + timeoutMs: choice.timeoutMs, maxRetries: 0, }); const previewId = await cacheSpec(spec); return reply.send({ previewId, source, + plan, + modelDisplayName: choice.displayName, + modelBadge: choice.displayBadge, + upgradeHint: plan === 'hobby', spec: { name: spec.name, description: spec.description, @@ -112,6 +141,37 @@ export async function serverRoutes(app: FastifyInstance): Promise { templateId, } = parsed.data; + // ---- Plan enforcement (must happen before any DB write) ---- + const plan = await getOrgPlan(user.orgId); + + // Daily build rate-limit. + const rl = await checkDailyLimit('build', user.userId, BUILD_DAILY_LIMIT[plan]); + if (!rl.ok) { + return reply.code(429).send({ + error: 'rate_limited', + detail: `Daily build limit reached for plan "${plan}" (${BUILD_DAILY_LIMIT[plan]}/day). Resets in ${Math.ceil(rl.resetIn / 3600)}h.`, + plan, + limit: BUILD_DAILY_LIMIT[plan], + resetIn: rl.resetIn, + }); + } + + // Server-count quota. Counted via SQL (not cached) so race risk is tiny. + const [serverCountRow] = await db + .select({ count: sql`count(*)::int` }) + .from(mcpServers) + .where(eq(mcpServers.orgId, user.orgId)); + const existingCount = serverCountRow?.count ?? 0; + if (existingCount >= SERVER_LIMITS[plan]) { + return reply.code(402).send({ + error: 'plan_limit_reached', + detail: `Plan "${plan}" allows ${SERVER_LIMITS[plan]} server(s); you have ${existingCount}. Upgrade to add more.`, + plan, + limit: SERVER_LIMITS[plan], + current: existingCount, + }); + } + // ---- Template-fork validation ---- // templateId is user-controlled. To prevent fork_count manipulation + garbage // template_id rows, the user MUST have hit POST /v1/templates/:slug/fork, diff --git a/apps/generator/src/config.ts b/apps/generator/src/config.ts index c8d25ec..9fafb0d 100644 --- a/apps/generator/src/config.ts +++ b/apps/generator/src/config.ts @@ -4,13 +4,14 @@ const Env = z.object({ DATABASE_URL: z.string(), REDIS_URL: z.string().default('redis://localhost:6379'), ANTHROPIC_API_KEY: z.string().optional(), + GLM_API_KEY: z.string().optional(), RUNNER_HOST: z.string().default('localhost'), RUNNER_PORT_RANGE_START: z.coerce.number().default(4100), RUNNER_PORT_RANGE_END: z.coerce.number().default(4999), CONTROL_PLANE_URL: z.string().default('http://host.docker.internal:4000'), CONTROL_PLANE_PUBLIC_URL: z.string().default('http://localhost:4000'), OAUTH_ISSUER: z.string().optional(), - MODEL_GENERATE: z.string().default('claude-opus-4-7'), + MODEL_GENERATE: z.string().default('glm-4.5'), MODEL_FIX: z.string().default('claude-haiku-4-5-20251001'), }); diff --git a/apps/generator/src/lib/claude.ts b/apps/generator/src/lib/claude.ts index 5299421..2928465 100644 --- a/apps/generator/src/lib/claude.ts +++ b/apps/generator/src/lib/claude.ts @@ -1,12 +1,40 @@ -import { generateSpec as sharedGenerate, type GenerationResult } from '@bmm/llm'; +import { type GenerationResult, generateSpec as sharedGenerate } from '@bmm/llm'; import { config } from '../config.js'; export type { GenerationResult }; +/** + * Build-worker spec generation (cache-miss path). Runs async in a BullMQ + * worker — no proxy timeout. Defaults to GLM to keep this rare path cheap; + * falls back to Anthropic Sonnet on GLM failure so a temporary outage at one + * provider doesn't break builds. + */ export async function generateSpec(prompt: string): Promise { + if (config.GLM_API_KEY) { + try { + return await sharedGenerate(prompt, { + provider: 'glm', + glmApiKey: config.GLM_API_KEY, + model: config.MODEL_GENERATE, + maxTokens: 8192, + timeoutMs: 180_000, + }); + } catch (err) { + console.warn( + '[generator] GLM failed, falling back to Anthropic Sonnet:', + (err as Error).message, + ); + } + } + if (!config.ANTHROPIC_API_KEY) { + // No keys at all → @bmm/llm returns mockSpec, which keeps builds working + // in dev without any provider configured. + return sharedGenerate(prompt, { provider: 'anthropic' }); + } return sharedGenerate(prompt, { + provider: 'anthropic', apiKey: config.ANTHROPIC_API_KEY, - model: config.MODEL_GENERATE, + model: 'claude-sonnet-4-6', maxTokens: 8192, }); } diff --git a/apps/generator/src/worker.ts b/apps/generator/src/worker.ts index f09904f..fd30f6f 100644 --- a/apps/generator/src/worker.ts +++ b/apps/generator/src/worker.ts @@ -1,13 +1,13 @@ +import { builds, createDb, eq, mcpServers } from '@bmm/db'; +import { GeneratorSpec } from '@bmm/types'; import { Worker } from 'bullmq'; import { Redis } from 'ioredis'; -import { GeneratorSpec } from '@bmm/types'; -import { builds, createDb, eq, mcpServers } from '@bmm/db'; import { config } from './config.js'; -import { generateSpec } from './lib/claude.js'; -import { renderServerCode } from './lib/render.js'; import { dockerBuild, prepareBuildContext, staticCheck } from './lib/build.js'; +import { generateSpec } from './lib/claude.js'; import { allocatePort, deployContainer, dockerAvailable, stopContainer } from './lib/deploy.js'; import { emitDone, emitError, emitLog, emitStatus } from './lib/emit.js'; +import { renderServerCode } from './lib/render.js'; const db = createDb(); const connection = new Redis(config.REDIS_URL, { maxRetriesPerRequest: null }); @@ -57,12 +57,18 @@ export const worker = new Worker( const oldContainerId = priorState?.containerId ?? null; try { - await db.update(builds).set({ status: 'generating', startedAt: new Date() }).where(eq(builds.id, buildId)); - await db.update(mcpServers).set({ status: 'generating', updatedAt: new Date() }).where(eq(mcpServers.id, serverId)); + await db + .update(builds) + .set({ status: 'generating', startedAt: new Date() }) + .where(eq(builds.id, buildId)); + await db + .update(mcpServers) + .set({ status: 'generating', updatedAt: new Date() }) + .where(eq(mcpServers.id, serverId)); await emitStatus(buildId, 'generating'); let spec: GeneratorSpec | null = null; - let source: 'claude' | 'mock' | 'cached' = 'mock'; + let source: 'claude' | 'glm' | 'mock' | 'cached' = 'mock'; if (previewId) { spec = await loadCachedSpec(previewId); @@ -87,7 +93,10 @@ export const worker = new Worker( let generatedCode: string; const prebuilt = previewId ? await loadPrebuiltCode(previewId) : null; if (prebuilt) { - await log('info', `Using pre-rendered template code (${prebuilt.length} chars) — skipping render`); + await log( + 'info', + `Using pre-rendered template code (${prebuilt.length} chars) — skipping render`, + ); generatedCode = prebuilt; } else { generatedCode = renderServerCode(spec); @@ -98,11 +107,20 @@ export const worker = new Worker( .where(eq(builds.id, buildId)); await db.update(builds).set({ status: 'building' }).where(eq(builds.id, buildId)); - await db.update(mcpServers).set({ status: 'building', toolsSchema: spec.tools, updatedAt: new Date() }).where(eq(mcpServers.id, serverId)); + await db + .update(mcpServers) + .set({ status: 'building', toolsSchema: spec.tools, updatedAt: new Date() }) + .where(eq(mcpServers.id, serverId)); await emitStatus(buildId, 'building'); await log('info', 'Preparing build context...'); - const { contextDir, imageTag } = await prepareBuildContext(serverId, version, slug, generatedCode, spec); + const { contextDir, imageTag } = await prepareBuildContext( + serverId, + version, + slug, + generatedCode, + spec, + ); await log('info', `Build context at ${contextDir}`); await log('info', 'Running static checks...'); @@ -112,8 +130,14 @@ export const worker = new Worker( const hasDocker = await dockerAvailable(); if (!hasDocker) { await log('warn', 'Docker not available — skipping build/deploy. Server marked draft.'); - await db.update(builds).set({ status: 'failed', errorMessage: 'docker_unavailable', finishedAt: new Date() }).where(eq(builds.id, buildId)); - await db.update(mcpServers).set({ status: 'failed', updatedAt: new Date() }).where(eq(mcpServers.id, serverId)); + await db + .update(builds) + .set({ status: 'failed', errorMessage: 'docker_unavailable', finishedAt: new Date() }) + .where(eq(builds.id, buildId)); + await db + .update(mcpServers) + .set({ status: 'failed', updatedAt: new Date() }) + .where(eq(mcpServers.id, serverId)); await emitDone(buildId, 'failed', serverId, null); return; } @@ -125,7 +149,10 @@ export const worker = new Worker( await log('info', 'Image built.'); await db.update(builds).set({ status: 'deploying' }).where(eq(builds.id, buildId)); - await db.update(mcpServers).set({ status: 'deploying', updatedAt: new Date() }).where(eq(mcpServers.id, serverId)); + await db + .update(mcpServers) + .set({ status: 'deploying', updatedAt: new Date() }) + .where(eq(mcpServers.id, serverId)); await emitStatus(buildId, 'deploying'); const port = await allocatePort(); @@ -140,7 +167,10 @@ export const worker = new Worker( }; const handle = await deployContainer({ serverId, slug, hostPort: port, imageTag, envVars }); - await log('info', `Container ${handle.containerId.slice(0, 12)} running at ${handle.publicUrl}`); + await log( + 'info', + `Container ${handle.containerId.slice(0, 12)} running at ${handle.publicUrl}`, + ); await db .update(builds) @@ -148,7 +178,12 @@ export const worker = new Worker( .where(eq(builds.id, buildId)); await db .update(mcpServers) - .set({ status: 'live', currentVersion: version, publicUrl: handle.publicUrl, updatedAt: new Date() }) + .set({ + status: 'live', + currentVersion: version, + publicUrl: handle.publicUrl, + updatedAt: new Date(), + }) .where(eq(mcpServers.id, serverId)); // Rolling deploy: the new container is live — now retire the previous one. diff --git a/apps/web/app/(dashboard)/servers/new/page.tsx b/apps/web/app/(dashboard)/servers/new/page.tsx index bac0745..786dbec 100644 --- a/apps/web/app/(dashboard)/servers/new/page.tsx +++ b/apps/web/app/(dashboard)/servers/new/page.tsx @@ -7,6 +7,7 @@ import { StreamingLogs } from '@/components/streaming-logs'; import { Button } from '@/components/ui/button'; import { apiFetch } from '@/lib/api'; import { Loader2, RotateCcw, X } from 'lucide-react'; +import Link from 'next/link'; import { useRouter, useSearchParams } from 'next/navigation'; import { Suspense, useEffect, useState } from 'react'; @@ -41,9 +42,15 @@ interface PreviewTool { inputSchema: Record; } +type Plan = 'hobby' | 'pro' | 'team' | 'enterprise'; + interface PreviewResponse { previewId: string; - source: 'claude' | 'mock'; + source: 'claude' | 'glm' | 'mock'; + plan?: Plan; + modelDisplayName?: string; + modelBadge?: 'open-tier' | 'claude-haiku' | 'claude-sonnet' | 'claude-opus'; + upgradeHint?: boolean; spec: { name: string; description?: string; @@ -53,6 +60,13 @@ interface PreviewResponse { }; } +const PREVIEW_MODEL_BY_PLAN: Record = { + hobby: { name: 'Open-tier AI', estimate: '30–60 seconds' }, + pro: { name: 'Claude Haiku 4.5', estimate: '10–20 seconds' }, + team: { name: 'Claude Sonnet 4.6', estimate: '15–40 seconds' }, + enterprise: { name: 'Claude Sonnet 4.6', estimate: '15–40 seconds' }, +}; + interface EditableTool { name: string; description: string; @@ -86,6 +100,7 @@ function NewServerPageInner() { const router = useRouter(); const [step, setStep] = useState('prompt'); const [elapsedSec, setElapsedSec] = useState(0); + const [userPlan, setUserPlan] = useState(null); const [prompt, setPrompt] = useState(''); const [name, setName] = useState(''); @@ -207,6 +222,14 @@ function NewServerPageInner() { return () => clearInterval(id); }, [step]); + // Plan determines which model the preview will use — we display its name + // *before* the request so the user knows what they're waiting for. + useEffect(() => { + apiFetch<{ user: { plan?: Plan } }>('/v1/auth/me') + .then((r) => setUserPlan(r.user.plan ?? 'hobby')) + .catch(() => setUserPlan('hobby')); + }, []); + async function analyze() { setError(null); if (prompt.trim().length < 10) { @@ -358,13 +381,23 @@ function NewServerPageInner() { setServerId(res.server.id); setStep('building'); } catch (e) { - const detail = (e as { detail?: { error?: string; detail?: unknown } }).detail; + const detail = (e as { detail?: { error?: string; detail?: string } }).detail; const code = detail?.error; - setError( - code === 'slug_taken' - ? `The slug "${slug}" is already used by one of your servers — change the Slug field above.` - : (code ?? (e as Error).message), - ); + if (code === 'slug_taken') { + setError( + `The slug "${slug}" is already used by one of your servers — change the Slug field above.`, + ); + return; + } + if (code === 'plan_limit_reached') { + setError(`${detail?.detail ?? 'Plan limit reached.'} See /pricing to upgrade.`); + return; + } + if (code === 'rate_limited') { + setError(detail?.detail ?? 'Daily build limit reached — try again tomorrow or upgrade.'); + return; + } + setError(detail?.detail ?? code ?? (e as Error).message); } } @@ -457,8 +490,18 @@ function NewServerPageInner() {

Analyzing your prompt…

- Claude is drafting the tool spec. Usually 15–40 seconds. + {(userPlan ? PREVIEW_MODEL_BY_PLAN[userPlan] : PREVIEW_MODEL_BY_PLAN.hobby).name} is + drafting the tool spec. Usually{' '} + {(userPlan ? PREVIEW_MODEL_BY_PLAN[userPlan] : PREVIEW_MODEL_BY_PLAN.hobby).estimate}.

+ {userPlan === 'hobby' && ( +

+ + Upgrade to Pro + {' '} + for ~3× faster analysis with Claude Haiku. +

+ )}

{elapsedSec}s elapsed

@@ -524,7 +567,7 @@ function NewServerPageInner() { )} - spec via {preview.source} + drafted with {preview.modelDisplayName ?? preview.source} diff --git a/apps/web/app/(marketing)/pricing/page.tsx b/apps/web/app/(marketing)/pricing/page.tsx index 798bf24..52dde61 100644 --- a/apps/web/app/(marketing)/pricing/page.tsx +++ b/apps/web/app/(marketing)/pricing/page.tsx @@ -14,9 +14,12 @@ const TIERS = [ price: '€0', tag: 'Forever free', description: 'For trying things out and shipping single-user tools.', + model: 'Open-tier AI', + modelDetail: 'Free-tier model · ~30-60s analyze', features: [ '1 MCP server', '100,000 tool calls / month', + '5 prompt analyses / day', 'BuildMyMCP subdomain', 'Community support', ], @@ -28,9 +31,12 @@ const TIERS = [ price: '€49', tag: '/ month', description: 'For solo founders and small teams shipping production tools.', + model: 'Claude Haiku 4.5', + modelDetail: 'Anthropic · ~10-20s analyze', features: [ '5 MCP servers', '1M tool calls / month', + '40 prompt analyses / day', 'Custom domain', 'Priority build queue', 'Email support, 1 business-day SLA', @@ -41,12 +47,15 @@ const TIERS = [ }, { name: 'Team', - price: '€149', + price: '€199', tag: '/ month', description: 'For teams with RBAC, audit, and 99.9% SLA needs.', + model: 'Claude Sonnet 4.6', + modelDetail: "Anthropic's flagship", features: [ '25 MCP servers', '10M tool calls / month', + '150 prompt analyses / day', 'RBAC + extended audit log', '99.9% uptime SLA', 'Shared Slack channel support', @@ -56,9 +65,11 @@ const TIERS = [ }, { name: 'Enterprise', - price: '€499+', + price: '€999+', tag: '/ month', description: 'For organizations bringing their own cloud, SSO and dedicated infra.', + model: 'Sonnet + Opus on build', + modelDetail: 'EU data-residency option', features: [ 'Unlimited servers', 'BYOC (AWS, GCP, Azure, Hetzner)', @@ -122,6 +133,13 @@ export default function Pricing() {

{t.description}

+
+
+ AI model +
+
{t.model}
+
{t.modelDetail}
+
    {t.features.map((f) => (
  • — {f}
  • diff --git a/apps/web/app/(marketing)/privacy/page.tsx b/apps/web/app/(marketing)/privacy/page.tsx index 16a0be3..5d088bd 100644 --- a/apps/web/app/(marketing)/privacy/page.tsx +++ b/apps/web/app/(marketing)/privacy/page.tsx @@ -36,11 +36,21 @@ const SECTIONS = [ { h: 'Subprocessors', p: [ - "Anthropic (generation) — only the prompt text you send. Anthropic's data-retention policy applies.", - 'Hetzner (compute).', - 'Backblaze (encrypted backups).', - 'Stripe (billing).', - 'Cloudflare (DNS + DDoS).', + "Anthropic, USA (Claude AI — used for prompt analysis and code generation on Pro / Team / Enterprise tiers). Only the prompt text and resulting spec are sent. Anthropic's data-retention policy applies.", + 'Zhipu AI, China (GLM model — used for prompt analysis on the free Hobby tier only). Only the prompt text and resulting spec are sent. Upgrade to a paid tier to keep all AI processing within Anthropic (US).', + 'Hetzner, Germany (compute).', + 'Backblaze, EU (encrypted backups).', + 'Stripe, Ireland (billing).', + 'Cloudflare (DNS + DDoS protection).', + ], + }, + { + h: 'AI processing per tier', + p: [ + 'Hobby (free): prompts are sent to Zhipu AI (GLM, China) for analysis. Choose a paid tier if your prompts contain data that must not leave the EU/US.', + 'Pro: prompts are sent to Anthropic (Claude Haiku 4.5, USA).', + 'Team: prompts are sent to Anthropic (Claude Sonnet 4.6, USA).', + 'Enterprise: Anthropic (Claude Sonnet + Opus, USA) with EU-data-residency opt-in available on request.', ], }, { diff --git a/apps/web/app/(marketing)/security/page.tsx b/apps/web/app/(marketing)/security/page.tsx index 51c76d1..cf70159 100644 --- a/apps/web/app/(marketing)/security/page.tsx +++ b/apps/web/app/(marketing)/security/page.tsx @@ -40,7 +40,11 @@ const PILLARS = [ }, { title: 'Rate limiting', - body: 'Default 100 requests/min/IP per tool, enforced at the Traefik layer before traffic ever reaches your container.', + body: 'Default 100 requests/min/IP per tool, enforced at the Traefik layer before traffic ever reaches your container. Daily preview + build caps per tier protect against runaway LLM spend.', + }, + { + title: 'AI provider by tier — transparent', + body: "Hobby (free) tier uses Zhipu's GLM model (servers in China) for prompt analysis — chosen for cost so we can offer a real free tier. Pro, Team and Enterprise use Anthropic Claude (US). Enterprise can request EU-only data residency. The provider is shown live in the wizard so you always know where your prompt is going.", }, ]; diff --git a/packages/llm/src/index.ts b/packages/llm/src/index.ts index c02d593..29046d8 100644 --- a/packages/llm/src/index.ts +++ b/packages/llm/src/index.ts @@ -45,18 +45,138 @@ const BANNED_PATTERNS = [ /disregard\s+(the\s+)?(above|previous)/i, ]; +// ────────────────────────────────────────────────────────────────────────── +// Plan-aware model selection +// ────────────────────────────────────────────────────────────────────────── + +export type Plan = 'hobby' | 'pro' | 'team' | 'enterprise'; +export type Purpose = 'preview' | 'build'; +export type Provider = 'anthropic' | 'glm'; +export type DisplayBadge = 'open-tier' | 'claude-haiku' | 'claude-sonnet' | 'claude-opus'; + +export interface ModelChoice { + provider: Provider; + model: string; + maxTokens: number; + timeoutMs: number; + /** User-facing model name shown in the wizard + previews. */ + displayName: string; + displayBadge: DisplayBadge; +} + +/** + * Preview runs synchronously inside an HTTP request behind Cloudflare's + * ~100s edge cap. Each tier's (model + max_tokens + timeout) is bounded to + * fit. Hobby uses GLM as the cost lever; paid tiers escalate to Claude — the + * visible quality/speed jump *is* the upgrade pitch. + * + * Measured token rates: glm-4-plus ~58 tok/s (3500 tok ≈ 60s) · + * Claude Haiku 4.5 ~200 tok/s (8192 tok ≈ 41s) · Claude Sonnet 4.6 ~80 tok/s. + */ +const PREVIEW_MODELS: Record = { + hobby: { + provider: 'glm', + model: 'glm-4-plus', + maxTokens: 3500, + timeoutMs: 65_000, + displayName: 'Open-tier AI', + displayBadge: 'open-tier', + }, + pro: { + provider: 'anthropic', + model: 'claude-haiku-4-5-20251001', + maxTokens: 8192, + timeoutMs: 60_000, + displayName: 'Claude Haiku 4.5', + displayBadge: 'claude-haiku', + }, + team: { + provider: 'anthropic', + model: 'claude-sonnet-4-6', + maxTokens: 8192, + timeoutMs: 60_000, + displayName: 'Claude Sonnet 4.6', + displayBadge: 'claude-sonnet', + }, + enterprise: { + provider: 'anthropic', + model: 'claude-sonnet-4-6', + maxTokens: 8192, + timeoutMs: 60_000, + displayName: 'Claude Sonnet 4.6', + displayBadge: 'claude-sonnet', + }, +}; + +/** + * Build worker runs async via BullMQ — no proxy timeout. With the 24h preview + * cache TTL cache-misses are rare, so GLM as the default keeps that rare path + * cheap; Enterprise gets Opus as a premium-quality promise. + */ +const BUILD_MODELS: Record = { + hobby: { + provider: 'glm', + model: 'glm-4.5', + maxTokens: 8192, + timeoutMs: 180_000, + displayName: 'Open-tier AI', + displayBadge: 'open-tier', + }, + pro: { + provider: 'glm', + model: 'glm-4.5', + maxTokens: 8192, + timeoutMs: 180_000, + displayName: 'Open-tier AI', + displayBadge: 'open-tier', + }, + team: { + provider: 'glm', + model: 'glm-4.5', + maxTokens: 8192, + timeoutMs: 180_000, + displayName: 'Open-tier AI', + displayBadge: 'open-tier', + }, + enterprise: { + provider: 'anthropic', + model: 'claude-opus-4-7', + maxTokens: 8192, + timeoutMs: 600_000, + displayName: 'Claude Opus 4.7', + displayBadge: 'claude-opus', + }, +}; + +export function pickPreviewModel(plan: Plan): ModelChoice { + return PREVIEW_MODELS[plan]; +} + +export function pickBuildModel(plan: Plan): ModelChoice { + return BUILD_MODELS[plan]; +} + +// ────────────────────────────────────────────────────────────────────────── +// Generation API +// ────────────────────────────────────────────────────────────────────────── + export interface GenerationResult { spec: GeneratorSpecT; - source: 'claude' | 'mock'; + source: 'claude' | 'glm' | 'mock'; } export interface GenerateOptions { + /** 'anthropic' (default) or 'glm'. */ + provider?: Provider; + /** Anthropic API key — required if provider === 'anthropic'. */ apiKey?: string; + /** Zhipu (GLM) API key — required if provider === 'glm'. */ + glmApiKey?: string; model?: string; maxTokens?: number; - /** Per-attempt request timeout in ms. Omit to use the SDK default. */ + /** Per-attempt request timeout in ms. */ timeoutMs?: number; - /** SDK retry count. Omit to use the SDK default. */ + /** SDK retry count. Anthropic only. */ maxRetries?: number; } @@ -64,9 +184,40 @@ export async function generateSpec( prompt: string, opts: GenerateOptions = {}, ): Promise { + const provider = opts.provider ?? 'anthropic'; + + if (provider === 'glm') { + if (!opts.glmApiKey) return { spec: mockSpec(prompt), source: 'mock' }; + return generateWithGlm(prompt, { + apiKey: opts.glmApiKey, + model: opts.model ?? 'glm-4-plus', + maxTokens: opts.maxTokens ?? 4096, + timeoutMs: opts.timeoutMs, + }); + } + if (!opts.apiKey) { return { spec: mockSpec(prompt), source: 'mock' }; } + return generateWithAnthropic(prompt, { + apiKey: opts.apiKey, + model: opts.model ?? 'claude-opus-4-7', + maxTokens: opts.maxTokens ?? 8192, + timeoutMs: opts.timeoutMs, + maxRetries: opts.maxRetries, + }); +} + +async function generateWithAnthropic( + prompt: string, + opts: { + apiKey: string; + model: string; + maxTokens: number; + timeoutMs?: number; + maxRetries?: number; + }, +): Promise { const client = new Anthropic({ apiKey: opts.apiKey }); const requestOptions: { timeout?: number; maxRetries?: number } = {}; if (opts.timeoutMs !== undefined) requestOptions.timeout = opts.timeoutMs; @@ -75,35 +226,81 @@ export async function generateSpec( const response = await client.messages .create( { - model: opts.model ?? 'claude-opus-4-7', - max_tokens: opts.maxTokens ?? 8192, + model: opts.model, + max_tokens: opts.maxTokens, system: SYSTEM_PROMPT, messages: [{ role: 'user', content: prompt }], }, requestOptions, ) .catch((err: unknown) => { - // A per-attempt timeout surfaces as APIConnectionTimeoutError once the - // SDK exhausts retries. Map it to a typed error so the API layer returns - // a clean 504 instead of letting the edge proxy time out headerless. if (err instanceof Anthropic.APIConnectionTimeoutError) { throw new SpecTimeoutError('spec generation exceeded the time budget'); } throw err; }); + const text = response.content .filter((b): b is { type: 'text'; text: string } => b.type === 'text') .map((b) => b.text) .join(''); const json = extractJson(text); const parsed = GeneratorSpec.safeParse(json); - if (!parsed.success) { - throw new SpecValidationError(parsed.error.message); - } + if (!parsed.success) throw new SpecValidationError(parsed.error.message); scanForInjection(parsed.data); return { spec: parsed.data, source: 'claude' }; } +const GLM_ENDPOINT = 'https://open.bigmodel.cn/api/paas/v4/chat/completions'; + +async function generateWithGlm( + prompt: string, + opts: { apiKey: string; model: string; maxTokens: number; timeoutMs?: number }, +): Promise { + const controller = new AbortController(); + const timer = opts.timeoutMs ? setTimeout(() => controller.abort(), opts.timeoutMs) : null; + let res: Response; + try { + res = await fetch(GLM_ENDPOINT, { + method: 'POST', + headers: { + Authorization: `Bearer ${opts.apiKey}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + model: opts.model, + max_tokens: opts.maxTokens, + messages: [ + { role: 'system', content: SYSTEM_PROMPT }, + { role: 'user', content: prompt }, + ], + }), + signal: controller.signal, + }); + } catch (err) { + if ((err as { name?: string }).name === 'AbortError') { + throw new SpecTimeoutError('glm spec generation exceeded the time budget'); + } + throw err; + } finally { + if (timer) clearTimeout(timer); + } + if (!res.ok) { + const body = await res.text().catch(() => ''); + throw new Error(`glm_api_${res.status}: ${body.slice(0, 200)}`); + } + const data = (await res.json()) as { + choices?: Array<{ message?: { content?: string }; finish_reason?: string }>; + }; + const content = data.choices?.[0]?.message?.content; + if (!content) throw new SpecValidationError('glm_empty_response'); + const json = extractJson(content); + const parsed = GeneratorSpec.safeParse(json); + if (!parsed.success) throw new SpecValidationError(parsed.error.message); + scanForInjection(parsed.data); + return { spec: parsed.data, source: 'glm' }; +} + export class SpecValidationError extends Error { override readonly name = 'SpecValidationError'; } @@ -141,7 +338,7 @@ function scanForInjection(spec: GeneratorSpecT): void { export function mockSpec(prompt: string): GeneratorSpecT { return { name: 'Echo MCP', - description: `Mock server (no ANTHROPIC_API_KEY). Prompt was: ${prompt.slice(0, 200)}`, + description: `Mock server (no LLM key). Prompt was: ${prompt.slice(0, 200)}`, tools: [ { name: 'echo',