feat: tiered LLM (GLM free / Claude paid) + rate limits + quota enforcement
All checks were successful
Deploy to Production / deploy (push) Successful in 53s

The free tier was hemorrhaging Anthropic cost with no abuse cap (no rate
limit on /preview, Opus default in the build worker, 5-min cache TTL that
made cache-miss the common case). This switches free users to GLM, paid
users to Claude tiers, and tightens every leak found in the audit.

Backend:
- @bmm/llm: GLM provider via Zhipu's OpenAI-compatible endpoint, pickPreviewModel
  + pickBuildModel helpers, plan-aware ModelChoice
- preview-cache TTL 5min -> 24h (kills the cache-miss path)
- /v1/servers/preview: picks model from caller's plan, returns model name to UI
- /v1/servers POST: enforces SERVER_LIMITS per plan (402), rate-limits builds
- daily rate-limit on preview (5/40/150/1000) and build (3/20/100/500)
- /v1/auth/me returns plan so the wizard can show the right model name
- generator worker: GLM default, Anthropic Sonnet fallback if GLM errors

Frontend:
- Wizard fetches plan, shows "<model> is drafting the tool spec" pre-emptively,
  upgrade hint for hobby users, friendly errors for 402 / 429
- Pricing page: AI-model line per tier (Open-tier / Haiku / Sonnet / Opus),
  Team €149 -> €199, Enterprise €499 -> €999, daily-preview limit per tier
- Privacy + Security: explicit subprocessor disclosure for Anthropic (US) /
  Zhipu (CN) and which tier uses which

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Marco Sadjadi 2026-05-23 23:50:00 +02:00
parent 66128c73d8
commit bc174c1302
14 changed files with 537 additions and 58 deletions

View File

@ -8,6 +8,7 @@ const Env = z.object({
NEXT_PUBLIC_APP_URL: z.string().default('http://localhost:3001'),
OAUTH_KEY_DIR: z.string().default('./keys'),
ANTHROPIC_API_KEY: z.string().optional(),
GLM_API_KEY: z.string().optional(),
SECRETS_ENCRYPTION_KEY: z
.string()
.min(64, '32 bytes hex required')
@ -33,6 +34,7 @@ export const config = Env.parse({
NEXT_PUBLIC_APP_URL: process.env.NEXT_PUBLIC_APP_URL,
OAUTH_KEY_DIR: process.env.OAUTH_KEY_DIR,
ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY,
GLM_API_KEY: process.env.GLM_API_KEY,
SECRETS_ENCRYPTION_KEY: process.env.SECRETS_ENCRYPTION_KEY,
CONTROL_PLANE_PUBLIC_URL: process.env.CONTROL_PLANE_PUBLIC_URL,
ADMIN_EMAIL: process.env.ADMIN_EMAIL,

23
apps/api/src/lib/plan.ts Normal file
View File

@ -0,0 +1,23 @@
import { createDb, eq, organizations } from '@bmm/db';
import type { Plan } from '@bmm/llm';
const db = createDb();
/** Look up an org's current plan. Defaults to 'hobby' if the org row is gone
* for any reason fail-closed to the least expensive tier. */
export async function getOrgPlan(orgId: string): Promise<Plan> {
const [row] = await db
.select({ plan: organizations.plan })
.from(organizations)
.where(eq(organizations.id, orgId))
.limit(1);
return (row?.plan ?? 'hobby') as Plan;
}
/** Max MCP servers per org by plan. Enforced at POST /v1/servers. */
export const SERVER_LIMITS: Record<Plan, number> = {
hobby: 1,
pro: 5,
team: 25,
enterprise: Number.MAX_SAFE_INTEGER,
};

View File

@ -1,8 +1,11 @@
import crypto from 'node:crypto';
import { getRedis } from './redis.js';
import type { GeneratorSpec } from '@bmm/types';
import { getRedis } from './redis.js';
const TTL_SECONDS = 5 * 60;
// 24h: previews are LLM-priced; a long TTL eliminates the cache-miss path on
// the build worker (each miss = another LLM call). Specs are tiny JSON (~5KB),
// Redis-memory impact is negligible.
const TTL_SECONDS = 24 * 60 * 60;
function key(previewId: string): string {
return `preview:${previewId}`;

View File

@ -0,0 +1,51 @@
import type { Plan } from '@bmm/llm';
import { getRedis } from './redis.js';
const DAY_SEC = 24 * 60 * 60;
function todayKey(): string {
return new Date().toISOString().slice(0, 10);
}
export interface RateLimitResult {
ok: boolean;
remaining: number;
resetIn: number;
}
/**
* Daily counter via Redis INCR. Atomic no race window between read & write.
* First INCR (count === 1) sets the TTL so the key auto-rolls at midnight UTC.
*/
export async function checkDailyLimit(
scope: string,
userId: string,
max: number,
): Promise<RateLimitResult> {
const key = `ratelimit:${scope}:${userId}:${todayKey()}`;
const redis = getRedis();
const count = await redis.incr(key);
if (count === 1) await redis.expire(key, DAY_SEC);
const ttl = await redis.ttl(key);
return {
ok: count <= max,
remaining: Math.max(0, max - count),
resetIn: ttl > 0 ? ttl : DAY_SEC,
};
}
// Per-tier daily limits on the two LLM-priced actions.
// Preview = ~€0.002-0.015/call · Build = ~€0.005-0.22/call.
export const PREVIEW_DAILY_LIMIT: Record<Plan, number> = {
hobby: 5,
pro: 40,
team: 150,
enterprise: 1000,
};
export const BUILD_DAILY_LIMIT: Record<Plan, number> = {
hobby: 3,
pro: 20,
team: 100,
enterprise: 500,
};

View File

@ -13,6 +13,7 @@ import type { FastifyInstance } from 'fastify';
import { z } from 'zod';
import { config } from '../config.js';
import { audit } from '../lib/audit.js';
import { getOrgPlan } from '../lib/plan.js';
import { sendSms, smsConfigured } from '../lib/sms.js';
const SESSION_COOKIE = 'bmm_session';
@ -128,7 +129,10 @@ export async function authRoutes(app: FastifyInstance): Promise<void> {
const token = req.cookies[SESSION_COOKIE];
const session = await getSession(token);
if (!session) return reply.code(401).send({ error: 'unauthorized' });
return reply.send({ user: session });
// Plan is on the org, not the session — look it up fresh so a Stripe
// upgrade is reflected without forcing a re-login.
const plan = await getOrgPlan(session.orgId);
return reply.send({ user: { ...session, plan } });
});
app.post('/v1/auth/admin/login', async (req, reply) => {

View File

@ -11,7 +11,13 @@ import {
sql,
templates,
} from '@bmm/db';
import { BannedPatternError, SpecTimeoutError, SpecValidationError, generateSpec } from '@bmm/llm';
import {
BannedPatternError,
SpecTimeoutError,
SpecValidationError,
generateSpec,
pickPreviewModel,
} from '@bmm/llm';
import {
BuildEvent,
CreateServerInput,
@ -26,8 +32,10 @@ import { config } from '../config.js';
import { audit } from '../lib/audit.js';
import { encryptSecret } from '../lib/crypto.js';
import { stopContainer } from '../lib/docker.js';
import { SERVER_LIMITS, getOrgPlan } from '../lib/plan.js';
import { cacheSpec, loadSpec, overwriteSpec } from '../lib/preview-cache.js';
import { getBuildQueue } from '../lib/queue.js';
import { BUILD_DAILY_LIMIT, PREVIEW_DAILY_LIMIT, checkDailyLimit } from '../lib/rate-limit.js';
import { buildChannel, getSubscriber } from '../lib/redis.js';
import { requireAuth } from '../plugins/session.js';
import { getForkRefTemplate } from './templates.js';
@ -46,26 +54,47 @@ export async function serverRoutes(app: FastifyInstance): Promise<void> {
});
app.post('/v1/servers/preview', { preHandler: requireAuth }, async (req, reply) => {
const user = req.user!;
const parsed = PreviewInput.safeParse(req.body);
if (!parsed.success) {
return reply.code(400).send({ error: 'invalid_input', issues: parsed.error.flatten() });
}
const plan = await getOrgPlan(user.orgId);
// Daily preview rate-limit per user. Free is tight (5/day) because every
// preview is a paid LLM call; paid tiers have headroom for real iteration.
const rl = await checkDailyLimit('preview', user.userId, PREVIEW_DAILY_LIMIT[plan]);
if (!rl.ok) {
return reply.code(429).send({
error: 'rate_limited',
detail: `Daily preview limit reached for plan "${plan}" (${PREVIEW_DAILY_LIMIT[plan]}/day). Resets in ${Math.ceil(rl.resetIn / 3600)}h.`,
plan,
limit: PREVIEW_DAILY_LIMIT[plan],
resetIn: rl.resetIn,
});
}
const choice = pickPreviewModel(plan);
try {
const { spec, source } = await generateSpec(parsed.data.prompt, {
provider: choice.provider,
apiKey: config.ANTHROPIC_API_KEY,
// Preview generates the spec synchronously inside an HTTP request that
// sits behind Cloudflare's edge timeout. Haiku 4.5 (~200 tok/s — a full
// 8k-token spec in ~40s) is the only model fast enough; Sonnet and Opus
// overran the proxy cap, which reached the browser as a CORS error. The
// hard 60s timeout guarantees a clean 504 before the proxy gives up.
model: 'claude-haiku-4-5-20251001',
timeoutMs: 60_000,
glmApiKey: config.GLM_API_KEY,
model: choice.model,
maxTokens: choice.maxTokens,
timeoutMs: choice.timeoutMs,
maxRetries: 0,
});
const previewId = await cacheSpec(spec);
return reply.send({
previewId,
source,
plan,
modelDisplayName: choice.displayName,
modelBadge: choice.displayBadge,
upgradeHint: plan === 'hobby',
spec: {
name: spec.name,
description: spec.description,
@ -112,6 +141,37 @@ export async function serverRoutes(app: FastifyInstance): Promise<void> {
templateId,
} = parsed.data;
// ---- Plan enforcement (must happen before any DB write) ----
const plan = await getOrgPlan(user.orgId);
// Daily build rate-limit.
const rl = await checkDailyLimit('build', user.userId, BUILD_DAILY_LIMIT[plan]);
if (!rl.ok) {
return reply.code(429).send({
error: 'rate_limited',
detail: `Daily build limit reached for plan "${plan}" (${BUILD_DAILY_LIMIT[plan]}/day). Resets in ${Math.ceil(rl.resetIn / 3600)}h.`,
plan,
limit: BUILD_DAILY_LIMIT[plan],
resetIn: rl.resetIn,
});
}
// Server-count quota. Counted via SQL (not cached) so race risk is tiny.
const [serverCountRow] = await db
.select({ count: sql<number>`count(*)::int` })
.from(mcpServers)
.where(eq(mcpServers.orgId, user.orgId));
const existingCount = serverCountRow?.count ?? 0;
if (existingCount >= SERVER_LIMITS[plan]) {
return reply.code(402).send({
error: 'plan_limit_reached',
detail: `Plan "${plan}" allows ${SERVER_LIMITS[plan]} server(s); you have ${existingCount}. Upgrade to add more.`,
plan,
limit: SERVER_LIMITS[plan],
current: existingCount,
});
}
// ---- Template-fork validation ----
// templateId is user-controlled. To prevent fork_count manipulation + garbage
// template_id rows, the user MUST have hit POST /v1/templates/:slug/fork,

View File

@ -4,13 +4,14 @@ const Env = z.object({
DATABASE_URL: z.string(),
REDIS_URL: z.string().default('redis://localhost:6379'),
ANTHROPIC_API_KEY: z.string().optional(),
GLM_API_KEY: z.string().optional(),
RUNNER_HOST: z.string().default('localhost'),
RUNNER_PORT_RANGE_START: z.coerce.number().default(4100),
RUNNER_PORT_RANGE_END: z.coerce.number().default(4999),
CONTROL_PLANE_URL: z.string().default('http://host.docker.internal:4000'),
CONTROL_PLANE_PUBLIC_URL: z.string().default('http://localhost:4000'),
OAUTH_ISSUER: z.string().optional(),
MODEL_GENERATE: z.string().default('claude-opus-4-7'),
MODEL_GENERATE: z.string().default('glm-4.5'),
MODEL_FIX: z.string().default('claude-haiku-4-5-20251001'),
});

View File

@ -1,12 +1,40 @@
import { generateSpec as sharedGenerate, type GenerationResult } from '@bmm/llm';
import { type GenerationResult, generateSpec as sharedGenerate } from '@bmm/llm';
import { config } from '../config.js';
export type { GenerationResult };
/**
* Build-worker spec generation (cache-miss path). Runs async in a BullMQ
* worker no proxy timeout. Defaults to GLM to keep this rare path cheap;
* falls back to Anthropic Sonnet on GLM failure so a temporary outage at one
* provider doesn't break builds.
*/
export async function generateSpec(prompt: string): Promise<GenerationResult> {
return sharedGenerate(prompt, {
apiKey: config.ANTHROPIC_API_KEY,
if (config.GLM_API_KEY) {
try {
return await sharedGenerate(prompt, {
provider: 'glm',
glmApiKey: config.GLM_API_KEY,
model: config.MODEL_GENERATE,
maxTokens: 8192,
timeoutMs: 180_000,
});
} catch (err) {
console.warn(
'[generator] GLM failed, falling back to Anthropic Sonnet:',
(err as Error).message,
);
}
}
if (!config.ANTHROPIC_API_KEY) {
// No keys at all → @bmm/llm returns mockSpec, which keeps builds working
// in dev without any provider configured.
return sharedGenerate(prompt, { provider: 'anthropic' });
}
return sharedGenerate(prompt, {
provider: 'anthropic',
apiKey: config.ANTHROPIC_API_KEY,
model: 'claude-sonnet-4-6',
maxTokens: 8192,
});
}

View File

@ -1,13 +1,13 @@
import { builds, createDb, eq, mcpServers } from '@bmm/db';
import { GeneratorSpec } from '@bmm/types';
import { Worker } from 'bullmq';
import { Redis } from 'ioredis';
import { GeneratorSpec } from '@bmm/types';
import { builds, createDb, eq, mcpServers } from '@bmm/db';
import { config } from './config.js';
import { generateSpec } from './lib/claude.js';
import { renderServerCode } from './lib/render.js';
import { dockerBuild, prepareBuildContext, staticCheck } from './lib/build.js';
import { generateSpec } from './lib/claude.js';
import { allocatePort, deployContainer, dockerAvailable, stopContainer } from './lib/deploy.js';
import { emitDone, emitError, emitLog, emitStatus } from './lib/emit.js';
import { renderServerCode } from './lib/render.js';
const db = createDb();
const connection = new Redis(config.REDIS_URL, { maxRetriesPerRequest: null });
@ -57,12 +57,18 @@ export const worker = new Worker<JobData>(
const oldContainerId = priorState?.containerId ?? null;
try {
await db.update(builds).set({ status: 'generating', startedAt: new Date() }).where(eq(builds.id, buildId));
await db.update(mcpServers).set({ status: 'generating', updatedAt: new Date() }).where(eq(mcpServers.id, serverId));
await db
.update(builds)
.set({ status: 'generating', startedAt: new Date() })
.where(eq(builds.id, buildId));
await db
.update(mcpServers)
.set({ status: 'generating', updatedAt: new Date() })
.where(eq(mcpServers.id, serverId));
await emitStatus(buildId, 'generating');
let spec: GeneratorSpec | null = null;
let source: 'claude' | 'mock' | 'cached' = 'mock';
let source: 'claude' | 'glm' | 'mock' | 'cached' = 'mock';
if (previewId) {
spec = await loadCachedSpec(previewId);
@ -87,7 +93,10 @@ export const worker = new Worker<JobData>(
let generatedCode: string;
const prebuilt = previewId ? await loadPrebuiltCode(previewId) : null;
if (prebuilt) {
await log('info', `Using pre-rendered template code (${prebuilt.length} chars) — skipping render`);
await log(
'info',
`Using pre-rendered template code (${prebuilt.length} chars) — skipping render`,
);
generatedCode = prebuilt;
} else {
generatedCode = renderServerCode(spec);
@ -98,11 +107,20 @@ export const worker = new Worker<JobData>(
.where(eq(builds.id, buildId));
await db.update(builds).set({ status: 'building' }).where(eq(builds.id, buildId));
await db.update(mcpServers).set({ status: 'building', toolsSchema: spec.tools, updatedAt: new Date() }).where(eq(mcpServers.id, serverId));
await db
.update(mcpServers)
.set({ status: 'building', toolsSchema: spec.tools, updatedAt: new Date() })
.where(eq(mcpServers.id, serverId));
await emitStatus(buildId, 'building');
await log('info', 'Preparing build context...');
const { contextDir, imageTag } = await prepareBuildContext(serverId, version, slug, generatedCode, spec);
const { contextDir, imageTag } = await prepareBuildContext(
serverId,
version,
slug,
generatedCode,
spec,
);
await log('info', `Build context at ${contextDir}`);
await log('info', 'Running static checks...');
@ -112,8 +130,14 @@ export const worker = new Worker<JobData>(
const hasDocker = await dockerAvailable();
if (!hasDocker) {
await log('warn', 'Docker not available — skipping build/deploy. Server marked draft.');
await db.update(builds).set({ status: 'failed', errorMessage: 'docker_unavailable', finishedAt: new Date() }).where(eq(builds.id, buildId));
await db.update(mcpServers).set({ status: 'failed', updatedAt: new Date() }).where(eq(mcpServers.id, serverId));
await db
.update(builds)
.set({ status: 'failed', errorMessage: 'docker_unavailable', finishedAt: new Date() })
.where(eq(builds.id, buildId));
await db
.update(mcpServers)
.set({ status: 'failed', updatedAt: new Date() })
.where(eq(mcpServers.id, serverId));
await emitDone(buildId, 'failed', serverId, null);
return;
}
@ -125,7 +149,10 @@ export const worker = new Worker<JobData>(
await log('info', 'Image built.');
await db.update(builds).set({ status: 'deploying' }).where(eq(builds.id, buildId));
await db.update(mcpServers).set({ status: 'deploying', updatedAt: new Date() }).where(eq(mcpServers.id, serverId));
await db
.update(mcpServers)
.set({ status: 'deploying', updatedAt: new Date() })
.where(eq(mcpServers.id, serverId));
await emitStatus(buildId, 'deploying');
const port = await allocatePort();
@ -140,7 +167,10 @@ export const worker = new Worker<JobData>(
};
const handle = await deployContainer({ serverId, slug, hostPort: port, imageTag, envVars });
await log('info', `Container ${handle.containerId.slice(0, 12)} running at ${handle.publicUrl}`);
await log(
'info',
`Container ${handle.containerId.slice(0, 12)} running at ${handle.publicUrl}`,
);
await db
.update(builds)
@ -148,7 +178,12 @@ export const worker = new Worker<JobData>(
.where(eq(builds.id, buildId));
await db
.update(mcpServers)
.set({ status: 'live', currentVersion: version, publicUrl: handle.publicUrl, updatedAt: new Date() })
.set({
status: 'live',
currentVersion: version,
publicUrl: handle.publicUrl,
updatedAt: new Date(),
})
.where(eq(mcpServers.id, serverId));
// Rolling deploy: the new container is live — now retire the previous one.

View File

@ -7,6 +7,7 @@ import { StreamingLogs } from '@/components/streaming-logs';
import { Button } from '@/components/ui/button';
import { apiFetch } from '@/lib/api';
import { Loader2, RotateCcw, X } from 'lucide-react';
import Link from 'next/link';
import { useRouter, useSearchParams } from 'next/navigation';
import { Suspense, useEffect, useState } from 'react';
@ -41,9 +42,15 @@ interface PreviewTool {
inputSchema: Record<string, unknown>;
}
type Plan = 'hobby' | 'pro' | 'team' | 'enterprise';
interface PreviewResponse {
previewId: string;
source: 'claude' | 'mock';
source: 'claude' | 'glm' | 'mock';
plan?: Plan;
modelDisplayName?: string;
modelBadge?: 'open-tier' | 'claude-haiku' | 'claude-sonnet' | 'claude-opus';
upgradeHint?: boolean;
spec: {
name: string;
description?: string;
@ -53,6 +60,13 @@ interface PreviewResponse {
};
}
const PREVIEW_MODEL_BY_PLAN: Record<Plan, { name: string; estimate: string }> = {
hobby: { name: 'Open-tier AI', estimate: '3060 seconds' },
pro: { name: 'Claude Haiku 4.5', estimate: '1020 seconds' },
team: { name: 'Claude Sonnet 4.6', estimate: '1540 seconds' },
enterprise: { name: 'Claude Sonnet 4.6', estimate: '1540 seconds' },
};
interface EditableTool {
name: string;
description: string;
@ -86,6 +100,7 @@ function NewServerPageInner() {
const router = useRouter();
const [step, setStep] = useState<Step>('prompt');
const [elapsedSec, setElapsedSec] = useState(0);
const [userPlan, setUserPlan] = useState<Plan | null>(null);
const [prompt, setPrompt] = useState('');
const [name, setName] = useState('');
@ -207,6 +222,14 @@ function NewServerPageInner() {
return () => clearInterval(id);
}, [step]);
// Plan determines which model the preview will use — we display its name
// *before* the request so the user knows what they're waiting for.
useEffect(() => {
apiFetch<{ user: { plan?: Plan } }>('/v1/auth/me')
.then((r) => setUserPlan(r.user.plan ?? 'hobby'))
.catch(() => setUserPlan('hobby'));
}, []);
async function analyze() {
setError(null);
if (prompt.trim().length < 10) {
@ -358,13 +381,23 @@ function NewServerPageInner() {
setServerId(res.server.id);
setStep('building');
} catch (e) {
const detail = (e as { detail?: { error?: string; detail?: unknown } }).detail;
const detail = (e as { detail?: { error?: string; detail?: string } }).detail;
const code = detail?.error;
if (code === 'slug_taken') {
setError(
code === 'slug_taken'
? `The slug "${slug}" is already used by one of your servers — change the Slug field above.`
: (code ?? (e as Error).message),
`The slug "${slug}" is already used by one of your servers — change the Slug field above.`,
);
return;
}
if (code === 'plan_limit_reached') {
setError(`${detail?.detail ?? 'Plan limit reached.'} See /pricing to upgrade.`);
return;
}
if (code === 'rate_limited') {
setError(detail?.detail ?? 'Daily build limit reached — try again tomorrow or upgrade.');
return;
}
setError(detail?.detail ?? code ?? (e as Error).message);
}
}
@ -457,8 +490,18 @@ function NewServerPageInner() {
<Loader2 className="mx-auto animate-spin text-[--color-accent]" size={22} />
<p className="mt-4 text-[13px]">Analyzing your prompt</p>
<p className="mt-1 text-[12px] text-[--color-fg-subtle]">
Claude is drafting the tool spec. Usually 1540 seconds.
{(userPlan ? PREVIEW_MODEL_BY_PLAN[userPlan] : PREVIEW_MODEL_BY_PLAN.hobby).name} is
drafting the tool spec. Usually{' '}
{(userPlan ? PREVIEW_MODEL_BY_PLAN[userPlan] : PREVIEW_MODEL_BY_PLAN.hobby).estimate}.
</p>
{userPlan === 'hobby' && (
<p className="mt-2 text-[11px] text-[--color-fg-muted]">
<Link href="/pricing" className="text-[--color-accent] hover:underline">
Upgrade to Pro
</Link>{' '}
for ~3× faster analysis with Claude Haiku.
</p>
)}
<p className="mono mt-3 text-[11px] tabular-nums text-[--color-fg-muted]">
{elapsedSec}s elapsed
</p>
@ -524,7 +567,7 @@ function NewServerPageInner() {
</button>
)}
<span className="mono text-[10.5px] text-[--color-fg-subtle]">
spec via {preview.source}
drafted with {preview.modelDisplayName ?? preview.source}
</span>
</div>
</div>

View File

@ -14,9 +14,12 @@ const TIERS = [
price: '€0',
tag: 'Forever free',
description: 'For trying things out and shipping single-user tools.',
model: 'Open-tier AI',
modelDetail: 'Free-tier model · ~30-60s analyze',
features: [
'1 MCP server',
'100,000 tool calls / month',
'5 prompt analyses / day',
'BuildMyMCP subdomain',
'Community support',
],
@ -28,9 +31,12 @@ const TIERS = [
price: '€49',
tag: '/ month',
description: 'For solo founders and small teams shipping production tools.',
model: 'Claude Haiku 4.5',
modelDetail: 'Anthropic · ~10-20s analyze',
features: [
'5 MCP servers',
'1M tool calls / month',
'40 prompt analyses / day',
'Custom domain',
'Priority build queue',
'Email support, 1 business-day SLA',
@ -41,12 +47,15 @@ const TIERS = [
},
{
name: 'Team',
price: '€149',
price: '€199',
tag: '/ month',
description: 'For teams with RBAC, audit, and 99.9% SLA needs.',
model: 'Claude Sonnet 4.6',
modelDetail: "Anthropic's flagship",
features: [
'25 MCP servers',
'10M tool calls / month',
'150 prompt analyses / day',
'RBAC + extended audit log',
'99.9% uptime SLA',
'Shared Slack channel support',
@ -56,9 +65,11 @@ const TIERS = [
},
{
name: 'Enterprise',
price: '€499+',
price: '€999+',
tag: '/ month',
description: 'For organizations bringing their own cloud, SSO and dedicated infra.',
model: 'Sonnet + Opus on build',
modelDetail: 'EU data-residency option',
features: [
'Unlimited servers',
'BYOC (AWS, GCP, Azure, Hetzner)',
@ -122,6 +133,13 @@ export default function Pricing() {
<p className="mt-2 text-[12px] leading-relaxed text-[--color-fg-muted]">
{t.description}
</p>
<div className="mt-3 rounded-md border border-[--color-border] bg-[--color-bg-subtle] px-2.5 py-1.5">
<div className="text-[10.5px] uppercase tracking-wider text-[--color-fg-subtle]">
AI model
</div>
<div className="mt-0.5 text-[12.5px] font-medium text-[--color-fg]">{t.model}</div>
<div className="text-[10.5px] text-[--color-fg-subtle]">{t.modelDetail}</div>
</div>
<ul className="mt-4 space-y-1.5 text-[12.5px] text-[--color-fg-muted]">
{t.features.map((f) => (
<li key={f}> {f}</li>

View File

@ -36,11 +36,21 @@ const SECTIONS = [
{
h: 'Subprocessors',
p: [
"Anthropic (generation) — only the prompt text you send. Anthropic's data-retention policy applies.",
'Hetzner (compute).',
'Backblaze (encrypted backups).',
'Stripe (billing).',
'Cloudflare (DNS + DDoS).',
"Anthropic, USA (Claude AI — used for prompt analysis and code generation on Pro / Team / Enterprise tiers). Only the prompt text and resulting spec are sent. Anthropic's data-retention policy applies.",
'Zhipu AI, China (GLM model — used for prompt analysis on the free Hobby tier only). Only the prompt text and resulting spec are sent. Upgrade to a paid tier to keep all AI processing within Anthropic (US).',
'Hetzner, Germany (compute).',
'Backblaze, EU (encrypted backups).',
'Stripe, Ireland (billing).',
'Cloudflare (DNS + DDoS protection).',
],
},
{
h: 'AI processing per tier',
p: [
'Hobby (free): prompts are sent to Zhipu AI (GLM, China) for analysis. Choose a paid tier if your prompts contain data that must not leave the EU/US.',
'Pro: prompts are sent to Anthropic (Claude Haiku 4.5, USA).',
'Team: prompts are sent to Anthropic (Claude Sonnet 4.6, USA).',
'Enterprise: Anthropic (Claude Sonnet + Opus, USA) with EU-data-residency opt-in available on request.',
],
},
{

View File

@ -40,7 +40,11 @@ const PILLARS = [
},
{
title: 'Rate limiting',
body: 'Default 100 requests/min/IP per tool, enforced at the Traefik layer before traffic ever reaches your container.',
body: 'Default 100 requests/min/IP per tool, enforced at the Traefik layer before traffic ever reaches your container. Daily preview + build caps per tier protect against runaway LLM spend.',
},
{
title: 'AI provider by tier — transparent',
body: "Hobby (free) tier uses Zhipu's GLM model (servers in China) for prompt analysis — chosen for cost so we can offer a real free tier. Pro, Team and Enterprise use Anthropic Claude (US). Enterprise can request EU-only data residency. The provider is shown live in the wizard so you always know where your prompt is going.",
},
];

View File

@ -45,18 +45,138 @@ const BANNED_PATTERNS = [
/disregard\s+(the\s+)?(above|previous)/i,
];
// ──────────────────────────────────────────────────────────────────────────
// Plan-aware model selection
// ──────────────────────────────────────────────────────────────────────────
export type Plan = 'hobby' | 'pro' | 'team' | 'enterprise';
export type Purpose = 'preview' | 'build';
export type Provider = 'anthropic' | 'glm';
export type DisplayBadge = 'open-tier' | 'claude-haiku' | 'claude-sonnet' | 'claude-opus';
export interface ModelChoice {
provider: Provider;
model: string;
maxTokens: number;
timeoutMs: number;
/** User-facing model name shown in the wizard + previews. */
displayName: string;
displayBadge: DisplayBadge;
}
/**
* Preview runs synchronously inside an HTTP request behind Cloudflare's
* ~100s edge cap. Each tier's (model + max_tokens + timeout) is bounded to
* fit. Hobby uses GLM as the cost lever; paid tiers escalate to Claude the
* visible quality/speed jump *is* the upgrade pitch.
*
* Measured token rates: glm-4-plus ~58 tok/s (3500 tok 60s) ·
* Claude Haiku 4.5 ~200 tok/s (8192 tok 41s) · Claude Sonnet 4.6 ~80 tok/s.
*/
const PREVIEW_MODELS: Record<Plan, ModelChoice> = {
hobby: {
provider: 'glm',
model: 'glm-4-plus',
maxTokens: 3500,
timeoutMs: 65_000,
displayName: 'Open-tier AI',
displayBadge: 'open-tier',
},
pro: {
provider: 'anthropic',
model: 'claude-haiku-4-5-20251001',
maxTokens: 8192,
timeoutMs: 60_000,
displayName: 'Claude Haiku 4.5',
displayBadge: 'claude-haiku',
},
team: {
provider: 'anthropic',
model: 'claude-sonnet-4-6',
maxTokens: 8192,
timeoutMs: 60_000,
displayName: 'Claude Sonnet 4.6',
displayBadge: 'claude-sonnet',
},
enterprise: {
provider: 'anthropic',
model: 'claude-sonnet-4-6',
maxTokens: 8192,
timeoutMs: 60_000,
displayName: 'Claude Sonnet 4.6',
displayBadge: 'claude-sonnet',
},
};
/**
* Build worker runs async via BullMQ no proxy timeout. With the 24h preview
* cache TTL cache-misses are rare, so GLM as the default keeps that rare path
* cheap; Enterprise gets Opus as a premium-quality promise.
*/
const BUILD_MODELS: Record<Plan, ModelChoice> = {
hobby: {
provider: 'glm',
model: 'glm-4.5',
maxTokens: 8192,
timeoutMs: 180_000,
displayName: 'Open-tier AI',
displayBadge: 'open-tier',
},
pro: {
provider: 'glm',
model: 'glm-4.5',
maxTokens: 8192,
timeoutMs: 180_000,
displayName: 'Open-tier AI',
displayBadge: 'open-tier',
},
team: {
provider: 'glm',
model: 'glm-4.5',
maxTokens: 8192,
timeoutMs: 180_000,
displayName: 'Open-tier AI',
displayBadge: 'open-tier',
},
enterprise: {
provider: 'anthropic',
model: 'claude-opus-4-7',
maxTokens: 8192,
timeoutMs: 600_000,
displayName: 'Claude Opus 4.7',
displayBadge: 'claude-opus',
},
};
export function pickPreviewModel(plan: Plan): ModelChoice {
return PREVIEW_MODELS[plan];
}
export function pickBuildModel(plan: Plan): ModelChoice {
return BUILD_MODELS[plan];
}
// ──────────────────────────────────────────────────────────────────────────
// Generation API
// ──────────────────────────────────────────────────────────────────────────
export interface GenerationResult {
spec: GeneratorSpecT;
source: 'claude' | 'mock';
source: 'claude' | 'glm' | 'mock';
}
export interface GenerateOptions {
/** 'anthropic' (default) or 'glm'. */
provider?: Provider;
/** Anthropic API key — required if provider === 'anthropic'. */
apiKey?: string;
/** Zhipu (GLM) API key — required if provider === 'glm'. */
glmApiKey?: string;
model?: string;
maxTokens?: number;
/** Per-attempt request timeout in ms. Omit to use the SDK default. */
/** Per-attempt request timeout in ms. */
timeoutMs?: number;
/** SDK retry count. Omit to use the SDK default. */
/** SDK retry count. Anthropic only. */
maxRetries?: number;
}
@ -64,9 +184,40 @@ export async function generateSpec(
prompt: string,
opts: GenerateOptions = {},
): Promise<GenerationResult> {
const provider = opts.provider ?? 'anthropic';
if (provider === 'glm') {
if (!opts.glmApiKey) return { spec: mockSpec(prompt), source: 'mock' };
return generateWithGlm(prompt, {
apiKey: opts.glmApiKey,
model: opts.model ?? 'glm-4-plus',
maxTokens: opts.maxTokens ?? 4096,
timeoutMs: opts.timeoutMs,
});
}
if (!opts.apiKey) {
return { spec: mockSpec(prompt), source: 'mock' };
}
return generateWithAnthropic(prompt, {
apiKey: opts.apiKey,
model: opts.model ?? 'claude-opus-4-7',
maxTokens: opts.maxTokens ?? 8192,
timeoutMs: opts.timeoutMs,
maxRetries: opts.maxRetries,
});
}
async function generateWithAnthropic(
prompt: string,
opts: {
apiKey: string;
model: string;
maxTokens: number;
timeoutMs?: number;
maxRetries?: number;
},
): Promise<GenerationResult> {
const client = new Anthropic({ apiKey: opts.apiKey });
const requestOptions: { timeout?: number; maxRetries?: number } = {};
if (opts.timeoutMs !== undefined) requestOptions.timeout = opts.timeoutMs;
@ -75,35 +226,81 @@ export async function generateSpec(
const response = await client.messages
.create(
{
model: opts.model ?? 'claude-opus-4-7',
max_tokens: opts.maxTokens ?? 8192,
model: opts.model,
max_tokens: opts.maxTokens,
system: SYSTEM_PROMPT,
messages: [{ role: 'user', content: prompt }],
},
requestOptions,
)
.catch((err: unknown) => {
// A per-attempt timeout surfaces as APIConnectionTimeoutError once the
// SDK exhausts retries. Map it to a typed error so the API layer returns
// a clean 504 instead of letting the edge proxy time out headerless.
if (err instanceof Anthropic.APIConnectionTimeoutError) {
throw new SpecTimeoutError('spec generation exceeded the time budget');
}
throw err;
});
const text = response.content
.filter((b): b is { type: 'text'; text: string } => b.type === 'text')
.map((b) => b.text)
.join('');
const json = extractJson(text);
const parsed = GeneratorSpec.safeParse(json);
if (!parsed.success) {
throw new SpecValidationError(parsed.error.message);
}
if (!parsed.success) throw new SpecValidationError(parsed.error.message);
scanForInjection(parsed.data);
return { spec: parsed.data, source: 'claude' };
}
const GLM_ENDPOINT = 'https://open.bigmodel.cn/api/paas/v4/chat/completions';
async function generateWithGlm(
prompt: string,
opts: { apiKey: string; model: string; maxTokens: number; timeoutMs?: number },
): Promise<GenerationResult> {
const controller = new AbortController();
const timer = opts.timeoutMs ? setTimeout(() => controller.abort(), opts.timeoutMs) : null;
let res: Response;
try {
res = await fetch(GLM_ENDPOINT, {
method: 'POST',
headers: {
Authorization: `Bearer ${opts.apiKey}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: opts.model,
max_tokens: opts.maxTokens,
messages: [
{ role: 'system', content: SYSTEM_PROMPT },
{ role: 'user', content: prompt },
],
}),
signal: controller.signal,
});
} catch (err) {
if ((err as { name?: string }).name === 'AbortError') {
throw new SpecTimeoutError('glm spec generation exceeded the time budget');
}
throw err;
} finally {
if (timer) clearTimeout(timer);
}
if (!res.ok) {
const body = await res.text().catch(() => '');
throw new Error(`glm_api_${res.status}: ${body.slice(0, 200)}`);
}
const data = (await res.json()) as {
choices?: Array<{ message?: { content?: string }; finish_reason?: string }>;
};
const content = data.choices?.[0]?.message?.content;
if (!content) throw new SpecValidationError('glm_empty_response');
const json = extractJson(content);
const parsed = GeneratorSpec.safeParse(json);
if (!parsed.success) throw new SpecValidationError(parsed.error.message);
scanForInjection(parsed.data);
return { spec: parsed.data, source: 'glm' };
}
export class SpecValidationError extends Error {
override readonly name = 'SpecValidationError';
}
@ -141,7 +338,7 @@ function scanForInjection(spec: GeneratorSpecT): void {
export function mockSpec(prompt: string): GeneratorSpecT {
return {
name: 'Echo MCP',
description: `Mock server (no ANTHROPIC_API_KEY). Prompt was: ${prompt.slice(0, 200)}`,
description: `Mock server (no LLM key). Prompt was: ${prompt.slice(0, 200)}`,
tools: [
{
name: 'echo',