feat: tiered LLM (GLM free / Claude paid) + rate limits + quota enforcement
All checks were successful
Deploy to Production / deploy (push) Successful in 53s
All checks were successful
Deploy to Production / deploy (push) Successful in 53s
The free tier was hemorrhaging Anthropic cost with no abuse cap (no rate limit on /preview, Opus default in the build worker, 5-min cache TTL that made cache-miss the common case). This switches free users to GLM, paid users to Claude tiers, and tightens every leak found in the audit. Backend: - @bmm/llm: GLM provider via Zhipu's OpenAI-compatible endpoint, pickPreviewModel + pickBuildModel helpers, plan-aware ModelChoice - preview-cache TTL 5min -> 24h (kills the cache-miss path) - /v1/servers/preview: picks model from caller's plan, returns model name to UI - /v1/servers POST: enforces SERVER_LIMITS per plan (402), rate-limits builds - daily rate-limit on preview (5/40/150/1000) and build (3/20/100/500) - /v1/auth/me returns plan so the wizard can show the right model name - generator worker: GLM default, Anthropic Sonnet fallback if GLM errors Frontend: - Wizard fetches plan, shows "<model> is drafting the tool spec" pre-emptively, upgrade hint for hobby users, friendly errors for 402 / 429 - Pricing page: AI-model line per tier (Open-tier / Haiku / Sonnet / Opus), Team €149 -> €199, Enterprise €499 -> €999, daily-preview limit per tier - Privacy + Security: explicit subprocessor disclosure for Anthropic (US) / Zhipu (CN) and which tier uses which Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
66128c73d8
commit
bc174c1302
@ -8,6 +8,7 @@ const Env = z.object({
|
||||
NEXT_PUBLIC_APP_URL: z.string().default('http://localhost:3001'),
|
||||
OAUTH_KEY_DIR: z.string().default('./keys'),
|
||||
ANTHROPIC_API_KEY: z.string().optional(),
|
||||
GLM_API_KEY: z.string().optional(),
|
||||
SECRETS_ENCRYPTION_KEY: z
|
||||
.string()
|
||||
.min(64, '32 bytes hex required')
|
||||
@ -33,6 +34,7 @@ export const config = Env.parse({
|
||||
NEXT_PUBLIC_APP_URL: process.env.NEXT_PUBLIC_APP_URL,
|
||||
OAUTH_KEY_DIR: process.env.OAUTH_KEY_DIR,
|
||||
ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY,
|
||||
GLM_API_KEY: process.env.GLM_API_KEY,
|
||||
SECRETS_ENCRYPTION_KEY: process.env.SECRETS_ENCRYPTION_KEY,
|
||||
CONTROL_PLANE_PUBLIC_URL: process.env.CONTROL_PLANE_PUBLIC_URL,
|
||||
ADMIN_EMAIL: process.env.ADMIN_EMAIL,
|
||||
|
||||
23
apps/api/src/lib/plan.ts
Normal file
23
apps/api/src/lib/plan.ts
Normal file
@ -0,0 +1,23 @@
|
||||
import { createDb, eq, organizations } from '@bmm/db';
|
||||
import type { Plan } from '@bmm/llm';
|
||||
|
||||
const db = createDb();
|
||||
|
||||
/** Look up an org's current plan. Defaults to 'hobby' if the org row is gone
|
||||
* for any reason — fail-closed to the least expensive tier. */
|
||||
export async function getOrgPlan(orgId: string): Promise<Plan> {
|
||||
const [row] = await db
|
||||
.select({ plan: organizations.plan })
|
||||
.from(organizations)
|
||||
.where(eq(organizations.id, orgId))
|
||||
.limit(1);
|
||||
return (row?.plan ?? 'hobby') as Plan;
|
||||
}
|
||||
|
||||
/** Max MCP servers per org by plan. Enforced at POST /v1/servers. */
|
||||
export const SERVER_LIMITS: Record<Plan, number> = {
|
||||
hobby: 1,
|
||||
pro: 5,
|
||||
team: 25,
|
||||
enterprise: Number.MAX_SAFE_INTEGER,
|
||||
};
|
||||
@ -1,8 +1,11 @@
|
||||
import crypto from 'node:crypto';
|
||||
import { getRedis } from './redis.js';
|
||||
import type { GeneratorSpec } from '@bmm/types';
|
||||
import { getRedis } from './redis.js';
|
||||
|
||||
const TTL_SECONDS = 5 * 60;
|
||||
// 24h: previews are LLM-priced; a long TTL eliminates the cache-miss path on
|
||||
// the build worker (each miss = another LLM call). Specs are tiny JSON (~5KB),
|
||||
// Redis-memory impact is negligible.
|
||||
const TTL_SECONDS = 24 * 60 * 60;
|
||||
|
||||
function key(previewId: string): string {
|
||||
return `preview:${previewId}`;
|
||||
|
||||
51
apps/api/src/lib/rate-limit.ts
Normal file
51
apps/api/src/lib/rate-limit.ts
Normal file
@ -0,0 +1,51 @@
|
||||
import type { Plan } from '@bmm/llm';
|
||||
import { getRedis } from './redis.js';
|
||||
|
||||
const DAY_SEC = 24 * 60 * 60;
|
||||
|
||||
function todayKey(): string {
|
||||
return new Date().toISOString().slice(0, 10);
|
||||
}
|
||||
|
||||
export interface RateLimitResult {
|
||||
ok: boolean;
|
||||
remaining: number;
|
||||
resetIn: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Daily counter via Redis INCR. Atomic — no race window between read & write.
|
||||
* First INCR (count === 1) sets the TTL so the key auto-rolls at midnight UTC.
|
||||
*/
|
||||
export async function checkDailyLimit(
|
||||
scope: string,
|
||||
userId: string,
|
||||
max: number,
|
||||
): Promise<RateLimitResult> {
|
||||
const key = `ratelimit:${scope}:${userId}:${todayKey()}`;
|
||||
const redis = getRedis();
|
||||
const count = await redis.incr(key);
|
||||
if (count === 1) await redis.expire(key, DAY_SEC);
|
||||
const ttl = await redis.ttl(key);
|
||||
return {
|
||||
ok: count <= max,
|
||||
remaining: Math.max(0, max - count),
|
||||
resetIn: ttl > 0 ? ttl : DAY_SEC,
|
||||
};
|
||||
}
|
||||
|
||||
// Per-tier daily limits on the two LLM-priced actions.
|
||||
// Preview = ~€0.002-0.015/call · Build = ~€0.005-0.22/call.
|
||||
export const PREVIEW_DAILY_LIMIT: Record<Plan, number> = {
|
||||
hobby: 5,
|
||||
pro: 40,
|
||||
team: 150,
|
||||
enterprise: 1000,
|
||||
};
|
||||
|
||||
export const BUILD_DAILY_LIMIT: Record<Plan, number> = {
|
||||
hobby: 3,
|
||||
pro: 20,
|
||||
team: 100,
|
||||
enterprise: 500,
|
||||
};
|
||||
@ -13,6 +13,7 @@ import type { FastifyInstance } from 'fastify';
|
||||
import { z } from 'zod';
|
||||
import { config } from '../config.js';
|
||||
import { audit } from '../lib/audit.js';
|
||||
import { getOrgPlan } from '../lib/plan.js';
|
||||
import { sendSms, smsConfigured } from '../lib/sms.js';
|
||||
|
||||
const SESSION_COOKIE = 'bmm_session';
|
||||
@ -128,7 +129,10 @@ export async function authRoutes(app: FastifyInstance): Promise<void> {
|
||||
const token = req.cookies[SESSION_COOKIE];
|
||||
const session = await getSession(token);
|
||||
if (!session) return reply.code(401).send({ error: 'unauthorized' });
|
||||
return reply.send({ user: session });
|
||||
// Plan is on the org, not the session — look it up fresh so a Stripe
|
||||
// upgrade is reflected without forcing a re-login.
|
||||
const plan = await getOrgPlan(session.orgId);
|
||||
return reply.send({ user: { ...session, plan } });
|
||||
});
|
||||
|
||||
app.post('/v1/auth/admin/login', async (req, reply) => {
|
||||
|
||||
@ -11,7 +11,13 @@ import {
|
||||
sql,
|
||||
templates,
|
||||
} from '@bmm/db';
|
||||
import { BannedPatternError, SpecTimeoutError, SpecValidationError, generateSpec } from '@bmm/llm';
|
||||
import {
|
||||
BannedPatternError,
|
||||
SpecTimeoutError,
|
||||
SpecValidationError,
|
||||
generateSpec,
|
||||
pickPreviewModel,
|
||||
} from '@bmm/llm';
|
||||
import {
|
||||
BuildEvent,
|
||||
CreateServerInput,
|
||||
@ -26,8 +32,10 @@ import { config } from '../config.js';
|
||||
import { audit } from '../lib/audit.js';
|
||||
import { encryptSecret } from '../lib/crypto.js';
|
||||
import { stopContainer } from '../lib/docker.js';
|
||||
import { SERVER_LIMITS, getOrgPlan } from '../lib/plan.js';
|
||||
import { cacheSpec, loadSpec, overwriteSpec } from '../lib/preview-cache.js';
|
||||
import { getBuildQueue } from '../lib/queue.js';
|
||||
import { BUILD_DAILY_LIMIT, PREVIEW_DAILY_LIMIT, checkDailyLimit } from '../lib/rate-limit.js';
|
||||
import { buildChannel, getSubscriber } from '../lib/redis.js';
|
||||
import { requireAuth } from '../plugins/session.js';
|
||||
import { getForkRefTemplate } from './templates.js';
|
||||
@ -46,26 +54,47 @@ export async function serverRoutes(app: FastifyInstance): Promise<void> {
|
||||
});
|
||||
|
||||
app.post('/v1/servers/preview', { preHandler: requireAuth }, async (req, reply) => {
|
||||
const user = req.user!;
|
||||
const parsed = PreviewInput.safeParse(req.body);
|
||||
if (!parsed.success) {
|
||||
return reply.code(400).send({ error: 'invalid_input', issues: parsed.error.flatten() });
|
||||
}
|
||||
|
||||
const plan = await getOrgPlan(user.orgId);
|
||||
|
||||
// Daily preview rate-limit per user. Free is tight (5/day) because every
|
||||
// preview is a paid LLM call; paid tiers have headroom for real iteration.
|
||||
const rl = await checkDailyLimit('preview', user.userId, PREVIEW_DAILY_LIMIT[plan]);
|
||||
if (!rl.ok) {
|
||||
return reply.code(429).send({
|
||||
error: 'rate_limited',
|
||||
detail: `Daily preview limit reached for plan "${plan}" (${PREVIEW_DAILY_LIMIT[plan]}/day). Resets in ${Math.ceil(rl.resetIn / 3600)}h.`,
|
||||
plan,
|
||||
limit: PREVIEW_DAILY_LIMIT[plan],
|
||||
resetIn: rl.resetIn,
|
||||
});
|
||||
}
|
||||
|
||||
const choice = pickPreviewModel(plan);
|
||||
|
||||
try {
|
||||
const { spec, source } = await generateSpec(parsed.data.prompt, {
|
||||
provider: choice.provider,
|
||||
apiKey: config.ANTHROPIC_API_KEY,
|
||||
// Preview generates the spec synchronously inside an HTTP request that
|
||||
// sits behind Cloudflare's edge timeout. Haiku 4.5 (~200 tok/s — a full
|
||||
// 8k-token spec in ~40s) is the only model fast enough; Sonnet and Opus
|
||||
// overran the proxy cap, which reached the browser as a CORS error. The
|
||||
// hard 60s timeout guarantees a clean 504 before the proxy gives up.
|
||||
model: 'claude-haiku-4-5-20251001',
|
||||
timeoutMs: 60_000,
|
||||
glmApiKey: config.GLM_API_KEY,
|
||||
model: choice.model,
|
||||
maxTokens: choice.maxTokens,
|
||||
timeoutMs: choice.timeoutMs,
|
||||
maxRetries: 0,
|
||||
});
|
||||
const previewId = await cacheSpec(spec);
|
||||
return reply.send({
|
||||
previewId,
|
||||
source,
|
||||
plan,
|
||||
modelDisplayName: choice.displayName,
|
||||
modelBadge: choice.displayBadge,
|
||||
upgradeHint: plan === 'hobby',
|
||||
spec: {
|
||||
name: spec.name,
|
||||
description: spec.description,
|
||||
@ -112,6 +141,37 @@ export async function serverRoutes(app: FastifyInstance): Promise<void> {
|
||||
templateId,
|
||||
} = parsed.data;
|
||||
|
||||
// ---- Plan enforcement (must happen before any DB write) ----
|
||||
const plan = await getOrgPlan(user.orgId);
|
||||
|
||||
// Daily build rate-limit.
|
||||
const rl = await checkDailyLimit('build', user.userId, BUILD_DAILY_LIMIT[plan]);
|
||||
if (!rl.ok) {
|
||||
return reply.code(429).send({
|
||||
error: 'rate_limited',
|
||||
detail: `Daily build limit reached for plan "${plan}" (${BUILD_DAILY_LIMIT[plan]}/day). Resets in ${Math.ceil(rl.resetIn / 3600)}h.`,
|
||||
plan,
|
||||
limit: BUILD_DAILY_LIMIT[plan],
|
||||
resetIn: rl.resetIn,
|
||||
});
|
||||
}
|
||||
|
||||
// Server-count quota. Counted via SQL (not cached) so race risk is tiny.
|
||||
const [serverCountRow] = await db
|
||||
.select({ count: sql<number>`count(*)::int` })
|
||||
.from(mcpServers)
|
||||
.where(eq(mcpServers.orgId, user.orgId));
|
||||
const existingCount = serverCountRow?.count ?? 0;
|
||||
if (existingCount >= SERVER_LIMITS[plan]) {
|
||||
return reply.code(402).send({
|
||||
error: 'plan_limit_reached',
|
||||
detail: `Plan "${plan}" allows ${SERVER_LIMITS[plan]} server(s); you have ${existingCount}. Upgrade to add more.`,
|
||||
plan,
|
||||
limit: SERVER_LIMITS[plan],
|
||||
current: existingCount,
|
||||
});
|
||||
}
|
||||
|
||||
// ---- Template-fork validation ----
|
||||
// templateId is user-controlled. To prevent fork_count manipulation + garbage
|
||||
// template_id rows, the user MUST have hit POST /v1/templates/:slug/fork,
|
||||
|
||||
@ -4,13 +4,14 @@ const Env = z.object({
|
||||
DATABASE_URL: z.string(),
|
||||
REDIS_URL: z.string().default('redis://localhost:6379'),
|
||||
ANTHROPIC_API_KEY: z.string().optional(),
|
||||
GLM_API_KEY: z.string().optional(),
|
||||
RUNNER_HOST: z.string().default('localhost'),
|
||||
RUNNER_PORT_RANGE_START: z.coerce.number().default(4100),
|
||||
RUNNER_PORT_RANGE_END: z.coerce.number().default(4999),
|
||||
CONTROL_PLANE_URL: z.string().default('http://host.docker.internal:4000'),
|
||||
CONTROL_PLANE_PUBLIC_URL: z.string().default('http://localhost:4000'),
|
||||
OAUTH_ISSUER: z.string().optional(),
|
||||
MODEL_GENERATE: z.string().default('claude-opus-4-7'),
|
||||
MODEL_GENERATE: z.string().default('glm-4.5'),
|
||||
MODEL_FIX: z.string().default('claude-haiku-4-5-20251001'),
|
||||
});
|
||||
|
||||
|
||||
@ -1,12 +1,40 @@
|
||||
import { generateSpec as sharedGenerate, type GenerationResult } from '@bmm/llm';
|
||||
import { type GenerationResult, generateSpec as sharedGenerate } from '@bmm/llm';
|
||||
import { config } from '../config.js';
|
||||
|
||||
export type { GenerationResult };
|
||||
|
||||
/**
|
||||
* Build-worker spec generation (cache-miss path). Runs async in a BullMQ
|
||||
* worker — no proxy timeout. Defaults to GLM to keep this rare path cheap;
|
||||
* falls back to Anthropic Sonnet on GLM failure so a temporary outage at one
|
||||
* provider doesn't break builds.
|
||||
*/
|
||||
export async function generateSpec(prompt: string): Promise<GenerationResult> {
|
||||
if (config.GLM_API_KEY) {
|
||||
try {
|
||||
return await sharedGenerate(prompt, {
|
||||
provider: 'glm',
|
||||
glmApiKey: config.GLM_API_KEY,
|
||||
model: config.MODEL_GENERATE,
|
||||
maxTokens: 8192,
|
||||
timeoutMs: 180_000,
|
||||
});
|
||||
} catch (err) {
|
||||
console.warn(
|
||||
'[generator] GLM failed, falling back to Anthropic Sonnet:',
|
||||
(err as Error).message,
|
||||
);
|
||||
}
|
||||
}
|
||||
if (!config.ANTHROPIC_API_KEY) {
|
||||
// No keys at all → @bmm/llm returns mockSpec, which keeps builds working
|
||||
// in dev without any provider configured.
|
||||
return sharedGenerate(prompt, { provider: 'anthropic' });
|
||||
}
|
||||
return sharedGenerate(prompt, {
|
||||
provider: 'anthropic',
|
||||
apiKey: config.ANTHROPIC_API_KEY,
|
||||
model: config.MODEL_GENERATE,
|
||||
model: 'claude-sonnet-4-6',
|
||||
maxTokens: 8192,
|
||||
});
|
||||
}
|
||||
|
||||
@ -1,13 +1,13 @@
|
||||
import { builds, createDb, eq, mcpServers } from '@bmm/db';
|
||||
import { GeneratorSpec } from '@bmm/types';
|
||||
import { Worker } from 'bullmq';
|
||||
import { Redis } from 'ioredis';
|
||||
import { GeneratorSpec } from '@bmm/types';
|
||||
import { builds, createDb, eq, mcpServers } from '@bmm/db';
|
||||
import { config } from './config.js';
|
||||
import { generateSpec } from './lib/claude.js';
|
||||
import { renderServerCode } from './lib/render.js';
|
||||
import { dockerBuild, prepareBuildContext, staticCheck } from './lib/build.js';
|
||||
import { generateSpec } from './lib/claude.js';
|
||||
import { allocatePort, deployContainer, dockerAvailable, stopContainer } from './lib/deploy.js';
|
||||
import { emitDone, emitError, emitLog, emitStatus } from './lib/emit.js';
|
||||
import { renderServerCode } from './lib/render.js';
|
||||
|
||||
const db = createDb();
|
||||
const connection = new Redis(config.REDIS_URL, { maxRetriesPerRequest: null });
|
||||
@ -57,12 +57,18 @@ export const worker = new Worker<JobData>(
|
||||
const oldContainerId = priorState?.containerId ?? null;
|
||||
|
||||
try {
|
||||
await db.update(builds).set({ status: 'generating', startedAt: new Date() }).where(eq(builds.id, buildId));
|
||||
await db.update(mcpServers).set({ status: 'generating', updatedAt: new Date() }).where(eq(mcpServers.id, serverId));
|
||||
await db
|
||||
.update(builds)
|
||||
.set({ status: 'generating', startedAt: new Date() })
|
||||
.where(eq(builds.id, buildId));
|
||||
await db
|
||||
.update(mcpServers)
|
||||
.set({ status: 'generating', updatedAt: new Date() })
|
||||
.where(eq(mcpServers.id, serverId));
|
||||
await emitStatus(buildId, 'generating');
|
||||
|
||||
let spec: GeneratorSpec | null = null;
|
||||
let source: 'claude' | 'mock' | 'cached' = 'mock';
|
||||
let source: 'claude' | 'glm' | 'mock' | 'cached' = 'mock';
|
||||
|
||||
if (previewId) {
|
||||
spec = await loadCachedSpec(previewId);
|
||||
@ -87,7 +93,10 @@ export const worker = new Worker<JobData>(
|
||||
let generatedCode: string;
|
||||
const prebuilt = previewId ? await loadPrebuiltCode(previewId) : null;
|
||||
if (prebuilt) {
|
||||
await log('info', `Using pre-rendered template code (${prebuilt.length} chars) — skipping render`);
|
||||
await log(
|
||||
'info',
|
||||
`Using pre-rendered template code (${prebuilt.length} chars) — skipping render`,
|
||||
);
|
||||
generatedCode = prebuilt;
|
||||
} else {
|
||||
generatedCode = renderServerCode(spec);
|
||||
@ -98,11 +107,20 @@ export const worker = new Worker<JobData>(
|
||||
.where(eq(builds.id, buildId));
|
||||
|
||||
await db.update(builds).set({ status: 'building' }).where(eq(builds.id, buildId));
|
||||
await db.update(mcpServers).set({ status: 'building', toolsSchema: spec.tools, updatedAt: new Date() }).where(eq(mcpServers.id, serverId));
|
||||
await db
|
||||
.update(mcpServers)
|
||||
.set({ status: 'building', toolsSchema: spec.tools, updatedAt: new Date() })
|
||||
.where(eq(mcpServers.id, serverId));
|
||||
await emitStatus(buildId, 'building');
|
||||
await log('info', 'Preparing build context...');
|
||||
|
||||
const { contextDir, imageTag } = await prepareBuildContext(serverId, version, slug, generatedCode, spec);
|
||||
const { contextDir, imageTag } = await prepareBuildContext(
|
||||
serverId,
|
||||
version,
|
||||
slug,
|
||||
generatedCode,
|
||||
spec,
|
||||
);
|
||||
await log('info', `Build context at ${contextDir}`);
|
||||
|
||||
await log('info', 'Running static checks...');
|
||||
@ -112,8 +130,14 @@ export const worker = new Worker<JobData>(
|
||||
const hasDocker = await dockerAvailable();
|
||||
if (!hasDocker) {
|
||||
await log('warn', 'Docker not available — skipping build/deploy. Server marked draft.');
|
||||
await db.update(builds).set({ status: 'failed', errorMessage: 'docker_unavailable', finishedAt: new Date() }).where(eq(builds.id, buildId));
|
||||
await db.update(mcpServers).set({ status: 'failed', updatedAt: new Date() }).where(eq(mcpServers.id, serverId));
|
||||
await db
|
||||
.update(builds)
|
||||
.set({ status: 'failed', errorMessage: 'docker_unavailable', finishedAt: new Date() })
|
||||
.where(eq(builds.id, buildId));
|
||||
await db
|
||||
.update(mcpServers)
|
||||
.set({ status: 'failed', updatedAt: new Date() })
|
||||
.where(eq(mcpServers.id, serverId));
|
||||
await emitDone(buildId, 'failed', serverId, null);
|
||||
return;
|
||||
}
|
||||
@ -125,7 +149,10 @@ export const worker = new Worker<JobData>(
|
||||
await log('info', 'Image built.');
|
||||
|
||||
await db.update(builds).set({ status: 'deploying' }).where(eq(builds.id, buildId));
|
||||
await db.update(mcpServers).set({ status: 'deploying', updatedAt: new Date() }).where(eq(mcpServers.id, serverId));
|
||||
await db
|
||||
.update(mcpServers)
|
||||
.set({ status: 'deploying', updatedAt: new Date() })
|
||||
.where(eq(mcpServers.id, serverId));
|
||||
await emitStatus(buildId, 'deploying');
|
||||
|
||||
const port = await allocatePort();
|
||||
@ -140,7 +167,10 @@ export const worker = new Worker<JobData>(
|
||||
};
|
||||
|
||||
const handle = await deployContainer({ serverId, slug, hostPort: port, imageTag, envVars });
|
||||
await log('info', `Container ${handle.containerId.slice(0, 12)} running at ${handle.publicUrl}`);
|
||||
await log(
|
||||
'info',
|
||||
`Container ${handle.containerId.slice(0, 12)} running at ${handle.publicUrl}`,
|
||||
);
|
||||
|
||||
await db
|
||||
.update(builds)
|
||||
@ -148,7 +178,12 @@ export const worker = new Worker<JobData>(
|
||||
.where(eq(builds.id, buildId));
|
||||
await db
|
||||
.update(mcpServers)
|
||||
.set({ status: 'live', currentVersion: version, publicUrl: handle.publicUrl, updatedAt: new Date() })
|
||||
.set({
|
||||
status: 'live',
|
||||
currentVersion: version,
|
||||
publicUrl: handle.publicUrl,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(eq(mcpServers.id, serverId));
|
||||
|
||||
// Rolling deploy: the new container is live — now retire the previous one.
|
||||
|
||||
@ -7,6 +7,7 @@ import { StreamingLogs } from '@/components/streaming-logs';
|
||||
import { Button } from '@/components/ui/button';
|
||||
import { apiFetch } from '@/lib/api';
|
||||
import { Loader2, RotateCcw, X } from 'lucide-react';
|
||||
import Link from 'next/link';
|
||||
import { useRouter, useSearchParams } from 'next/navigation';
|
||||
import { Suspense, useEffect, useState } from 'react';
|
||||
|
||||
@ -41,9 +42,15 @@ interface PreviewTool {
|
||||
inputSchema: Record<string, unknown>;
|
||||
}
|
||||
|
||||
type Plan = 'hobby' | 'pro' | 'team' | 'enterprise';
|
||||
|
||||
interface PreviewResponse {
|
||||
previewId: string;
|
||||
source: 'claude' | 'mock';
|
||||
source: 'claude' | 'glm' | 'mock';
|
||||
plan?: Plan;
|
||||
modelDisplayName?: string;
|
||||
modelBadge?: 'open-tier' | 'claude-haiku' | 'claude-sonnet' | 'claude-opus';
|
||||
upgradeHint?: boolean;
|
||||
spec: {
|
||||
name: string;
|
||||
description?: string;
|
||||
@ -53,6 +60,13 @@ interface PreviewResponse {
|
||||
};
|
||||
}
|
||||
|
||||
const PREVIEW_MODEL_BY_PLAN: Record<Plan, { name: string; estimate: string }> = {
|
||||
hobby: { name: 'Open-tier AI', estimate: '30–60 seconds' },
|
||||
pro: { name: 'Claude Haiku 4.5', estimate: '10–20 seconds' },
|
||||
team: { name: 'Claude Sonnet 4.6', estimate: '15–40 seconds' },
|
||||
enterprise: { name: 'Claude Sonnet 4.6', estimate: '15–40 seconds' },
|
||||
};
|
||||
|
||||
interface EditableTool {
|
||||
name: string;
|
||||
description: string;
|
||||
@ -86,6 +100,7 @@ function NewServerPageInner() {
|
||||
const router = useRouter();
|
||||
const [step, setStep] = useState<Step>('prompt');
|
||||
const [elapsedSec, setElapsedSec] = useState(0);
|
||||
const [userPlan, setUserPlan] = useState<Plan | null>(null);
|
||||
|
||||
const [prompt, setPrompt] = useState('');
|
||||
const [name, setName] = useState('');
|
||||
@ -207,6 +222,14 @@ function NewServerPageInner() {
|
||||
return () => clearInterval(id);
|
||||
}, [step]);
|
||||
|
||||
// Plan determines which model the preview will use — we display its name
|
||||
// *before* the request so the user knows what they're waiting for.
|
||||
useEffect(() => {
|
||||
apiFetch<{ user: { plan?: Plan } }>('/v1/auth/me')
|
||||
.then((r) => setUserPlan(r.user.plan ?? 'hobby'))
|
||||
.catch(() => setUserPlan('hobby'));
|
||||
}, []);
|
||||
|
||||
async function analyze() {
|
||||
setError(null);
|
||||
if (prompt.trim().length < 10) {
|
||||
@ -358,13 +381,23 @@ function NewServerPageInner() {
|
||||
setServerId(res.server.id);
|
||||
setStep('building');
|
||||
} catch (e) {
|
||||
const detail = (e as { detail?: { error?: string; detail?: unknown } }).detail;
|
||||
const detail = (e as { detail?: { error?: string; detail?: string } }).detail;
|
||||
const code = detail?.error;
|
||||
setError(
|
||||
code === 'slug_taken'
|
||||
? `The slug "${slug}" is already used by one of your servers — change the Slug field above.`
|
||||
: (code ?? (e as Error).message),
|
||||
);
|
||||
if (code === 'slug_taken') {
|
||||
setError(
|
||||
`The slug "${slug}" is already used by one of your servers — change the Slug field above.`,
|
||||
);
|
||||
return;
|
||||
}
|
||||
if (code === 'plan_limit_reached') {
|
||||
setError(`${detail?.detail ?? 'Plan limit reached.'} See /pricing to upgrade.`);
|
||||
return;
|
||||
}
|
||||
if (code === 'rate_limited') {
|
||||
setError(detail?.detail ?? 'Daily build limit reached — try again tomorrow or upgrade.');
|
||||
return;
|
||||
}
|
||||
setError(detail?.detail ?? code ?? (e as Error).message);
|
||||
}
|
||||
}
|
||||
|
||||
@ -457,8 +490,18 @@ function NewServerPageInner() {
|
||||
<Loader2 className="mx-auto animate-spin text-[--color-accent]" size={22} />
|
||||
<p className="mt-4 text-[13px]">Analyzing your prompt…</p>
|
||||
<p className="mt-1 text-[12px] text-[--color-fg-subtle]">
|
||||
Claude is drafting the tool spec. Usually 15–40 seconds.
|
||||
{(userPlan ? PREVIEW_MODEL_BY_PLAN[userPlan] : PREVIEW_MODEL_BY_PLAN.hobby).name} is
|
||||
drafting the tool spec. Usually{' '}
|
||||
{(userPlan ? PREVIEW_MODEL_BY_PLAN[userPlan] : PREVIEW_MODEL_BY_PLAN.hobby).estimate}.
|
||||
</p>
|
||||
{userPlan === 'hobby' && (
|
||||
<p className="mt-2 text-[11px] text-[--color-fg-muted]">
|
||||
<Link href="/pricing" className="text-[--color-accent] hover:underline">
|
||||
Upgrade to Pro
|
||||
</Link>{' '}
|
||||
for ~3× faster analysis with Claude Haiku.
|
||||
</p>
|
||||
)}
|
||||
<p className="mono mt-3 text-[11px] tabular-nums text-[--color-fg-muted]">
|
||||
{elapsedSec}s elapsed
|
||||
</p>
|
||||
@ -524,7 +567,7 @@ function NewServerPageInner() {
|
||||
</button>
|
||||
)}
|
||||
<span className="mono text-[10.5px] text-[--color-fg-subtle]">
|
||||
spec via {preview.source}
|
||||
drafted with {preview.modelDisplayName ?? preview.source}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@ -14,9 +14,12 @@ const TIERS = [
|
||||
price: '€0',
|
||||
tag: 'Forever free',
|
||||
description: 'For trying things out and shipping single-user tools.',
|
||||
model: 'Open-tier AI',
|
||||
modelDetail: 'Free-tier model · ~30-60s analyze',
|
||||
features: [
|
||||
'1 MCP server',
|
||||
'100,000 tool calls / month',
|
||||
'5 prompt analyses / day',
|
||||
'BuildMyMCP subdomain',
|
||||
'Community support',
|
||||
],
|
||||
@ -28,9 +31,12 @@ const TIERS = [
|
||||
price: '€49',
|
||||
tag: '/ month',
|
||||
description: 'For solo founders and small teams shipping production tools.',
|
||||
model: 'Claude Haiku 4.5',
|
||||
modelDetail: 'Anthropic · ~10-20s analyze',
|
||||
features: [
|
||||
'5 MCP servers',
|
||||
'1M tool calls / month',
|
||||
'40 prompt analyses / day',
|
||||
'Custom domain',
|
||||
'Priority build queue',
|
||||
'Email support, 1 business-day SLA',
|
||||
@ -41,12 +47,15 @@ const TIERS = [
|
||||
},
|
||||
{
|
||||
name: 'Team',
|
||||
price: '€149',
|
||||
price: '€199',
|
||||
tag: '/ month',
|
||||
description: 'For teams with RBAC, audit, and 99.9% SLA needs.',
|
||||
model: 'Claude Sonnet 4.6',
|
||||
modelDetail: "Anthropic's flagship",
|
||||
features: [
|
||||
'25 MCP servers',
|
||||
'10M tool calls / month',
|
||||
'150 prompt analyses / day',
|
||||
'RBAC + extended audit log',
|
||||
'99.9% uptime SLA',
|
||||
'Shared Slack channel support',
|
||||
@ -56,9 +65,11 @@ const TIERS = [
|
||||
},
|
||||
{
|
||||
name: 'Enterprise',
|
||||
price: '€499+',
|
||||
price: '€999+',
|
||||
tag: '/ month',
|
||||
description: 'For organizations bringing their own cloud, SSO and dedicated infra.',
|
||||
model: 'Sonnet + Opus on build',
|
||||
modelDetail: 'EU data-residency option',
|
||||
features: [
|
||||
'Unlimited servers',
|
||||
'BYOC (AWS, GCP, Azure, Hetzner)',
|
||||
@ -122,6 +133,13 @@ export default function Pricing() {
|
||||
<p className="mt-2 text-[12px] leading-relaxed text-[--color-fg-muted]">
|
||||
{t.description}
|
||||
</p>
|
||||
<div className="mt-3 rounded-md border border-[--color-border] bg-[--color-bg-subtle] px-2.5 py-1.5">
|
||||
<div className="text-[10.5px] uppercase tracking-wider text-[--color-fg-subtle]">
|
||||
AI model
|
||||
</div>
|
||||
<div className="mt-0.5 text-[12.5px] font-medium text-[--color-fg]">{t.model}</div>
|
||||
<div className="text-[10.5px] text-[--color-fg-subtle]">{t.modelDetail}</div>
|
||||
</div>
|
||||
<ul className="mt-4 space-y-1.5 text-[12.5px] text-[--color-fg-muted]">
|
||||
{t.features.map((f) => (
|
||||
<li key={f}>— {f}</li>
|
||||
|
||||
@ -36,11 +36,21 @@ const SECTIONS = [
|
||||
{
|
||||
h: 'Subprocessors',
|
||||
p: [
|
||||
"Anthropic (generation) — only the prompt text you send. Anthropic's data-retention policy applies.",
|
||||
'Hetzner (compute).',
|
||||
'Backblaze (encrypted backups).',
|
||||
'Stripe (billing).',
|
||||
'Cloudflare (DNS + DDoS).',
|
||||
"Anthropic, USA (Claude AI — used for prompt analysis and code generation on Pro / Team / Enterprise tiers). Only the prompt text and resulting spec are sent. Anthropic's data-retention policy applies.",
|
||||
'Zhipu AI, China (GLM model — used for prompt analysis on the free Hobby tier only). Only the prompt text and resulting spec are sent. Upgrade to a paid tier to keep all AI processing within Anthropic (US).',
|
||||
'Hetzner, Germany (compute).',
|
||||
'Backblaze, EU (encrypted backups).',
|
||||
'Stripe, Ireland (billing).',
|
||||
'Cloudflare (DNS + DDoS protection).',
|
||||
],
|
||||
},
|
||||
{
|
||||
h: 'AI processing per tier',
|
||||
p: [
|
||||
'Hobby (free): prompts are sent to Zhipu AI (GLM, China) for analysis. Choose a paid tier if your prompts contain data that must not leave the EU/US.',
|
||||
'Pro: prompts are sent to Anthropic (Claude Haiku 4.5, USA).',
|
||||
'Team: prompts are sent to Anthropic (Claude Sonnet 4.6, USA).',
|
||||
'Enterprise: Anthropic (Claude Sonnet + Opus, USA) with EU-data-residency opt-in available on request.',
|
||||
],
|
||||
},
|
||||
{
|
||||
|
||||
@ -40,7 +40,11 @@ const PILLARS = [
|
||||
},
|
||||
{
|
||||
title: 'Rate limiting',
|
||||
body: 'Default 100 requests/min/IP per tool, enforced at the Traefik layer before traffic ever reaches your container.',
|
||||
body: 'Default 100 requests/min/IP per tool, enforced at the Traefik layer before traffic ever reaches your container. Daily preview + build caps per tier protect against runaway LLM spend.',
|
||||
},
|
||||
{
|
||||
title: 'AI provider by tier — transparent',
|
||||
body: "Hobby (free) tier uses Zhipu's GLM model (servers in China) for prompt analysis — chosen for cost so we can offer a real free tier. Pro, Team and Enterprise use Anthropic Claude (US). Enterprise can request EU-only data residency. The provider is shown live in the wizard so you always know where your prompt is going.",
|
||||
},
|
||||
];
|
||||
|
||||
|
||||
@ -45,18 +45,138 @@ const BANNED_PATTERNS = [
|
||||
/disregard\s+(the\s+)?(above|previous)/i,
|
||||
];
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Plan-aware model selection
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export type Plan = 'hobby' | 'pro' | 'team' | 'enterprise';
|
||||
export type Purpose = 'preview' | 'build';
|
||||
export type Provider = 'anthropic' | 'glm';
|
||||
export type DisplayBadge = 'open-tier' | 'claude-haiku' | 'claude-sonnet' | 'claude-opus';
|
||||
|
||||
export interface ModelChoice {
|
||||
provider: Provider;
|
||||
model: string;
|
||||
maxTokens: number;
|
||||
timeoutMs: number;
|
||||
/** User-facing model name shown in the wizard + previews. */
|
||||
displayName: string;
|
||||
displayBadge: DisplayBadge;
|
||||
}
|
||||
|
||||
/**
|
||||
* Preview runs synchronously inside an HTTP request behind Cloudflare's
|
||||
* ~100s edge cap. Each tier's (model + max_tokens + timeout) is bounded to
|
||||
* fit. Hobby uses GLM as the cost lever; paid tiers escalate to Claude — the
|
||||
* visible quality/speed jump *is* the upgrade pitch.
|
||||
*
|
||||
* Measured token rates: glm-4-plus ~58 tok/s (3500 tok ≈ 60s) ·
|
||||
* Claude Haiku 4.5 ~200 tok/s (8192 tok ≈ 41s) · Claude Sonnet 4.6 ~80 tok/s.
|
||||
*/
|
||||
const PREVIEW_MODELS: Record<Plan, ModelChoice> = {
|
||||
hobby: {
|
||||
provider: 'glm',
|
||||
model: 'glm-4-plus',
|
||||
maxTokens: 3500,
|
||||
timeoutMs: 65_000,
|
||||
displayName: 'Open-tier AI',
|
||||
displayBadge: 'open-tier',
|
||||
},
|
||||
pro: {
|
||||
provider: 'anthropic',
|
||||
model: 'claude-haiku-4-5-20251001',
|
||||
maxTokens: 8192,
|
||||
timeoutMs: 60_000,
|
||||
displayName: 'Claude Haiku 4.5',
|
||||
displayBadge: 'claude-haiku',
|
||||
},
|
||||
team: {
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-6',
|
||||
maxTokens: 8192,
|
||||
timeoutMs: 60_000,
|
||||
displayName: 'Claude Sonnet 4.6',
|
||||
displayBadge: 'claude-sonnet',
|
||||
},
|
||||
enterprise: {
|
||||
provider: 'anthropic',
|
||||
model: 'claude-sonnet-4-6',
|
||||
maxTokens: 8192,
|
||||
timeoutMs: 60_000,
|
||||
displayName: 'Claude Sonnet 4.6',
|
||||
displayBadge: 'claude-sonnet',
|
||||
},
|
||||
};
|
||||
|
||||
/**
|
||||
* Build worker runs async via BullMQ — no proxy timeout. With the 24h preview
|
||||
* cache TTL cache-misses are rare, so GLM as the default keeps that rare path
|
||||
* cheap; Enterprise gets Opus as a premium-quality promise.
|
||||
*/
|
||||
const BUILD_MODELS: Record<Plan, ModelChoice> = {
|
||||
hobby: {
|
||||
provider: 'glm',
|
||||
model: 'glm-4.5',
|
||||
maxTokens: 8192,
|
||||
timeoutMs: 180_000,
|
||||
displayName: 'Open-tier AI',
|
||||
displayBadge: 'open-tier',
|
||||
},
|
||||
pro: {
|
||||
provider: 'glm',
|
||||
model: 'glm-4.5',
|
||||
maxTokens: 8192,
|
||||
timeoutMs: 180_000,
|
||||
displayName: 'Open-tier AI',
|
||||
displayBadge: 'open-tier',
|
||||
},
|
||||
team: {
|
||||
provider: 'glm',
|
||||
model: 'glm-4.5',
|
||||
maxTokens: 8192,
|
||||
timeoutMs: 180_000,
|
||||
displayName: 'Open-tier AI',
|
||||
displayBadge: 'open-tier',
|
||||
},
|
||||
enterprise: {
|
||||
provider: 'anthropic',
|
||||
model: 'claude-opus-4-7',
|
||||
maxTokens: 8192,
|
||||
timeoutMs: 600_000,
|
||||
displayName: 'Claude Opus 4.7',
|
||||
displayBadge: 'claude-opus',
|
||||
},
|
||||
};
|
||||
|
||||
export function pickPreviewModel(plan: Plan): ModelChoice {
|
||||
return PREVIEW_MODELS[plan];
|
||||
}
|
||||
|
||||
export function pickBuildModel(plan: Plan): ModelChoice {
|
||||
return BUILD_MODELS[plan];
|
||||
}
|
||||
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
// Generation API
|
||||
// ──────────────────────────────────────────────────────────────────────────
|
||||
|
||||
export interface GenerationResult {
|
||||
spec: GeneratorSpecT;
|
||||
source: 'claude' | 'mock';
|
||||
source: 'claude' | 'glm' | 'mock';
|
||||
}
|
||||
|
||||
export interface GenerateOptions {
|
||||
/** 'anthropic' (default) or 'glm'. */
|
||||
provider?: Provider;
|
||||
/** Anthropic API key — required if provider === 'anthropic'. */
|
||||
apiKey?: string;
|
||||
/** Zhipu (GLM) API key — required if provider === 'glm'. */
|
||||
glmApiKey?: string;
|
||||
model?: string;
|
||||
maxTokens?: number;
|
||||
/** Per-attempt request timeout in ms. Omit to use the SDK default. */
|
||||
/** Per-attempt request timeout in ms. */
|
||||
timeoutMs?: number;
|
||||
/** SDK retry count. Omit to use the SDK default. */
|
||||
/** SDK retry count. Anthropic only. */
|
||||
maxRetries?: number;
|
||||
}
|
||||
|
||||
@ -64,9 +184,40 @@ export async function generateSpec(
|
||||
prompt: string,
|
||||
opts: GenerateOptions = {},
|
||||
): Promise<GenerationResult> {
|
||||
const provider = opts.provider ?? 'anthropic';
|
||||
|
||||
if (provider === 'glm') {
|
||||
if (!opts.glmApiKey) return { spec: mockSpec(prompt), source: 'mock' };
|
||||
return generateWithGlm(prompt, {
|
||||
apiKey: opts.glmApiKey,
|
||||
model: opts.model ?? 'glm-4-plus',
|
||||
maxTokens: opts.maxTokens ?? 4096,
|
||||
timeoutMs: opts.timeoutMs,
|
||||
});
|
||||
}
|
||||
|
||||
if (!opts.apiKey) {
|
||||
return { spec: mockSpec(prompt), source: 'mock' };
|
||||
}
|
||||
return generateWithAnthropic(prompt, {
|
||||
apiKey: opts.apiKey,
|
||||
model: opts.model ?? 'claude-opus-4-7',
|
||||
maxTokens: opts.maxTokens ?? 8192,
|
||||
timeoutMs: opts.timeoutMs,
|
||||
maxRetries: opts.maxRetries,
|
||||
});
|
||||
}
|
||||
|
||||
async function generateWithAnthropic(
|
||||
prompt: string,
|
||||
opts: {
|
||||
apiKey: string;
|
||||
model: string;
|
||||
maxTokens: number;
|
||||
timeoutMs?: number;
|
||||
maxRetries?: number;
|
||||
},
|
||||
): Promise<GenerationResult> {
|
||||
const client = new Anthropic({ apiKey: opts.apiKey });
|
||||
const requestOptions: { timeout?: number; maxRetries?: number } = {};
|
||||
if (opts.timeoutMs !== undefined) requestOptions.timeout = opts.timeoutMs;
|
||||
@ -75,35 +226,81 @@ export async function generateSpec(
|
||||
const response = await client.messages
|
||||
.create(
|
||||
{
|
||||
model: opts.model ?? 'claude-opus-4-7',
|
||||
max_tokens: opts.maxTokens ?? 8192,
|
||||
model: opts.model,
|
||||
max_tokens: opts.maxTokens,
|
||||
system: SYSTEM_PROMPT,
|
||||
messages: [{ role: 'user', content: prompt }],
|
||||
},
|
||||
requestOptions,
|
||||
)
|
||||
.catch((err: unknown) => {
|
||||
// A per-attempt timeout surfaces as APIConnectionTimeoutError once the
|
||||
// SDK exhausts retries. Map it to a typed error so the API layer returns
|
||||
// a clean 504 instead of letting the edge proxy time out headerless.
|
||||
if (err instanceof Anthropic.APIConnectionTimeoutError) {
|
||||
throw new SpecTimeoutError('spec generation exceeded the time budget');
|
||||
}
|
||||
throw err;
|
||||
});
|
||||
|
||||
const text = response.content
|
||||
.filter((b): b is { type: 'text'; text: string } => b.type === 'text')
|
||||
.map((b) => b.text)
|
||||
.join('');
|
||||
const json = extractJson(text);
|
||||
const parsed = GeneratorSpec.safeParse(json);
|
||||
if (!parsed.success) {
|
||||
throw new SpecValidationError(parsed.error.message);
|
||||
}
|
||||
if (!parsed.success) throw new SpecValidationError(parsed.error.message);
|
||||
scanForInjection(parsed.data);
|
||||
return { spec: parsed.data, source: 'claude' };
|
||||
}
|
||||
|
||||
const GLM_ENDPOINT = 'https://open.bigmodel.cn/api/paas/v4/chat/completions';
|
||||
|
||||
async function generateWithGlm(
|
||||
prompt: string,
|
||||
opts: { apiKey: string; model: string; maxTokens: number; timeoutMs?: number },
|
||||
): Promise<GenerationResult> {
|
||||
const controller = new AbortController();
|
||||
const timer = opts.timeoutMs ? setTimeout(() => controller.abort(), opts.timeoutMs) : null;
|
||||
let res: Response;
|
||||
try {
|
||||
res = await fetch(GLM_ENDPOINT, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
Authorization: `Bearer ${opts.apiKey}`,
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: opts.model,
|
||||
max_tokens: opts.maxTokens,
|
||||
messages: [
|
||||
{ role: 'system', content: SYSTEM_PROMPT },
|
||||
{ role: 'user', content: prompt },
|
||||
],
|
||||
}),
|
||||
signal: controller.signal,
|
||||
});
|
||||
} catch (err) {
|
||||
if ((err as { name?: string }).name === 'AbortError') {
|
||||
throw new SpecTimeoutError('glm spec generation exceeded the time budget');
|
||||
}
|
||||
throw err;
|
||||
} finally {
|
||||
if (timer) clearTimeout(timer);
|
||||
}
|
||||
if (!res.ok) {
|
||||
const body = await res.text().catch(() => '');
|
||||
throw new Error(`glm_api_${res.status}: ${body.slice(0, 200)}`);
|
||||
}
|
||||
const data = (await res.json()) as {
|
||||
choices?: Array<{ message?: { content?: string }; finish_reason?: string }>;
|
||||
};
|
||||
const content = data.choices?.[0]?.message?.content;
|
||||
if (!content) throw new SpecValidationError('glm_empty_response');
|
||||
const json = extractJson(content);
|
||||
const parsed = GeneratorSpec.safeParse(json);
|
||||
if (!parsed.success) throw new SpecValidationError(parsed.error.message);
|
||||
scanForInjection(parsed.data);
|
||||
return { spec: parsed.data, source: 'glm' };
|
||||
}
|
||||
|
||||
export class SpecValidationError extends Error {
|
||||
override readonly name = 'SpecValidationError';
|
||||
}
|
||||
@ -141,7 +338,7 @@ function scanForInjection(spec: GeneratorSpecT): void {
|
||||
export function mockSpec(prompt: string): GeneratorSpecT {
|
||||
return {
|
||||
name: 'Echo MCP',
|
||||
description: `Mock server (no ANTHROPIC_API_KEY). Prompt was: ${prompt.slice(0, 200)}`,
|
||||
description: `Mock server (no LLM key). Prompt was: ${prompt.slice(0, 200)}`,
|
||||
tools: [
|
||||
{
|
||||
name: 'echo',
|
||||
|
||||
Loading…
Reference in New Issue
Block a user