buildmymcpserver/apps/api/src/lib/rate-limit.ts

import type { Plan } from '@bmm/llm';
import { getRedis } from './redis.js';

const DAY_SEC = 24 * 60 * 60;

function todayKey(): string {
  return new Date().toISOString().slice(0, 10);
}

export interface RateLimitResult {
  ok: boolean;
  remaining: number;
  resetIn: number;
}

/**
 * Daily counter via Redis INCR. Atomic — no race window between read & write.
 * First INCR (count === 1) sets the TTL so the key auto-rolls at midnight UTC.
 */
export async function checkDailyLimit(
  scope: string,
  userId: string,
  max: number,
): Promise<RateLimitResult> {
  const key = `ratelimit:${scope}:${userId}:${todayKey()}`;
  const redis = getRedis();
  const count = await redis.incr(key);
  if (count === 1) await redis.expire(key, DAY_SEC);
  const ttl = await redis.ttl(key);
  return {
    ok: count <= max,
    remaining: Math.max(0, max - count),
    resetIn: ttl > 0 ? ttl : DAY_SEC,
  };
}

// Per-tier daily limits on the two LLM-priced actions.
// Preview = ~€0.002-0.115/call (model-dependent) · Build = ~€0.005-0.22/call.
//
// Caps are set so that even a max-usage power-user stays profitable at the
// tier's price point. Critical for Team/Enterprise where Sonnet/Opus tokens
// add up fast — a runaway Bot with a Team subscription could otherwise
// out-cost the €199 monthly revenue. Math (max-case):
//   Pro:        40 prev × €0.020 × 30 = €24/mo  → margin €25 (~50%)
//   Team:       50 prev × €0.058 × 30 = €87/mo  → margin €112 (~56%)
//   Enterprise: 200 prev × €0.060 × 30 = €360/mo → margin €639 (~64%)
// Build caps are looser because the 24h cache TTL means most builds are
// cache-HITS (no LLM call) — the cap is mostly about runner-port / hosting
// budget, not token cost.
export const PREVIEW_DAILY_LIMIT: Record<Plan, number> = {
  hobby: 5,
  pro: 40,
  team: 50,
  enterprise: 200,
};

export const BUILD_DAILY_LIMIT: Record<Plan, number> = {
  hobby: 3,
  pro: 20,
  team: 30,
  enterprise: 100,
};
-												feat: tiered LLM (GLM free / Claude paid) + rate limits + quota enforcement

The free tier was hemorrhaging Anthropic cost with no abuse cap (no rate
limit on /preview, Opus default in the build worker, 5-min cache TTL that
made cache-miss the common case). This switches free users to GLM, paid
users to Claude tiers, and tightens every leak found in the audit.

Backend:
- @bmm/llm: GLM provider via Zhipu's OpenAI-compatible endpoint, pickPreviewModel
  + pickBuildModel helpers, plan-aware ModelChoice
- preview-cache TTL 5min -> 24h (kills the cache-miss path)
- /v1/servers/preview: picks model from caller's plan, returns model name to UI
- /v1/servers POST: enforces SERVER_LIMITS per plan (402), rate-limits builds
- daily rate-limit on preview (5/40/150/1000) and build (3/20/100/500)
- /v1/auth/me returns plan so the wizard can show the right model name
- generator worker: GLM default, Anthropic Sonnet fallback if GLM errors

Frontend:
- Wizard fetches plan, shows "<model> is drafting the tool spec" pre-emptively,
  upgrade hint for hobby users, friendly errors for 402 / 429
- Pricing page: AI-model line per tier (Open-tier / Haiku / Sonnet / Opus),
  Team €149 -> €199, Enterprise €499 -> €999, daily-preview limit per tier
- Privacy + Security: explicit subprocessor disclosure for Anthropic (US) /
  Zhipu (CN) and which tier uses which

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-23 23:50:00 +02:00
+								import type { Plan } from '@bmm/llm';
 								import { getRedis } from './redis.js';
 								const DAY_SEC = 24 * 60 * 60;
 								function todayKey(): string {
 								  return new Date().toISOString().slice(0, 10);
 								}
 								export interface RateLimitResult {
 								  ok: boolean;
 								  remaining: number;
 								  resetIn: number;
 								}
 								/**
 								 * Daily counter via Redis INCR. Atomic — no race window between read & write.
 								 * First INCR (count === 1) sets the TTL so the key auto-rolls at midnight UTC.
 								 */
 								export async function checkDailyLimit(
 								  scope: string,
 								  userId: string,
 								  max: number,
 								): Promise<RateLimitResult> {
 								  const key = `ratelimit:${scope}:${userId}:${todayKey()}`;
 								  const redis = getRedis();
 								  const count = await redis.incr(key);
 								  if (count === 1) await redis.expire(key, DAY_SEC);
 								  const ttl = await redis.ttl(key);
 								  return {
 								    ok: count <= max,
 								    remaining: Math.max(0, max - count),
 								    resetIn: ttl > 0 ? ttl : DAY_SEC,
 								  };
 								}
 								// Per-tier daily limits on the two LLM-priced actions.
-												fix(quotas): tighten Team/Enterprise daily preview caps to stay profitable

The earlier caps (Team 150/day, Enterprise 1000/day) used Sonnet/Opus pricing
that put max-usage above the tier's monthly revenue — a Bot with a Team
subscription could out-cost €199 in Anthropic spend. Drop to 50/day Team
and 200/day Enterprise; both now keep ~55-65% margin even when maxed.

Pricing page Team feature line updated to match (150 -> 50). Build caps
loosened slightly less since the 24h cache TTL makes most builds cache-hits.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:14:07 +02:00
+								// Preview = ~€0.002-0.115/call (model-dependent) · Build = ~€0.005-0.22/call.
 								//
 								// Caps are set so that even a max-usage power-user stays profitable at the
 								// tier's price point. Critical for Team/Enterprise where Sonnet/Opus tokens
 								// add up fast — a runaway Bot with a Team subscription could otherwise
 								// out-cost the €199 monthly revenue. Math (max-case):
 								//   Pro:        40 prev × €0.020 × 30 = €24/mo  → margin €25 (~50%)
 								//   Team:       50 prev × €0.058 × 30 = €87/mo  → margin €112 (~56%)
 								//   Enterprise: 200 prev × €0.060 × 30 = €360/mo → margin €639 (~64%)
 								// Build caps are looser because the 24h cache TTL means most builds are
 								// cache-HITS (no LLM call) — the cap is mostly about runner-port / hosting
 								// budget, not token cost.
-												feat: tiered LLM (GLM free / Claude paid) + rate limits + quota enforcement

The free tier was hemorrhaging Anthropic cost with no abuse cap (no rate
limit on /preview, Opus default in the build worker, 5-min cache TTL that
made cache-miss the common case). This switches free users to GLM, paid
users to Claude tiers, and tightens every leak found in the audit.

Backend:
- @bmm/llm: GLM provider via Zhipu's OpenAI-compatible endpoint, pickPreviewModel
  + pickBuildModel helpers, plan-aware ModelChoice
- preview-cache TTL 5min -> 24h (kills the cache-miss path)
- /v1/servers/preview: picks model from caller's plan, returns model name to UI
- /v1/servers POST: enforces SERVER_LIMITS per plan (402), rate-limits builds
- daily rate-limit on preview (5/40/150/1000) and build (3/20/100/500)
- /v1/auth/me returns plan so the wizard can show the right model name
- generator worker: GLM default, Anthropic Sonnet fallback if GLM errors

Frontend:
- Wizard fetches plan, shows "<model> is drafting the tool spec" pre-emptively,
  upgrade hint for hobby users, friendly errors for 402 / 429
- Pricing page: AI-model line per tier (Open-tier / Haiku / Sonnet / Opus),
  Team €149 -> €199, Enterprise €499 -> €999, daily-preview limit per tier
- Privacy + Security: explicit subprocessor disclosure for Anthropic (US) /
  Zhipu (CN) and which tier uses which

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-23 23:50:00 +02:00
+								export const PREVIEW_DAILY_LIMIT: Record<Plan, number> = {
 								  hobby: 5,
 								  pro: 40,
-												fix(quotas): tighten Team/Enterprise daily preview caps to stay profitable

The earlier caps (Team 150/day, Enterprise 1000/day) used Sonnet/Opus pricing
that put max-usage above the tier's monthly revenue — a Bot with a Team
subscription could out-cost €199 in Anthropic spend. Drop to 50/day Team
and 200/day Enterprise; both now keep ~55-65% margin even when maxed.

Pricing page Team feature line updated to match (150 -> 50). Build caps
loosened slightly less since the 24h cache TTL makes most builds cache-hits.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:14:07 +02:00
+								  team: 50,
 								  enterprise: 200,
-												feat: tiered LLM (GLM free / Claude paid) + rate limits + quota enforcement

The free tier was hemorrhaging Anthropic cost with no abuse cap (no rate
limit on /preview, Opus default in the build worker, 5-min cache TTL that
made cache-miss the common case). This switches free users to GLM, paid
users to Claude tiers, and tightens every leak found in the audit.

Backend:
- @bmm/llm: GLM provider via Zhipu's OpenAI-compatible endpoint, pickPreviewModel
  + pickBuildModel helpers, plan-aware ModelChoice
- preview-cache TTL 5min -> 24h (kills the cache-miss path)
- /v1/servers/preview: picks model from caller's plan, returns model name to UI
- /v1/servers POST: enforces SERVER_LIMITS per plan (402), rate-limits builds
- daily rate-limit on preview (5/40/150/1000) and build (3/20/100/500)
- /v1/auth/me returns plan so the wizard can show the right model name
- generator worker: GLM default, Anthropic Sonnet fallback if GLM errors

Frontend:
- Wizard fetches plan, shows "<model> is drafting the tool spec" pre-emptively,
  upgrade hint for hobby users, friendly errors for 402 / 429
- Pricing page: AI-model line per tier (Open-tier / Haiku / Sonnet / Opus),
  Team €149 -> €199, Enterprise €499 -> €999, daily-preview limit per tier
- Privacy + Security: explicit subprocessor disclosure for Anthropic (US) /
  Zhipu (CN) and which tier uses which

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-23 23:50:00 +02:00
+								};
 								export const BUILD_DAILY_LIMIT: Record<Plan, number> = {
 								  hobby: 3,
 								  pro: 20,
-												fix(quotas): tighten Team/Enterprise daily preview caps to stay profitable

The earlier caps (Team 150/day, Enterprise 1000/day) used Sonnet/Opus pricing
that put max-usage above the tier's monthly revenue — a Bot with a Team
subscription could out-cost €199 in Anthropic spend. Drop to 50/day Team
and 200/day Enterprise; both now keep ~55-65% margin even when maxed.

Pricing page Team feature line updated to match (150 -> 50). Build caps
loosened slightly less since the 24h cache TTL makes most builds cache-hits.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-24 00:14:07 +02:00
+								  team: 30,
 								  enterprise: 100,
-												feat: tiered LLM (GLM free / Claude paid) + rate limits + quota enforcement

The free tier was hemorrhaging Anthropic cost with no abuse cap (no rate
limit on /preview, Opus default in the build worker, 5-min cache TTL that
made cache-miss the common case). This switches free users to GLM, paid
users to Claude tiers, and tightens every leak found in the audit.

Backend:
- @bmm/llm: GLM provider via Zhipu's OpenAI-compatible endpoint, pickPreviewModel
  + pickBuildModel helpers, plan-aware ModelChoice
- preview-cache TTL 5min -> 24h (kills the cache-miss path)
- /v1/servers/preview: picks model from caller's plan, returns model name to UI
- /v1/servers POST: enforces SERVER_LIMITS per plan (402), rate-limits builds
- daily rate-limit on preview (5/40/150/1000) and build (3/20/100/500)
- /v1/auth/me returns plan so the wizard can show the right model name
- generator worker: GLM default, Anthropic Sonnet fallback if GLM errors

Frontend:
- Wizard fetches plan, shows "<model> is drafting the tool spec" pre-emptively,
  upgrade hint for hobby users, friendly errors for 402 / 429
- Pricing page: AI-model line per tier (Open-tier / Haiku / Sonnet / Opus),
  Team €149 -> €199, Enterprise €499 -> €999, daily-preview limit per tier
- Privacy + Security: explicit subprocessor disclosure for Anthropic (US) /
  Zhipu (CN) and which tier uses which

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

											
										
										
											2026-05-23 23:50:00 +02:00
+								};