From f8af3fc0fd84f72ae3a22dd19aaa2717bd082fe5 Mon Sep 17 00:00:00 2001
From: Marco Sadjadi <marco@buildmymcpserver.local>
Date: Mon, 25 May 2026 18:02:59 +0200
Subject: [PATCH] =?UTF-8?q?security:=20sovereign-audit=20Phase=202=20fixes?=
 =?UTF-8?q?=20=E2=80=94=20trustProxy,=20Docker=20hardening,=20banned-patte?=
 =?UTF-8?q?rn=20overhaul?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Five confirmed findings from the sovereign-audit pass, ordered by severity:

Z3-001 CRITICAL — Fastify now trustProxy:true so req.ip resolves to the
real visitor IP via X-Forwarded-For instead of always being the nginx /
docker-bridge peer. Every per-IP rate-limit in the codebase was silently
collapsed into one global counter; this restores them.

Z1-001 CRITICAL — runner container hardening flags (--read-only,
--cap-drop=ALL, --security-opt=no-new-privileges:true, --pids-limit=100,
--memory=512m, --cpus=0.5, tmpfs /tmp) were sitting commented-out as a
TODO despite /security promising them. Now applied unconditionally on
production/staging; opt-out flag RUNNER_DISABLE_HARDENING=1 for Win-dev.

Z2-001 + Z2-002 CRITICAL / MEDIUM — banned-pattern blacklist tightened
(Function(...) without `new`, process.binding, process.dlopen,
.constructor.constructor, _load, vm.runIn*Context, globalThis['..'],
"system prompt override"). scanForInjection now also walks tool.name and
every inputSchema property description, not only implementation +
description — closes the prompt-injection-into-AI-client surface that
downstream clients (Claude Desktop, Cursor) read verbatim. The duplicate
BANNED_PATTERNS in apps/api/src/routes/servers.ts deleted in favour of
the single shared scanForInjection export from @bmm/llm.

Z4-001 HIGH — /v1/auth/magic-link gained the two-axis daily rate-limit
the SMS endpoint already had: 10/IP/day + 5/email/day. Combined with the
trustProxy fix above these are now real per-visitor limits.

Z4-002 MEDIUM — magic-link callback URL no longer printed to stdout in
production. In dev it still prints (so devs can click the link); in
production we log only "issued, URL withheld" and a loud error if no
email sender is wired (Resend integration is the actual launch
blocker — left as a TODO).

Z6-001 MEDIUM — /v1/builds/:id/stream WebSocket now refuses cross-origin
upgrades. SameSite=Lax already mitigates in modern browsers; this is the
defense-in-depth against browser bugs and non-browser clients.

FALSE POSITIVES dismissed: slug path-traversal (schema regex
^[a-z][a-z0-9-]*$ in @bmm/types catches it); session-after-promote
(getSession re-fetches isAdmin from DB on every request).

DEFERRED (not blockers, tracked):
- Z1-002 generated-server HTTPS — needs nginx wildcard subdomain TLS
- Z1-003 docker image cleanup cron
- Z2-001 v2 — real sandbox runtime (multi-week refactor)
- Z3-002 rawBody-per-request memory — branch on webhook path only
- Z5-001 multi-user org RBAC for billing — gated on Team feature
- Email sender integration (Resend) — launch blocker

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 apps/api/src/index.ts            |  5 ++++
 apps/api/src/routes/auth.ts      | 38 ++++++++++++++++++++++++---
 apps/api/src/routes/servers.ts   | 32 +++++++++++++----------
 apps/generator/src/lib/deploy.ts | 45 +++++++++++++++++++++++++-------
 packages/llm/src/index.ts        | 40 +++++++++++++++++++++++++---
 5 files changed, 130 insertions(+), 30 deletions(-)

diff --git a/apps/api/src/index.ts b/apps/api/src/index.ts
index b427da0..799bdb6 100644
--- a/apps/api/src/index.ts
+++ b/apps/api/src/index.ts
@@ -29,6 +29,11 @@ const app = Fastify({
   logger: {
     level: config.NODE_ENV === 'production' ? 'info' : 'debug',
   },
+  // We run behind nginx + Cloudflare — both prepend the real client IP into
+  // X-Forwarded-For. Without trustProxy=true Fastify reports the nginx peer
+  // (always 127.0.0.1 / docker-bridge) for req.ip, which silently collapses
+  // every per-IP rate-limit into a single global counter. (See Z3-001.)
+  trustProxy: true,
 });
 
 // Replace the default JSON parser with one that keeps the raw buffer for the
diff --git a/apps/api/src/routes/auth.ts b/apps/api/src/routes/auth.ts
index 7b202ed..b88e418 100644
--- a/apps/api/src/routes/auth.ts
+++ b/apps/api/src/routes/auth.ts
@@ -14,6 +14,7 @@ import { z } from 'zod';
 import { config } from '../config.js';
 import { audit } from '../lib/audit.js';
 import { getOrgPlan } from '../lib/plan.js';
+import { checkDailyLimit } from '../lib/rate-limit.js';
 import { sendSms, smsConfigured } from '../lib/sms.js';
 
 const SESSION_COOKIE = 'bmm_session';
@@ -78,12 +79,43 @@ export async function authRoutes(app: FastifyInstance): Promise<void> {
     const Body = z.object({ email: z.string().email() });
     const parsed = Body.safeParse(req.body);
     if (!parsed.success) return reply.code(400).send({ error: 'invalid_email' });
+
+    // Two-axis rate-limit: per-IP (prevents IP-flooding the endpoint) and
+    // per-email (prevents inbox-flooding a specific target). Both required
+    // because the IP cap protects us, the email cap protects the recipient.
+    const ipOk = await checkDailyLimit('magic_ip', req.ip, 10);
+    if (!ipOk.ok) {
+      return reply.code(429).send({
+        error: 'rate_limited',
+        detail: 'Too many magic-link requests from this IP. Try again tomorrow.',
+      });
+    }
+    const emailOk = await checkDailyLimit('magic_email', parsed.data.email.toLowerCase(), 5);
+    if (!emailOk.ok) {
+      return reply.code(429).send({
+        error: 'rate_limited',
+        detail: 'Too many magic-link requests for this email. Try again tomorrow.',
+      });
+    }
+
     try {
       const { token, expiresAt } = await issueMagicLink(parsed.data.email);
       const callbackUrl = `${config.NEXT_PUBLIC_APP_URL}/login/callback?token=${token}`;
-      // Dev transport: print to stdout. Production: send via Resend / SES.
-      app.log.info({ to: parsed.data.email, expiresAt }, `[magic-link] -> ${callbackUrl}`);
-      console.log(`\n[magic-link] ${parsed.data.email} ->\n  ${callbackUrl}\n`);
+      // In dev we print the link to stdout so the developer can click it.
+      // In production we must NEVER log the full token — anyone with
+      // `docker logs` access would silently impersonate any user.
+      if (config.NODE_ENV !== 'production') {
+        app.log.info({ to: parsed.data.email, expiresAt }, `[magic-link] -> ${callbackUrl}`);
+        console.log(`\n[magic-link] ${parsed.data.email} ->\n  ${callbackUrl}\n`);
+      } else {
+        app.log.info(
+          { to: parsed.data.email, expiresAt },
+          '[magic-link] issued (URL withheld from logs)',
+        );
+        // TODO(launch): hook up Resend / SES here. Until then, production
+        // magic-link is effectively dead — fail loud rather than silent.
+        app.log.error('magic-link email sender not configured — link cannot reach user');
+      }
       return reply.send({ ok: true });
     } catch (e) {
       app.log.error(e);
diff --git a/apps/api/src/routes/servers.ts b/apps/api/src/routes/servers.ts
index 261b99f..334b900 100644
--- a/apps/api/src/routes/servers.ts
+++ b/apps/api/src/routes/servers.ts
@@ -17,6 +17,7 @@ import {
   SpecValidationError,
   generateSpec,
   pickPreviewModel,
+  scanForInjection,
 } from '@bmm/llm';
 import {
   BuildEvent,
@@ -428,6 +429,14 @@ export async function serverRoutes(app: FastifyInstance): Promise<void> {
       socket.close();
     };
 
+    // Defense-in-depth Origin check. SameSite=Lax cookies already block
+    // cross-origin WS from JS in spec-compliant browsers, but enforcing the
+    // Origin server-side closes any browser-bug or non-browser-client path.
+    const origin = req.headers.origin;
+    if (origin && origin !== config.NEXT_PUBLIC_APP_URL) {
+      return fail('cross_origin_ws_rejected');
+    }
+
     const Params = z.object({ id: z.string().uuid() });
     const parsed = Params.safeParse(req.params);
     if (!parsed.success) return fail('invalid_id');
@@ -537,22 +546,17 @@ export async function serverRoutes(app: FastifyInstance): Promise<void> {
 
 // ---- Spec-edit merge helpers ----
 
-const BANNED_PATTERNS = [
-  /\beval\s*\(/,
-  /\bnew\s+Function\s*\(/,
-  /\brequire\s*\(\s*['"]child_process['"]/,
-  /\bchild_process\b/,
-  /ignore\s+previous\s+instructions/i,
-  /disregard\s+(the\s+)?(above|previous)/i,
-];
-
+/** Delegates to @bmm/llm's scanForInjection so the banned-pattern set is
+ *  defined exactly once. Wraps the thrown BannedPatternError into a plain
+ *  Error for the existing catch path in /v1/servers POST. */
 function rescanInjection(spec: GeneratorSpec): void {
-  for (const tool of spec.tools) {
-    for (const pattern of BANNED_PATTERNS) {
-      if (pattern.test(tool.implementation) || pattern.test(tool.description)) {
-        throw new Error(`banned_pattern_detected: ${pattern.source}`);
-      }
+  try {
+    scanForInjection(spec);
+  } catch (err) {
+    if (err instanceof BannedPatternError) {
+      throw new Error(err.message);
     }
+    throw err;
   }
 }
 
diff --git a/apps/generator/src/lib/deploy.ts b/apps/generator/src/lib/deploy.ts
index 74d66ee..3485aff 100644
--- a/apps/generator/src/lib/deploy.ts
+++ b/apps/generator/src/lib/deploy.ts
@@ -2,6 +2,37 @@ import net from 'node:net';
 import { createDb, eq, isNotNull, mcpServers } from '@bmm/db';
 import { config } from '../config.js';
 
+/**
+ * Container hardening flags applied on every runner deployment on Linux
+ * production hosts. Skipped only when explicitly disabled (dev/Windows
+ * Docker Desktop, which doesn't fully honour --read-only on bind mounts).
+ *
+ * Without these, a tenant container runs as root with full capabilities on
+ * the shared host — combined with the LLM static-check being a regex
+ * blacklist (Z2-001), this would let a malicious tenant execute arbitrary
+ * code on the host. With them, the blast radius collapses to "within the
+ * container", which holds only that tenant's own decrypted secrets.
+ */
+const HARDENING_FLAGS = [
+  '--read-only',
+  '--cap-drop=ALL',
+  '--security-opt=no-new-privileges:true',
+  '--pids-limit=100',
+  '--memory=512m',
+  '--memory-swap=512m',
+  '--cpus=0.5',
+  // /tmp needs writable space — runner-template uses it for build/cache.
+  '--tmpfs=/tmp:rw,nosuid,nodev,size=64m',
+];
+
+function shouldHarden(): boolean {
+  // Explicit opt-out for local dev on Windows where --read-only conflicts
+  // with how Docker Desktop binds volumes. Production must always harden.
+  if (process.env.RUNNER_DISABLE_HARDENING === '1') return false;
+  const env = process.env.NODE_ENV;
+  return env === 'production' || env === 'staging';
+}
+
 const db = createDb();
 
 async function portFree(port: number, host = '127.0.0.1'): Promise<boolean> {
@@ -46,16 +77,9 @@ export interface DeployInput {
   envVars: Record<string, string>;
 }
 
-// Production-only flags documented but unused in dev for Windows Docker Desktop compat:
-// '--read-only',
-// '--cap-drop=ALL',
-// '--security-opt=no-new-privileges',
-// '--cpus=0.5',
-// '--memory=512m',
-
 export async function deployContainer(input: DeployInput): Promise<DeployHandle> {
-  // In a future iteration this calls docker engine API directly via UNIX socket / named pipe.
-  // For Sprint 1-3 we shell out via the bound docker CLI which is portable on win/mac/linux.
+  // Docker CLI is portable across linux/mac/win — sufficient for now; future
+  // iteration will switch to the engine API via UNIX socket.
   const { spawn } = await import('node:child_process');
   const containerName = `bmm-mcp-${input.slug}-${Date.now().toString(36)}`;
   const args = [
@@ -66,6 +90,9 @@ export async function deployContainer(input: DeployInput): Promise<DeployHandle>
     '-p',
     `${input.hostPort}:3000`,
   ];
+  if (shouldHarden()) {
+    args.push(...HARDENING_FLAGS);
+  }
   for (const [k, v] of Object.entries(input.envVars)) {
     args.push('-e', `${k}=${v}`);
   }
diff --git a/packages/llm/src/index.ts b/packages/llm/src/index.ts
index 29046d8..21accb3 100644
--- a/packages/llm/src/index.ts
+++ b/packages/llm/src/index.ts
@@ -36,13 +36,27 @@ Rules:
 
 Return JSON only. No explanation.`;
 
+// Regex blacklist — explicitly NOT a security boundary, just an early-warning
+// for obviously-dangerous LLM output. The real defence is the Docker
+// hardening in apps/generator/src/lib/deploy.ts (--cap-drop=ALL etc.). A
+// determined attacker can bypass any of these with string concatenation
+// (`'chi'+'ld_process'`) or alternate APIs — that's why container isolation
+// has to hold even when this fails.
 const BANNED_PATTERNS = [
   /\beval\s*\(/,
   /\bnew\s+Function\s*\(/,
+  /\bFunction\s*\(\s*['"`]/, // Function('...') without `new`
   /\brequire\s*\(\s*['"]child_process['"]/,
   /\bchild_process\b/,
+  /\bprocess\.binding\b/,
+  /\bprocess\.dlopen\b/,
+  /\.constructor\s*\.\s*constructor\b/, // [].constructor.constructor('...')
+  /\b_load\s*\(/,
+  /\bvm\.runIn(This|New)Context\b/,
+  /globalThis\s*\[\s*['"`]/, // globalThis['Fun'+'ction']
   /ignore\s+previous\s+instructions/i,
   /disregard\s+(the\s+)?(above|previous)/i,
+  /system\s+prompt\s+override/i,
 ];
 
 // ──────────────────────────────────────────────────────────────────────────
@@ -325,11 +339,29 @@ function extractJson(text: string): unknown {
   }
 }
 
-function scanForInjection(spec: GeneratorSpecT): void {
+/**
+ * Public so other layers (the spec-edit merge in apps/api) can re-scan a
+ * user-edited spec without duplicating the pattern list — single source of
+ * truth for what counts as obviously-dangerous LLM output.
+ */
+export function scanForInjection(spec: GeneratorSpecT): void {
   for (const tool of spec.tools) {
-    for (const pattern of BANNED_PATTERNS) {
-      if (pattern.test(tool.implementation) || pattern.test(tool.description)) {
-        throw new BannedPatternError(`banned_pattern_detected: ${pattern.source}`);
+    // Collect every string the LLM could have planted a payload in. Downstream
+    // AI clients (Claude Desktop, Cursor) read tool.name + every inputSchema
+    // description verbatim, so an injection there can pivot the user's AI
+    // session — not only the runtime code.
+    const surfaces: string[] = [tool.name, tool.description, tool.implementation];
+    for (const param of Object.values(tool.inputSchema)) {
+      if (param && typeof param === 'object' && 'description' in param) {
+        const d = (param as { description?: unknown }).description;
+        if (typeof d === 'string') surfaces.push(d);
+      }
+    }
+    for (const text of surfaces) {
+      for (const pattern of BANNED_PATTERNS) {
+        if (pattern.test(text)) {
+          throw new BannedPatternError(`banned_pattern_detected: ${pattern.source}`);
+        }
       }
     }
   }