@

fix(security): sovereign-audit hardening pass — RCE, multi-tenant, reliability Reasoning-based audit fixes (all verified by typecheck, attack paths re-traced): - build-time RCE: validate spec.dependencies to npm-registry semver only (no git/url/file specifiers) + --ignore-scripts in runner Dockerfile. - container hardening fail-CLOSED: harden unless RUNNER_DISABLE_HARDENING=1, no longer gated on a fragile NODE_ENV string compare. - secret env keys validated (UPPER_SNAKE, reject NODE_*/PATH/LD_*). - cross-org image-tag collision: qualify tag with serverId. - /iterate now enforces suspension + daily-build limits like /servers. - preview SSE: clear keepalive in finally + on client close (timer/FD leak). - SMS OTP: atomic attempt counter (lt(attempts,MAX) in UPDATE) — brute-force race. - getSession orders membership by createdAt (deterministic primary org). - template scopes aggregated from real tool scopes (was hardcoded mcp:read). - template category filter pushed into WHERE (was applied after LIMIT). - support admin reply/status: 404 on unknown ticket; status change now audited. - build worker: queue defaultJobOptions, docker build/run/stop timeouts, old-container teardown in finally (no orphan on post-deploy DB failure). - nginx: HSTS, X-Frame-Options DENY, nosniff, Referrer-Policy. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> @
2026-05-29 20:56:30 +02:00 · 2026-05-29 20:56:30 +02:00 · 9d5386ccba
commit 9d5386ccba
parent 092290bb38
12 changed files with 338 additions and 136 deletions
--- a/.env.example
+++ b/.env.example
@ -1,5 +1,8 @@
 # ---- Core ----
 NODE_ENV=development
+# Local dev only: skip runner container hardening (--read-only etc. break on
+# Windows Docker Desktop). NEVER set this in .env.production. (GEN-002)
+RUNNER_DISABLE_HARDENING=1

 # ---- Database ----
 DATABASE_URL=postgresql://bmm:bmm@localhost:5440/bmm
--- a/apps/api/src/lib/queue.ts
+++ b/apps/api/src/lib/queue.ts
@ -17,7 +17,14 @@ let queue: Queue<BuildJobData> | null = null;

 export function getBuildQueue(): Queue<BuildJobData> {
  if (!queue) {
-    queue = new Queue<BuildJobData>('build', { connection: getRedis() });
+    queue = new Queue<BuildJobData>('build', {
+      connection: getRedis(),
+      // Explicit job lifecycle. attempts:1 because a build is non-idempotent
+      // (allocates a host port, runs a container, spends an LLM call) — a blind
+      // BullMQ retry would double-spend; users re-run via /iterate instead.
+      // removeOnComplete/Fail caps Redis growth. (GEN-007)
+      defaultJobOptions: { attempts: 1, removeOnComplete: 100, removeOnFail: 500 },
+    });
  }
  return queue;
 }
--- a/apps/api/src/routes/servers.ts
+++ b/apps/api/src/routes/servers.ts
@ -219,7 +219,8 @@ export async function serverRoutes(app: FastifyInstance): Promise<void> {
    if (choice.provider !== 'anthropic' || !config.ANTHROPIC_API_KEY) {
      return reply.code(409).send({
        error: 'streaming_unavailable',
-        detail: 'Streaming preview is only available for Anthropic-backed tiers. Use POST /v1/servers/preview instead.',
+        detail:
+          'Streaming preview is only available for Anthropic-backed tiers. Use POST /v1/servers/preview instead.',
      });
    }

@ -254,7 +255,10 @@ export async function serverRoutes(app: FastifyInstance): Promise<void> {
    // open as long as bytes flow; comments are SSE-noop but count as bytes.
    const keepalive = setInterval(() => reply.raw.write(`: ping\n\n`), 15_000);
    const abort = new AbortController();
-    req.raw.on('close', () => abort.abort());
+    req.raw.on('close', () => {
+      abort.abort();
+      clearInterval(keepalive);
+    });

    // `resolved` is set inside the awaited handlers below — by the time
    // streamSpecFromAnthropic returns, exactly one of onSpec/onError will
@ -263,95 +267,105 @@ export async function serverRoutes(app: FastifyInstance): Promise<void> {
    // ended without either handler running (which would be a programming
    // bug, not a runtime path).
    let resolved = false;
-    await streamSpecFromAnthropic(
-      parsed.data.prompt,
-      {
-        apiKey: config.ANTHROPIC_API_KEY,
-        model: choice.model,
-        maxTokens: choice.maxTokens,
-        signal: abort.signal,
-      },
-      {
-        onText: (delta) => send('text', delta),
-        onSpec: async ({ spec, source }) => {
-          const previewId = await cacheSpec(spec);
-          send('spec', {
-            previewId,
-            source,
-            plan,
-            modelDisplayName: choice.displayName,
-            modelBadge: choice.displayBadge,
-            upgradeHint: plan === 'hobby',
-            spec: {
-              name: spec.name,
-              description: spec.description,
-              tools: spec.tools.map((t) => ({
-                name: t.name,
-                description: t.description,
-                inputSchema: t.inputSchema,
-              })),
-              requiredSecrets: spec.requiredSecrets,
-              scopes: spec.scopes,
-            },
-          });
-          app.log.info(
-            {
+    try {
+      await streamSpecFromAnthropic(
+        parsed.data.prompt,
+        {
+          apiKey: config.ANTHROPIC_API_KEY,
+          model: choice.model,
+          maxTokens: choice.maxTokens,
+          signal: abort.signal,
+        },
+        {
+          onText: (delta) => send('text', delta),
+          onSpec: async ({ spec, source }) => {
+            const previewId = await cacheSpec(spec);
+            send('spec', {
              previewId,
-              tools: spec.tools.length,
-              prompt: parsed.data.prompt.slice(0, 200),
-              model: choice.displayName,
-            },
-            'preview_spec_ready',
-          );
-          resolved = true;
-        },
-        onError: (err) => {
-          if (err instanceof SpecTruncatedError) {
-            app.log.warn(
+              source,
+              plan,
+              modelDisplayName: choice.displayName,
+              modelBadge: choice.displayBadge,
+              upgradeHint: plan === 'hobby',
+              spec: {
+                name: spec.name,
+                description: spec.description,
+                tools: spec.tools.map((t) => ({
+                  name: t.name,
+                  description: t.description,
+                  inputSchema: t.inputSchema,
+                })),
+                requiredSecrets: spec.requiredSecrets,
+                scopes: spec.scopes,
+              },
+            });
+            app.log.info(
              {
-                reason: err.message,
+                previewId,
+                tools: spec.tools.length,
                prompt: parsed.data.prompt.slice(0, 200),
                model: choice.displayName,
              },
-              'preview_spec_truncated',
+              'preview_spec_ready',
            );
-            send('error', {
-              error: 'spec_too_large',
-              detail:
-                'The spec for this prompt exceeded the maximum response size. Split it into fewer tools or describe one capability per prompt.',
-            });
-          } else if (err instanceof SpecValidationError) {
-            app.log.warn(
-              {
-                zod_message: err.message,
-                prompt: parsed.data.prompt.slice(0, 200),
-                model: choice.displayName,
-              },
-              'preview_spec_invalid',
-            );
-            send('error', { error: 'spec_invalid', detail: err.message });
-          } else if (err instanceof BannedPatternError) {
-            send('error', { error: 'banned_pattern', detail: err.message });
-          } else if (err instanceof SpecTimeoutError) {
-            send('error', {
-              error: 'preview_timeout',
-              detail: 'Spec generation took too long. Try a shorter, more specific prompt.',
-            });
-          } else {
-            app.log.error(err);
-            send('error', { error: 'preview_failed', detail: err.message });
-          }
-          resolved = true;
+            resolved = true;
+          },
+          onError: (err) => {
+            if (err instanceof SpecTruncatedError) {
+              app.log.warn(
+                {
+                  reason: err.message,
+                  prompt: parsed.data.prompt.slice(0, 200),
+                  model: choice.displayName,
+                },
+                'preview_spec_truncated',
+              );
+              send('error', {
+                error: 'spec_too_large',
+                detail:
+                  'The spec for this prompt exceeded the maximum response size. Split it into fewer tools or describe one capability per prompt.',
+              });
+            } else if (err instanceof SpecValidationError) {
+              app.log.warn(
+                {
+                  zod_message: err.message,
+                  prompt: parsed.data.prompt.slice(0, 200),
+                  model: choice.displayName,
+                },
+                'preview_spec_invalid',
+              );
+              send('error', { error: 'spec_invalid', detail: err.message });
+            } else if (err instanceof BannedPatternError) {
+              send('error', { error: 'banned_pattern', detail: err.message });
+            } else if (err instanceof SpecTimeoutError) {
+              send('error', {
+                error: 'preview_timeout',
+                detail: 'Spec generation took too long. Try a shorter, more specific prompt.',
+              });
+            } else {
+              app.log.error(err);
+              send('error', { error: 'preview_failed', detail: err.message });
+            }
+            resolved = true;
+          },
        },
-      },
-    );
+      );

-    if (!resolved) {
-      app.log.error({ prompt: parsed.data.prompt.slice(0, 200) }, 'preview_stream_unresolved');
-      send('error', { error: 'preview_failed', detail: 'stream ended without a final event' });
+      if (!resolved) {
+        app.log.error({ prompt: parsed.data.prompt.slice(0, 200) }, 'preview_stream_unresolved');
+        send('error', { error: 'preview_failed', detail: 'stream ended without a final event' });
+      }
+    } catch (err) {
+      // If the stream itself rejects (e.g. cacheSpec/Redis throws inside onSpec,
+      // or a network error before either handler runs) we must still tear down
+      // the keepalive timer and close the socket — otherwise the interval keeps
+      // writing to a dead connection forever, leaking a timer + FD per failure. (SRV-004)
+      app.log.error({ err, prompt: parsed.data.prompt.slice(0, 200) }, 'preview_stream_threw');
+      if (!resolved) send('error', { error: 'preview_failed', detail: 'spec generation failed' });
+    } finally {
+      clearInterval(keepalive);
+      reply.raw.end();
    }
-    clearInterval(keepalive);
-    reply.raw.end();
  });

  app.post('/v1/servers', { preHandler: requireAuth }, async (req, reply) => {
@ -574,6 +588,32 @@ export async function serverRoutes(app: FastifyInstance): Promise<void> {
      .limit(1);
    if (!server) return reply.code(404).send({ error: 'not_found' });

+    // iterate queues a full paid LLM build exactly like POST /v1/servers, so it
+    // must enforce the same suspension + daily-build gates. Without these a
+    // suspended (non-paying) or rate-capped org could generate unlimited builds
+    // by hitting iterate instead of create. (SRV-003)
+    const billing = await getOrgBilling(user.orgId);
+    if (billing.suspended) {
+      return reply.code(402).send({
+        error: 'subscription_suspended',
+        detail:
+          billing.suspendedReason === 'payment_failed'
+            ? 'Your subscription is paused due to a payment issue. Update your payment method in /settings/billing.'
+            : 'Your subscription is paused. Visit /settings/billing for details.',
+        suspendedReason: billing.suspendedReason,
+      });
+    }
+    const iterateRl = await checkDailyLimit('build', user.userId, BUILD_DAILY_LIMIT[billing.plan]);
+    if (!iterateRl.ok) {
+      return reply.code(429).send({
+        error: 'rate_limited',
+        detail: `Daily build limit reached for plan "${billing.plan}" (${BUILD_DAILY_LIMIT[billing.plan]}/day). Resets in ${Math.ceil(iterateRl.resetIn / 3600)}h.`,
+        plan: billing.plan,
+        limit: BUILD_DAILY_LIMIT[billing.plan],
+        resetIn: iterateRl.resetIn,
+      });
+    }
+
    const nextVersion = server.currentVersion + 1;
    const [build] = await db
      .insert(builds)
--- a/apps/api/src/routes/support.ts
+++ b/apps/api/src/routes/support.ts
@ -250,6 +250,15 @@ export async function supportRoutes(app: FastifyInstance): Promise<void> {
      const body = NewMessageBody.safeParse(req.body);
      if (!body.success) return reply.code(400).send({ error: 'invalid_input' });

+      // Confirm the ticket exists first — otherwise the insert below hits a raw
+      // FK violation (500) instead of a clean 404. (SUP-002)
+      const [ticket] = await db
+        .select({ id: supportTickets.id })
+        .from(supportTickets)
+        .where(eq(supportTickets.id, parsed.data.id))
+        .limit(1);
+      if (!ticket) return reply.code(404).send({ error: 'not_found' });
+
      await db.insert(supportMessages).values({
        ticketId: parsed.data.id,
        authorUserId: user.userId,
@ -282,12 +291,22 @@ export async function supportRoutes(app: FastifyInstance): Promise<void> {
    '/v1/admin/support/tickets/:id/status',
    { preHandler: requireAdmin },
    async (req, reply) => {
+      const user = req.user!;
      const Params = z.object({ id: z.string().uuid() });
      const parsed = Params.safeParse(req.params);
      if (!parsed.success) return reply.code(400).send({ error: 'invalid_id' });
      const body = StatusBody.safeParse(req.body);
      if (!body.success) return reply.code(400).send({ error: 'invalid_input' });

+      // 404 on unknown ticket instead of a silent no-op `UPDATE ... WHERE id=?`
+      // that returns ok:true and masks the bad id. (SUP-002)
+      const [ticket] = await db
+        .select({ id: supportTickets.id })
+        .from(supportTickets)
+        .where(eq(supportTickets.id, parsed.data.id))
+        .limit(1);
+      if (!ticket) return reply.code(404).send({ error: 'not_found' });
+
      await db
        .update(supportTickets)
        .set({
@ -297,6 +316,17 @@ export async function supportRoutes(app: FastifyInstance): Promise<void> {
        })
        .where(eq(supportTickets.id, parsed.data.id));

+      // Status changes were previously unaudited, unlike admin replies — close
+      // the compliance-trail gap. (SUP-002)
+      await audit({
+        orgId: user.orgId,
+        userId: user.userId,
+        action: 'support.status_changed',
+        resourceType: 'support_ticket',
+        resourceId: parsed.data.id,
+        metadata: { status: body.data.status },
+      });
+
      return reply.send({ ok: true });
    },
  );
--- a/apps/api/src/routes/templates.ts
+++ b/apps/api/src/routes/templates.ts
@ -194,10 +194,15 @@ export async function templateRoutes(app: FastifyInstance): Promise<void> {
        toolsSchema: server.toolsSchema,
        generatedCode: build.generatedCode,
        requiredSecrets: parsed.data.secretHints,
-        scopes: (server.toolsSchema as Array<{ scopes?: string[] }>).reduce<string[]>(
-          () => ['mcp:read'],
-          [],
-        ),
+        // Aggregate the distinct scopes actually declared by the server's tools
+        // (deduped), falling back to read-only. The previous reduce ignored its
+        // input and hardcoded ['mcp:read'] for every template regardless of what
+        // its tools did. (TPL-003)
+        scopes: (() => {
+          const tools = (server.toolsSchema as Array<{ scopes?: string[] }> | null) ?? [];
+          const all = [...new Set(tools.flatMap((t) => t.scopes ?? []))];
+          return all.length > 0 ? all : ['mcp:read'];
+        })(),
        allowedDomains: parsed.data.allowedDomains ?? null,
      })
      .returning();
@ -310,13 +315,19 @@ export async function templateRoutes(app: FastifyInstance): Promise<void> {
      .from(templates)
      .leftJoin(users, eq(users.id, templates.ownerUserId))
      .leftJoin(organizations, eq(organizations.id, templates.ownerOrgId))
-      .where(eq(templates.status, 'public'))
+      // Category filter belongs in the WHERE, BEFORE limit — filtering in JS
+      // after `.limit(50)` meant `?category=x` searched only the 50 newest
+      // public templates (any category), returning far fewer than `limit`. (TPL-008)
+      .where(
+        and(
+          eq(templates.status, 'public'),
+          parsed.data.category ? eq(templates.category, parsed.data.category) : undefined,
+        ),
+      )
      .orderBy(desc(templates.createdAt))
      .limit(parsed.data.limit);

-    const filtered = parsed.data.category
-      ? rows.filter((r) => r.template.category === parsed.data.category)
-      : rows;
+    const filtered = rows;

    // Single grouped query — was N+1 (one COUNT per template). On a 100-row
    // listing that's 101 round-trips → p95 latency cliff once the marketplace
--- a/apps/generator/src/lib/build.ts
+++ b/apps/generator/src/lib/build.ts
@ -39,7 +39,10 @@ export async function prepareBuildContext(
  pkg.dependencies = { ...pkg.dependencies, ...spec.dependencies };
  await fs.writeFile(pkgPath, `${JSON.stringify(pkg, null, 2)}\n`, 'utf8');

-  const imageTag = `bmm-mcp-${slug}:v${version}`;
+  // Include serverId in the tag: `slug` is unique only per-org, so two orgs
+  // sharing a slug at the same version would otherwise collide on one global
+  // image tag and run each other's code. Matches the contextDir scheme. (GEN-009)
+  const imageTag = `bmm-mcp-${serverId.slice(0, 8)}-${slug}:v${version}`;
  return { contextDir, imageTag };
 }

@ -70,12 +73,21 @@ export async function staticCheck(contextDir: string): Promise<void> {
  }
 }

+// A hung `docker build` (stalled npm install, wedged daemon) must not pin a
+// worker slot forever — concurrency is 2, so two stuck builds = zero throughput
+// with no alarm. Kill and fail the build past this ceiling. (GEN-008)
+const BUILD_TIMEOUT_MS = 10 * 60 * 1000;
+
 export async function dockerBuild(contextDir: string, imageTag: string, onLog: (msg: string) => void): Promise<void> {
  await new Promise<void>((resolve, reject) => {
    const child = spawn('docker', ['build', '-t', imageTag, '.'], {
      cwd: contextDir,
      stdio: ['ignore', 'pipe', 'pipe'],
    });
+    const timer = setTimeout(() => {
+      child.kill('SIGKILL');
+      reject(new Error(`docker_build_timeout (exceeded ${BUILD_TIMEOUT_MS / 1000}s)`));
+    }, BUILD_TIMEOUT_MS);
    child.stdout.on('data', (d) => {
      for (const line of d.toString().split(/\r?\n/)) {
        if (line.trim()) onLog(line.trim());
@ -86,8 +98,12 @@ export async function dockerBuild(contextDir: string, imageTag: string, onLog: (
        if (line.trim()) onLog(line.trim());
      }
    });
-    child.on('error', (e) => reject(e));
+    child.on('error', (e) => {
+      clearTimeout(timer);
+      reject(e);
+    });
    child.on('close', (code) => {
+      clearTimeout(timer);
      if (code === 0) resolve();
      else reject(new Error(`docker_build_failed (exit ${code})`));
    });
--- a/apps/generator/src/lib/deploy.ts
+++ b/apps/generator/src/lib/deploy.ts
@ -81,13 +81,26 @@ const HARDENING_FLAGS = [
 ];

 function shouldHarden(): boolean {
-  // Explicit opt-out for local dev on Windows where --read-only conflicts
-  // with how Docker Desktop binds volumes. Production must always harden.
-  if (process.env.RUNNER_DISABLE_HARDENING === '1') return false;
-  const env = process.env.NODE_ENV;
-  return env === 'production' || env === 'staging';
+  // Fail-CLOSED: harden by default everywhere. The only opt-out is the explicit
+  // RUNNER_DISABLE_HARDENING=1 flag (local Windows Docker Desktop, where
+  // --read-only conflicts with how volumes bind). The previous NODE_ENV gate was
+  // fail-OPEN — a missing/typo'd NODE_ENV silently ran tenant containers as root
+  // with full caps on the shared host, which is the one defense the LLM
+  // static-check explicitly is NOT. (GEN-002)
+  if (process.env.RUNNER_DISABLE_HARDENING === '1') {
+    console.warn(
+      '[deploy] container hardening DISABLED via RUNNER_DISABLE_HARDENING=1 — never set this in production',
+    );
+    return false;
+  }
+  return true;
 }

+// docker run / rm should return in seconds; cap them so a wedged daemon can't
+// hang a worker slot indefinitely. (GEN-008)
+const DOCKER_RUN_TIMEOUT_MS = 60 * 1000;
+const DOCKER_STOP_TIMEOUT_MS = 60 * 1000;
+
 const db = createDb();

 async function portFree(port: number, host = '127.0.0.1'): Promise<boolean> {
@ -157,14 +170,24 @@ export async function deployContainer(input: DeployInput): Promise<DeployHandle>
    const child = spawn('docker', args, { stdio: ['ignore', 'pipe', 'pipe'] });
    let out = '';
    let err = '';
+    // `docker run -d` returns promptly; if it hangs (wedged daemon) don't pin a
+    // worker slot forever. (GEN-008)
+    const timer = setTimeout(() => {
+      child.kill('SIGKILL');
+      reject(new Error('docker_run_timeout'));
+    }, DOCKER_RUN_TIMEOUT_MS);
    child.stdout.on('data', (d) => {
      out += d.toString();
    });
    child.stderr.on('data', (d) => {
      err += d.toString();
    });
-    child.on('error', (e) => reject(e));
+    child.on('error', (e) => {
+      clearTimeout(timer);
+      reject(e);
+    });
    child.on('close', async (code) => {
+      clearTimeout(timer);
      if (code !== 0) {
        reject(new Error(`docker_run_failed (exit ${code}): ${err.trim() || out.trim()}`));
        return;
@ -207,13 +230,21 @@ export async function stopContainer(
      stdio: ['ignore', 'pipe', 'pipe'],
    });
    let err = '';
+    const timer = setTimeout(() => {
+      child.kill('SIGKILL');
+      resolve({ ok: false, detail: 'stop_timeout' });
+    }, DOCKER_STOP_TIMEOUT_MS);
    child.stderr?.on('data', (d: Buffer) => {
      err += d.toString();
    });
-    child.on('error', () => resolve({ ok: false, detail: 'spawn_failed' }));
-    child.on('close', (code) =>
-      resolve(code === 0 ? { ok: true, detail: '' } : { ok: false, detail: err.trim() || `exit ${code}` }),
-    );
+    child.on('error', () => {
+      clearTimeout(timer);
+      resolve({ ok: false, detail: 'spawn_failed' });
+    });
+    child.on('close', (code) => {
+      clearTimeout(timer);
+      resolve(code === 0 ? { ok: true, detail: '' } : { ok: false, detail: err.trim() || `exit ${code}` });
+    });
  });
 }

--- a/apps/generator/src/worker.ts
+++ b/apps/generator/src/worker.ts
@ -184,30 +184,34 @@ export const worker = new Worker<JobData>(
        `Container ${handle.containerId.slice(0, 12)} running at ${handle.publicUrl}`,
      );

-      await db
-        .update(builds)
-        .set({ status: 'success', finishedAt: new Date() })
-        .where(eq(builds.id, buildId));
-      await db
-        .update(mcpServers)
-        .set({
-          status: 'live',
-          currentVersion: version,
-          publicUrl: handle.publicUrl,
-          updatedAt: new Date(),
-        })
-        .where(eq(mcpServers.id, serverId));
-
-      // Rolling deploy: the new container is live — now retire the previous one.
-      // Without this every iterate would leave an orphan holding a host port.
-      if (oldContainerId && oldContainerId !== handle.containerId) {
-        const stopped = await stopContainer(oldContainerId);
-        await log(
-          stopped.ok ? 'info' : 'warn',
-          stopped.ok
-            ? `Retired previous container ${oldContainerId.slice(0, 12)}`
-            : `Could not stop previous container ${oldContainerId.slice(0, 12)}: ${stopped.detail}`,
-        );
+      try {
+        await db
+          .update(builds)
+          .set({ status: 'success', finishedAt: new Date() })
+          .where(eq(builds.id, buildId));
+        await db
+          .update(mcpServers)
+          .set({
+            status: 'live',
+            currentVersion: version,
+            publicUrl: handle.publicUrl,
+            updatedAt: new Date(),
+          })
+          .where(eq(mcpServers.id, serverId));
+      } finally {
+        // Rolling deploy: retire the previous container even if the success DB
+        // writes above threw — otherwise a DB hiccup after a healthy deploy
+        // leaves the old container orphaned, holding its host port. The new
+        // container is already live and its id is persisted in deployContainer. (GEN-007)
+        if (oldContainerId && oldContainerId !== handle.containerId) {
+          const stopped = await stopContainer(oldContainerId);
+          await log(
+            stopped.ok ? 'info' : 'warn',
+            stopped.ok
+              ? `Retired previous container ${oldContainerId.slice(0, 12)}`
+              : `Could not stop previous container ${oldContainerId.slice(0, 12)}: ${stopped.detail}`,
+          );
+        }
      }

      await emitStatus(buildId, 'success');
--- a/apps/runner-template/Dockerfile
+++ b/apps/runner-template/Dockerfile
@ -1,7 +1,11 @@
 FROM node:20-alpine AS deps
 WORKDIR /app
 COPY package.json ./
-RUN npm install --omit=dev --no-audit --no-fund && npm install --no-save tsx@4.19.2 typescript@5.7.2
+# --ignore-scripts: generated package.json carries LLM/user-chosen dependencies.
+# Without this, a malicious dependency's postinstall lifecycle script would run
+# at `docker build` time on the shared host. Specifiers are also validated to
+# registry semver ranges at the API boundary (DependencyMap). (GEN-001)
+RUN npm install --omit=dev --ignore-scripts --no-audit --no-fund && npm install --no-save --ignore-scripts tsx@4.19.2 typescript@5.7.2

 FROM node:20-alpine AS runtime
 WORKDIR /app
--- a/infra/nginx/buildmymcpserver.conf
+++ b/infra/nginx/buildmymcpserver.conf
@ -21,6 +21,14 @@ server {

    client_max_body_size 12M;

+    # Security headers (INF-002). Cloudflare sits in front — if it also injects
+    # HSTS, drop that line here to avoid duplication. CSP intentionally omitted
+    # for now (a wrong policy breaks Next/Tailwind inline) — track separately.
+    add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
+    add_header X-Frame-Options "DENY" always;
+    add_header X-Content-Type-Options "nosniff" always;
+    add_header Referrer-Policy "strict-origin-when-cross-origin" always;
+
    location / {
        proxy_pass http://127.0.0.1:4001;
        proxy_http_version 1.1;
@ -48,6 +56,13 @@ server {

    client_max_body_size 12M;

+    # Security headers (INF-002). nosniff matters for the JSON API; XFO/HSTS are
+    # belt-and-suspenders for any HTML the API might ever serve.
+    add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
+    add_header X-Frame-Options "DENY" always;
+    add_header X-Content-Type-Options "nosniff" always;
+    add_header Referrer-Policy "strict-origin-when-cross-origin" always;
+
    # Build-log WebSocket stream (/v1/builds/:id/stream) — needs the upgrade
    # headers and a long read timeout; buffering off so frames are not held.
    location /v1/builds/ {
--- a/packages/auth/src/index.ts
+++ b/packages/auth/src/index.ts
@ -7,6 +7,8 @@ import {
  eq,
  gt,
  isNull,
+  lt,
+  sql,
  magicLinks,
  memberships,
  organizations,
@ -272,12 +274,18 @@ export async function consumeSmsCode(
    .orderBy(desc(smsCodes.createdAt))
    .limit(1);
  if (!row || row.consumedAt) throw new Error('invalid_or_expired_code');
-  if (row.attempts >= SMS_MAX_ATTEMPTS) throw new Error('too_many_attempts');
+  // Atomically claim one guess attempt. The increment is gated on
+  // `attempts < MAX` inside the same UPDATE, so the DB row-lock serialises
+  // concurrent verifies and at most MAX increments ever succeed for a code.
+  // The previous read-then-write (`row.attempts + 1`) let N parallel requests
+  // all pass a stale `attempts` read and brute-force the 6-digit code. (AUTH-001)
+  const slot = await db
+    .update(smsCodes)
+    .set({ attempts: sql`${smsCodes.attempts} + 1` })
+    .where(and(eq(smsCodes.id, row.id), lt(smsCodes.attempts, SMS_MAX_ATTEMPTS)))
+    .returning({ attempts: smsCodes.attempts });
+  if (slot.length === 0) throw new Error('too_many_attempts');
  if (sha256(`${phone}:${code}`) !== row.codeHash) {
-    await db
-      .update(smsCodes)
-      .set({ attempts: row.attempts + 1 })
-      .where(eq(smsCodes.id, row.id));
    throw new Error('invalid_code');
  }
  await db.update(smsCodes).set({ consumedAt: new Date() }).where(eq(smsCodes.id, row.id));
@ -344,10 +352,16 @@ export async function getSession(
    .where(eq(sessions.tokenHash, hash))
    .limit(1);
  if (!row || row.expiresAt < new Date()) return null;
+  // Deterministic primary-org selection — must match the login flows
+  // (consumeMagicLink / loginWithPassword order by oldest membership). Without
+  // this orderBy, a user with >1 membership (once org-invites land) would get a
+  // nondeterministic org per request, silently scoping their reads/writes to a
+  // different tenant than they logged in as. (SRV-001)
  const [membership] = await db
    .select({ orgId: memberships.orgId, role: memberships.role })
    .from(memberships)
    .where(eq(memberships.userId, row.userId))
+    .orderBy(memberships.createdAt)
    .limit(1);
  if (!membership) return null;
  return {
--- a/packages/types/src/index.ts
+++ b/packages/types/src/index.ts
@ -68,6 +68,33 @@ export const PromptSpec = z.object({
 });
 export type PromptSpec = z.infer<typeof PromptSpec>;

+// Dependency specifiers come from untrusted LLM output and are merged into the
+// build's package.json, then `npm install`-ed inside `docker build` on the
+// SHARED host. Restrict to npm-registry semver ranges only: no `git+`, `http(s):`,
+// `file:` or tarball-URL specifiers (which fetch/checkout+run arbitrary code at
+// install time), and valid (optionally-scoped) npm package names. Combined with
+// `--ignore-scripts` in the runner Dockerfile this closes the build-time RCE. (GEN-001)
+const DepName = z
+  .string()
+  .max(214)
+  .regex(/^(@[a-z0-9][\w.-]*\/)?[a-z0-9][\w.-]*$/i, 'invalid npm package name');
+const DepRange = z
+  .string()
+  .max(64)
+  .regex(/^([\^~]?\d+(\.\d+){0,2}(-[\w.]+)?|\*|latest)$/, 'must be a plain semver range');
+export const DependencyMap = z.record(DepName, DepRange);
+
+// Secret KEYS become `-e KEY=VALUE` docker run args and the runtime env of the
+// tenant container. Constrain to UPPER_SNAKE_CASE (matches requiredSecrets) and
+// reject names that could hijack the Node runtime/loader if the container
+// hardening ever regressed. Values stay free-form. (GEN-003)
+const RESERVED_ENV = new Set(['PATH', 'LD_PRELOAD', 'LD_LIBRARY_PATH', 'NODE_ENV']);
+const SecretKey = z
+  .string()
+  .regex(/^[A-Z][A-Z0-9_]*$/, 'UPPER_SNAKE_CASE env var name required')
+  .refine((k) => !RESERVED_ENV.has(k) && !k.startsWith('NODE_'), 'reserved env var name');
+export const SecretMap = z.record(SecretKey, z.string());
+
 export const GeneratorSpec = z.object({
  name: z.string().min(1).max(128),
  description: z.string().max(2000).optional(),
@ -76,7 +103,7 @@ export const GeneratorSpec = z.object({
  prompts: z.array(PromptSpec).max(50).default([]),
  requiredSecrets: z.array(z.string().regex(/^[A-Z][A-Z0-9_]*$/)).max(30).default([]),
  scopes: z.array(z.string()).max(50).default([]),
-  dependencies: z.record(z.string(), z.string()).default({}),
+  dependencies: DependencyMap.default({}),
 });
 export type GeneratorSpec = z.infer<typeof GeneratorSpec>;

@ -136,7 +163,7 @@ export const CreateServerInput = z.object({
    .max(64)
    .regex(/^[a-z][a-z0-9-]*$/, 'lowercase, hyphenated'),
  prompt: z.string().min(10).max(8000),
-  secrets: z.record(z.string(), z.string()).default({}),
+  secrets: SecretMap.default({}),
  previewId: z.string().min(1).max(64).optional(),
  specEdit: SpecEdit.optional(),
  templateId: z.string().uuid().optional(),
@ -169,7 +196,7 @@ export type PreviewResult = z.infer<typeof PreviewResult>;

 export const IterateServerInput = z.object({
  prompt: z.string().min(10).max(8000),
-  secrets: z.record(z.string(), z.string()).default({}),
+  secrets: SecretMap.default({}),
 });
 export type IterateServerInput = z.infer<typeof IterateServerInput>;