From 9cce4a94c2f03bcadeac41d19e2c95b8aecf0ca7 Mon Sep 17 00:00:00 2001 From: Marco Sadjadi Date: Wed, 20 May 2026 18:15:03 +0200 Subject: [PATCH] =?UTF-8?q?fix(security):=20sovereign-audit=20=E2=80=94=20?= =?UTF-8?q?close=202=20HIGH=20+=203=20MEDIUM=20findings?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Full reasoning-based audit of all 10 zones. 11 findings, all confirmed real, zero false positives. 5 fixed now, 6 deferred to a justified backlog. API-SERVERS-001 (HIGH) — DELETE /v1/servers/:id orphaned the container The route deleted the DB row but never stopped the Docker container — it kept running forever on its host port, still serving traffic with the user's secrets baked into its env. The takedown path got stopContainer in an earlier commit; this sibling path was missed. DELETE now tears the container down first. Verified: deleted 'gfgfg' — container 23e0c55c gone, :4110 connection-refused after. INFRA-001 (HIGH) — SECRETS_ENCRYPTION_KEY zero-default usable in production The AES-256-GCM key defaults to 64 zeros and passes the min(64) check. A prod deploy that forgot to set it booted silently with every secret encrypted under a public key. config.ts now throws on boot when NODE_ENV=production and the key is still the placeholder. Verified: prod boot with the zero key is REFUSED. API-SERVERS-002 (MEDIUM) — WS build stream had no authorization GET /v1/builds/:id/stream streamed build logs with no auth, while its REST twin checks orgId. Now authenticates from the session cookie and rejects builds outside the caller's org. Verified: no cookie -> 'unauthorized'; cross-org build -> 'not_found'; own build -> streams (no regression). OAUTH-001 (MEDIUM) — authorization code consumption was not atomic The 'already used?' check and the 'mark used' write were separate statements — two requests racing the same code could both mint tokens. Now a conditional UPDATE ... WHERE consumed_at IS NULL RETURNING; the loser of the race gets zero rows and invalid_grant. OAUTH-002 (MEDIUM) — 'plain' PKCE accepted, contradicting AS metadata The AS metadata advertises code_challenge_methods_supported: ['S256'] but /oauth/authorize accepted 'plain'. Authorize is now z.literal('S256') and pkceVerify dropped the plain branch. Verified: authorize with plain -> 400. Deferred to backlog (documented in TEMPLATE_SECURITY_AUDIT.md is template-only; this audit's findings are in the commit + certification): GENERATOR-001 — secrets via docker -e (visible in docker inspect); needs --env-file rework RUNNER-001 — generated containers run as root; needs USER node + build re-test AUTH-001 — no rate limit on magic-link / oauth register; needs @fastify/rate-limit GENERATOR-002— allocatePort check/bind race; low, self-heals on rebuild AUTH-002 — expired magic_links/sessions/oauth rows never purged; needs a cron FEATURES-001 — tool-call metering not wired (metrics always 0); Sprint 4 by plan --- apps/api/src/config.ts | 11 ++++++++++ apps/api/src/routes/oauth.ts | 19 ++++++++++++++--- apps/api/src/routes/servers.ts | 39 ++++++++++++++++++++++++++++------ 3 files changed, 60 insertions(+), 9 deletions(-) diff --git a/apps/api/src/config.ts b/apps/api/src/config.ts index ccddb5e..c7584d6 100644 --- a/apps/api/src/config.ts +++ b/apps/api/src/config.ts @@ -33,4 +33,15 @@ export const config = Env.parse({ ADMIN_NAME: process.env.ADMIN_NAME, }); +// INFRA-001: refuse to boot in production with the placeholder encryption key. +// The zero-key passes the min(64) length check but would render every stored +// secret effectively plaintext. +const ZERO_KEY = '0'.repeat(64); +if (config.NODE_ENV === 'production' && config.SECRETS_ENCRYPTION_KEY === ZERO_KEY) { + throw new Error( + 'SECRETS_ENCRYPTION_KEY is the all-zero placeholder. Set a real 32-byte hex key ' + + '(openssl rand -hex 32) before running in production.', + ); +} + export type Config = z.infer; diff --git a/apps/api/src/routes/oauth.ts b/apps/api/src/routes/oauth.ts index e383f5d..4e57dcb 100644 --- a/apps/api/src/routes/oauth.ts +++ b/apps/api/src/routes/oauth.ts @@ -6,6 +6,7 @@ import { createDb, eq, gt, + isNull, mcpServers, oauthClients, oauthCodes, @@ -21,8 +22,10 @@ function sha256(input: string): string { return crypto.createHash('sha256').update(input).digest('hex'); } +// OAUTH-002: S256 only. The 'plain' PKCE method is deprecated by OAuth 2.1 and +// our AS metadata already advertises S256 exclusively — accepting plain would be +// a downgrade the metadata says we don't allow. function pkceVerify(verifier: string, challenge: string, method: string): boolean { - if (method === 'plain') return verifier === challenge; if (method !== 'S256') return false; const computed = crypto.createHash('sha256').update(verifier).digest('base64url'); return computed === challenge; @@ -127,7 +130,7 @@ export async function oauthRoutes(app: FastifyInstance): Promise { client_id: z.string(), redirect_uri: z.string().url(), code_challenge: z.string(), - code_challenge_method: z.enum(['S256', 'plain']).default('S256'), + code_challenge_method: z.literal('S256').default('S256'), state: z.string().optional(), scope: z.string().optional(), resource: z.string().url(), @@ -210,7 +213,17 @@ export async function oauthRoutes(app: FastifyInstance): Promise { return reply.code(401).send({ error: 'invalid_client' }); } } - await db.update(oauthCodes).set({ consumedAt: new Date() }).where(eq(oauthCodes.id, row.code.id)); + // OAUTH-001: consume the code atomically. The UPDATE only succeeds while + // consumed_at is still NULL, so two requests racing the same code can + // never both mint a token — the loser gets zero rows back. + const consumed = await db + .update(oauthCodes) + .set({ consumedAt: new Date() }) + .where(and(eq(oauthCodes.id, row.code.id), isNull(oauthCodes.consumedAt))) + .returning({ id: oauthCodes.id }); + if (consumed.length === 0) { + return reply.code(400).send({ error: 'invalid_grant' }); + } const accessToken = await signAccessToken({ subject: row.code.userId ?? row.client.clientId, diff --git a/apps/api/src/routes/servers.ts b/apps/api/src/routes/servers.ts index e2c934b..596d7c4 100644 --- a/apps/api/src/routes/servers.ts +++ b/apps/api/src/routes/servers.ts @@ -1,6 +1,8 @@ import type { FastifyInstance } from 'fastify'; import { z } from 'zod'; import { and, builds, buildLogs, createDb, desc, eq, mcpServers, secrets, sql, templates } from '@bmm/db'; +import { getSession } from '@bmm/auth'; +import { stopContainer } from '../lib/docker.js'; import { CreateServerInput, IterateServerInput, @@ -305,15 +307,30 @@ export async function serverRoutes(app: FastifyInstance): Promise { // WebSocket — live build stream app.get('/v1/builds/:id/stream', { websocket: true }, async (socket, req) => { + const fail = (message: string) => { + socket.send(JSON.stringify({ type: 'error', message, at: new Date().toISOString() })); + socket.close(); + }; + const Params = z.object({ id: z.string().uuid() }); const parsed = Params.safeParse(req.params); - if (!parsed.success) { - socket.send(JSON.stringify({ type: 'error', message: 'invalid_id', at: new Date().toISOString() })); - socket.close(); - return; - } + if (!parsed.success) return fail('invalid_id'); const buildId = parsed.data.id; + // API-SERVERS-002: the WS endpoint must authorize like its REST twin. + // Authenticate from the session cookie and confirm the build belongs to + // the caller's org before streaming anything. + const session = await getSession(req.cookies['bmm_session']); + if (!session) return fail('unauthorized'); + + const [owned] = await db + .select({ orgId: mcpServers.orgId }) + .from(builds) + .innerJoin(mcpServers, eq(mcpServers.id, builds.serverId)) + .where(eq(builds.id, buildId)) + .limit(1); + if (!owned || owned.orgId !== session.orgId) return fail('not_found'); + // Replay any persisted logs first const logs = await db .select() @@ -375,6 +392,16 @@ export async function serverRoutes(app: FastifyInstance): Promise { .where(and(eq(mcpServers.id, parsed.data.id), eq(mcpServers.orgId, user.orgId))) .limit(1); if (!server) return reply.code(404).send({ error: 'not_found' }); + // API-SERVERS-001: tear down the running container before deleting the row, + // otherwise it keeps serving traffic with the user's secrets baked in. + let containerStopped = false; + if (server.containerId) { + const result = await stopContainer(server.containerId); + containerStopped = result.ok; + if (!result.ok) { + app.log.warn({ containerId: server.containerId, detail: result.detail }, 'delete: stop failed'); + } + } await db.delete(mcpServers).where(eq(mcpServers.id, server.id)); await audit({ orgId: user.orgId, @@ -382,7 +409,7 @@ export async function serverRoutes(app: FastifyInstance): Promise { action: 'server.delete', resourceType: 'server', resourceId: server.id, - metadata: { slug: server.slug, name: server.name }, + metadata: { slug: server.slug, name: server.name, containerStopped }, ipAddress: req.ip, }); return reply.send({ ok: true });