diff --git a/apps/api/src/lib/docker.ts b/apps/api/src/lib/docker.ts index 999dcec..697fb6c 100644 --- a/apps/api/src/lib/docker.ts +++ b/apps/api/src/lib/docker.ts @@ -1,14 +1,43 @@ import { spawn } from 'node:child_process'; +import fs from 'node:fs/promises'; +import path from 'node:path'; + +/** + * Per-runner nginx map fragment cleanup. Mirrors the generator-side helper + * (apps/generator/src/lib/deploy.ts) — when MCP_DOMAIN is set, the host + * runs an inotify watcher over the map dir that reloads nginx on any + * change. We remove the fragment here so the slug stops serving 502 the + * moment the user deletes their server. + * + * No-op if MCP_DOMAIN isn't configured (legacy http://host:port URLs are + * still in use). Idempotent — missing files are fine. + */ +const MCP_DOMAIN = process.env.MCP_DOMAIN ?? ''; +const RUNNER_MAP_DIR = process.env.RUNNER_MAP_DIR ?? '/var/runner-map'; + +async function removeRunnerMapEntry(slug: string): Promise { + if (!MCP_DOMAIN || !slug) return; + try { + await fs.rm(path.join(RUNNER_MAP_DIR, `${slug}.conf`), { force: true }); + } catch { + /* ignore — not critical */ + } +} /** * Stop and remove a generated MCP container by container id. * Resolves regardless of outcome — failures are logged but never blocking. - * Production: should be moved to a Coolify HTTP-API call. + * Also drops the slug's nginx map fragment so the public URL stops 502'ing + * the moment the container goes away. */ -export async function stopContainer(containerId: string): Promise<{ ok: boolean; detail: string }> { +export async function stopContainer( + containerId: string, + slug?: string, +): Promise<{ ok: boolean; detail: string }> { if (!containerId || containerId.length < 4) { return { ok: false, detail: 'invalid_container_id' }; } + if (slug) await removeRunnerMapEntry(slug); return await new Promise<{ ok: boolean; detail: string }>((resolve) => { const child = spawn('docker', ['rm', '-f', containerId], { stdio: ['ignore', 'pipe', 'pipe'], diff --git a/apps/api/src/routes/oauth.ts b/apps/api/src/routes/oauth.ts index 366e54c..034e3f4 100644 --- a/apps/api/src/routes/oauth.ts +++ b/apps/api/src/routes/oauth.ts @@ -19,6 +19,12 @@ import { config } from '../config.js'; const db = createDb(); +// Access-token lifetime is short so revocation propagates within the hour. +// Refresh-token lifetime is long so legitimate clients don't have to +// re-authorize daily; rotation on every refresh limits exposure if one leaks. +const ACCESS_TOKEN_TTL_S = 3600; // 1 hour +const REFRESH_TOKEN_TTL_MS = 30 * 24 * 3600 * 1000; // 30 days + function sha256(input: string): string { return crypto.createHash('sha256').update(input).digest('hex'); } @@ -236,12 +242,13 @@ export async function oauthRoutes(app: FastifyInstance): Promise { return reply.code(400).send({ error: 'invalid_grant' }); } + const subject = row.code.userId ?? row.client.clientId; const accessToken = await signAccessToken({ - subject: row.code.userId ?? row.client.clientId, + subject, audience: resource, issuer: `${config.CONTROL_PLANE_PUBLIC_URL}/oauth`, scope: row.code.scope ?? '', - ttlSeconds: 3600, + ttlSeconds: ACCESS_TOKEN_TTL_S, }); const refreshToken = crypto.randomBytes(32).toString('base64url'); await db.insert(oauthTokens).values({ @@ -250,18 +257,98 @@ export async function oauthRoutes(app: FastifyInstance): Promise { refreshTokenHash: sha256(refreshToken), scope: row.code.scope ?? null, resource, - expiresAt: new Date(Date.now() + 3600 * 1000), + subject, + // expiresAt is the REFRESH-token lifetime — 30 days. Access-token + // expiry lives inside the JWT's `exp` claim (1h, set above). + expiresAt: new Date(Date.now() + REFRESH_TOKEN_TTL_MS), }); return reply.send({ access_token: accessToken, token_type: 'Bearer', - expires_in: 3600, + expires_in: ACCESS_TOKEN_TTL_S, refresh_token: refreshToken, scope: row.code.scope ?? '', }); } + // ─── grant_type: refresh_token ───────────────────────────────────── + // OAuth 2.1 with rotation: every successful refresh issues a NEW refresh + // token and atomically invalidates the old one. If a stolen refresh token + // gets used after the legitimate client refreshed, the second use sees + // invalid_grant — that's how rotation surfaces token theft. + if (parsed.data.grant_type === 'refresh_token') { + const { refresh_token, client_id, client_secret, resource: requestedResource } = + parsed.data; + if (!refresh_token || !client_id) { + return reply.code(400).send({ error: 'invalid_request' }); + } + + const refreshHash = sha256(refresh_token); + const [row] = await db + .select({ token: oauthTokens, client: oauthClients }) + .from(oauthTokens) + .innerJoin(oauthClients, eq(oauthClients.id, oauthTokens.clientDbId)) + .where( + and( + eq(oauthTokens.refreshTokenHash, refreshHash), + gt(oauthTokens.expiresAt, new Date()), + ), + ) + .limit(1); + if (!row) return reply.code(400).send({ error: 'invalid_grant' }); + if (row.client.clientId !== client_id) { + return reply.code(401).send({ error: 'invalid_client' }); + } + if (row.client.clientSecretHash) { + if (!client_secret || sha256(client_secret) !== row.client.clientSecretHash) { + return reply.code(401).send({ error: 'invalid_client' }); + } + } + // RFC 8707: requested resource must equal the stored one — refreshes + // don't allow audience changes (would be a downgrade/escalation vector). + if (requestedResource && requestedResource !== row.token.resource) { + return reply.code(400).send({ error: 'invalid_resource' }); + } + + const subject = row.token.subject ?? row.client.clientId; + const newAccessToken = await signAccessToken({ + subject, + audience: row.token.resource ?? '', + issuer: `${config.CONTROL_PLANE_PUBLIC_URL}/oauth`, + scope: row.token.scope ?? '', + ttlSeconds: ACCESS_TOKEN_TTL_S, + }); + const newRefreshToken = crypto.randomBytes(32).toString('base64url'); + const newRefreshHash = sha256(newRefreshToken); + + // Atomic rotation: UPDATE only succeeds if the row still has the OLD + // refresh-hash. Two parallel refreshes with the same token can't both + // win — the loser sees zero rows and gets invalid_grant. + const rotated = await db + .update(oauthTokens) + .set({ + accessTokenHash: sha256(newAccessToken), + refreshTokenHash: newRefreshHash, + expiresAt: new Date(Date.now() + REFRESH_TOKEN_TTL_MS), + }) + .where( + and(eq(oauthTokens.id, row.token.id), eq(oauthTokens.refreshTokenHash, refreshHash)), + ) + .returning({ id: oauthTokens.id }); + if (rotated.length === 0) { + return reply.code(400).send({ error: 'invalid_grant' }); + } + + return reply.send({ + access_token: newAccessToken, + token_type: 'Bearer', + expires_in: ACCESS_TOKEN_TTL_S, + refresh_token: newRefreshToken, + scope: row.token.scope ?? '', + }); + } + return reply.code(400).send({ error: 'unsupported_grant_type' }); }); diff --git a/apps/api/src/routes/servers.ts b/apps/api/src/routes/servers.ts index 334b900..f6dedf2 100644 --- a/apps/api/src/routes/servers.ts +++ b/apps/api/src/routes/servers.ts @@ -521,7 +521,7 @@ export async function serverRoutes(app: FastifyInstance): Promise { // otherwise it keeps serving traffic with the user's secrets baked in. let containerStopped = false; if (server.containerId) { - const result = await stopContainer(server.containerId); + const result = await stopContainer(server.containerId, server.slug); containerStopped = result.ok; if (!result.ok) { app.log.warn( diff --git a/apps/api/src/routes/templates.ts b/apps/api/src/routes/templates.ts index 175f6e4..54a2c91 100644 --- a/apps/api/src/routes/templates.ts +++ b/apps/api/src/routes/templates.ts @@ -559,12 +559,12 @@ export async function templateRoutes(app: FastifyInstance): Promise { let stoppedContainers = 0; if (b.data.status === 'takedown') { const forkedServers = await db - .select({ id: mcpServers.id, containerId: mcpServers.containerId }) + .select({ id: mcpServers.id, containerId: mcpServers.containerId, slug: mcpServers.slug }) .from(mcpServers) .where(eq(mcpServers.templateId, p.data.id)); for (const fork of forkedServers) { if (fork.containerId) { - const result = await stopContainer(fork.containerId); + const result = await stopContainer(fork.containerId, fork.slug); if (result.ok) stoppedContainers++; else app.log.warn( diff --git a/apps/generator/src/config.ts b/apps/generator/src/config.ts index 9fafb0d..96d3ad4 100644 --- a/apps/generator/src/config.ts +++ b/apps/generator/src/config.ts @@ -13,6 +13,15 @@ const Env = z.object({ OAUTH_ISSUER: z.string().optional(), MODEL_GENERATE: z.string().default('glm-4.5'), MODEL_FIX: z.string().default('claude-haiku-4-5-20251001'), + // When set (e.g. "mcp.buildmymcpserver.com"), each deployed runner gets a + // public URL of the form https://. instead of the legacy + // http://: form. Requires host-side nginx + DNS setup + // (see scripts/setup-runner-tls.sh). When unset, falls back to plain HTTP. + MCP_DOMAIN: z.string().optional(), + // Directory the generator drops per-runner map fragments into. A host-side + // inotify service combines them and reloads nginx. Mounted as a volume by + // docker-compose (see setup-runner-tls.sh). + RUNNER_MAP_DIR: z.string().default('/var/runner-map'), }); export const config = Env.parse(process.env); diff --git a/apps/generator/src/lib/deploy.ts b/apps/generator/src/lib/deploy.ts index 3485aff..9f378ce 100644 --- a/apps/generator/src/lib/deploy.ts +++ b/apps/generator/src/lib/deploy.ts @@ -1,7 +1,51 @@ +import fs from 'node:fs/promises'; import net from 'node:net'; +import path from 'node:path'; import { createDb, eq, isNotNull, mcpServers } from '@bmm/db'; import { config } from '../config.js'; +/** + * Per-runner subdomain TLS support. When MCP_DOMAIN is set, the generator + * publishes each container under https://. via a host-side + * nginx that reads a slug→port map. The generator writes a tiny config + * fragment per server; a systemd inotify watcher combines them and reloads + * nginx. See scripts/setup-runner-tls.sh for the one-time host setup. + * + * If MCP_DOMAIN is unset, both the URL formatter and the map writer no-op + * and we fall back to the legacy http://host:port URL — zero behaviour + * change without the host-side infra in place. + */ +function runnerMapPath(slug: string): string { + return path.join(config.RUNNER_MAP_DIR, `${slug}.conf`); +} + +async function writeRunnerMapEntry(slug: string, port: number): Promise { + if (!config.MCP_DOMAIN) return; + const line = `${slug}.${config.MCP_DOMAIN} ${port};\n`; + try { + await fs.mkdir(config.RUNNER_MAP_DIR, { recursive: true }); + await fs.writeFile(runnerMapPath(slug), line, 'utf8'); + } catch (err) { + // Don't fail the deploy if the map dir isn't mounted yet — runner still + // serves on http://host:port and the user can manually proxy. + console.warn(`[runner-tls] could not write map entry for ${slug}:`, err); + } +} + +async function removeRunnerMapEntry(slug: string): Promise { + if (!config.MCP_DOMAIN) return; + try { + await fs.rm(runnerMapPath(slug), { force: true }); + } catch { + // Idempotent — missing file is fine. + } +} + +function computePublicUrl(slug: string, port: number): string { + if (config.MCP_DOMAIN) return `https://${slug}.${config.MCP_DOMAIN}`; + return `http://${config.RUNNER_HOST}:${port}`; +} + /** * Container hardening flags applied on every runner deployment on Linux * production hosts. Skipped only when explicitly disabled (dev/Windows @@ -115,7 +159,10 @@ export async function deployContainer(input: DeployInput): Promise return; } const containerId = out.trim().slice(0, 64); - const publicUrl = `http://${config.RUNNER_HOST}:${input.hostPort}`; + const publicUrl = computePublicUrl(input.slug, input.hostPort); + // Drop the nginx map fragment BEFORE persisting publicUrl so the + // user-visible URL is reachable by the time the wizard polls "live". + await writeRunnerMapEntry(input.slug, input.hostPort); await db .update(mcpServers) .set({ @@ -133,10 +180,16 @@ export async function deployContainer(input: DeployInput): Promise export async function stopContainer( containerId: string, + slug?: string, ): Promise<{ ok: boolean; detail: string }> { if (!containerId || containerId.length < 4) { return { ok: false, detail: 'invalid_container_id' }; } + // Remove the nginx map fragment first so the slug stops serving 502 from + // the proxy as soon as the container goes down. Idempotent — called + // multiple times with the same slug is fine. + if (slug) await removeRunnerMapEntry(slug); + const { spawn } = await import('node:child_process'); return await new Promise<{ ok: boolean; detail: string }>((resolve) => { const child = spawn('docker', ['rm', '-f', containerId], { diff --git a/packages/db/src/schema.ts b/packages/db/src/schema.ts index 6909749..9b4604e 100644 --- a/packages/db/src/schema.ts +++ b/packages/db/src/schema.ts @@ -296,6 +296,14 @@ export const oauthTokens = pgTable('oauth_tokens', { refreshTokenHash: text('refresh_token_hash'), scope: text('scope'), resource: text('resource'), + // The JWT `sub` claim of the issued access token. Stored so that a + // refresh_token-grant request can mint a new access token with the SAME + // subject as the original authorization, without re-walking the (now + // consumed) authorization code. Falls back to client_id for M2M grants. + subject: text('subject'), + // Row-level expiry — represents the refresh-token's lifetime. Access tokens + // carry their own `exp` inside the JWT; the server doesn't need to track + // access expiry separately. expiresAt: timestamp('expires_at').notNull(), createdAt: timestamp('created_at').defaultNow().notNull(), }); diff --git a/scripts/setup-runner-tls.sh b/scripts/setup-runner-tls.sh new file mode 100644 index 0000000..73b78c9 --- /dev/null +++ b/scripts/setup-runner-tls.sh @@ -0,0 +1,165 @@ +#!/usr/bin/env bash +# setup-runner-tls.sh +# +# One-time host setup for per-runner subdomain TLS. Run as root on the BMM +# host AFTER you've done the prereqs below. Idempotent — safe to re-run. +# +# What it does: +# 1. Creates /opt/buildmymcpserver/runner-map/ (volume-mounted into bmm-api +# and bmm-generator — they drop one .conf fragment per live runner) +# 2. Installs an nginx vhost that catches *.mcp.buildmymcpserver.com, +# reads slug→port from a combined map file, and reverse-proxies to the +# runner on localhost +# 3. Installs a systemd service that inotify-watches the map dir, combines +# all fragments into a single map file, and reloads nginx on any change +# +# After this script: +# - In docker-compose.prod.yml, add a volume mount to BOTH api and generator: +# volumes: +# - /opt/buildmymcpserver/runner-map:/var/runner-map +# - In .env.production, add: +# MCP_DOMAIN=mcp.buildmymcpserver.com +# - docker compose up -d --force-recreate api generator +# - From now on every deployed runner gets https://.mcp.buildmymcpserver.com +# +# ─── PREREQS (do these in Cloudflare dashboard before running) ─────────── +# A. DNS: Add an A-record '*.mcp.buildmymcpserver.com' → 213.239.213.217 +# Proxy status: Proxied (orange cloud) +# B. SSL: Cloudflare → SSL/TLS → Origin Server → Create Certificate +# Hostnames: *.mcp.buildmymcpserver.com, mcp.buildmymcpserver.com +# Save the .crt and .key to: +# /etc/ssl/buildmymcpserver/mcp-runners.crt (mode 644) +# /etc/ssl/buildmymcpserver/mcp-runners.key (mode 600, root:root) +# C. SSL mode: Cloudflare → SSL/TLS → Overview → set to "Full (strict)" +# (you've likely already set this for api.* — same setting) +# +# Run: sudo bash scripts/setup-runner-tls.sh +set -euo pipefail + +if [[ "${EUID}" -ne 0 ]]; then + echo "Run as root (sudo bash $0)." + exit 1 +fi + +MAP_DIR="/opt/buildmymcpserver/runner-map" +COMBINED="/opt/buildmymcpserver/runner-map.combined" +VHOST_DST="/etc/nginx/sites-available/bmm-mcp-runners" +VHOST_LNK="/etc/nginx/sites-enabled/bmm-mcp-runners" +CERT="/etc/ssl/buildmymcpserver/mcp-runners.crt" +KEY="/etc/ssl/buildmymcpserver/mcp-runners.key" + +echo "─── checking prereqs ───────────────────────────────────────" +for f in "$CERT" "$KEY"; do + if [[ ! -f "$f" ]]; then + echo "MISSING: $f — see PREREQS at the top of this script." + exit 1 + fi +done + +if ! command -v inotifywait >/dev/null; then + echo "Installing inotify-tools…" + apt-get update -qq + apt-get install -y -qq inotify-tools +fi + +echo "─── creating map dir + initial combined file ──────────────" +mkdir -p "$MAP_DIR" +chmod 755 "$MAP_DIR" +touch "$COMBINED" +chmod 644 "$COMBINED" + +echo "─── writing nginx vhost ──────────────────────────────────" +cat > "$VHOST_DST" <<'NGINX' +# BMM per-runner subdomain proxy. Map file (slug→port) is regenerated by +# the bmm-api and bmm-generator containers; a systemd inotify watcher +# combines them into the included file and runs `nginx -s reload`. + +map $http_host $bmm_runner_port { + default 0; + include /opt/buildmymcpserver/runner-map.combined; +} + +server { + listen 80; + listen [::]:80; + listen 443 ssl http2; + listen [::]:443 ssl http2; + server_name ~^(?[a-z0-9][a-z0-9-]*)\.mcp\.buildmymcpserver\.com$; + + ssl_certificate /etc/ssl/buildmymcpserver/mcp-runners.crt; + ssl_certificate_key /etc/ssl/buildmymcpserver/mcp-runners.key; + + client_max_body_size 4M; + + # Unknown slugs land here — return 404 instead of a confusing default vhost. + if ($bmm_runner_port = 0) { + return 404; + } + + location / { + proxy_pass http://127.0.0.1:$bmm_runner_port; + proxy_http_version 1.1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto https; + # MCP uses Streamable HTTP — disable buffering so response chunks flow. + proxy_buffering off; + proxy_cache off; + proxy_read_timeout 600s; + } +} +NGINX +ln -sf "$VHOST_DST" "$VHOST_LNK" + +echo "─── writing systemd watcher service ──────────────────────" +cat > /etc/systemd/system/bmm-runner-map.service </dev/null > ${COMBINED} || true; /usr/sbin/nginx -t && /usr/sbin/nginx -s reload || true' +ExecStart=/bin/bash -c 'while inotifywait -q -e create,modify,delete,moved_to,moved_from ${MAP_DIR}; do cat ${MAP_DIR}/*.conf 2>/dev/null > ${COMBINED} || true; /usr/sbin/nginx -t && /usr/sbin/nginx -s reload; done' +Restart=always +RestartSec=2 + +[Install] +WantedBy=multi-user.target +EOF + +systemctl daemon-reload +systemctl enable --now bmm-runner-map + +echo "─── verifying nginx config + reload ──────────────────────" +nginx -t +nginx -s reload || true + +echo "" +echo "─── DONE ─────────────────────────────────────────────────" +echo "" +echo "Next steps (one-time):" +echo "" +echo "1) Edit /opt/buildmymcpserver/docker-compose.prod.yml — add to BOTH" +echo " the 'api' and 'generator' services:" +echo "" +echo " volumes:" +echo " - /opt/buildmymcpserver/runner-map:/var/runner-map" +echo "" +echo "2) Edit /opt/buildmymcpserver/.env.production — add:" +echo "" +echo " MCP_DOMAIN=mcp.buildmymcpserver.com" +echo "" +echo "3) Restart api + generator so they pick up the env + volume:" +echo "" +echo " cd /opt/buildmymcpserver" +echo " docker compose --env-file .env.production -f docker-compose.prod.yml \\" +echo " up -d --force-recreate api generator" +echo "" +echo "Test (after at least one runner has been deployed):" +echo " curl -I https://.mcp.buildmymcpserver.com/health" +echo "" +echo "If you ever need to verify the map state:" +echo " cat ${COMBINED}" +echo " systemctl status bmm-runner-map"