All checks were successful
Deploy to Production / deploy (push) Successful in 1m38s
Two overlapping bugs were killing OAuth discovery for every external MCP client (Claude Desktop, Cursor, etc.): 1. worker.ts injected PUBLIC_URL=http://<RUNNER_HOST>:<port> into the runner container even when MCP_DOMAIN was set. Result: the runner's /.well-known/oauth-protected-resource advertised an unreachable URL and the WWW-Authenticate header pointed at a non-HTTPS loopback address. Claude Desktop refused to follow the discovery chain. Now derives PUBLIC_URL from the same computePublicUrl() helper that builds the user-visible URL stored in mcp_servers.public_url, so the container's self-reported resource matches its actual route. 2. docker-compose.prod.yml never mounted /opt/buildmymcpserver/runner-map into the api / generator containers. The .conf snippet written by the generator landed in an ephemeral container path; the host inotify watcher saw an empty directory and produced an empty runner-map.combined. Result: nginx 404'd every /<slug>/* request, the runner was unreachable from the public domain, and OAuth discovery couldn't even begin. Mount added to both services. Existing weather server has the wrong PUBLIC_URL baked in and must be recreated after deploy. No customers yet. export computePublicUrl from deploy.ts so worker.ts can call it.
228 lines
8.0 KiB
TypeScript
228 lines
8.0 KiB
TypeScript
import fs from 'node:fs/promises';
|
|
import net from 'node:net';
|
|
import path from 'node:path';
|
|
import { createDb, eq, isNotNull, mcpServers } from '@bmm/db';
|
|
import { config } from '../config.js';
|
|
|
|
/**
|
|
* Per-runner TLS via path-routing on mcp.buildmymcpserver.com. When
|
|
* MCP_DOMAIN is set, the generator publishes each container at
|
|
* https://<MCP_DOMAIN>/<slug>
|
|
* and writes a one-line nginx snippet per server into RUNNER_MAP_DIR.
|
|
* A host-side systemd inotify watcher combines the snippets into a single
|
|
* file that the nginx vhost includes inside its location block, mapping
|
|
* the captured slug to its local runner port.
|
|
*
|
|
* Path-routing (instead of per-subdomain) is the bootstrap-friendly choice:
|
|
* mcp.buildmymcpserver.com is covered by Cloudflare's free Universal SSL,
|
|
* whereas *.mcp.buildmymcpserver.com would need CF Advanced Cert Manager
|
|
* ($10/mo) or a custom Let's-Encrypt wildcard via DNS-01 (free but more
|
|
* ops). See scripts/setup-runner-tls.sh for the one-time host setup.
|
|
*
|
|
* If MCP_DOMAIN is unset, both the URL formatter and the map writer no-op
|
|
* and we fall back to the legacy http://host:port URL — zero behaviour
|
|
* change without the host-side infra in place.
|
|
*/
|
|
function runnerMapPath(slug: string): string {
|
|
return path.join(config.RUNNER_MAP_DIR, `${slug}.conf`);
|
|
}
|
|
|
|
async function writeRunnerMapEntry(slug: string, port: number): Promise<void> {
|
|
if (!config.MCP_DOMAIN) return;
|
|
// nginx snippet — included inside a `location ~` block that captures
|
|
// $bmm_slug. Each runner contributes one line; the systemd watcher
|
|
// concatenates them into /opt/buildmymcpserver/runner-map.combined.
|
|
const line = `if ($bmm_slug = "${slug}") { set $bmm_port ${port}; }\n`;
|
|
try {
|
|
await fs.mkdir(config.RUNNER_MAP_DIR, { recursive: true });
|
|
await fs.writeFile(runnerMapPath(slug), line, 'utf8');
|
|
} catch (err) {
|
|
// Don't fail the deploy if the map dir isn't mounted yet — runner still
|
|
// serves on http://host:port and the user can manually proxy.
|
|
console.warn(`[runner-tls] could not write map entry for ${slug}:`, err);
|
|
}
|
|
}
|
|
|
|
async function removeRunnerMapEntry(slug: string): Promise<void> {
|
|
if (!config.MCP_DOMAIN) return;
|
|
try {
|
|
await fs.rm(runnerMapPath(slug), { force: true });
|
|
} catch {
|
|
// Idempotent — missing file is fine.
|
|
}
|
|
}
|
|
|
|
export function computePublicUrl(slug: string, port: number): string {
|
|
if (config.MCP_DOMAIN) return `https://${config.MCP_DOMAIN}/${slug}`;
|
|
return `http://${config.RUNNER_HOST}:${port}`;
|
|
}
|
|
|
|
/**
|
|
* Container hardening flags applied on every runner deployment on Linux
|
|
* production hosts. Skipped only when explicitly disabled (dev/Windows
|
|
* Docker Desktop, which doesn't fully honour --read-only on bind mounts).
|
|
*
|
|
* Without these, a tenant container runs as root with full capabilities on
|
|
* the shared host — combined with the LLM static-check being a regex
|
|
* blacklist (Z2-001), this would let a malicious tenant execute arbitrary
|
|
* code on the host. With them, the blast radius collapses to "within the
|
|
* container", which holds only that tenant's own decrypted secrets.
|
|
*/
|
|
const HARDENING_FLAGS = [
|
|
'--read-only',
|
|
'--cap-drop=ALL',
|
|
'--security-opt=no-new-privileges:true',
|
|
'--pids-limit=100',
|
|
'--memory=512m',
|
|
'--memory-swap=512m',
|
|
'--cpus=0.5',
|
|
// /tmp needs writable space — runner-template uses it for build/cache.
|
|
'--tmpfs=/tmp:rw,nosuid,nodev,size=64m',
|
|
];
|
|
|
|
function shouldHarden(): boolean {
|
|
// Explicit opt-out for local dev on Windows where --read-only conflicts
|
|
// with how Docker Desktop binds volumes. Production must always harden.
|
|
if (process.env.RUNNER_DISABLE_HARDENING === '1') return false;
|
|
const env = process.env.NODE_ENV;
|
|
return env === 'production' || env === 'staging';
|
|
}
|
|
|
|
const db = createDb();
|
|
|
|
async function portFree(port: number, host = '127.0.0.1'): Promise<boolean> {
|
|
return new Promise((resolve) => {
|
|
const tester = net
|
|
.createServer()
|
|
.once('error', () => resolve(false))
|
|
.once('listening', () => tester.close(() => resolve(true)))
|
|
.listen(port, host);
|
|
});
|
|
}
|
|
|
|
export async function allocatePort(): Promise<number> {
|
|
const used = new Set(
|
|
(
|
|
await db
|
|
.select({ port: mcpServers.hostPort })
|
|
.from(mcpServers)
|
|
.where(isNotNull(mcpServers.hostPort))
|
|
)
|
|
.map((r) => r.port)
|
|
.filter((p): p is number => typeof p === 'number'),
|
|
);
|
|
for (let port = config.RUNNER_PORT_RANGE_START; port <= config.RUNNER_PORT_RANGE_END; port++) {
|
|
if (used.has(port)) continue;
|
|
if (await portFree(port)) return port;
|
|
}
|
|
throw new Error('no_free_port');
|
|
}
|
|
|
|
export interface DeployHandle {
|
|
containerId: string;
|
|
publicUrl: string;
|
|
hostPort: number;
|
|
}
|
|
|
|
export interface DeployInput {
|
|
serverId: string;
|
|
slug: string;
|
|
hostPort: number;
|
|
imageTag: string;
|
|
envVars: Record<string, string>;
|
|
}
|
|
|
|
export async function deployContainer(input: DeployInput): Promise<DeployHandle> {
|
|
// Docker CLI is portable across linux/mac/win — sufficient for now; future
|
|
// iteration will switch to the engine API via UNIX socket.
|
|
const { spawn } = await import('node:child_process');
|
|
const containerName = `bmm-mcp-${input.slug}-${Date.now().toString(36)}`;
|
|
const args = [
|
|
'run',
|
|
'-d',
|
|
'--name',
|
|
containerName,
|
|
'-p',
|
|
`${input.hostPort}:3000`,
|
|
];
|
|
if (shouldHarden()) {
|
|
args.push(...HARDENING_FLAGS);
|
|
}
|
|
for (const [k, v] of Object.entries(input.envVars)) {
|
|
args.push('-e', `${k}=${v}`);
|
|
}
|
|
args.push('--restart=unless-stopped', input.imageTag);
|
|
|
|
return await new Promise<DeployHandle>((resolve, reject) => {
|
|
const child = spawn('docker', args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
let out = '';
|
|
let err = '';
|
|
child.stdout.on('data', (d) => {
|
|
out += d.toString();
|
|
});
|
|
child.stderr.on('data', (d) => {
|
|
err += d.toString();
|
|
});
|
|
child.on('error', (e) => reject(e));
|
|
child.on('close', async (code) => {
|
|
if (code !== 0) {
|
|
reject(new Error(`docker_run_failed (exit ${code}): ${err.trim() || out.trim()}`));
|
|
return;
|
|
}
|
|
const containerId = out.trim().slice(0, 64);
|
|
const publicUrl = computePublicUrl(input.slug, input.hostPort);
|
|
// Drop the nginx map fragment BEFORE persisting publicUrl so the
|
|
// user-visible URL is reachable by the time the wizard polls "live".
|
|
await writeRunnerMapEntry(input.slug, input.hostPort);
|
|
await db
|
|
.update(mcpServers)
|
|
.set({
|
|
containerId,
|
|
hostPort: input.hostPort,
|
|
publicUrl,
|
|
status: 'live',
|
|
updatedAt: new Date(),
|
|
})
|
|
.where(eq(mcpServers.id, input.serverId));
|
|
resolve({ containerId, publicUrl, hostPort: input.hostPort });
|
|
});
|
|
});
|
|
}
|
|
|
|
export async function stopContainer(
|
|
containerId: string,
|
|
slug?: string,
|
|
): Promise<{ ok: boolean; detail: string }> {
|
|
if (!containerId || containerId.length < 4) {
|
|
return { ok: false, detail: 'invalid_container_id' };
|
|
}
|
|
// Remove the nginx map fragment first so the slug stops serving 502 from
|
|
// the proxy as soon as the container goes down. Idempotent — called
|
|
// multiple times with the same slug is fine.
|
|
if (slug) await removeRunnerMapEntry(slug);
|
|
|
|
const { spawn } = await import('node:child_process');
|
|
return await new Promise<{ ok: boolean; detail: string }>((resolve) => {
|
|
const child = spawn('docker', ['rm', '-f', containerId], {
|
|
stdio: ['ignore', 'pipe', 'pipe'],
|
|
});
|
|
let err = '';
|
|
child.stderr?.on('data', (d: Buffer) => {
|
|
err += d.toString();
|
|
});
|
|
child.on('error', () => resolve({ ok: false, detail: 'spawn_failed' }));
|
|
child.on('close', (code) =>
|
|
resolve(code === 0 ? { ok: true, detail: '' } : { ok: false, detail: err.trim() || `exit ${code}` }),
|
|
);
|
|
});
|
|
}
|
|
|
|
export async function dockerAvailable(): Promise<boolean> {
|
|
const { spawn } = await import('node:child_process');
|
|
return await new Promise<boolean>((resolve) => {
|
|
const child = spawn('docker', ['version'], { stdio: 'ignore' });
|
|
child.on('error', () => resolve(false));
|
|
child.on('close', (code) => resolve(code === 0));
|
|
});
|
|
}
|