buildmymcpserver/ops/bmm/backup.sh

#!/usr/bin/env bash
# Daily Postgres backup for BMM — replaces the inline cron one-liner with:
# - integrity check (gunzip -t + pg_dump header sanity)
# - failure alert via notify.sh
# - structured logging
# - optional offsite push via rclone (if rclone configured + BMM_BACKUP_REMOTE set)
# - 14-day local retention
#
# Cron: 15 3 * * * root /opt/bmm-ops/backup.sh
#
# Restore:
#   docker exec -i bmm-postgres psql -U bmm -d bmm_restore_test \
#     < <(gunzip -c /var/backups/bmm/bmm-YYYYMMDD.sql.gz)

set -uo pipefail

BACKUP_DIR="/var/backups/bmm"
LOG_FILE="/var/log/bmm-backup.log"
NOTIFY="/opt/bmm-ops/notify.sh"
RETENTION_DAYS=14
PG_USER="bmm"
PG_DB="bmm"
CONTAINER="bmm-postgres"

mkdir -p "$BACKUP_DIR"
DATE=$(date -u +%Y%m%d)
TIMESTAMP=$(date -u +%Y-%m-%dT%H:%M:%SZ)
OUT="${BACKUP_DIR}/bmm-${DATE}.sql.gz"

log() { echo "[${TIMESTAMP}] $*" >> "$LOG_FILE"; }
fail() { log "FAIL: $*"; "$NOTIFY" "backup-failed" "$*"; exit 1; }

log "starting backup"

# pg_dump → gzip in one pipeline; pipefail catches a dump failure mid-stream
if ! docker exec "$CONTAINER" pg_dump -U "$PG_USER" "$PG_DB" 2>>"$LOG_FILE" | gzip > "$OUT.tmp"; then
  rm -f "$OUT.tmp"
  fail "pg_dump pipeline failed"
fi

# Integrity check 1 — gzip stream must be valid end-to-end
if ! gunzip -t "$OUT.tmp" 2>>"$LOG_FILE"; then
  rm -f "$OUT.tmp"
  fail "gzip integrity check failed for $OUT.tmp"
fi

# Integrity check 2 — decompressed content must contain the pg_dump header
# in the first few lines. pg_dump emits "--" on line 1 and the actual
# "-- PostgreSQL database dump" comment on line 2, so we scan the first 5
# lines rather than only line 1.
HEADER_BLOCK=$(gunzip -c "$OUT.tmp" | head -5)
if ! echo "$HEADER_BLOCK" | grep -q "^-- PostgreSQL database dump"; then
  rm -f "$OUT.tmp"
  fail "pg_dump output missing expected header (first 5 lines: $(echo "$HEADER_BLOCK" | tr '\n' '|' | cut -c1-120))"
fi

# Size sanity — backups have grown to ~8KB. A sub-1KB dump means schema-only
# or empty. Likely-broken: alert but keep file for inspection.
SIZE=$(stat -c%s "$OUT.tmp")
if [ "$SIZE" -lt 1024 ]; then
  log "WARN: backup unusually small (${SIZE} bytes)"
  "$NOTIFY" "backup-suspicious" "backup is only ${SIZE} bytes — investigate $OUT.tmp"
fi

# Atomic move — only swap into place once all checks passed
mv "$OUT.tmp" "$OUT"
log "backup written: $OUT (${SIZE} bytes)"

# Optional offsite push — set BMM_BACKUP_REMOTE=<rclone-remote>:<path> in
# /opt/buildmymcpserver/.env.production once rclone is configured. We
# grep-parse rather than sourcing the env file because the env file is
# managed for Docker compose (KEY=value, sometimes unquoted text values
# like names) and `source` evaluates unquoted RHS as shell, which breaks
# on any value containing whitespace.
ENV_FILE="/opt/buildmymcpserver/.env.production"
BMM_BACKUP_REMOTE="$(grep -E '^BMM_BACKUP_REMOTE=' "$ENV_FILE" 2>/dev/null | head -1 | cut -d= -f2- | sed 's/^"\(.*\)"$/\1/; s/^'"'"'\(.*\)'"'"'$/\1/')"
if [ -n "${BMM_BACKUP_REMOTE:-}" ] && command -v rclone >/dev/null 2>&1; then
  if rclone copy "$OUT" "$BMM_BACKUP_REMOTE" --quiet 2>>"$LOG_FILE"; then
    log "offsite copy ok: $BMM_BACKUP_REMOTE"
  else
    "$NOTIFY" "backup-offsite-failed" "rclone copy to $BMM_BACKUP_REMOTE failed"
  fi
fi

# Retention — keep last 14 days
find "$BACKUP_DIR" -maxdepth 1 -name "bmm-*.sql.gz" -mtime "+${RETENTION_DAYS}" -delete 2>>"$LOG_FILE"

log "done"
exit 0