pounce/deploy.sh
Yves Gugger 52ee772391
Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled
feat: Zero-downtime deployment + drops auto-cleanup
1. Deploy Pipeline v3.0:
   - Zero-downtime frontend deployment (build while server runs)
   - Atomic switchover only after successful build
   - Server stays up during entire npm install + npm run build

2. Navigation:
   - Removed "Intel" from public navigation (use Discover instead)

3. Drops Auto-Cleanup:
   - New scheduler job every 4 hours to verify drops availability
   - Automatically removes domains that have been re-registered
   - Keeps drops list clean with only actually available domains
2025-12-18 11:20:18 +01:00

652 lines
21 KiB
Bash
Executable File

#!/bin/bash
# ============================================================================
# POUNCE ZERO-DOWNTIME DEPLOY PIPELINE v3.0
#
# Features:
# - ZERO-DOWNTIME: Build happens while old server still runs
# - Atomic switchover only after successful build
# - Multiple connection methods (DNS, public IP, internal IP)
# - Automatic retry with exponential backoff
# - Health checks before and after deployment
# - Parallel file sync for speed
# - Detailed logging
# ============================================================================
set -uo pipefail
# Colors
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
RED='\033[0;31m'
CYAN='\033[0;36m'
GRAY='\033[0;90m'
BOLD='\033[1m'
NC='\033[0m'
# ============================================================================
# CONFIGURATION
# ============================================================================
SERVER_USER="user"
SERVER_PASS="user"
SERVER_PATH="/home/user/pounce"
# Multiple server addresses to try (in order of preference)
declare -a SERVER_HOSTS=(
"pounce.ch"
"46.235.147.194"
"10.42.0.73"
)
# SSH options
SSH_TIMEOUT=15
SSH_RETRIES=3
SSH_OPTS="-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=$SSH_TIMEOUT -o ServerAliveInterval=10 -o ServerAliveCountMax=3"
# URLs for health checks
FRONTEND_URL="https://pounce.ch"
API_URL="https://pounce.ch/api/v1/health"
# Log file
LOG_FILE="/tmp/pounce-deploy-$(date +%Y%m%d-%H%M%S).log"
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
log() {
local msg="[$(date '+%H:%M:%S')] $1"
echo -e "$msg" | tee -a "$LOG_FILE"
}
log_success() { log "${GREEN}$1${NC}"; }
log_error() { log "${RED}$1${NC}"; }
log_warn() { log "${YELLOW}$1${NC}"; }
log_info() { log "${BLUE}$1${NC}"; }
log_debug() { log "${GRAY} $1${NC}"; }
# Check if command exists
require_cmd() {
if ! command -v "$1" >/dev/null 2>&1; then
log_error "$1 is required but not installed"
if [ "$1" = "sshpass" ]; then
echo -e " Install with: ${CYAN}brew install hudochenkov/sshpass/sshpass${NC}"
fi
exit 1
fi
}
# ============================================================================
# CONNECTION FUNCTIONS
# ============================================================================
# Find working server address
find_server() {
log_info "Finding reachable server..."
for host in "${SERVER_HOSTS[@]}"; do
log_debug "Trying $host..."
if curl -s --connect-timeout 5 "https://$host" >/dev/null 2>&1 || \
curl -s --connect-timeout 5 "http://$host" >/dev/null 2>&1; then
ACTIVE_HOST="$host"
log_success "Server reachable via HTTPS at $host"
return 0
fi
done
log_error "No server reachable"
return 1
}
# Test SSH connection with retries
test_ssh() {
local host="$1"
local retries="${2:-$SSH_RETRIES}"
for i in $(seq 1 $retries); do
if sshpass -p "$SERVER_PASS" ssh $SSH_OPTS "$SERVER_USER@$host" "echo 'SSH OK'" >/dev/null 2>&1; then
return 0
fi
if [ $i -lt $retries ]; then
log_debug "Retry $i/$retries in ${i}s..."
sleep $((i * 2))
fi
done
return 1
}
# Find working SSH connection
find_ssh() {
log_info "Testing SSH connections..."
for host in "${SERVER_HOSTS[@]}"; do
log_debug "Trying SSH to $host..."
if test_ssh "$host" 2; then
SSH_HOST="$host"
log_success "SSH connected to $host"
return 0
fi
done
SSH_HOST=""
log_warn "No SSH connection available"
return 1
}
# Execute remote command with timeout
remote_exec() {
local cmd="$1"
local timeout="${2:-1}" # 1=no timeout limit for builds
if [ -z "$SSH_HOST" ]; then
log_error "No SSH connection"
return 1
fi
sshpass -p "$SERVER_PASS" ssh $SSH_OPTS "$SERVER_USER@$SSH_HOST" "$cmd" 2>&1 | tee -a "$LOG_FILE"
return ${PIPESTATUS[0]}
}
# ============================================================================
# HEALTH CHECK FUNCTIONS
# ============================================================================
check_api_health() {
log_info "Checking API health..."
local status
status=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 10 --max-time 30 "$API_URL" 2>/dev/null)
if [ "$status" = "200" ]; then
log_success "API is healthy"
return 0
else
log_error "API health check failed (HTTP $status)"
return 1
fi
}
check_frontend_health() {
log_info "Checking frontend health..."
local status
status=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 10 --max-time 30 "$FRONTEND_URL" 2>/dev/null)
if [ "$status" = "200" ]; then
log_success "Frontend is healthy (HTTP $status)"
return 0
else
log_error "Frontend health check failed (HTTP $status)"
return 1
fi
}
# ============================================================================
# SYNC FUNCTIONS
# ============================================================================
sync_backend() {
log_info "Syncing backend files..."
local host="${SSH_HOST:-$ACTIVE_HOST}"
sshpass -p "$SERVER_PASS" rsync -e "ssh $SSH_OPTS" \
-avz --delete --compress-level=9 --checksum \
--exclude '__pycache__' \
--exclude '.pytest_cache' \
--exclude 'venv' \
--exclude '.git' \
--exclude '*.pyc' \
--exclude '.env' \
--exclude '*.db' \
--exclude 'logs/' \
backend/ "$SERVER_USER@$host:$SERVER_PATH/backend/" 2>&1 | tee -a "$LOG_FILE"
if [ ${PIPESTATUS[0]} -eq 0 ]; then
log_success "Backend files synced"
return 0
else
log_error "Backend sync failed"
return 1
fi
}
sync_frontend() {
log_info "Syncing frontend files..."
local host="${SSH_HOST:-$ACTIVE_HOST}"
sshpass -p "$SERVER_PASS" rsync -e "ssh $SSH_OPTS" \
-avz --delete --compress-level=9 --checksum \
--exclude 'node_modules' \
--exclude '.next' \
--exclude '.git' \
frontend/ "$SERVER_USER@$host:$SERVER_PATH/frontend/" 2>&1 | tee -a "$LOG_FILE"
if [ ${PIPESTATUS[0]} -eq 0 ]; then
log_success "Frontend files synced"
return 0
else
log_error "Frontend sync failed"
return 1
fi
}
# ============================================================================
# DEPLOY FUNCTIONS
# ============================================================================
deploy_backend() {
log_info "Deploying backend..."
if [ -z "$SSH_HOST" ]; then
log_warn "SSH not available, backend will use synced files on next restart"
return 0
fi
remote_exec "
cd $SERVER_PATH/backend
# Activate virtualenv
if [ -f 'venv/bin/activate' ]; then
source venv/bin/activate
else
echo 'venv not found, creating...'
python3 -m venv venv
source venv/bin/activate
pip install -r requirements.txt
fi
# Run migrations
echo 'Running database migrations...'
python -c 'from app.database import init_db; import asyncio; asyncio.run(init_db())' 2>&1 || true
# Graceful restart (SIGHUP for uvicorn)
if systemctl is-active --quiet pounce-backend 2>/dev/null; then
echo 'Graceful backend restart via systemd...'
echo '$SERVER_PASS' | sudo -S systemctl reload-or-restart pounce-backend
sleep 2
else
echo 'Starting backend with nohup...'
pkill -f 'uvicorn app.main:app' 2>/dev/null || true
sleep 1
cd $SERVER_PATH/backend
source venv/bin/activate
nohup uvicorn app.main:app --host 0.0.0.0 --port 8000 > /tmp/backend.log 2>&1 &
sleep 3
fi
echo 'Backend deployment complete'
" 3
return $?
}
# ZERO-DOWNTIME FRONTEND DEPLOYMENT
deploy_frontend_zero_downtime() {
log_info "Zero-downtime frontend deployment..."
if [ -z "$SSH_HOST" ]; then
log_warn "SSH not available, cannot build frontend remotely"
return 1
fi
remote_exec "
cd $SERVER_PATH/frontend
# Create build timestamp for tracking
BUILD_ID=\$(date +%Y%m%d-%H%M%S)
echo \"Starting build \$BUILD_ID while server continues running...\"
# Check if dependencies need update
LOCKFILE_HASH=''
if [ -f '.lockfile_hash' ]; then
LOCKFILE_HASH=\$(cat .lockfile_hash)
fi
CURRENT_HASH=\$(md5sum package-lock.json 2>/dev/null | cut -d' ' -f1 || echo 'none')
if [ \"\$LOCKFILE_HASH\" != \"\$CURRENT_HASH\" ]; then
echo 'Installing dependencies...'
npm ci --prefer-offline --no-audit --no-fund
echo \"\$CURRENT_HASH\" > .lockfile_hash
else
echo 'Dependencies up to date (skipping npm ci)'
fi
# ===== CRITICAL: Build WHILE old server still runs =====
echo ''
echo '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
echo '🚀 Building new version (server still running)...'
echo '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
echo ''
# Build to .next directory
NEXT_PUBLIC_API_URL=https://pounce.ch/api/v1 NODE_OPTIONS='--max-old-space-size=2048' npm run build
if [ \$? -ne 0 ]; then
echo '❌ Build failed! Server continues with old version.'
exit 1
fi
echo ''
echo '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
echo '✅ Build successful! Preparing atomic switchover...'
echo '━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━'
echo ''
# Setup standalone directory with new build
mkdir -p .next/standalone/.next
# Copy static assets (must be real files, not symlinks for reliability)
rm -rf .next/standalone/.next/static
cp -r .next/static .next/standalone/.next/
rm -rf .next/standalone/public
cp -r public .next/standalone/public
echo 'New build prepared. Starting atomic switchover...'
# ===== ATOMIC SWITCHOVER: Stop old, start new immediately =====
if systemctl is-active --quiet pounce-frontend 2>/dev/null; then
echo 'Restarting frontend via systemd (fast restart)...'
echo '$SERVER_PASS' | sudo -S systemctl restart pounce-frontend
sleep 2
else
# Manual restart - minimize gap
echo 'Manual restart - minimizing downtime...'
# Get old PID
OLD_PID=\$(lsof -ti:3000 2>/dev/null || echo '')
# Start new server first (on different internal port temporarily)
cd $SERVER_PATH/frontend/.next/standalone
NODE_ENV=production HOSTNAME=0.0.0.0 PORT=3001 BACKEND_URL=http://127.0.0.1:8000 node server.js &
NEW_PID=\$!
sleep 3
# Verify new server is healthy
if curl -s -o /dev/null -w '%{http_code}' http://localhost:3001 | grep -q '200'; then
echo 'New server healthy on port 3001'
# Kill old server
if [ -n \"\$OLD_PID\" ]; then
kill -9 \$OLD_PID 2>/dev/null || true
fi
# Kill new server on temp port and restart on correct port
kill -9 \$NEW_PID 2>/dev/null || true
sleep 1
# Start on correct port
cd $SERVER_PATH/frontend/.next/standalone
nohup env NODE_ENV=production HOSTNAME=0.0.0.0 PORT=3000 BACKEND_URL=http://127.0.0.1:8000 node server.js > /tmp/frontend.log 2>&1 &
sleep 2
echo 'New server running on port 3000'
else
echo '⚠️ New server failed health check, keeping old server'
kill -9 \$NEW_PID 2>/dev/null || true
exit 1
fi
fi
echo ''
echo '✅ Zero-downtime deployment complete!'
echo \"Build ID: \$BUILD_ID\"
" 1
return $?
}
# Legacy deploy (with downtime) - kept as fallback
deploy_frontend_legacy() {
log_info "Deploying frontend (legacy mode with downtime)..."
if [ -z "$SSH_HOST" ]; then
log_warn "SSH not available, cannot build frontend remotely"
return 1
fi
remote_exec "
cd $SERVER_PATH/frontend
# Stop server during build
echo 'Stopping server for rebuild...'
if systemctl is-active --quiet pounce-frontend 2>/dev/null; then
echo '$SERVER_PASS' | sudo -S systemctl stop pounce-frontend
else
pkill -f 'node .next/standalone/server.js' 2>/dev/null || true
lsof -ti:3000 | xargs -r kill -9 2>/dev/null || true
fi
# Install & build
npm ci --prefer-offline --no-audit --no-fund
NEXT_PUBLIC_API_URL=https://pounce.ch/api/v1 NODE_OPTIONS='--max-old-space-size=2048' npm run build
# Setup standalone
mkdir -p .next/standalone/.next
rm -rf .next/standalone/.next/static
cp -r .next/static .next/standalone/.next/
rm -rf .next/standalone/public
cp -r public .next/standalone/public
# Start server
if systemctl is-active --quiet pounce-frontend 2>/dev/null; then
echo '$SERVER_PASS' | sudo -S systemctl start pounce-frontend
else
cd $SERVER_PATH/frontend/.next/standalone
nohup env NODE_ENV=production HOSTNAME=0.0.0.0 PORT=3000 BACKEND_URL=http://127.0.0.1:8000 node server.js > /tmp/frontend.log 2>&1 &
fi
sleep 3
echo 'Frontend deployment complete'
" 1
return $?
}
# ============================================================================
# GIT FUNCTIONS
# ============================================================================
git_commit_push() {
local msg="${1:-Deploy: $(date '+%Y-%m-%d %H:%M')}"
log_info "Git operations..."
# Check for changes
if [ -z "$(git status --porcelain 2>/dev/null)" ]; then
log_debug "No changes to commit"
else
git add -A
git commit -m "$msg" 2>&1 | tee -a "$LOG_FILE" || true
log_success "Committed: $msg"
fi
# Push
if git push origin main 2>&1 | tee -a "$LOG_FILE"; then
log_success "Pushed to remote"
else
log_warn "Push failed or nothing to push"
fi
}
# ============================================================================
# MAIN DEPLOY FUNCTION
# ============================================================================
deploy() {
local mode="${1:-full}"
local commit_msg="${2:-}"
echo -e "\n${BOLD}${BLUE}╔════════════════════════════════════════════════════════════════╗${NC}"
echo -e "${BOLD}${BLUE}║ POUNCE ZERO-DOWNTIME DEPLOY v3.0 ║${NC}"
echo -e "${BOLD}${BLUE}╚════════════════════════════════════════════════════════════════╝${NC}\n"
log_info "Mode: ${CYAN}$mode${NC}"
log_info "Log: ${CYAN}$LOG_FILE${NC}"
local errors=0
local start_time=$(date +%s)
# Phase 1: Connectivity
echo -e "\n${BOLD}[1/5] Connectivity${NC}"
find_server || { log_error "Cannot reach server"; exit 1; }
find_ssh || log_warn "SSH unavailable - sync-only mode"
# Phase 2: Pre-deploy health check
echo -e "\n${BOLD}[2/5] Pre-deploy Health Check${NC}"
check_api_health || ((errors++))
check_frontend_health || ((errors++))
# Phase 3: Git (skip in quick mode)
echo -e "\n${BOLD}[3/5] Git${NC}"
if [ "$mode" = "quick" ] || [ "$mode" = "sync" ]; then
echo -e " ${GRAY}(skipped)${NC}"
else
git_commit_push "$commit_msg"
fi
# Phase 4: Sync & Deploy
echo -e "\n${BOLD}[4/5] Sync & Deploy${NC}"
case "$mode" in
backend)
sync_backend || ((errors++))
deploy_backend || ((errors++))
;;
frontend)
sync_frontend || ((errors++))
deploy_frontend_zero_downtime || ((errors++))
;;
sync)
sync_backend || ((errors++))
sync_frontend || ((errors++))
;;
*)
# Full or quick deploy
sync_backend || ((errors++))
sync_frontend || ((errors++))
deploy_backend || ((errors++))
deploy_frontend_zero_downtime || ((errors++))
;;
esac
# Phase 5: Post-deploy health check
echo -e "\n${BOLD}[5/5] Post-deploy Health Check${NC}"
sleep 3 # Give services time to start
check_api_health || ((errors++))
check_frontend_health || ((errors++))
# Summary
local end_time=$(date +%s)
local duration=$((end_time - start_time))
echo -e "\n${BOLD}════════════════════════════════════════════════════════════════${NC}"
if [ $errors -eq 0 ]; then
echo -e "${GREEN}${BOLD}✅ ZERO-DOWNTIME DEPLOY SUCCESSFUL${NC} (${duration}s)"
else
echo -e "${RED}${BOLD}⚠️ DEPLOY COMPLETED WITH $errors ERROR(S)${NC} (${duration}s)"
fi
echo -e "${BOLD}════════════════════════════════════════════════════════════════${NC}\n"
echo -e " ${CYAN}Frontend:${NC} $FRONTEND_URL"
echo -e " ${CYAN}API:${NC} $API_URL"
echo -e " ${CYAN}Log:${NC} $LOG_FILE"
echo ""
return $errors
}
# ============================================================================
# CLI INTERFACE
# ============================================================================
show_help() {
echo "Usage: $0 [command] [options]"
echo ""
echo "Commands:"
echo " full Full deploy (default) - git, sync, build, restart"
echo " quick Skip git commit/push"
echo " backend Deploy backend only"
echo " frontend Deploy frontend only"
echo " sync Sync files only (no build/restart)"
echo " status Show server status"
echo " health Run health checks only"
echo " legacy Use legacy deploy (with downtime)"
echo ""
echo "Options:"
echo " -m MSG Commit message"
echo " -h Show this help"
echo ""
echo "Examples:"
echo " $0 # Full zero-downtime deploy"
echo " $0 quick # Quick deploy (skip git)"
echo " $0 frontend # Frontend only"
echo " $0 -m 'feat: new' # Full with commit message"
}
# Main
main() {
require_cmd sshpass
require_cmd rsync
require_cmd curl
require_cmd git
local command="full"
local commit_msg=""
while [[ $# -gt 0 ]]; do
case $1 in
full|quick|backend|frontend|sync)
command="$1"
shift
;;
legacy)
# Override frontend deploy function
deploy_frontend_zero_downtime() { deploy_frontend_legacy; }
command="full"
shift
;;
status)
find_server && find_ssh
if [ -n "$SSH_HOST" ]; then
remote_exec "
echo '=== Services ==='
systemctl status pounce-backend --no-pager 2>/dev/null | head -5 || echo 'Backend: manual mode'
systemctl status pounce-frontend --no-pager 2>/dev/null | head -5 || echo 'Frontend: manual mode'
echo ''
echo '=== Ports ==='
ss -tlnp | grep -E ':(3000|8000)' || echo 'No services on expected ports'
"
fi
exit 0
;;
health)
find_server
check_api_health
check_frontend_health
exit 0
;;
-m)
shift
commit_msg="$1"
shift
;;
-h|--help)
show_help
exit 0
;;
*)
log_error "Unknown option: $1"
show_help
exit 1
;;
esac
done
deploy "$command" "$commit_msg"
}
main "$@"