pounce/backend/app/api/deploy.py
Yves Gugger a70439c51a
Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled
feat: Robust deploy pipeline v2.0
- Multiple server addresses with automatic fallback
- SSH retry logic with exponential backoff
- Health checks before and after deployment
- HTTP-based deployment endpoint as backup
- Better error handling and logging
- Support for partial deployments (backend/frontend only)
2025-12-18 10:43:50 +01:00

232 lines
7.4 KiB
Python

"""
Remote Deploy Endpoint
This provides a secure way to trigger deployments remotely when SSH is not available.
Protected by an internal API key that should be kept secret.
"""
import asyncio
import logging
import os
import subprocess
from datetime import datetime
from typing import Optional
from fastapi import APIRouter, HTTPException, Header, BackgroundTasks
from pydantic import BaseModel
from app.config import get_settings
router = APIRouter(prefix="/deploy", tags=["deploy"])
logger = logging.getLogger(__name__)
settings = get_settings()
class DeployStatus(BaseModel):
"""Response model for deploy status."""
status: str
message: str
timestamp: str
details: Optional[dict] = None
class DeployRequest(BaseModel):
"""Request model for deploy trigger."""
component: str = "all" # all, backend, frontend
git_pull: bool = True
def run_command(cmd: str, cwd: str = None, timeout: int = 300) -> tuple[int, str, str]:
"""Run a shell command and return exit code, stdout, stderr."""
try:
result = subprocess.run(
cmd,
shell=True,
cwd=cwd,
capture_output=True,
text=True,
timeout=timeout,
)
return result.returncode, result.stdout, result.stderr
except subprocess.TimeoutExpired:
return -1, "", f"Command timed out after {timeout}s"
except Exception as e:
return -1, "", str(e)
async def run_deploy(component: str, git_pull: bool) -> dict:
"""
Execute deployment steps.
This runs in the background to not block the HTTP response.
"""
results = {
"started_at": datetime.utcnow().isoformat(),
"steps": [],
}
base_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
try:
# Step 1: Git pull (if requested)
if git_pull:
logger.info("Deploy: Running git pull...")
code, stdout, stderr = run_command("git pull origin main", cwd=base_path, timeout=60)
results["steps"].append({
"step": "git_pull",
"success": code == 0,
"output": stdout or stderr,
})
if code != 0:
logger.error(f"Git pull failed: {stderr}")
# Step 2: Backend deployment
if component in ("all", "backend"):
logger.info("Deploy: Restarting backend...")
# Try systemctl first
code, stdout, stderr = run_command("sudo systemctl restart pounce-backend", timeout=30)
if code == 0:
results["steps"].append({
"step": "backend_restart",
"method": "systemctl",
"success": True,
})
else:
# Fallback: Send SIGHUP to reload
code, stdout, stderr = run_command("pkill -HUP -f 'uvicorn app.main:app'", timeout=10)
results["steps"].append({
"step": "backend_restart",
"method": "sighup",
"success": code == 0,
"output": stderr if code != 0 else None,
})
# Step 3: Frontend deployment (more complex)
if component in ("all", "frontend"):
logger.info("Deploy: Rebuilding frontend...")
frontend_path = os.path.join(os.path.dirname(base_path), "frontend")
# Build frontend
build_cmd = "npm run build"
code, stdout, stderr = run_command(
f"cd {frontend_path} && {build_cmd}",
timeout=300, # 5 min for build
)
results["steps"].append({
"step": "frontend_build",
"success": code == 0,
"output": stderr[-500:] if code != 0 else "Build successful",
})
if code == 0:
# Copy public files for standalone
run_command(
f"cp -r {frontend_path}/public {frontend_path}/.next/standalone/",
timeout=30,
)
# Restart frontend
code, stdout, stderr = run_command("sudo systemctl restart pounce-frontend", timeout=30)
if code != 0:
# Fallback
run_command("pkill -f 'node .next/standalone/server.js'", timeout=10)
run_command(
f"cd {frontend_path}/.next/standalone && nohup node server.js > /tmp/frontend.log 2>&1 &",
timeout=10,
)
results["steps"].append({
"step": "frontend_restart",
"success": True,
})
results["completed_at"] = datetime.utcnow().isoformat()
results["success"] = all(s.get("success", False) for s in results["steps"])
except Exception as e:
logger.exception(f"Deploy failed: {e}")
results["error"] = str(e)
results["success"] = False
logger.info(f"Deploy completed: {results}")
return results
# Store last deploy result
_last_deploy_result: Optional[dict] = None
@router.post("/trigger", response_model=DeployStatus)
async def trigger_deploy(
request: DeployRequest,
background_tasks: BackgroundTasks,
x_deploy_key: str = Header(..., alias="X-Deploy-Key"),
):
"""
Trigger a deployment remotely.
Requires X-Deploy-Key header matching the internal_api_key setting.
This starts the deployment in the background and returns immediately.
Check /deploy/status for results.
"""
global _last_deploy_result
# Verify deploy key
expected_key = settings.internal_api_key
if not expected_key or x_deploy_key != expected_key:
raise HTTPException(status_code=403, detail="Invalid deploy key")
# Start deployment in background
async def do_deploy():
global _last_deploy_result
_last_deploy_result = await run_deploy(request.component, request.git_pull)
background_tasks.add_task(do_deploy)
return DeployStatus(
status="started",
message=f"Deployment started for component: {request.component}",
timestamp=datetime.utcnow().isoformat(),
)
@router.get("/status", response_model=DeployStatus)
async def get_deploy_status(
x_deploy_key: str = Header(..., alias="X-Deploy-Key"),
):
"""
Get the status of the last deployment.
Requires X-Deploy-Key header.
"""
expected_key = settings.internal_api_key
if not expected_key or x_deploy_key != expected_key:
raise HTTPException(status_code=403, detail="Invalid deploy key")
if _last_deploy_result is None:
return DeployStatus(
status="none",
message="No deployments have been triggered",
timestamp=datetime.utcnow().isoformat(),
)
return DeployStatus(
status="completed" if _last_deploy_result.get("success") else "failed",
message="Last deployment result",
timestamp=_last_deploy_result.get("completed_at", "unknown"),
details=_last_deploy_result,
)
@router.get("/health")
async def deploy_health():
"""Simple health check for deploy endpoint."""
return {"status": "ok", "message": "Deploy endpoint available"}