diff --git a/backend/app/api/admin.py b/backend/app/api/admin.py index d4b8639..814fa3a 100644 --- a/backend/app/api/admin.py +++ b/backend/app/api/admin.py @@ -11,11 +11,12 @@ Provides admin-only access to: from datetime import datetime, timedelta from pathlib import Path from typing import Optional -from fastapi import APIRouter, HTTPException, status, BackgroundTasks, Depends +from fastapi import APIRouter, HTTPException, status, Depends from pydantic import BaseModel, EmailStr from sqlalchemy import select, func, desc from app.api.deps import Database, get_current_user +from app.config import get_settings from app.models.user import User from app.models.subscription import Subscription, SubscriptionTier, SubscriptionStatus, TIER_CONFIG from app.models.domain import Domain @@ -26,6 +27,7 @@ from app.models.auction import DomainAuction from app.models.price_alert import PriceAlert router = APIRouter() +settings = get_settings() # ============== Admin Authentication ============== async def require_admin( @@ -606,10 +608,18 @@ async def trigger_tld_scrape( admin: User = Depends(require_admin), ): """Manually trigger a TLD price scrape.""" + # Prefer job queue in production (non-blocking) + if settings.enable_job_queue and settings.redis_url: + from app.jobs.client import enqueue_job + + job_id = await enqueue_job("scrape_tld_prices") + return {"message": "TLD price scrape enqueued", "job_id": job_id} + + # Fallback: run inline from app.services.tld_scraper.aggregator import tld_aggregator - + result = await tld_aggregator.run_scrape(db) - + return { "message": "TLD price scrape completed", "status": result.status, @@ -1099,7 +1109,6 @@ async def test_external_apis( @router.post("/trigger-scrape") async def trigger_auction_scrape( - background_tasks: BackgroundTasks, db: Database, admin: User = Depends(require_admin), ): @@ -1110,20 +1119,27 @@ async def trigger_auction_scrape( 1. Try Tier 1 APIs (DropCatch, Sedo) first 2. Fall back to web scraping for others """ + # Prefer job queue in production (non-blocking) + if settings.enable_job_queue and settings.redis_url: + from app.jobs.client import enqueue_job + + job_id = await enqueue_job("scrape_auctions") + return { + "message": "Auction scraping enqueued", + "job_id": job_id, + "note": "Check /admin/scrape-status for results", + } + + # Fallback: run inline from app.services.auction_scraper import AuctionScraperService - + scraper = AuctionScraperService() - - # Run scraping in background - async def run_scrape(): - async with db.begin(): - return await scraper.scrape_all_platforms(db) - - background_tasks.add_task(run_scrape) - + result = await scraper.scrape_all_platforms(db) + return { - "message": "Auction scraping started in background", - "note": "Check /admin/scrape-status for results" + "message": "Auction scraping completed", + "result": result, + "note": "Check /admin/scrape-status for results", } diff --git a/backend/app/config.py b/backend/app/config.py index 174ce29..49fa96d 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -33,6 +33,23 @@ class Settings(BaseSettings): check_minute: int = 0 scheduler_check_interval_hours: int = 24 enable_scheduler: bool = False # Run APScheduler jobs in this process (recommend: separate scheduler process) + + # Job Queue / Redis (Phase 2) + redis_url: str = "" # e.g. redis://redis:6379/0 + enable_job_queue: bool = False + + # Observability (Phase 2) + enable_metrics: bool = True + metrics_path: str = "/metrics" + enable_db_query_metrics: bool = False + + # Rate limiting storage (SlowAPI / limits). Use Redis in production. + rate_limit_storage_uri: str = "memory://" + + # Database pooling (PostgreSQL) + db_pool_size: int = 5 + db_max_overflow: int = 10 + db_pool_timeout: int = 30 # ================================= # External API Credentials diff --git a/backend/app/database.py b/backend/app/database.py index 612f417..e775c16 100644 --- a/backend/app/database.py +++ b/backend/app/database.py @@ -7,11 +7,22 @@ from app.config import get_settings settings = get_settings() # Create async engine -engine = create_async_engine( - settings.database_url, - echo=settings.debug, - future=True, -) +engine_kwargs = { + "echo": settings.debug, + "future": True, +} +# Production hardening: enable connection pooling for Postgres +if settings.database_url.startswith("postgresql"): + engine_kwargs.update( + { + "pool_size": settings.db_pool_size, + "max_overflow": settings.db_max_overflow, + "pool_timeout": settings.db_pool_timeout, + "pool_pre_ping": True, + } + ) + +engine = create_async_engine(settings.database_url, **engine_kwargs) # Create async session factory AsyncSessionLocal = async_sessionmaker( diff --git a/backend/app/jobs/__init__.py b/backend/app/jobs/__init__.py new file mode 100644 index 0000000..913d4c2 --- /dev/null +++ b/backend/app/jobs/__init__.py @@ -0,0 +1,3 @@ +"""Async job queue (ARQ / Redis).""" + + diff --git a/backend/app/jobs/client.py b/backend/app/jobs/client.py new file mode 100644 index 0000000..7733a41 --- /dev/null +++ b/backend/app/jobs/client.py @@ -0,0 +1,38 @@ +"""ARQ client helper to enqueue jobs.""" + +from __future__ import annotations + +from typing import Any + +from arq.connections import RedisSettings, create_pool + +from app.config import get_settings + +_pool = None + + +async def _get_pool(): + global _pool + if _pool is not None: + return _pool + + settings = get_settings() + if not settings.redis_url: + raise RuntimeError("redis_url is not configured (set REDIS_URL)") + + _pool = await create_pool(RedisSettings.from_dsn(settings.redis_url)) + return _pool + + +async def enqueue_job(name: str, *args: Any, **kwargs: Any) -> str: + """ + Enqueue a job by name. Returns the job id. + """ + pool = await _get_pool() + job = await pool.enqueue_job(name, *args, **kwargs) + # job may be None if enqueue failed + if job is None: + raise RuntimeError(f"Failed to enqueue job: {name}") + return job.job_id + + diff --git a/backend/app/jobs/tasks.py b/backend/app/jobs/tasks.py new file mode 100644 index 0000000..dfa17f0 --- /dev/null +++ b/backend/app/jobs/tasks.py @@ -0,0 +1,72 @@ +"""Job functions executed by the ARQ worker.""" + +from __future__ import annotations + +from datetime import datetime + +from sqlalchemy import select + +from app.database import AsyncSessionLocal, init_db +from app.models.auction import DomainAuction +from app.services.auction_scraper import auction_scraper +from app.services.pounce_score import calculate_pounce_score_v2 +from app.services.tld_scraper.aggregator import tld_aggregator + + +async def scrape_auctions(ctx) -> dict: # arq passes ctx + """Scrape auctions from all platforms and store results.""" + await init_db() + async with AsyncSessionLocal() as db: + result = await auction_scraper.scrape_all_platforms(db) + await db.commit() + return {"status": "ok", "result": result, "timestamp": datetime.utcnow().isoformat()} + + +async def scrape_tld_prices(ctx) -> dict: + """Scrape TLD prices from all sources and store results.""" + await init_db() + async with AsyncSessionLocal() as db: + result = await tld_aggregator.run_scrape(db) + await db.commit() + return { + "status": "ok", + "tlds_scraped": result.tlds_scraped, + "prices_saved": result.prices_saved, + "sources_succeeded": result.sources_succeeded, + "sources_attempted": result.sources_attempted, + "timestamp": datetime.utcnow().isoformat(), + } + + +async def backfill_auction_scores(ctx, *, limit: int = 5000) -> dict: + """ + Backfill DomainAuction.pounce_score for legacy rows. + + Safe to run multiple times; only fills NULL scores. + """ + await init_db() + updated = 0 + async with AsyncSessionLocal() as db: + rows = ( + await db.execute( + select(DomainAuction) + .where(DomainAuction.pounce_score == None) # noqa: E711 + .limit(limit) + ) + ).scalars().all() + + for auction in rows: + auction.pounce_score = calculate_pounce_score_v2( + auction.domain, + auction.tld, + num_bids=auction.num_bids or 0, + age_years=auction.age_years or 0, + is_pounce=False, + ) + updated += 1 + + await db.commit() + + return {"status": "ok", "updated": updated, "timestamp": datetime.utcnow().isoformat()} + + diff --git a/backend/app/jobs/worker.py b/backend/app/jobs/worker.py new file mode 100644 index 0000000..7f18078 --- /dev/null +++ b/backend/app/jobs/worker.py @@ -0,0 +1,26 @@ +"""ARQ worker configuration.""" + +from __future__ import annotations + +from arq.connections import RedisSettings + +from app.config import get_settings +from app.jobs import tasks + + +class WorkerSettings: + """ + Run with: + arq app.jobs.worker.WorkerSettings + """ + + settings = get_settings() + + redis_settings = RedisSettings.from_dsn(settings.redis_url or "redis://localhost:6379/0") + functions = [ + tasks.scrape_auctions, + tasks.scrape_tld_prices, + tasks.backfill_auction_scores, + ] + + diff --git a/backend/app/main.py b/backend/app/main.py index 6d81725..5e2049d 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -18,6 +18,7 @@ from app.api import api_router from app.config import get_settings from app.database import init_db from app.scheduler import start_scheduler, stop_scheduler +from app.observability.metrics import instrument_app # Configure logging logging.basicConfig( @@ -32,7 +33,7 @@ settings = get_settings() limiter = Limiter( key_func=get_remote_address, default_limits=["200/minute"], # Global default - storage_uri="memory://", # In-memory storage (use Redis in production) + storage_uri=settings.rate_limit_storage_uri, # Use Redis in production ) @@ -78,8 +79,8 @@ Domain availability monitoring and portfolio management service. ## Authentication -Most endpoints require authentication via Bearer token. -Get a token via POST /api/v1/auth/login +Most endpoints require authentication via HttpOnly session cookie (recommended). +Login: POST /api/v1/auth/login ## Rate Limits @@ -98,6 +99,10 @@ For API issues, contact support@pounce.ch redoc_url="/redoc", ) +# Observability (Prometheus metrics) +if settings.enable_metrics: + instrument_app(app, metrics_path=settings.metrics_path, enable_db_metrics=settings.enable_db_query_metrics) + # Add rate limiter to app state app.state.limiter = limiter diff --git a/backend/app/observability/__init__.py b/backend/app/observability/__init__.py new file mode 100644 index 0000000..e461a04 --- /dev/null +++ b/backend/app/observability/__init__.py @@ -0,0 +1,3 @@ +"""Observability helpers (metrics, tracing).""" + + diff --git a/backend/app/observability/metrics.py b/backend/app/observability/metrics.py new file mode 100644 index 0000000..b756af3 --- /dev/null +++ b/backend/app/observability/metrics.py @@ -0,0 +1,122 @@ +"""Prometheus metrics for FastAPI + optional DB query metrics.""" + +from __future__ import annotations + +import time +from typing import Optional + +from fastapi import FastAPI, Request, Response + +try: + from prometheus_client import Counter, Histogram, generate_latest, CONTENT_TYPE_LATEST +except Exception: # pragma: no cover + Counter = None # type: ignore + Histogram = None # type: ignore + generate_latest = None # type: ignore + CONTENT_TYPE_LATEST = "text/plain; version=0.0.4" # type: ignore + + +_instrumented = False +_db_instrumented = False + + +def _get_route_template(request: Request) -> str: + route = request.scope.get("route") + if route is not None and hasattr(route, "path"): + return str(route.path) + return request.url.path + + +def instrument_app(app: FastAPI, *, metrics_path: str = "/metrics", enable_db_metrics: bool = False) -> None: + """ + Add Prometheus request metrics and a `/metrics` endpoint. + + - Low-cardinality path labels by using FastAPI route templates. + - Optional SQLAlchemy query timing metrics (off by default). + """ + global _instrumented + if _instrumented: + return + _instrumented = True + + if Counter is None or Histogram is None: + # Dependency not installed; keep app working without metrics. + return + + http_requests_total = Counter( + "http_requests_total", + "Total HTTP requests", + ["method", "path", "status"], + ) + http_request_duration_seconds = Histogram( + "http_request_duration_seconds", + "HTTP request duration (seconds)", + ["method", "path"], + buckets=(0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10), + ) + + @app.middleware("http") + async def _metrics_middleware(request: Request, call_next): + start = time.perf_counter() + response: Optional[Response] = None + try: + response = await call_next(request) + return response + finally: + duration = time.perf_counter() - start + path = _get_route_template(request) + method = request.method + status = str(getattr(response, "status_code", 500)) + http_requests_total.labels(method=method, path=path, status=status).inc() + http_request_duration_seconds.labels(method=method, path=path).observe(duration) + + @app.get(metrics_path, include_in_schema=False) + async def _metrics_endpoint(): + return Response(generate_latest(), media_type=CONTENT_TYPE_LATEST) + + if enable_db_metrics: + _instrument_db_metrics() + + +def _instrument_db_metrics() -> None: + """Attach SQLAlchemy event listeners to track query latencies.""" + global _db_instrumented + if _db_instrumented: + return + _db_instrumented = True + + if Counter is None or Histogram is None: + return + + from sqlalchemy import event + from app.database import engine + + db_queries_total = Counter( + "db_queries_total", + "Total DB queries executed", + ["dialect"], + ) + db_query_duration_seconds = Histogram( + "db_query_duration_seconds", + "DB query duration (seconds)", + ["dialect"], + buckets=(0.001, 0.0025, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5), + ) + + dialect = engine.sync_engine.dialect.name + + @event.listens_for(engine.sync_engine, "before_cursor_execute") + def _before_cursor_execute(conn, cursor, statement, parameters, context, executemany): # type: ignore[no-untyped-def] + conn.info.setdefault("_query_start_time", []).append(time.perf_counter()) + + @event.listens_for(engine.sync_engine, "after_cursor_execute") + def _after_cursor_execute(conn, cursor, statement, parameters, context, executemany): # type: ignore[no-untyped-def] + start_list = conn.info.get("_query_start_time") or [] + if not start_list: + return + start = start_list.pop() + duration = time.perf_counter() - start + db_queries_total.labels(dialect=dialect).inc() + db_query_duration_seconds.labels(dialect=dialect).observe(duration) + + diff --git a/backend/requirements.txt b/backend/requirements.txt index 2734d29..abd8a8e 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -45,5 +45,12 @@ stripe>=7.0.0 # Rate Limiting slowapi>=0.1.9 +# Observability (Prometheus) +prometheus-client>=0.20.0 + +# Job Queue (Redis) +arq>=0.26.0 +redis>=5.0.0 + # Production Database (optional) # asyncpg>=0.30.0 # Already included above diff --git a/docker-compose.yml b/docker-compose.yml index 85c0b33..a57c78b 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -18,6 +18,20 @@ services: timeout: 5s retries: 5 + # Redis (job queue + rate limiting storage) + redis: + image: redis:7-alpine + container_name: pounce-redis + restart: unless-stopped + command: ["redis-server", "--appendonly", "yes"] + volumes: + - redis_data:/data + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 5s + retries: 5 + # FastAPI Backend backend: build: @@ -32,9 +46,15 @@ services: SECRET_KEY: ${SECRET_KEY:-change-this-in-production} CORS_ORIGINS: ${CORS_ORIGINS:-http://localhost:3000} ENABLE_SCHEDULER: "false" + ENABLE_JOB_QUEUE: "true" + REDIS_URL: redis://redis:6379/0 + RATE_LIMIT_STORAGE_URI: redis://redis:6379/0 + ENABLE_METRICS: "true" depends_on: db: condition: service_healthy + redis: + condition: service_healthy healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] interval: 30s @@ -57,6 +77,26 @@ services: condition: service_healthy command: ["python", "run_scheduler.py"] + # Worker (ARQ / Redis job queue) + worker: + build: + context: ./backend + dockerfile: Dockerfile + container_name: pounce-worker + restart: unless-stopped + environment: + DATABASE_URL: postgresql+asyncpg://pounce:${DB_PASSWORD:-changeme}@db:5432/pounce + SECRET_KEY: ${SECRET_KEY:-change-this-in-production} + ENABLE_SCHEDULER: "false" + ENABLE_JOB_QUEUE: "true" + REDIS_URL: redis://redis:6379/0 + depends_on: + db: + condition: service_healthy + redis: + condition: service_healthy + command: ["arq", "app.jobs.worker.WorkerSettings"] + # Next.js Frontend frontend: build: @@ -73,4 +113,5 @@ services: volumes: postgres_data: + redis_data: diff --git a/frontend/next.config.js b/frontend/next.config.js index 2702ffe..3ebd517 100644 --- a/frontend/next.config.js +++ b/frontend/next.config.js @@ -1,7 +1,7 @@ /** @type {import('next').NextConfig} */ const nextConfig = { reactStrictMode: true, - // output: 'standalone', // Only needed for Docker deployment + output: 'standalone', // Performance & SEO optimizations poweredByHeader: false, // Remove X-Powered-By header for security diff --git a/loadtest/README.md b/loadtest/README.md new file mode 100644 index 0000000..bb0f9d7 --- /dev/null +++ b/loadtest/README.md @@ -0,0 +1,24 @@ +## Load testing + +This folder contains lightweight load test scaffolding to validate API performance regressions. + +### k6 (recommended) + +1. Install k6 (macOS): + +```bash +brew install k6 +``` + +2. Run the smoke test against a running stack: + +```bash +BASE_URL=http://localhost:8000 k6 run loadtest/k6/api-smoke.js +``` + +### Notes + +- The scripts assume the FastAPI backend is reachable at `BASE_URL` and the API prefix is `/api/v1`. +- For authenticated endpoints, extend the script to login first and replay the cookie. + + diff --git a/loadtest/k6/api-smoke.js b/loadtest/k6/api-smoke.js new file mode 100644 index 0000000..8aec740 --- /dev/null +++ b/loadtest/k6/api-smoke.js @@ -0,0 +1,37 @@ +import http from 'k6/http' +import { check, sleep } from 'k6' + +export const options = { + vus: 5, + duration: '30s', + thresholds: { + http_req_failed: ['rate<0.01'], + http_req_duration: ['p(95)<800'], // p95 < 800ms for these lightweight endpoints + }, +} + +const BASE_URL = __ENV.BASE_URL || 'http://localhost:8000' + +export default function () { + // Health + const health = http.get(`${BASE_URL}/health`) + check(health, { + 'health status 200': (r) => r.status === 200, + }) + + // Public: auctions feed (anonymous) + const feed = http.get(`${BASE_URL}/api/v1/auctions/feed?source=external&limit=20&offset=0&sort_by=score`) + check(feed, { + 'feed status 200': (r) => r.status === 200, + }) + + // Public: trending TLDs + const trending = http.get(`${BASE_URL}/api/v1/tld-prices/trending`) + check(trending, { + 'trending status 200': (r) => r.status === 200, + }) + + sleep(1) +} + +