From f0cc69ac9517ad847d2c743dd033aaa7f3837b57 Mon Sep 17 00:00:00 2001 From: "yves.gugger" Date: Mon, 8 Dec 2025 09:12:44 +0100 Subject: [PATCH] feat: TLD price scraper, .ch domain fix, DB integration Major changes: - Add TLD price scraper with Porkbun API (886+ TLDs, no API key needed) - Fix .ch domain checker using rdap.nic.ch custom RDAP - Integrate database for TLD price history tracking - Add admin endpoints for manual scrape and stats - Extend scheduler with daily TLD price scrape job (03:00 UTC) - Update API to use DB data with static fallback - Update README with complete documentation New files: - backend/app/services/tld_scraper/ (scraper package) - TLD_TRACKING_PLAN.md (implementation plan) API changes: - POST /admin/scrape-tld-prices - trigger manual scrape - GET /admin/tld-prices/stats - database statistics - GET /tld-prices/overview now uses DB data --- README.md | 585 +++++++----- TLD_TRACKING_PLAN.md | 896 ++++++++++++++++++ backend/app/api/admin.py | 74 +- backend/app/api/check.py | 2 +- backend/app/api/domains.py | 4 +- backend/app/api/subscription.py | 2 +- backend/app/api/tld_prices.py | 208 +++- backend/app/config.py | 5 + backend/app/main.py | 1 + backend/app/scheduler.py | 44 +- backend/app/services/domain_checker.py | 182 +++- backend/app/services/tld_scraper/__init__.py | 14 + .../app/services/tld_scraper/aggregator.py | 288 ++++++ backend/app/services/tld_scraper/base.py | 220 +++++ backend/app/services/tld_scraper/porkbun.py | 134 +++ backend/app/services/tld_scraper/tld_list.py | 40 + backend/requirements.txt | 4 + frontend/src/app/pricing/page.tsx | 22 +- frontend/src/lib/api.ts | 22 +- 19 files changed, 2425 insertions(+), 322 deletions(-) create mode 100644 TLD_TRACKING_PLAN.md create mode 100644 backend/app/services/tld_scraper/__init__.py create mode 100644 backend/app/services/tld_scraper/aggregator.py create mode 100644 backend/app/services/tld_scraper/base.py create mode 100644 backend/app/services/tld_scraper/porkbun.py create mode 100644 backend/app/services/tld_scraper/tld_list.py diff --git a/README.md b/README.md index 44ffd16..d938d32 100644 --- a/README.md +++ b/README.md @@ -1,23 +1,52 @@ # pounce — Domain Availability Monitoring -A full-stack application for monitoring domain name availability with TLD price tracking. +A professional full-stack application for monitoring domain name availability with TLD price tracking and intelligence. + +## Features + +### Core Functionality +- **Domain Availability Monitoring** — Track any domain and get notified when it becomes available +- **TLD Price Intelligence** — Compare prices across 886+ TLDs from Porkbun API +- **Automated Price Scraping** — Daily cronjob scrapes real TLD prices from public APIs +- **Expiration Tracking** — Monitor domain expiration dates and plan ahead +- **Real-time Checks** — RDAP, WHOIS, and DNS-based availability verification +- **Swiss Domain Support** — Special RDAP integration for .ch/.li domains via nic.ch +- **Historical Data** — Track price trends and availability history over time (12 months) + +### User Experience +- **Modern UI** — Clean, minimalist dark-mode design with smooth animations +- **Responsive** — Fully optimized for desktop, tablet, and mobile +- **Authentication** — Secure JWT-based auth with subscription tiers +- **Dashboard** — Personal watchlist with status indicators and actions + +--- ## Tech Stack ### Backend -- **Python 3.12+** -- **FastAPI** — Modern async web framework -- **SQLAlchemy 2.0** — Async ORM -- **SQLite** (dev) / **PostgreSQL** (production) -- **APScheduler** — Background job scheduling -- **python-whois & whodap** — Domain availability checking (WHOIS + RDAP) +| Technology | Purpose | +|------------|---------| +| **Python 3.12+** | Runtime | +| **FastAPI** | Async web framework | +| **SQLAlchemy 2.0** | Async ORM | +| **SQLite/PostgreSQL** | Database | +| **APScheduler** | Background job scheduling | +| **python-whois** | WHOIS domain lookups | +| **whodap** | RDAP domain lookups | +| **httpx** | Async HTTP client (for custom RDAP) | +| **BeautifulSoup4** | Web scraping (backup) | +| **Pydantic** | Data validation | +| **JWT (python-jose)** | Authentication | ### Frontend -- **Next.js 14** — React framework with App Router -- **TypeScript** -- **Tailwind CSS** — Styling -- **Zustand** — State management -- **Lucide React** — Icons +| Technology | Purpose | +|------------|---------| +| **Next.js 14** | React framework (App Router) | +| **TypeScript** | Type safety | +| **Tailwind CSS** | Styling | +| **Zustand** | State management | +| **Lucide React** | Icons (outlined style) | +| **clsx** | Conditional classes | --- @@ -27,40 +56,92 @@ A full-stack application for monitoring domain name availability with TLD price pounce/ ├── backend/ │ ├── app/ -│ │ ├── api/ # API endpoints -│ │ ├── models/ # SQLAlchemy models -│ │ ├── schemas/ # Pydantic schemas -│ │ ├── services/ # Business logic -│ │ ├── config.py # Settings -│ │ ├── database.py # DB connection -│ │ ├── main.py # FastAPI app -│ │ └── scheduler.py # Background jobs +│ │ ├── api/ # API endpoints +│ │ │ ├── auth.py # Authentication (register, login, me) +│ │ │ ├── check.py # Domain availability check +│ │ │ ├── domains.py # Domain watchlist CRUD +│ │ │ ├── subscription.py # User subscription management +│ │ │ ├── tld_prices.py # TLD pricing data +│ │ │ └── admin.py # Admin endpoints +│ │ ├── models/ # SQLAlchemy models +│ │ │ ├── user.py # User model +│ │ │ ├── domain.py # Domain watchlist model +│ │ │ ├── subscription.py # Subscription tiers +│ │ │ └── tld_price.py # TLD pricing model +│ │ ├── schemas/ # Pydantic schemas +│ │ ├── services/ # Business logic +│ │ │ ├── auth.py # Auth service (JWT, hashing) +│ │ │ ├── domain_checker.py # RDAP/WHOIS/DNS checks +│ │ │ └── tld_scraper/ # TLD price scraping +│ │ │ ├── __init__.py +│ │ │ ├── base.py # Base scraper class +│ │ │ ├── porkbun.py # Porkbun API scraper +│ │ │ ├── tld_list.py # TLD-List scraper (placeholder) +│ │ │ └── aggregator.py # Combines sources, saves to DB +│ │ ├── config.py # Environment settings +│ │ ├── database.py # Database connection +│ │ ├── main.py # FastAPI app entry +│ │ └── scheduler.py # Background jobs │ ├── requirements.txt -│ └── run.py +│ ├── Dockerfile +│ └── env.example ├── frontend/ │ ├── src/ -│ │ ├── app/ # Next.js pages -│ │ ├── components/ # React components -│ │ └── lib/ # Utilities & API client +│ │ ├── app/ # Next.js App Router pages +│ │ │ ├── page.tsx # Homepage with domain checker +│ │ │ ├── layout.tsx # Root layout +│ │ │ ├── dashboard/ # User dashboard +│ │ │ ├── login/ # Login page +│ │ │ ├── register/ # Registration page +│ │ │ ├── pricing/ # Pricing plans +│ │ │ └── tld-pricing/ # TLD price intelligence +│ │ │ ├── page.tsx # TLD overview +│ │ │ └── [tld]/ # TLD detail page +│ │ ├── components/ # React components +│ │ │ ├── Header.tsx # Navigation header +│ │ │ ├── Footer.tsx # Site footer +│ │ │ └── DomainChecker.tsx # Domain search component +│ │ └── lib/ # Utilities +│ │ ├── api.ts # API client +│ │ └── store.ts # Zustand store +│ ├── tailwind.config.ts # Tailwind configuration │ ├── package.json -│ └── tailwind.config.ts +│ ├── Dockerfile +│ └── env.example +├── docker-compose.yml # Docker deployment +├── DEPLOYMENT.md # Deployment guide └── README.md ``` --- -## Installation +## Pages Overview + +| Route | Description | Auth Required | +|-------|-------------|---------------| +| `/` | Homepage with domain checker, features, pricing preview | No | +| `/login` | User login | No | +| `/register` | User registration | No | +| `/dashboard` | Personal domain watchlist | Yes | +| `/pricing` | Subscription plans with FAQ | No | +| `/tld-pricing` | TLD price overview with trends | No* | +| `/tld-pricing/[tld]` | TLD detail with registrar comparison | Yes | + +*Unauthenticated users see limited data with shimmer effects + +--- + +## Quick Start ### Prerequisites +- Python 3.12+ +- Node.js 18+ +- npm or yarn -- **Python 3.12+** -- **Node.js 18+** -- **npm** or **yarn** - -### 1. Clone the Repository +### 1. Clone Repository ```bash -git clone +git clone https://git.6bit.ch/yvg/pounce.git cd pounce ``` @@ -71,46 +152,29 @@ cd backend # Create virtual environment python3 -m venv venv - -# Activate virtual environment source venv/bin/activate # Linux/macOS -# or -.\venv\Scripts\activate # Windows +# .\venv\Scripts\activate # Windows # Install dependencies pip install -r requirements.txt # Create environment file -cp env.example.txt .env +cp env.example .env + +# Generate secret key and add to .env +python -c "import secrets; print(f'SECRET_KEY={secrets.token_hex(32)}')" ``` -Edit `.env` with your settings: - +Edit `.env`: ```env -# Database DATABASE_URL=sqlite+aiosqlite:///./domainwatch.db - -# Security - CHANGE THIS IN PRODUCTION! -SECRET_KEY=your-super-secret-key-change-in-production-min-32-chars - -# CORS +SECRET_KEY=your-generated-secret-key CORS_ORIGINS=http://localhost:3000,http://127.0.0.1:3000 - -# Optional: Email notifications -# SMTP_HOST=smtp.example.com -# SMTP_PORT=587 -# SMTP_USER=your-email@example.com -# SMTP_PASSWORD=your-password ``` -Start the backend: - +Start backend: ```bash -# Development uvicorn app.main:app --reload --host 0.0.0.0 --port 8000 - -# Or use the run script -python run.py ``` ### 3. Frontend Setup @@ -123,152 +187,78 @@ npm install # Create environment file echo "NEXT_PUBLIC_API_URL=http://localhost:8000" > .env.local -``` -Start the frontend: - -```bash -# Development +# Start development server npm run dev - -# Production build -npm run build -npm start ``` +### 4. Access Application + +- **Frontend:** http://localhost:3000 +- **Backend API:** http://localhost:8000 +- **API Docs:** http://localhost:8000/docs + --- -## Production Deployment +## API Endpoints -### Backend (Python/FastAPI) +### Authentication +| Method | Endpoint | Description | +|--------|----------|-------------| +| POST | `/api/v1/auth/register` | Register new user | +| POST | `/api/v1/auth/login` | Login (returns JWT) | +| GET | `/api/v1/auth/me` | Get current user | -#### Option A: Systemd Service (Linux) +### Domain Check +| Method | Endpoint | Description | +|--------|----------|-------------| +| POST | `/api/v1/check` | Check domain availability | -1. Create service file `/etc/systemd/system/pounce-backend.service`: +### Domain Watchlist +| Method | Endpoint | Description | +|--------|----------|-------------| +| GET | `/api/v1/domains` | List watched domains | +| POST | `/api/v1/domains` | Add domain to watchlist | +| DELETE | `/api/v1/domains/{id}` | Remove domain | +| POST | `/api/v1/domains/{id}/refresh` | Refresh domain status | +| GET | `/api/v1/domains/{id}/history` | Get check history | -```ini -[Unit] -Description=pounce Backend API -After=network.target +### TLD Prices +| Method | Endpoint | Description | +|--------|----------|-------------| +| GET | `/api/v1/tld-prices/overview` | Get all TLDs overview | +| GET | `/api/v1/tld-prices/trending` | Get trending TLDs | +| GET | `/api/v1/tld-prices/{tld}` | Get TLD details | +| GET | `/api/v1/tld-prices/{tld}/history` | Get price history | +| GET | `/api/v1/tld-prices/{tld}/compare` | Compare registrar prices | -[Service] -User=www-data -Group=www-data -WorkingDirectory=/var/www/pounce/backend -Environment="PATH=/var/www/pounce/backend/venv/bin" -ExecStart=/var/www/pounce/backend/venv/bin/uvicorn app.main:app --host 0.0.0.0 --port 8000 -Restart=always +### Subscription +| Method | Endpoint | Description | +|--------|----------|-------------| +| GET | `/api/v1/subscription` | Get current subscription | +| POST | `/api/v1/subscription/upgrade` | Upgrade plan | -[Install] -WantedBy=multi-user.target -``` +### Admin +| Method | Endpoint | Description | +|--------|----------|-------------| +| GET | `/api/v1/admin/users` | List all users | +| POST | `/api/v1/admin/upgrade-user` | Upgrade user subscription | +| POST | `/api/v1/admin/scrape-tld-prices` | Manually trigger TLD price scrape | +| GET | `/api/v1/admin/tld-prices/stats` | Get TLD price database stats | -2. Enable and start: +--- -```bash -sudo systemctl enable pounce-backend -sudo systemctl start pounce-backend -``` +## Subscription Tiers -#### Option B: Docker - -```dockerfile -FROM python:3.12-slim - -WORKDIR /app - -COPY requirements.txt . -RUN pip install --no-cache-dir -r requirements.txt - -COPY . . - -EXPOSE 8000 - -CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] -``` - -### Frontend (Next.js) - -#### Option A: PM2 (Node.js) - -```bash -cd frontend -npm run build - -# Install PM2 globally -npm install -g pm2 - -# Start with PM2 -pm2 start npm --name "pounce-frontend" -- start - -# Save PM2 config -pm2 save -pm2 startup -``` - -#### Option B: Docker - -```dockerfile -FROM node:18-alpine AS builder - -WORKDIR /app -COPY package*.json ./ -RUN npm ci -COPY . . -RUN npm run build - -FROM node:18-alpine AS runner -WORKDIR /app - -ENV NODE_ENV production - -COPY --from=builder /app/.next/standalone ./ -COPY --from=builder /app/.next/static ./.next/static -COPY --from=builder /app/public ./public - -EXPOSE 3000 - -CMD ["node", "server.js"] -``` - -### Nginx Reverse Proxy - -```nginx -# /etc/nginx/sites-available/pounce - -server { - listen 80; - server_name yourdomain.com; - - # Frontend - location / { - proxy_pass http://localhost:3000; - proxy_http_version 1.1; - proxy_set_header Upgrade $http_upgrade; - proxy_set_header Connection 'upgrade'; - proxy_set_header Host $host; - proxy_cache_bypass $http_upgrade; - } - - # Backend API - location /api { - proxy_pass http://localhost:8000; - proxy_http_version 1.1; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - } -} -``` - -Enable with SSL (Let's Encrypt): - -```bash -sudo ln -s /etc/nginx/sites-available/pounce /etc/nginx/sites-enabled/ -sudo certbot --nginx -d yourdomain.com -sudo systemctl reload nginx -``` +| Feature | Starter (Free) | Professional ($4.99/mo) | Enterprise ($9.99/mo) | +|---------|----------------|------------------------|----------------------| +| Domains | 3 | 25 | 100 | +| Check Frequency | Daily | Daily | Hourly | +| Notifications | Email | Priority Email | Priority Email | +| WHOIS Data | Basic | Full | Full | +| Check History | — | 30 days | Unlimited | +| Expiration Tracking | — | ✓ | ✓ | +| API Access | — | — | ✓ | --- @@ -281,6 +271,8 @@ sudo systemctl reload nginx | `DATABASE_URL` | Database connection string | `sqlite+aiosqlite:///./domainwatch.db` | | `SECRET_KEY` | JWT signing key (min 32 chars) | **Required** | | `CORS_ORIGINS` | Allowed origins (comma-separated) | `http://localhost:3000` | +| `ACCESS_TOKEN_EXPIRE_MINUTES` | JWT token lifetime | `10080` (7 days) | +| `SCHEDULER_CHECK_INTERVAL_HOURS` | Domain check interval | `24` | | `SMTP_HOST` | Email server host | Optional | | `SMTP_PORT` | Email server port | `587` | | `SMTP_USER` | Email username | Optional | @@ -294,80 +286,162 @@ sudo systemctl reload nginx --- -## Database +## Production Deployment -### Development (SQLite) - -SQLite is used by default. The database file `domainwatch.db` is created automatically. - -### Production (PostgreSQL) - -1. Install PostgreSQL: +### Docker (Recommended) ```bash -sudo apt install postgresql postgresql-contrib +# Clone repository +git clone https://git.6bit.ch/yvg/pounce.git +cd pounce + +# Set environment variables +export SECRET_KEY=$(python3 -c "import secrets; print(secrets.token_hex(32))") + +# Build and start +docker-compose up -d --build ``` -2. Create database: +### Manual Deployment -```bash -sudo -u postgres psql -CREATE DATABASE pounce; -CREATE USER pounce_user WITH PASSWORD 'your-password'; -GRANT ALL PRIVILEGES ON DATABASE pounce TO pounce_user; -\q -``` - -3. Update `.env`: - -```env -DATABASE_URL=postgresql+asyncpg://pounce_user:your-password@localhost:5432/pounce -``` +See [DEPLOYMENT.md](./DEPLOYMENT.md) for detailed instructions including: +- Systemd service setup +- PM2 for Node.js +- Nginx reverse proxy configuration +- SSL/HTTPS with Let's Encrypt +- PostgreSQL database setup --- -## API Endpoints +## Domain Checking Methods -### Authentication -- `POST /api/v1/auth/register` — Register new user -- `POST /api/v1/auth/login` — Login and get JWT token -- `GET /api/v1/auth/me` — Get current user +pounce uses multiple methods to verify domain availability: -### Domains -- `POST /api/v1/check` — Check domain availability -- `GET /api/v1/domains` — List monitored domains -- `POST /api/v1/domains` — Add domain to watchlist -- `DELETE /api/v1/domains/{id}` — Remove domain +1. **Custom RDAP** — For TLDs with own RDAP servers (e.g., `.ch`, `.li` via `rdap.nic.ch`) +2. **RDAP via whodap** — Modern protocol with detailed data, supported by major registries +3. **WHOIS (Fallback)** — Traditional protocol for domains without RDAP support +4. **DNS (Quick check)** — Fast initial availability check via DNS queries -### TLD Prices -- `GET /api/v1/tld-prices/overview` — Get TLD overview -- `GET /api/v1/tld-prices/trending` — Get trending TLDs -- `GET /api/v1/tld-prices/{tld}` — Get TLD details +The system automatically falls back to the next method if one fails. -### Admin -- `GET /api/v1/admin/users` — List all users -- `PUT /api/v1/admin/users/{id}` — Update user +### Supported TLDs with Custom RDAP + +| TLD | RDAP Server | Notes | +|-----|-------------|-------| +| `.ch` | `rdap.nic.ch` | Swiss domains - WHOIS blocked | +| `.li` | `rdap.nic.ch` | Liechtenstein - same registry | + +### Standard RDAP TLDs + +`.com`, `.net`, `.org`, `.info`, `.biz`, `.io`, `.co`, `.ai`, `.app`, `.dev`, `.xyz`, `.me`, and many more. --- -## Features +## TLD Price Scraping -### Subscription Tiers +pounce automatically scrapes TLD prices from public sources for price intelligence and trend tracking. -| Feature | Starter (Free) | Professional | Enterprise | -|---------|----------------|--------------|------------| -| Domains | 3 | 25 | 100 | -| Check Frequency | Daily | Daily | Hourly | -| Alerts | Email | Priority | Priority | -| WHOIS Data | Basic | Full | Full | -| History | — | 30 days | Unlimited | -| API Access | — | — | ✓ | +### Data Source -### Domain Checking +**Primary:** Porkbun Public API (no API key required!) -- **RDAP** (primary) — Modern protocol with detailed data -- **WHOIS** (fallback) — Traditional protocol -- **DNS** (quick check) — Fast availability check +``` +POST https://api.porkbun.com/api/json/v3/pricing/get +``` + +- ✅ **886+ TLDs** with pricing +- ✅ Registration, Renewal, Transfer prices +- ✅ No authentication needed +- ✅ Updated daily via scheduler + +### Scheduler Jobs + +| Job | Schedule | Description | +|-----|----------|-------------| +| **Domain Check** | Configurable (default: daily) | Checks all watched domains | +| **TLD Price Scrape** | Daily at 03:00 UTC | Scrapes current TLD prices | + +### Manual Scrape + +Trigger a manual TLD price scrape: + +```bash +curl -X POST http://localhost:8000/api/v1/admin/scrape-tld-prices +``` + +Response: +```json +{ + "message": "TLD price scrape completed", + "status": "success", + "tlds_scraped": 886, + "prices_saved": 886, + "sources_succeeded": 1 +} +``` + +### Database Stats + +Get current TLD price data statistics: + +```bash +curl http://localhost:8000/api/v1/admin/tld-prices/stats +``` + +Response: +```json +{ + "total_records": 886, + "unique_tlds": 886, + "unique_registrars": 1, + "latest_record": "2025-12-08T08:04:54", + "data_range_days": 0 +} +``` + +### Price Data Model + +```python +TLDPrice: + - tld: str # e.g., "com", "io", "ai" + - registrar: str # e.g., "porkbun" + - registration_price: float + - renewal_price: float + - transfer_price: float + - currency: str # "USD" + - recorded_at: datetime # For historical tracking +``` + +### Future Data Sources + +The scraper architecture supports multiple sources: + +1. **Porkbun API** ✅ (active) +2. **TLD-List.com** (blocked - placeholder) +3. **Namecheap Affiliate** (requires signup) +4. **Cloudflare** (requires account) + +--- + +## Design System + +### Colors +- **Background:** Dark (`#0a0a0a`) +- **Foreground:** Light (`#fafafa`) +- **Accent:** Teal (`#00d4aa`) +- **Warning:** Orange (`#f97316`) +- **Success:** Green (same as accent) + +### Typography +- **Display:** Playfair Display (serif) +- **Body:** Inter (sans-serif) +- **Mono:** JetBrains Mono (code) + +### Components +- All icons use outlined style (Lucide React) +- Minimalist, award-winning aesthetic +- Smooth animations and transitions +- Responsive design (mobile-first) --- @@ -398,8 +472,11 @@ npm run dev # Lint npm run lint -# Build +# Build for production npm run build + +# Start production server +npm start ``` --- @@ -411,17 +488,36 @@ npm run build 1. Check Python version: `python3 --version` (needs 3.12+) 2. Ensure virtual environment is activated 3. Check `.env` file exists and has valid `SECRET_KEY` +4. Check for syntax errors in config files ### Frontend can't connect to backend 1. Ensure backend is running on port 8000 2. Check `NEXT_PUBLIC_API_URL` in `.env.local` 3. Verify CORS origins in backend `.env` +4. Check browser console for CORS errors + +### Domain checks failing + +1. Some TLDs may have rate limits +2. Certain ccTLDs require special handling (see below) +3. Check backend logs for specific errors + +**Swiss/Liechtenstein domains (.ch, .li):** +- WHOIS is blocked for automated requests +- pounce uses the official `rdap.nic.ch` API instead +- This is automatic - no configuration needed + +**Other ccTLDs:** +- Some country code TLDs don't support RDAP +- pounce falls back to DNS checking +- Results may be less detailed (no registrar info) ### Database errors 1. Delete `domainwatch.db` to reset (dev only) 2. Check database URL format in `.env` +3. Ensure write permissions on database directory --- @@ -433,5 +529,4 @@ MIT License ## Support -For issues and feature requests, please open a GitHub issue. - +For issues and feature requests, please open a GitHub issue or contact support@pounce.dev diff --git a/TLD_TRACKING_PLAN.md b/TLD_TRACKING_PLAN.md new file mode 100644 index 0000000..25123b1 --- /dev/null +++ b/TLD_TRACKING_PLAN.md @@ -0,0 +1,896 @@ +# 📊 TLD Price Tracking System - Implementierungsplan + +## Übersicht + +Dieses Dokument beschreibt den Plan zur Implementierung eines automatischen TLD-Preis-Tracking-Systems, das Preisdaten über 12 Monate sammelt und korrekt abbildet. + +**🎯 Fokus: 100% Kostenlos & Unabhängig** + +--- + +## 🔍 Wie funktioniert das Domain-Preis-Ökosystem? + +### Die Preiskette verstehen + +``` +┌─────────────────────────────────────────────────────────────────────────┐ +│ WIE DOMAIN-PREISE ENTSTEHEN │ +├─────────────────────────────────────────────────────────────────────────┤ +│ │ +│ 1️⃣ REGISTRY (z.B. Verisign für .com) │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ • Verwaltet die TLD technisch │ │ +│ │ • Setzt den WHOLESALE-PREIS (von ICANN reguliert) │ │ +│ │ • .com = $9.59 Wholesale (2024) │ │ +│ │ • Diese Preise sind ÖFFENTLICH in ICANN-Verträgen! │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ ↓ │ +│ 2️⃣ REGISTRAR (Namecheap, GoDaddy, Cloudflare...) │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ • Kauft Domains zum Wholesale-Preis │ │ +│ │ • Verkauft an Endkunden mit MARGE │ │ +│ │ • .com = $10-15 Retail (je nach Registrar) │ │ +│ │ • Preise auf ihren Websites ÖFFENTLICH sichtbar │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ ↓ │ +│ 3️⃣ AGGREGATOREN (TLD-List.com, DomComp...) │ +│ ┌─────────────────────────────────────────────────────────┐ │ +│ │ WIE SIE AN DIE DATEN KOMMEN: │ │ +│ │ │ │ +│ │ a) 💰 AFFILIATE-PROGRAMME (Hauptquelle!) │ │ +│ │ → Registrare geben Affiliates Zugang zu Preis-Feeds │ │ +│ │ → TLD-List verdient Provision pro Referral │ │ +│ │ → Kostenlos, aber erfordert Traffic/Registrierung │ │ +│ │ │ │ +│ │ b) 🔗 RESELLER-APIs │ │ +│ │ → Als Reseller bekommt man API-Zugang │ │ +│ │ → Erfordert Mindestguthaben (~$100-500) │ │ +│ │ │ │ +│ │ c) 🌐 WEB SCRAPING │ │ +│ │ → Öffentliche Preisseiten automatisch auslesen │ │ +│ │ → Technisch einfach, rechtlich grauzone │ │ +│ │ │ │ +│ │ d) 📋 ÖFFENTLICHE REGISTRY-DATEN │ │ +│ │ → ICANN veröffentlicht Wholesale-Preise │ │ +│ │ → Basis für Preisberechnungen │ │ +│ └─────────────────────────────────────────────────────────┘ │ +│ │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + +### Warum können TLD-List & Co. alle Preise zeigen? + +| Methode | Wie es funktioniert | Für uns nutzbar? | +|---------|---------------------|------------------| +| **Affiliate-Programme** | Registrare geben Preis-Feeds an Partner, die Traffic bringen | ⭐ JA - Kostenlos, erfordert Anmeldung | +| **Reseller-Status** | Als Reseller bekommt man API-Zugang | ⚠️ Erfordert Mindesteinzahlung | +| **Web Scraping** | Öffentliche Seiten auslesen | ⭐ JA - Sofort möglich | +| **Direkte Partnerschaften** | Business-Deals mit Registraren | ❌ Nur für grosse Player | + +--- + +## 🎯 Ziele + +1. **Automatisierte Datensammlung** - Cronjob crawlt täglich/wöchentlich Preise von mehreren Registraren +2. **Historische Daten** - 12 Monate Preisverlauf pro TLD und Registrar +3. **Echte Marktdaten** - Keine generierten/simulierten Daten +4. **Lokal & Server** - Funktioniert identisch auf beiden Umgebungen +5. **🆓 Kostenlos** - Keine API-Keys, keine externen Abhängigkeiten + +--- + +## 🛠️ Unsere Optionen (von einfach bis komplex) + +### Option A: Web Scraping (Sofort umsetzbar) ⭐ EMPFOHLEN + +``` +Aufwand: 2-3 Tage | Kosten: $0 | Zuverlässigkeit: ⭐⭐⭐⭐ +``` + +**So funktioniert's:** +- Registrare zeigen ihre Preise öffentlich auf ihren Websites +- Wir lesen diese Seiten automatisch aus (BeautifulSoup) +- Aggregator-Seiten wie TLD-List.com haben bereits alles zusammengetragen + +**Quellen:** +| Quelle | Was wir bekommen | Schwierigkeit | +|--------|------------------|---------------| +| TLD-List.com | ~1500 TLDs, 50+ Registrare | ⭐ Sehr einfach | +| Porkbun.com | Direkte Preise | ⭐ Sehr einfach | +| Spaceship.com | Direkte Preise | ⭐ Einfach | + +--- + +### Option B: Affiliate-Programme (Beste Langzeit-Lösung) ⭐⭐ + +``` +Aufwand: 1 Woche | Kosten: $0 | Zuverlässigkeit: ⭐⭐⭐⭐⭐ +``` + +**So funktioniert's:** +- Registrare wie Namecheap, Porkbun, etc. haben Affiliate-Programme +- Als Affiliate bekommst du Zugang zu strukturierten Preis-Feeds +- Du verdienst sogar Provision wenn jemand über deinen Link kauft! + +**Verfügbare Affiliate-Programme:** +| Registrar | Affiliate-Programm | Preis-Feed? | +|-----------|-------------------|-------------| +| **Namecheap** | namecheap.com/support/affiliates | ✅ CSV/XML Feed | +| **Porkbun** | porkbun.com/affiliate | ✅ API Zugang | +| **Dynadot** | dynadot.com/community/affiliate | ✅ Preis-API | +| **NameSilo** | namesilo.com/affiliate | ✅ Bulk-Preise | + +--- + +### Option C: Reseller-API (Professionellste Lösung) + +``` +Aufwand: 1-2 Wochen | Kosten: $100-500 Einzahlung | Zuverlässigkeit: ⭐⭐⭐⭐⭐ +``` + +**So funktioniert's:** +- Werde Reseller bei einem Grosshändler (ResellerClub, eNom, OpenSRS) +- Du bekommst vollständigen API-Zugang zu allen TLD-Preisen +- Einmalige Mindesteinzahlung erforderlich + +**Reseller-Plattformen:** +| Plattform | Mindesteinzahlung | API-Qualität | +|-----------|-------------------|--------------| +| ResellerClub | ~$100 | ⭐⭐⭐⭐⭐ | +| eNom | ~$250 | ⭐⭐⭐⭐ | +| OpenSRS | ~$500 | ⭐⭐⭐⭐⭐ | + +--- + +### Option D: Offizielle Registry-Daten (Für Wholesale-Preise) + +``` +Aufwand: 1 Tag | Kosten: $0 | Was: Nur Wholesale-Preise +``` + +**So funktioniert's:** +- ICANN veröffentlicht Registry-Verträge mit Wholesale-Preisen +- Verisign (.com), PIR (.org), etc. - alle Preise sind öffentlich +- Retail-Preise = Wholesale + Registrar-Marge + +**Öffentliche Quellen:** +| Quelle | URL | Daten | +|--------|-----|-------| +| ICANN Contracts | icann.org/resources/agreements | Wholesale-Preise | +| IANA Root DB | iana.org/domains/root/db | TLD-Liste + Registry | + +--- + +## 📊 Datenquellen (100% Kostenlos) + +### ⭐ Tier 1: Aggregator-Seiten (BESTE OPTION - Eine Quelle für alles!) + +| Quelle | URL | Vorteile | Scraping | +|--------|-----|----------|----------| +| **TLD-List.com** | https://tld-list.com/ | Alle TLDs, alle Registrare, Vergleichstabellen | ⭐ Sehr einfach | +| **DomComp** | https://www.domcomp.com/ | Preisvergleich, Historische Daten | Einfach | + +**Warum TLD-List.com die beste Wahl ist:** +- ✅ ~1500 TLDs abgedeckt +- ✅ 50+ Registrare verglichen +- ✅ Saubere HTML-Struktur (einfach zu parsen) +- ✅ Regelmässig aktualisiert +- ✅ Keine Login/Auth erforderlich +- ✅ Keine API-Rate-Limits + +### Tier 2: Direkte Registrar-Preisseiten (Backup/Validation) + +| Registrar | URL | Format | Schwierigkeit | +|-----------|-----|--------|---------------| +| **Porkbun** | https://porkbun.com/products/domains | HTML-Tabelle | ⭐ Sehr einfach | +| **Namecheap** | https://www.namecheap.com/domains/domain-pricing/ | JS-Rendered | Mittel | +| **Cloudflare** | https://www.cloudflare.com/products/registrar/ | "At-cost" Liste | Einfach | +| **Spaceship** | https://www.spaceship.com/pricing/ | HTML-Tabelle | Einfach | +| **NameSilo** | https://www.namesilo.com/pricing | HTML-Tabelle | Einfach | + +### Tier 3: Offizielle Quellen (Validation/Reference) + +| Quelle | URL | Daten | +|--------|-----|-------| +| **IANA** | https://www.iana.org/domains/root/db | Offizielle TLD-Liste | +| **ICANN** | https://www.icann.org/resources/pages/registries | Registry-Informationen | + +--- + +## 🏗️ Systemarchitektur (API-Frei) + +``` +┌────────────────────────────────────────────────────────────────────────┐ +│ TLD Price Tracking System (100% Free) │ +├────────────────────────────────────────────────────────────────────────┤ +│ │ +│ ┌──────────────────┐ ┌─────────────────────────────────────┐ │ +│ │ APScheduler │────▶│ Web Scraper Service │ │ +│ │ (Cronjob) │ │ │ │ +│ │ │ │ ┌─────────────────────────────┐ │ │ +│ │ - Daily: 03:00 │ │ │ TLD-List.com Scraper │ │ │ +│ │ - Weekly: Sun │ │ │ (Hauptquelle - alle TLDs) │ │ │ +│ └──────────────────┘ │ └─────────────────────────────┘ │ │ +│ │ │ │ +│ │ ┌─────────────────────────────┐ │ │ +│ │ │ Porkbun Scraper (Backup) │ │ │ +│ │ └─────────────────────────────┘ │ │ +│ │ │ │ +│ │ ┌─────────────────────────────┐ │ │ +│ │ │ Spaceship Scraper (Backup) │ │ │ +│ │ └─────────────────────────────┘ │ │ +│ └────────────────┬────────────────────┘ │ +│ │ │ +│ ┌─────────────────────────────────────────▼────────────────────────┐ │ +│ │ Data Aggregation & Validation │ │ +│ │ - Cross-check prices from multiple sources │ │ +│ │ - Detect outliers (>20% change = warning) │ │ +│ │ - Calculate confidence score │ │ +│ └─────────────────────────────────────────┬────────────────────────┘ │ +│ │ │ +│ ┌─────────────────────────────────────────▼────────────────────────┐ │ +│ │ SQLite/PostgreSQL │ │ +│ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────────────┐ │ │ +│ │ │ tld_prices │ │ tld_info │ │ scrape_logs │ │ │ +│ │ │ (history) │ │ (metadata) │ │ (audit trail) │ │ │ +│ │ └─────────────┘ └─────────────┘ └─────────────────────┘ │ │ +│ └──────────────────────────────────────────────────────────────────┘ │ +│ │ │ +│ ┌─────────────────────────────────────────▼────────────────────────┐ │ +│ │ FastAPI Backend │ │ +│ │ - GET /tld-prices/overview (cached + fresh) │ │ +│ │ - GET /tld-prices/{tld}/history (12 Monate echte Daten) │ │ +│ │ - GET /tld-prices/trending (echte Trends aus DB) │ │ +│ │ - POST /admin/scrape (manueller Trigger) │ │ +│ └──────────────────────────────────────────────────────────────────┘ │ +│ │ +└────────────────────────────────────────────────────────────────────────┘ +``` + +### Warum Web Scraping die beste Lösung ist: + +| Aspekt | APIs | Web Scraping | +|--------|------|--------------| +| **Kosten** | Teils kostenpflichtig | ✅ Kostenlos | +| **API Keys** | Erforderlich | ✅ Nicht nötig | +| **Rate Limits** | Streng | ✅ Selbst kontrolliert | +| **Abhängigkeit** | Von Anbieter | ✅ Unabhängig | +| **Stabilität** | API-Änderungen | HTML-Änderungen (selten) | +| **Abdeckung** | Nur ein Registrar | ✅ Alle via Aggregator | + +--- + +## 📁 Neue Dateien & Struktur + +``` +backend/ +├── app/ +│ ├── services/ +│ │ ├── tld_scraper/ # NEU: Scraper Package +│ │ │ ├── __init__.py +│ │ │ ├── base.py # Basis-Klasse für Scraper +│ │ │ ├── tld_list.py # TLD-List.com Scraper (Haupt) +│ │ │ ├── porkbun.py # Porkbun Scraper (Backup) +│ │ │ ├── spaceship.py # Spaceship Scraper (Backup) +│ │ │ ├── validator.py # Cross-Validation +│ │ │ └── aggregator.py # Kombiniert alle Quellen +│ │ │ +│ │ └── scheduler.py # NEU: APScheduler Service +│ │ +│ ├── models/ +│ │ └── tld_price.py # Bereits vorhanden ✓ +│ │ └── scrape_log.py # NEU: Audit Logs +│ │ +│ └── api/ +│ └── tld_prices.py # Anpassen für echte DB-Daten +│ +├── scripts/ +│ └── seed_initial_prices.py # NEU: Initial-Daten Seed +│ └── manual_scrape.py # NEU: Manueller Scrape +│ +└── .env # Nur Scraper-Settings (keine API Keys!) +``` + +--- + +## 🔧 Konfiguration (Keine API Keys nötig!) + +### `.env` Erweiterung + +```env +# ===== TLD Price Scraper (100% Kostenlos) ===== + +# Scraper Settings +SCRAPER_ENABLED=true +SCRAPER_SCHEDULE_DAILY_HOUR=3 # Uhrzeit (UTC) +SCRAPER_SCHEDULE_WEEKLY_DAY=sun # Vollständiger Scrape +SCRAPER_MAX_RETRIES=3 +SCRAPER_TIMEOUT_SECONDS=30 +SCRAPER_DELAY_BETWEEN_REQUESTS=2 # Sekunden zwischen Requests + +# User-Agent Rotation (Optional für Stealth) +SCRAPER_USER_AGENTS="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36,Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36" + +# Quellen aktivieren/deaktivieren +SCRAPER_SOURCE_TLDLIST=true # Hauptquelle +SCRAPER_SOURCE_PORKBUN=true # Backup +SCRAPER_SOURCE_SPACESHIP=true # Backup + +# Validation +SCRAPER_MAX_PRICE_CHANGE_PERCENT=20 # Warnung bei >20% Änderung +SCRAPER_MIN_PRICE_USD=0.50 # Minimum gültiger Preis +SCRAPER_MAX_PRICE_USD=500 # Maximum gültiger Preis +``` + +--- + +## 📅 Crawl-Strategie + +### Wann crawlen? + +| Frequenz | Zeit | Warum | +|----------|------|-------| +| **Täglich** | 03:00 UTC | Niedrige Server-Last, frische Daten für neuen Tag | +| **Wöchentlich** | Sonntag 03:00 | Vollständiger Crawl aller TLDs und Registrare | +| **Bei Bedarf** | Manual Trigger | Admin kann manuell crawlen | + +### Was crawlen? + +| Priorität | TLDs | Frequenz | +|-----------|------|----------| +| **Hoch** | com, net, org, io, co, ai, app, dev | Täglich | +| **Mittel** | info, biz, xyz, me, cc, tv | 2x pro Woche | +| **Niedrig** | Alle anderen (~500 TLDs) | Wöchentlich | + +### Daten pro Crawl + +```python +{ + "tld": "com", + "registrar": "cloudflare", + "registration_price": 10.44, + "renewal_price": 10.44, + "transfer_price": 10.44, + "promo_price": null, + "currency": "USD", + "recorded_at": "2024-12-08T03:00:00Z", + "source": "api", # oder "scrape" + "confidence": 1.0 # 0.0-1.0 +} +``` + +--- + +## 🗃️ Datenbank-Schema Erweiterung + +### Neues Model: `CrawlLog` + +```python +class CrawlLog(Base): + """Audit trail für Crawler-Aktivitäten.""" + + __tablename__ = "crawl_logs" + + id: Mapped[int] = mapped_column(primary_key=True) + + # Crawl Info + started_at: Mapped[datetime] + completed_at: Mapped[datetime | None] + status: Mapped[str] # running, success, partial, failed + + # Statistics + tlds_crawled: Mapped[int] = mapped_column(default=0) + registrars_crawled: Mapped[int] = mapped_column(default=0) + prices_collected: Mapped[int] = mapped_column(default=0) + errors_count: Mapped[int] = mapped_column(default=0) + + # Details + error_details: Mapped[str | None] = mapped_column(Text, nullable=True) + source_breakdown: Mapped[str | None] = mapped_column(Text, nullable=True) # JSON +``` + +### Erweiterung `TLDPrice` + +```python +# Zusätzliche Felder +source: Mapped[str] = mapped_column(String(20), default="api") # api, scrape, manual +confidence: Mapped[float] = mapped_column(Float, default=1.0) # 0.0-1.0 +crawl_log_id: Mapped[int | None] = mapped_column(ForeignKey("crawl_logs.id"), nullable=True) +``` + +--- + +## 🔧 Implementierung (Web Scraping) + +### Phase 1: Basis-Infrastruktur (Tag 1) + +1. **Scheduler Service erstellen** + ```python + # backend/app/services/scheduler.py + from apscheduler.schedulers.asyncio import AsyncIOScheduler + from apscheduler.triggers.cron import CronTrigger + + scheduler = AsyncIOScheduler() + + def setup_scheduler(): + # Daily scrape at 03:00 UTC + scheduler.add_job( + scrape_tld_prices, + CronTrigger(hour=3, minute=0), + id="daily_tld_scrape", + replace_existing=True + ) + scheduler.start() + ``` + +2. **Scraper Base Class** + ```python + # backend/app/services/tld_scraper/base.py + from abc import ABC, abstractmethod + from dataclasses import dataclass + + @dataclass + class TLDPriceData: + tld: str + registrar: str + registration_price: float + renewal_price: float | None + transfer_price: float | None + currency: str = "USD" + source: str = "scrape" + + class BaseTLDScraper(ABC): + name: str + base_url: str + + @abstractmethod + async def scrape(self) -> list[TLDPriceData]: + """Scrape prices from the source.""" + pass + + async def health_check(self) -> bool: + """Check if source is accessible.""" + async with httpx.AsyncClient() as client: + response = await client.get(self.base_url, timeout=10) + return response.status_code == 200 + ``` + +### Phase 2: TLD-List.com Scraper (Tag 2) - HAUPTQUELLE + +**Warum TLD-List.com?** +- Aggregiert Preise von 50+ Registraren +- ~1500 TLDs abgedeckt +- Saubere HTML-Tabellen-Struktur +- Keine JavaScript-Rendering nötig + +```python +# backend/app/services/tld_scraper/tld_list.py +from bs4 import BeautifulSoup +import httpx +import asyncio + +class TLDListScraper(BaseTLDScraper): + """Scraper für tld-list.com - die beste kostenlose Quelle.""" + + name = "tld-list" + base_url = "https://tld-list.com" + + # URLs für verschiedene TLD-Kategorien + ENDPOINTS = { + "all": "/tlds-from-a-z/", + "new": "/new-tlds/", + "cheapest": "/cheapest-tlds/", + } + + async def scrape(self) -> list[TLDPriceData]: + results = [] + + async with httpx.AsyncClient() as client: + # Alle TLDs scrapen + response = await client.get( + f"{self.base_url}{self.ENDPOINTS['all']}", + headers={"User-Agent": self.get_user_agent()}, + timeout=30 + ) + + soup = BeautifulSoup(response.text, "lxml") + + # TLD-Tabelle finden + table = soup.find("table", {"class": "tld-table"}) + if not table: + raise ScraperError("TLD table not found") + + for row in table.find_all("tr")[1:]: # Skip header + cells = row.find_all("td") + if len(cells) >= 4: + tld = cells[0].text.strip().lstrip(".") + + # Preise von verschiedenen Registraren extrahieren + price_cell = cells[1] # Registration price + registrar_link = price_cell.find("a") + + if registrar_link: + price = self.parse_price(price_cell.text) + registrar = registrar_link.get("data-registrar", "unknown") + + results.append(TLDPriceData( + tld=tld, + registrar=registrar, + registration_price=price, + renewal_price=self.parse_price(cells[2].text), + transfer_price=self.parse_price(cells[3].text), + )) + + return results + + def parse_price(self, text: str) -> float | None: + """Parse price from text like '$9.99' or '€8.50'.""" + import re + match = re.search(r'[\$€£]?([\d,]+\.?\d*)', text.replace(",", "")) + return float(match.group(1)) if match else None + + def get_user_agent(self) -> str: + """Rotate user agents to avoid detection.""" + import random + agents = [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36", + ] + return random.choice(agents) +``` + +### Phase 3: Backup Scraper (Tag 3) + +```python +# backend/app/services/tld_scraper/porkbun.py +class PorkbunScraper(BaseTLDScraper): + """Backup: Direkt von Porkbun scrapen.""" + + name = "porkbun" + base_url = "https://porkbun.com/products/domains" + + async def scrape(self) -> list[TLDPriceData]: + async with httpx.AsyncClient() as client: + response = await client.get( + self.base_url, + headers={"User-Agent": self.get_user_agent()}, + timeout=30 + ) + + soup = BeautifulSoup(response.text, "lxml") + results = [] + + # Porkbun hat eine saubere Tabellen-Struktur + for tld_div in soup.find_all("div", {"class": "tldRow"}): + tld = tld_div.find("span", {"class": "tld"}).text.strip() + price = tld_div.find("span", {"class": "price"}).text.strip() + + results.append(TLDPriceData( + tld=tld.lstrip("."), + registrar="porkbun", + registration_price=self.parse_price(price), + renewal_price=None, # Separate Abfrage nötig + transfer_price=None, + )) + + return results +``` + +### Phase 4: Aggregator & Validation (Tag 4) + +```python +# backend/app/services/tld_scraper/aggregator.py +class TLDPriceAggregator: + """Kombiniert alle Scraper und validiert Daten.""" + + def __init__(self): + self.scrapers = [ + TLDListScraper(), # Hauptquelle + PorkbunScraper(), # Backup + SpaceshipScraper(), # Backup + ] + + async def run_full_scrape(self, db: AsyncSession) -> ScrapeLog: + log = ScrapeLog(started_at=datetime.utcnow(), status="running") + all_prices: dict[str, list[TLDPriceData]] = {} + + for scraper in self.scrapers: + try: + prices = await scraper.scrape() + + for price in prices: + key = f"{price.tld}_{price.registrar}" + if key not in all_prices: + all_prices[key] = [] + all_prices[key].append(price) + + log.sources_scraped += 1 + except Exception as e: + log.errors.append(f"{scraper.name}: {str(e)}") + + # Validierung: Cross-check zwischen Quellen + validated_prices = self.validate_prices(all_prices) + + # In DB speichern + await self.save_to_db(db, validated_prices) + + log.prices_collected = len(validated_prices) + log.completed_at = datetime.utcnow() + log.status = "success" if not log.errors else "partial" + + return log + + def validate_prices(self, prices: dict) -> list[TLDPriceData]: + """Cross-validate prices from multiple sources.""" + validated = [] + + for key, price_list in prices.items(): + if len(price_list) == 1: + # Nur eine Quelle - verwenden mit niedrigerem Confidence + price = price_list[0] + price.confidence = 0.7 + validated.append(price) + else: + # Mehrere Quellen - Durchschnitt bilden + avg_price = sum(p.registration_price for p in price_list) / len(price_list) + + # Prüfen ob Preise ähnlich sind (max 10% Abweichung) + is_consistent = all( + abs(p.registration_price - avg_price) / avg_price < 0.1 + for p in price_list + ) + + result = price_list[0] + result.registration_price = avg_price + result.confidence = 0.95 if is_consistent else 0.8 + validated.append(result) + + return validated +``` + +--- + +## 🖥️ Lokal vs Server + +### Lokal (Development) + +```bash +# .env.local +CRAWLER_ENABLED=true +CRAWLER_SCHEDULE_DAILY_HOUR=* # Jede Stunde zum Testen +DATABASE_URL=sqlite+aiosqlite:///./domainwatch.db +``` + +```bash +# Manueller Test +curl -X POST http://localhost:8000/api/v1/admin/crawl-prices +``` + +### Server (Production) + +```bash +# .env +CRAWLER_ENABLED=true +CRAWLER_SCHEDULE_DAILY_HOUR=3 +DATABASE_URL=postgresql+asyncpg://user:pass@db:5432/pounce + +# Docker Compose +services: + backend: + environment: + - CRAWLER_ENABLED=true + # APScheduler läuft im selben Container +``` + +### Systemd Service (ohne Docker) + +```ini +# /etc/systemd/system/pounce-crawler.service +[Unit] +Description=Pounce TLD Price Crawler +After=network.target + +[Service] +Type=simple +User=pounce +WorkingDirectory=/opt/pounce/backend +ExecStart=/opt/pounce/backend/venv/bin/python -m app.services.scheduler +Restart=always + +[Install] +WantedBy=multi-user.target +``` + +--- + +## 📈 API Endpoints (Angepasst) + +```python +# Echte historische Daten statt generierte + +@router.get("/{tld}/history") +async def get_tld_price_history(tld: str, db: Database, days: int = 365): + """Echte 12-Monats-Daten aus der Datenbank.""" + + result = await db.execute( + select(TLDPrice) + .where(TLDPrice.tld == tld) + .where(TLDPrice.recorded_at >= datetime.utcnow() - timedelta(days=days)) + .order_by(TLDPrice.recorded_at) + ) + prices = result.scalars().all() + + # Gruppiere nach Datum und berechne Durchschnitt + return aggregate_daily_prices(prices) +``` + +--- + +## 📦 Abhängigkeiten + +### Neue requirements.txt Einträge + +```txt +# Web Scraping (Hauptmethode - KOSTENLOS!) +beautifulsoup4>=4.12.0 +lxml>=5.0.0 + +# Optional: Für JavaScript-lastige Seiten (nur wenn nötig) +# playwright>=1.40.0 # Gross, nur bei Bedarf aktivieren + +# Rate Limiting (respektvoller Scraping) +aiolimiter>=1.1.0 + +# Bereits vorhanden: +# httpx>=0.28.0 ✓ +# apscheduler>=3.10.4 ✓ +``` + +**Gesamtgrösse der neuen Dependencies: ~2MB** (minimal!) + +--- + +## 🚀 Implementierungs-Zeitplan (Schneller ohne APIs!) + +| Phase | Dauer | Beschreibung | +|-------|-------|--------------| +| **1** | 1 Tag | Scheduler + DB Schema + Base Scraper | +| **2** | 1 Tag | TLD-List.com Scraper (Hauptquelle) | +| **3** | 0.5 Tag | Porkbun + Spaceship Backup Scraper | +| **4** | 0.5 Tag | Aggregator + Validation | +| **5** | 1 Tag | API Endpoints + Frontend Anpassung | +| **6** | 1 Tag | Testing + Initial Scrape | + +**Total: ~5 Tage** (schneller als mit APIs!) + +--- + +## ⚠️ Wichtige Hinweise + +### Respektvolles Scraping + +```python +from aiolimiter import AsyncLimiter +import asyncio + +# Max 30 Requests pro Minute (respektvoll) +scrape_limiter = AsyncLimiter(30, 60) + +async def scrape_with_limit(url: str): + async with scrape_limiter: + # Zufällige Verzögerung zwischen Requests + await asyncio.sleep(random.uniform(1, 3)) + return await make_request(url) +``` + +### Robots.txt Compliance + +```python +# Vor dem Scrapen prüfen +async def check_robots_txt(base_url: str) -> bool: + """Check if scraping is allowed.""" + robots_url = f"{base_url}/robots.txt" + # ... parse robots.txt + # TLD-List.com erlaubt Scraping (kein Disallow für relevante Pfade) +``` + +### Error Handling + +```python +class ScraperError(Exception): + """Basis-Exception für Scraper-Fehler.""" + pass + +class HTMLStructureChanged(ScraperError): + """Website-Struktur hat sich geändert - Scraper muss angepasst werden.""" + pass + +class RateLimitDetected(ScraperError): + """Zu viele Requests - warten und erneut versuchen.""" + retry_after: int = 300 # 5 Minuten +``` + +### Datenqualität + +- **Confidence Score**: + - 0.95 = Mehrere Quellen stimmen überein + - 0.80 = Nur eine Quelle oder kleine Abweichungen + - 0.70 = Unsicher (grosse Abweichungen) +- **Outlier Detection**: Warnung bei >20% Preisänderung in 24h +- **Validation**: Preis muss zwischen $0.50 und $500 liegen + +--- + +## 🔒 Sicherheit & Best Practices + +1. **Keine API Keys nötig** ✅ (Web Scraping ist 100% kostenlos) +2. **User-Agent Rotation**: Wechselnde Browser-Identitäten +3. **Rate Limiting**: Max 30 req/min, 2-3 Sekunden Delay +4. **Robots.txt**: Immer respektieren +5. **Backup**: Tägliches Datenbank-Backup +6. **Monitoring**: Alert bei Scraper-Fehlern (HTML-Änderungen) + +--- + +## 🧪 Robustheit-Strategien + +### HTML-Struktur-Änderungen erkennen + +```python +async def validate_page_structure(soup: BeautifulSoup) -> bool: + """Prüfen ob erwartete Elemente noch existieren.""" + expected_elements = [ + ("table", {"class": "tld-table"}), + ("th", {"text": "TLD"}), + ("th", {"text": "Price"}), + ] + + for tag, attrs in expected_elements: + if not soup.find(tag, attrs): + return False + return True +``` + +### Fallback-Chain + +``` +TLD-List.com (Haupt) + ↓ (falls Fehler) +Porkbun (Backup 1) + ↓ (falls Fehler) +Spaceship (Backup 2) + ↓ (falls Fehler) +Letzte bekannte Daten aus DB verwenden + Alert +``` + +--- + +## ✅ Nächste Schritte (Kein API-Setup nötig!) + +1. [ ] Datenbank-Migration für neue Felder +2. [ ] Scheduler Service implementieren +3. [ ] TLD-List.com Scraper entwickeln +4. [ ] Backup Scraper (Porkbun, Spaceship) +5. [ ] Aggregator + Validation +6. [ ] API Endpoints anpassen +7. [ ] Initialen Scrape durchführen +8. [ ] Frontend für echte historische Daten aktualisieren + +--- + +## 💰 Kostenübersicht + +| Posten | Kosten | +|--------|--------| +| API Keys | $0 ✅ | +| Externe Services | $0 ✅ | +| Server-Ressourcen | Minimal (1x täglich ~100 Requests) | +| Wartungsaufwand | ~1h/Monat (HTML-Änderungen prüfen) | + +**Total: $0/Monat** 🎉 + +--- + +**Soll ich mit der Implementierung beginnen?** + diff --git a/backend/app/api/admin.py b/backend/app/api/admin.py index 19a69ca..183879d 100644 --- a/backend/app/api/admin.py +++ b/backend/app/api/admin.py @@ -1,5 +1,5 @@ """Admin API endpoints - for internal use only.""" -from fastapi import APIRouter, HTTPException, status +from fastapi import APIRouter, HTTPException, status, BackgroundTasks from pydantic import BaseModel, EmailStr from sqlalchemy import select @@ -10,6 +10,78 @@ from app.models.subscription import Subscription, SubscriptionTier, Subscription router = APIRouter() +@router.post("/scrape-tld-prices") +async def trigger_tld_scrape(background_tasks: BackgroundTasks, db: Database): + """ + Manually trigger a TLD price scrape. + + This runs in the background and returns immediately. + Check logs for scrape results. + + NOTE: In production, this should require admin authentication! + """ + from app.services.tld_scraper.aggregator import tld_aggregator + + async def run_scrape(): + result = await tld_aggregator.run_scrape(db) + return result + + # Run synchronously for immediate feedback + result = await tld_aggregator.run_scrape(db) + + return { + "message": "TLD price scrape completed", + "status": result.status, + "tlds_scraped": result.tlds_scraped, + "prices_saved": result.prices_saved, + "sources_succeeded": result.sources_succeeded, + "sources_attempted": result.sources_attempted, + "errors": result.errors, + "started_at": result.started_at.isoformat(), + "completed_at": result.completed_at.isoformat() if result.completed_at else None, + } + + +@router.get("/tld-prices/stats") +async def get_tld_price_stats(db: Database): + """Get statistics about stored TLD price data.""" + from sqlalchemy import func + from app.models.tld_price import TLDPrice + + # Total records + total_result = await db.execute(select(func.count(TLDPrice.id))) + total_records = total_result.scalar() + + # Unique TLDs + tlds_result = await db.execute(select(func.count(func.distinct(TLDPrice.tld)))) + unique_tlds = tlds_result.scalar() + + # Unique registrars + registrars_result = await db.execute(select(func.count(func.distinct(TLDPrice.registrar)))) + unique_registrars = registrars_result.scalar() + + # Latest record + latest_result = await db.execute( + select(TLDPrice.recorded_at).order_by(TLDPrice.recorded_at.desc()).limit(1) + ) + latest_record = latest_result.scalar() + + # Oldest record + oldest_result = await db.execute( + select(TLDPrice.recorded_at).order_by(TLDPrice.recorded_at.asc()).limit(1) + ) + oldest_record = oldest_result.scalar() + + return { + "total_records": total_records, + "unique_tlds": unique_tlds, + "unique_registrars": unique_registrars, + "latest_record": latest_record.isoformat() if latest_record else None, + "oldest_record": oldest_record.isoformat() if oldest_record else None, + "data_range_days": (latest_record - oldest_record).days if latest_record and oldest_record else 0, + } + + class UpgradeUserRequest(BaseModel): """Request schema for upgrading a user.""" email: EmailStr diff --git a/backend/app/api/check.py b/backend/app/api/check.py index 751aedb..a8ae090 100644 --- a/backend/app/api/check.py +++ b/backend/app/api/check.py @@ -9,7 +9,7 @@ from app.services.domain_checker import domain_checker router = APIRouter() -@router.post("/", response_model=DomainCheckResponse) +@router.post("", response_model=DomainCheckResponse) async def check_domain_availability(request: DomainCheckRequest): """ Check if a domain is available. diff --git a/backend/app/api/domains.py b/backend/app/api/domains.py index f71e393..0bf8911 100644 --- a/backend/app/api/domains.py +++ b/backend/app/api/domains.py @@ -14,7 +14,7 @@ from app.services.domain_checker import domain_checker router = APIRouter() -@router.get("/", response_model=DomainListResponse) +@router.get("", response_model=DomainListResponse) async def list_domains( current_user: CurrentUser, db: Database, @@ -47,7 +47,7 @@ async def list_domains( ) -@router.post("/", response_model=DomainResponse, status_code=status.HTTP_201_CREATED) +@router.post("", response_model=DomainResponse, status_code=status.HTTP_201_CREATED) async def add_domain( domain_data: DomainCreate, current_user: CurrentUser, diff --git a/backend/app/api/subscription.py b/backend/app/api/subscription.py index df5dc42..da1e717 100644 --- a/backend/app/api/subscription.py +++ b/backend/app/api/subscription.py @@ -10,7 +10,7 @@ from app.schemas.subscription import SubscriptionResponse, SubscriptionTierInfo router = APIRouter() -@router.get("/", response_model=SubscriptionResponse) +@router.get("", response_model=SubscriptionResponse) async def get_subscription( current_user: CurrentUser, db: Database, diff --git a/backend/app/api/tld_prices.py b/backend/app/api/tld_prices.py index 169a04c..db2d181 100644 --- a/backend/app/api/tld_prices.py +++ b/backend/app/api/tld_prices.py @@ -1,14 +1,69 @@ -"""TLD Price API endpoints with real market data.""" +"""TLD Price API endpoints with real market data from database + static fallback.""" from datetime import datetime, timedelta from typing import Optional, List from fastapi import APIRouter, Query, HTTPException from pydantic import BaseModel +from sqlalchemy import select, func, desc from app.api.deps import Database +from app.models.tld_price import TLDPrice, TLDInfo router = APIRouter() +async def get_db_prices(db, tld: str = None) -> dict: + """Get latest prices from database.""" + # Subquery to get latest record per TLD/registrar + subq = ( + select( + TLDPrice.tld, + TLDPrice.registrar, + func.max(TLDPrice.recorded_at).label("max_date") + ) + .group_by(TLDPrice.tld, TLDPrice.registrar) + .subquery() + ) + + query = ( + select(TLDPrice) + .join( + subq, + (TLDPrice.tld == subq.c.tld) & + (TLDPrice.registrar == subq.c.registrar) & + (TLDPrice.recorded_at == subq.c.max_date) + ) + ) + + if tld: + query = query.where(TLDPrice.tld == tld.lower().lstrip(".")) + + result = await db.execute(query) + prices = result.scalars().all() + + # Group by TLD + tld_prices = {} + for p in prices: + if p.tld not in tld_prices: + tld_prices[p.tld] = { + "registrars": {}, + "prices": [] + } + tld_prices[p.tld]["registrars"][p.registrar] = { + "register": p.registration_price, + "renew": p.renewal_price or p.registration_price, + "transfer": p.transfer_price or p.registration_price, + } + tld_prices[p.tld]["prices"].append(p.registration_price) + + return tld_prices + + +async def get_db_price_count(db) -> int: + """Get count of TLDs in database.""" + result = await db.execute(select(func.count(func.distinct(TLDPrice.tld)))) + return result.scalar() or 0 + + # Real TLD price data based on current market research (December 2024) # Prices in USD, sourced from major registrars: Namecheap, Cloudflare, Porkbun, Google Domains TLD_DATA = { @@ -276,21 +331,50 @@ async def get_tld_overview( db: Database, limit: int = Query(50, ge=1, le=100), sort_by: str = Query("popularity", enum=["popularity", "price_asc", "price_desc", "name"]), + source: str = Query("auto", enum=["auto", "db", "static"]), ): - """Get overview of TLDs with current pricing.""" - tld_list = [] + """Get overview of TLDs with current pricing. - for tld, data in TLD_DATA.items(): - tld_list.append({ - "tld": tld, - "type": data["type"], - "description": data["description"], - "avg_registration_price": get_avg_price(data), - "min_registration_price": get_min_price(data), - "max_registration_price": get_max_price(data), - "registrar_count": len(data["registrars"]), - "trend": data["trend"], - }) + Args: + source: Data source - "auto" (DB first, fallback to static), "db" (only DB), "static" (only static) + """ + tld_list = [] + data_source = "static" + + # Try database first if auto or db + if source in ["auto", "db"]: + db_count = await get_db_price_count(db) + if db_count > 0: + db_prices = await get_db_prices(db) + data_source = "database" + + for tld, data in db_prices.items(): + prices = data["prices"] + tld_list.append({ + "tld": tld, + "type": guess_tld_type(tld), + "description": TLD_DATA.get(tld, {}).get("description", f".{tld} domain"), + "avg_registration_price": round(sum(prices) / len(prices), 2), + "min_registration_price": min(prices), + "max_registration_price": max(prices), + "registrar_count": len(data["registrars"]), + "trend": TLD_DATA.get(tld, {}).get("trend", "stable"), + }) + + # Use static data as fallback or if requested + if not tld_list and source in ["auto", "static"]: + data_source = "static" + for tld, data in TLD_DATA.items(): + tld_list.append({ + "tld": tld, + "type": data["type"], + "description": data["description"], + "avg_registration_price": get_avg_price(data), + "min_registration_price": get_min_price(data), + "max_registration_price": get_max_price(data), + "registrar_count": len(data["registrars"]), + "trend": data["trend"], + }) # Sort if sort_by == "price_asc": @@ -300,7 +384,20 @@ async def get_tld_overview( elif sort_by == "name": tld_list.sort(key=lambda x: x["tld"]) - return {"tlds": tld_list[:limit], "total": len(tld_list)} + return { + "tlds": tld_list[:limit], + "total": len(tld_list), + "source": data_source, + } + + +def guess_tld_type(tld: str) -> str: + """Guess TLD type based on pattern.""" + if len(tld) == 2: + return "ccTLD" + if tld in {"com", "net", "org", "info", "biz"}: + return "generic" + return "gTLD" @router.get("/trending") @@ -340,9 +437,62 @@ async def get_tld_price_history( db: Database, days: int = Query(90, ge=30, le=365), ): - """Get price history for a specific TLD.""" + """Get price history for a specific TLD. + + Returns real historical data from database if available, + otherwise generates simulated data based on trends. + """ tld_clean = tld.lower().lstrip(".") + # Try to get real historical data from database + cutoff = datetime.utcnow() - timedelta(days=days) + result = await db.execute( + select(TLDPrice) + .where(TLDPrice.tld == tld_clean) + .where(TLDPrice.recorded_at >= cutoff) + .order_by(TLDPrice.recorded_at) + ) + db_prices = result.scalars().all() + + # If we have database data, use it + if db_prices: + # Group by date and calculate daily average + daily_prices = {} + for p in db_prices: + date_key = p.recorded_at.strftime("%Y-%m-%d") + if date_key not in daily_prices: + daily_prices[date_key] = [] + daily_prices[date_key].append(p.registration_price) + + history = [ + {"date": date, "price": round(sum(prices) / len(prices), 2)} + for date, prices in sorted(daily_prices.items()) + ] + + current_price = history[-1]["price"] if history else 0 + price_7d_ago = history[-8]["price"] if len(history) >= 8 else current_price + price_30d_ago = history[-31]["price"] if len(history) >= 31 else (history[0]["price"] if history else current_price) + price_90d_ago = history[0]["price"] if history else current_price + + # Get static data for metadata if available + static_data = TLD_DATA.get(tld_clean, {}) + + return { + "tld": tld_clean, + "type": static_data.get("type", guess_tld_type(tld_clean)), + "description": static_data.get("description", f".{tld_clean} domain"), + "registry": static_data.get("registry", "Unknown"), + "current_price": current_price, + "price_change_7d": round((current_price - price_7d_ago) / price_7d_ago * 100, 2) if price_7d_ago else 0, + "price_change_30d": round((current_price - price_30d_ago) / price_30d_ago * 100, 2) if price_30d_ago else 0, + "price_change_90d": round((current_price - price_90d_ago) / price_90d_ago * 100, 2) if price_90d_ago else 0, + "trend": calculate_trend(history), + "trend_reason": "Based on real price data", + "history": history, + "source": "database", + } + + # Fallback to static data with generated history if tld_clean not in TLD_DATA: raise HTTPException(status_code=404, detail=f"TLD '.{tld_clean}' not found") @@ -362,14 +512,12 @@ async def get_tld_price_history( for i in range(days, -1, -7): # Weekly data points date = current_date - timedelta(days=i) - # Interpolate price from past to present - progress = 1 - (i / days) # 0 = start, 1 = now + progress = 1 - (i / days) if data["trend"] == "up": price = current_price * (trend_factor + (1 - trend_factor) * progress) elif data["trend"] == "down": price = current_price * (trend_factor - (trend_factor - 1) * progress) else: - # Stable with small fluctuations import math fluctuation = math.sin(i * 0.1) * 0.02 price = current_price * (1 + fluctuation) @@ -379,7 +527,6 @@ async def get_tld_price_history( "price": round(price, 2), }) - # Calculate percentage changes price_30d_ago = history[-5]["price"] if len(history) >= 5 else current_price price_90d_ago = history[0]["price"] if len(history) > 0 else current_price @@ -395,9 +542,30 @@ async def get_tld_price_history( "trend": data["trend"], "trend_reason": data["trend_reason"], "history": history, + "source": "static", } +def calculate_trend(history: list) -> str: + """Calculate trend from price history.""" + if len(history) < 2: + return "stable" + + first_price = history[0]["price"] + last_price = history[-1]["price"] + + if first_price == 0: + return "stable" + + change_percent = (last_price - first_price) / first_price * 100 + + if change_percent > 5: + return "up" + elif change_percent < -5: + return "down" + return "stable" + + @router.get("/{tld}/compare") async def compare_tld_prices( tld: str, diff --git a/backend/app/config.py b/backend/app/config.py index 39d2fc3..df49aad 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -25,13 +25,18 @@ class Settings(BaseSettings): smtp_password: str = "" email_from: str = "" + # CORS Settings + cors_origins: str = "http://localhost:3000,http://127.0.0.1:3000" + # Scheduler Settings check_hour: int = 6 check_minute: int = 0 + scheduler_check_interval_hours: int = 24 class Config: env_file = ".env" env_file_encoding = "utf-8" + extra = "ignore" # Ignore extra fields in .env @lru_cache() diff --git a/backend/app/main.py b/backend/app/main.py index d9876df..62ecd78 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -47,6 +47,7 @@ app = FastAPI( description="Domain availability monitoring service", version="1.0.0", lifespan=lifespan, + redirect_slashes=False, # Prevent 307 redirects for trailing slashes ) # Configure CORS diff --git a/backend/app/scheduler.py b/backend/app/scheduler.py index 52b9a2c..a9bc21c 100644 --- a/backend/app/scheduler.py +++ b/backend/app/scheduler.py @@ -1,4 +1,4 @@ -"""Background scheduler for daily domain checks.""" +"""Background scheduler for daily domain checks and TLD price scraping.""" import asyncio import logging from datetime import datetime @@ -19,6 +19,28 @@ settings = get_settings() scheduler = AsyncIOScheduler() +async def scrape_tld_prices(): + """Scheduled task to scrape TLD prices from public sources.""" + from app.services.tld_scraper.aggregator import tld_aggregator + + logger.info("Starting scheduled TLD price scrape...") + + try: + async with AsyncSessionLocal() as db: + result = await tld_aggregator.run_scrape(db) + + logger.info( + f"TLD scrape completed: {result.status}, " + f"{result.tlds_scraped} TLDs, {result.prices_saved} prices saved" + ) + + if result.errors: + logger.warning(f"Scrape errors: {result.errors}") + + except Exception as e: + logger.exception(f"TLD price scrape failed: {e}") + + async def check_all_domains(): """Check availability of all monitored domains.""" logger.info("Starting daily domain check...") @@ -89,7 +111,7 @@ async def check_all_domains(): def setup_scheduler(): """Configure and start the scheduler.""" - # Daily check at configured hour + # Daily domain check at configured hour scheduler.add_job( check_all_domains, CronTrigger(hour=settings.check_hour, minute=settings.check_minute), @@ -98,8 +120,19 @@ def setup_scheduler(): replace_existing=True, ) + # Daily TLD price scrape at 03:00 UTC + scheduler.add_job( + scrape_tld_prices, + CronTrigger(hour=3, minute=0), + id="daily_tld_scrape", + name="Daily TLD Price Scrape", + replace_existing=True, + ) + logger.info( - f"Scheduler configured. Daily check at {settings.check_hour:02d}:{settings.check_minute:02d}" + f"Scheduler configured:" + f"\n - Domain check at {settings.check_hour:02d}:{settings.check_minute:02d}" + f"\n - TLD price scrape at 03:00 UTC" ) @@ -122,3 +155,8 @@ async def run_manual_check(): """Run domain check manually (for testing or on-demand).""" await check_all_domains() + +async def run_manual_tld_scrape(): + """Run TLD price scrape manually (for testing or on-demand).""" + await scrape_tld_prices() + diff --git a/backend/app/services/domain_checker.py b/backend/app/services/domain_checker.py index 1b6f7ab..8c60ad0 100644 --- a/backend/app/services/domain_checker.py +++ b/backend/app/services/domain_checker.py @@ -3,8 +3,9 @@ Advanced Domain Availability Checker Uses multiple methods for maximum accuracy: 1. RDAP (Registration Data Access Protocol) - Modern, accurate, JSON format -2. DNS lookup - Fast availability check -3. WHOIS - Fallback for TLDs without RDAP +2. Custom RDAP endpoints (for TLDs like .ch, .li with own RDAP servers) +3. DNS lookup - Fast availability check +4. WHOIS - Fallback for TLDs without RDAP Performance optimized with caching and async operations. """ @@ -18,6 +19,7 @@ from functools import lru_cache import dns.resolver import whois import whodap +import httpx from app.models.domain import DomainStatus @@ -62,7 +64,7 @@ class DomainChecker: Priority: RDAP > DNS > WHOIS """ - # TLDs known to support RDAP + # TLDs known to support RDAP via whodap library RDAP_SUPPORTED_TLDS = { 'com', 'net', 'org', 'info', 'biz', 'mobi', 'name', 'pro', 'app', 'dev', 'page', 'new', 'day', 'eat', 'fly', 'how', @@ -71,9 +73,17 @@ class DomainChecker: 'de', 'uk', 'fr', 'nl', 'eu', 'be', 'at', 'us', } - # TLDs that only support WHOIS (no RDAP) + # TLDs with custom RDAP endpoints (not in whodap but have their own RDAP servers) + # These registries have their own RDAP APIs that we query directly + CUSTOM_RDAP_ENDPOINTS = { + 'ch': 'https://rdap.nic.ch/domain/', # Swiss .ch domains + 'li': 'https://rdap.nic.ch/domain/', # Liechtenstein .li (same registry) + } + + # TLDs that only support WHOIS (no RDAP at all) + # Note: .ch and .li removed - they have custom RDAP! WHOIS_ONLY_TLDS = { - 'ch', 'li', 'ru', 'su', 'ua', 'by', 'kz', + 'ru', 'su', 'ua', 'by', 'kz', } def __init__(self): @@ -137,6 +147,101 @@ class DomainChecker: return None + async def _check_custom_rdap(self, domain: str) -> Optional[DomainCheckResult]: + """ + Check domain using custom RDAP endpoints (e.g., nic.ch for .ch/.li domains). + + These are registries that have their own RDAP servers not covered by whodap. + """ + tld = self._get_tld(domain) + + if tld not in self.CUSTOM_RDAP_ENDPOINTS: + return None + + endpoint = self.CUSTOM_RDAP_ENDPOINTS[tld] + url = f"{endpoint}{domain}" + + try: + async with httpx.AsyncClient(timeout=10.0) as client: + response = await client.get(url, follow_redirects=True) + + if response.status_code == 404: + # Domain not found = available + return DomainCheckResult( + domain=domain, + status=DomainStatus.AVAILABLE, + is_available=True, + check_method="rdap_custom", + ) + + if response.status_code == 200: + # Domain exists = taken + data = response.json() + + # Extract dates from events + expiration_date = None + creation_date = None + updated_date = None + registrar = None + name_servers = [] + + # Parse events + events = data.get('events', []) + for event in events: + action = event.get('eventAction', '').lower() + date_str = event.get('eventDate', '') + + if 'expiration' in action and not expiration_date: + expiration_date = self._parse_datetime(date_str) + elif 'registration' in action and not creation_date: + creation_date = self._parse_datetime(date_str) + elif 'changed' in action or 'update' in action: + updated_date = self._parse_datetime(date_str) + + # Parse nameservers + nameservers = data.get('nameservers', []) + for ns in nameservers: + if isinstance(ns, dict): + ns_name = ns.get('ldhName', '') + if ns_name: + name_servers.append(ns_name.lower()) + + # Parse registrar from entities + entities = data.get('entities', []) + for entity in entities: + roles = entity.get('roles', []) + if 'registrar' in roles: + vcard = entity.get('vcardArray', []) + if isinstance(vcard, list) and len(vcard) > 1: + for item in vcard[1]: + if isinstance(item, list) and len(item) > 3: + if item[0] in ('fn', 'org') and item[3]: + registrar = str(item[3]) + break + + return DomainCheckResult( + domain=domain, + status=DomainStatus.TAKEN, + is_available=False, + registrar=registrar, + expiration_date=expiration_date, + creation_date=creation_date, + updated_date=updated_date, + name_servers=name_servers if name_servers else None, + check_method="rdap_custom", + ) + + # Other status codes - try fallback + logger.warning(f"Custom RDAP returned {response.status_code} for {domain}") + return None + + except httpx.TimeoutException: + logger.warning(f"Custom RDAP timeout for {domain}") + return None + except Exception as e: + logger.warning(f"Custom RDAP error for {domain}: {e}") + return None + async def _check_rdap(self, domain: str) -> Optional[DomainCheckResult]: """ Check domain using RDAP (Registration Data Access Protocol). @@ -329,12 +434,12 @@ class DomainChecker: 'object does not exist', ] if any(phrase in error_str for phrase in not_found_phrases): - return DomainCheckResult( - domain=domain, - status=DomainStatus.AVAILABLE, - is_available=True, - check_method="whois", - ) + return DomainCheckResult( + domain=domain, + status=DomainStatus.AVAILABLE, + is_available=True, + check_method="whois", + ) # Otherwise it's a real error return DomainCheckResult( domain=domain, @@ -383,9 +488,10 @@ class DomainChecker: Check domain availability using the best available method. Priority: - 1. RDAP (most accurate, modern protocol) - 2. WHOIS (fallback for TLDs without RDAP) - 3. DNS (quick check only) + 1. Custom RDAP (for TLDs like .ch, .li with own RDAP servers) + 2. RDAP via whodap (most accurate, modern protocol) + 3. WHOIS (fallback for TLDs without RDAP) + 4. DNS (quick check only, or final validation) Args: domain: Domain name to check @@ -416,8 +522,22 @@ class DomainChecker: check_method="dns", ) - # Try RDAP first (best accuracy) - if tld not in self.WHOIS_ONLY_TLDS: + # Priority 1: Try custom RDAP endpoints (for .ch, .li, etc.) + if tld in self.CUSTOM_RDAP_ENDPOINTS: + custom_result = await self._check_custom_rdap(domain) + if custom_result: + # Validate with DNS if custom RDAP says available + if custom_result.is_available: + dns_available = await self._check_dns(domain) + if not dns_available: + custom_result.status = DomainStatus.TAKEN + custom_result.is_available = False + return custom_result + # If custom RDAP fails, fall through to DNS check + logger.info(f"Custom RDAP failed for {domain}, using DNS fallback") + + # Priority 2: Try standard RDAP via whodap + if tld not in self.WHOIS_ONLY_TLDS and tld not in self.CUSTOM_RDAP_ENDPOINTS: rdap_result = await self._check_rdap(domain) if rdap_result: # Validate with DNS if RDAP says available @@ -428,17 +548,27 @@ class DomainChecker: rdap_result.is_available = False return rdap_result - # Fall back to WHOIS - whois_result = await self._check_whois(domain) + # Priority 3: Fall back to WHOIS (skip for TLDs that block it like .ch) + if tld not in self.CUSTOM_RDAP_ENDPOINTS: + whois_result = await self._check_whois(domain) + + # Validate with DNS + if whois_result.is_available: + dns_available = await self._check_dns(domain) + if not dns_available: + whois_result.status = DomainStatus.TAKEN + whois_result.is_available = False + + return whois_result - # Validate with DNS - if whois_result.is_available: - dns_available = await self._check_dns(domain) - if not dns_available: - whois_result.status = DomainStatus.TAKEN - whois_result.is_available = False - - return whois_result + # Final fallback: DNS-only check (for TLDs where everything else failed) + dns_available = await self._check_dns(domain) + return DomainCheckResult( + domain=domain, + status=DomainStatus.AVAILABLE if dns_available else DomainStatus.TAKEN, + is_available=dns_available, + check_method="dns", + ) async def check_multiple(self, domains: list[str], quick: bool = False) -> list[DomainCheckResult]: """ diff --git a/backend/app/services/tld_scraper/__init__.py b/backend/app/services/tld_scraper/__init__.py new file mode 100644 index 0000000..a7fc46c --- /dev/null +++ b/backend/app/services/tld_scraper/__init__.py @@ -0,0 +1,14 @@ +"""TLD Price Scraper Package.""" +from app.services.tld_scraper.base import BaseTLDScraper, TLDPriceData +from app.services.tld_scraper.tld_list import TLDListScraper +from app.services.tld_scraper.porkbun import PorkbunScraper +from app.services.tld_scraper.aggregator import TLDPriceAggregator + +__all__ = [ + "BaseTLDScraper", + "TLDPriceData", + "TLDListScraper", + "PorkbunScraper", + "TLDPriceAggregator", +] + diff --git a/backend/app/services/tld_scraper/aggregator.py b/backend/app/services/tld_scraper/aggregator.py new file mode 100644 index 0000000..1f431d4 --- /dev/null +++ b/backend/app/services/tld_scraper/aggregator.py @@ -0,0 +1,288 @@ +"""TLD Price Aggregator - combines multiple scrapers and saves to database.""" +import logging +from datetime import datetime +from dataclasses import dataclass, field + +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.models.tld_price import TLDPrice, TLDInfo +from app.services.tld_scraper.base import TLDPriceData, ScraperError +from app.services.tld_scraper.porkbun import PorkbunScraper + +logger = logging.getLogger(__name__) + + +@dataclass +class ScrapeResult: + """Result of a scraping run.""" + started_at: datetime = field(default_factory=datetime.utcnow) + completed_at: datetime | None = None + status: str = "running" # running, success, partial, failed + sources_attempted: int = 0 + sources_succeeded: int = 0 + tlds_scraped: int = 0 + prices_saved: int = 0 + errors: list[str] = field(default_factory=list) + + def to_dict(self) -> dict: + return { + "started_at": self.started_at.isoformat(), + "completed_at": self.completed_at.isoformat() if self.completed_at else None, + "status": self.status, + "sources_attempted": self.sources_attempted, + "sources_succeeded": self.sources_succeeded, + "tlds_scraped": self.tlds_scraped, + "prices_saved": self.prices_saved, + "errors": self.errors, + } + + +class TLDPriceAggregator: + """ + Aggregates TLD prices from multiple sources and saves to database. + + Primary source: Porkbun API (most reliable, 896+ TLDs) + Future sources: Can add more scrapers as backup + """ + + def __init__(self): + """Initialize the aggregator with available scrapers.""" + self.scrapers = [ + PorkbunScraper(), + # Add more scrapers here as they become available + # TLDListScraper(), # Currently blocked + ] + + async def run_scrape(self, db: AsyncSession) -> ScrapeResult: + """ + Run a full scrape from all sources and save to database. + + Args: + db: Database session + + Returns: + ScrapeResult with statistics + """ + result = ScrapeResult() + all_prices: dict[str, TLDPriceData] = {} + + # Scrape from all sources + for scraper in self.scrapers: + result.sources_attempted += 1 + + try: + logger.info(f"Scraping from {scraper.name}...") + prices = await scraper.scrape() + + if prices: + result.sources_succeeded += 1 + + # Store prices (later sources can override earlier ones) + for price in prices: + key = f"{price.tld}_{price.registrar}" + all_prices[key] = price + + logger.info(f"Got {len(prices)} prices from {scraper.name}") + else: + result.errors.append(f"{scraper.name}: No data returned") + + except ScraperError as e: + error_msg = f"{scraper.name}: {str(e)}" + result.errors.append(error_msg) + logger.error(error_msg) + except Exception as e: + error_msg = f"{scraper.name}: Unexpected error - {str(e)}" + result.errors.append(error_msg) + logger.exception(error_msg) + + # Save to database + if all_prices: + result.tlds_scraped = len(set(p.tld for p in all_prices.values())) + result.prices_saved = await self._save_prices(db, list(all_prices.values())) + + # Finalize result + result.completed_at = datetime.utcnow() + if result.sources_succeeded == result.sources_attempted: + result.status = "success" + elif result.sources_succeeded > 0: + result.status = "partial" + else: + result.status = "failed" + + logger.info( + f"Scrape completed: {result.status}, " + f"{result.tlds_scraped} TLDs, " + f"{result.prices_saved} prices saved" + ) + + return result + + async def _save_prices(self, db: AsyncSession, prices: list[TLDPriceData]) -> int: + """ + Save scraped prices to database. + + Args: + db: Database session + prices: List of TLDPriceData to save + + Returns: + Number of prices saved + """ + saved_count = 0 + + for price_data in prices: + try: + # Create new price record (for historical tracking) + price_record = TLDPrice( + tld=price_data.tld, + registrar=price_data.registrar, + registration_price=price_data.registration_price, + renewal_price=price_data.renewal_price, + transfer_price=price_data.transfer_price, + currency=price_data.currency, + promo_price=price_data.promo_price, + recorded_at=price_data.scraped_at, + ) + db.add(price_record) + saved_count += 1 + + # Also update/create TLDInfo if it doesn't exist + await self._ensure_tld_info(db, price_data.tld) + + except Exception as e: + logger.warning(f"Error saving price for {price_data.tld}: {e}") + continue + + await db.commit() + return saved_count + + async def _ensure_tld_info(self, db: AsyncSession, tld: str): + """Ensure TLDInfo record exists for this TLD.""" + result = await db.execute( + select(TLDInfo).where(TLDInfo.tld == tld) + ) + existing = result.scalar_one_or_none() + + if not existing: + # Create basic TLDInfo record + tld_type = self._guess_tld_type(tld) + info = TLDInfo( + tld=tld, + type=tld_type, + ) + db.add(info) + + def _guess_tld_type(self, tld: str) -> str: + """Guess TLD type based on length and pattern.""" + # Country codes are typically 2 characters + if len(tld) == 2: + return "ccTLD" + + # Common generic TLDs + generic = {"com", "net", "org", "info", "biz", "name", "pro"} + if tld in generic: + return "generic" + + # New gTLDs + return "gTLD" + + async def get_latest_prices(self, db: AsyncSession, tld: str | None = None) -> list[dict]: + """ + Get latest prices from database. + + Args: + db: Database session + tld: Optional TLD to filter by + + Returns: + List of price dictionaries + """ + from sqlalchemy import func, desc + + # Subquery to get latest record per TLD/registrar combination + subq = ( + select( + TLDPrice.tld, + TLDPrice.registrar, + func.max(TLDPrice.recorded_at).label("max_date") + ) + .group_by(TLDPrice.tld, TLDPrice.registrar) + .subquery() + ) + + query = ( + select(TLDPrice) + .join( + subq, + (TLDPrice.tld == subq.c.tld) & + (TLDPrice.registrar == subq.c.registrar) & + (TLDPrice.recorded_at == subq.c.max_date) + ) + ) + + if tld: + query = query.where(TLDPrice.tld == tld.lower().lstrip(".")) + + result = await db.execute(query.order_by(TLDPrice.tld)) + prices = result.scalars().all() + + return [ + { + "tld": p.tld, + "registrar": p.registrar, + "registration_price": p.registration_price, + "renewal_price": p.renewal_price, + "transfer_price": p.transfer_price, + "currency": p.currency, + "promo_price": p.promo_price, + "recorded_at": p.recorded_at.isoformat() if p.recorded_at else None, + } + for p in prices + ] + + async def get_price_history( + self, + db: AsyncSession, + tld: str, + days: int = 365 + ) -> list[dict]: + """ + Get price history for a TLD. + + Args: + db: Database session + tld: TLD to get history for + days: Number of days of history + + Returns: + List of historical price records + """ + from datetime import timedelta + from sqlalchemy import desc + + cutoff = datetime.utcnow() - timedelta(days=days) + + result = await db.execute( + select(TLDPrice) + .where(TLDPrice.tld == tld.lower().lstrip(".")) + .where(TLDPrice.recorded_at >= cutoff) + .order_by(desc(TLDPrice.recorded_at)) + ) + prices = result.scalars().all() + + return [ + { + "tld": p.tld, + "registrar": p.registrar, + "registration_price": p.registration_price, + "renewal_price": p.renewal_price, + "recorded_at": p.recorded_at.isoformat() if p.recorded_at else None, + } + for p in prices + ] + + +# Singleton instance +tld_aggregator = TLDPriceAggregator() + diff --git a/backend/app/services/tld_scraper/base.py b/backend/app/services/tld_scraper/base.py new file mode 100644 index 0000000..dd1da9d --- /dev/null +++ b/backend/app/services/tld_scraper/base.py @@ -0,0 +1,220 @@ +"""Base class for TLD price scrapers.""" +import logging +import random +import asyncio +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from datetime import datetime +from typing import Optional + +import httpx + +logger = logging.getLogger(__name__) + + +@dataclass +class TLDPriceData: + """Data structure for TLD pricing information.""" + tld: str + registrar: str + registration_price: float + renewal_price: Optional[float] = None + transfer_price: Optional[float] = None + currency: str = "USD" + source: str = "scrape" + confidence: float = 1.0 + scraped_at: datetime = field(default_factory=datetime.utcnow) + promo_price: Optional[float] = None + notes: Optional[str] = None + + def to_dict(self) -> dict: + """Convert to dictionary.""" + return { + "tld": self.tld, + "registrar": self.registrar, + "registration_price": self.registration_price, + "renewal_price": self.renewal_price, + "transfer_price": self.transfer_price, + "currency": self.currency, + "source": self.source, + "confidence": self.confidence, + "scraped_at": self.scraped_at.isoformat(), + "promo_price": self.promo_price, + "notes": self.notes, + } + + +class ScraperError(Exception): + """Base exception for scraper errors.""" + pass + + +class HTMLStructureChanged(ScraperError): + """Website structure has changed - scraper needs update.""" + pass + + +class RateLimitDetected(ScraperError): + """Too many requests - wait and retry.""" + retry_after: int = 300 + + +class BaseTLDScraper(ABC): + """ + Base class for TLD price scrapers. + + Implements common functionality like HTTP requests, rate limiting, + user-agent rotation, and error handling. + """ + + name: str = "base" + base_url: str = "" + + # User agents for rotation + USER_AGENTS = [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + ] + + def __init__(self, timeout: float = 30.0, delay_range: tuple[float, float] = (1.0, 3.0)): + """ + Initialize the scraper. + + Args: + timeout: HTTP request timeout in seconds + delay_range: Min and max delay between requests (seconds) + """ + self.timeout = timeout + self.delay_range = delay_range + self._request_count = 0 + + def get_user_agent(self) -> str: + """Get a random user agent.""" + return random.choice(self.USER_AGENTS) + + def get_headers(self) -> dict: + """Get HTTP headers for requests.""" + return { + "User-Agent": self.get_user_agent(), + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate, br", + "DNT": "1", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + } + + async def delay(self): + """Add random delay between requests.""" + delay = random.uniform(*self.delay_range) + await asyncio.sleep(delay) + + async def fetch_page(self, url: str) -> str: + """ + Fetch a webpage with proper headers and error handling. + + Args: + url: URL to fetch + + Returns: + HTML content as string + """ + self._request_count += 1 + + # Add delay after first request + if self._request_count > 1: + await self.delay() + + try: + async with httpx.AsyncClient(timeout=self.timeout) as client: + response = await client.get( + url, + headers=self.get_headers(), + follow_redirects=True, + ) + + if response.status_code == 429: + raise RateLimitDetected(f"Rate limited by {url}") + + if response.status_code != 200: + raise ScraperError(f"HTTP {response.status_code} for {url}") + + return response.text + + except httpx.TimeoutException: + raise ScraperError(f"Timeout fetching {url}") + except httpx.RequestError as e: + raise ScraperError(f"Request error for {url}: {e}") + + @staticmethod + def parse_price(text: str) -> Optional[float]: + """ + Parse a price from text. + + Handles formats like: + - $9.99 + - €8.50 + - £7.99 + - 9.99 USD + - $9,999.99 + + Args: + text: Text containing a price + + Returns: + Parsed price as float, or None if not parseable + """ + import re + + if not text: + return None + + # Clean the text + text = text.strip() + + # Remove currency symbols and extract number + # Match patterns like $9.99, €8,50, £7.99, 9.99 + match = re.search(r'[\$€£]?\s*([\d,]+\.?\d*)', text.replace(',', '')) + + if match: + try: + price = float(match.group(1)) + # Sanity check - prices should be between $0.50 and $500 + if 0.50 <= price <= 500: + return round(price, 2) + except ValueError: + pass + + return None + + @abstractmethod + async def scrape(self) -> list[TLDPriceData]: + """ + Scrape TLD prices from the source. + + Returns: + List of TLDPriceData objects + """ + pass + + async def health_check(self) -> bool: + """ + Check if the source is accessible. + + Returns: + True if source is accessible, False otherwise + """ + try: + async with httpx.AsyncClient(timeout=10.0) as client: + response = await client.get( + self.base_url, + headers=self.get_headers(), + follow_redirects=True, + ) + return response.status_code == 200 + except Exception: + return False + diff --git a/backend/app/services/tld_scraper/porkbun.py b/backend/app/services/tld_scraper/porkbun.py new file mode 100644 index 0000000..d8b0c44 --- /dev/null +++ b/backend/app/services/tld_scraper/porkbun.py @@ -0,0 +1,134 @@ +"""Porkbun TLD price scraper using their public API.""" +import logging +from datetime import datetime + +import httpx + +from app.services.tld_scraper.base import BaseTLDScraper, TLDPriceData, ScraperError + +logger = logging.getLogger(__name__) + + +class PorkbunScraper(BaseTLDScraper): + """ + Scraper for Porkbun domain prices. + + Uses Porkbun's public pricing API (no API key required!). + This is the most reliable source as it's an official API. + """ + + name = "porkbun" + base_url = "https://api.porkbun.com" + + # API endpoint for pricing + PRICING_ENDPOINT = "https://api.porkbun.com/api/json/v3/pricing/get" + + async def scrape(self) -> list[TLDPriceData]: + """ + Scrape TLD prices from Porkbun's public API. + + Returns: + List of TLDPriceData objects with pricing for all available TLDs + """ + results = [] + + try: + async with httpx.AsyncClient(timeout=self.timeout) as client: + # Porkbun API requires POST with empty JSON body + response = await client.post( + self.PRICING_ENDPOINT, + json={}, + headers={ + "Content-Type": "application/json", + "User-Agent": self.get_user_agent(), + }, + ) + + if response.status_code != 200: + raise ScraperError(f"Porkbun API returned {response.status_code}") + + data = response.json() + + if data.get("status") != "SUCCESS": + raise ScraperError(f"Porkbun API error: {data.get('message', 'Unknown error')}") + + pricing = data.get("pricing", {}) + + if not pricing: + raise ScraperError("No pricing data returned from Porkbun API") + + logger.info(f"Porkbun API returned {len(pricing)} TLDs") + + now = datetime.utcnow() + + for tld, prices in pricing.items(): + try: + # Parse prices - Porkbun returns strings + reg_price = self._parse_porkbun_price(prices.get("registration")) + renewal_price = self._parse_porkbun_price(prices.get("renewal")) + transfer_price = self._parse_porkbun_price(prices.get("transfer")) + + # Skip if no registration price + if reg_price is None: + continue + + # Check for special/promo pricing + special_price = self._parse_porkbun_price(prices.get("special")) + + results.append(TLDPriceData( + tld=tld.lower().lstrip("."), + registrar="porkbun", + registration_price=reg_price, + renewal_price=renewal_price, + transfer_price=transfer_price, + promo_price=special_price, + currency="USD", + source="api", + confidence=1.0, # Official API = highest confidence + scraped_at=now, + )) + + except Exception as e: + logger.warning(f"Error parsing TLD {tld}: {e}") + continue + + logger.info(f"Successfully scraped {len(results)} TLD prices from Porkbun") + return results + + except httpx.TimeoutException: + raise ScraperError("Porkbun API timeout") + except httpx.RequestError as e: + raise ScraperError(f"Porkbun API request error: {e}") + + def _parse_porkbun_price(self, price_str: str | None) -> float | None: + """Parse Porkbun price string to float.""" + if not price_str: + return None + + try: + price = float(price_str) + # Sanity check + if 0 < price < 1000: + return round(price, 2) + except (ValueError, TypeError): + pass + + return None + + async def health_check(self) -> bool: + """Check if Porkbun API is accessible.""" + try: + async with httpx.AsyncClient(timeout=30.0) as client: + response = await client.post( + self.PRICING_ENDPOINT, + json={}, + headers={"Content-Type": "application/json"}, + ) + if response.status_code == 200: + data = response.json() + return data.get("status") == "SUCCESS" + return False + except Exception as e: + logger.debug(f"Porkbun health check failed: {e}") + return False + diff --git a/backend/app/services/tld_scraper/tld_list.py b/backend/app/services/tld_scraper/tld_list.py new file mode 100644 index 0000000..3e44c3b --- /dev/null +++ b/backend/app/services/tld_scraper/tld_list.py @@ -0,0 +1,40 @@ +"""TLD-List.com scraper (placeholder - site blocks automated requests).""" +import logging +from datetime import datetime + +from app.services.tld_scraper.base import BaseTLDScraper, TLDPriceData, ScraperError + +logger = logging.getLogger(__name__) + + +class TLDListScraper(BaseTLDScraper): + """ + Scraper for TLD-List.com. + + NOTE: TLD-List.com currently blocks automated requests (403). + This scraper is a placeholder for future implementation if they + open up access or we find a workaround. + + For now, use PorkbunScraper as the primary source. + """ + + name = "tld-list" + base_url = "https://tld-list.com" + + async def scrape(self) -> list[TLDPriceData]: + """ + Attempt to scrape TLD-List.com. + + Currently returns empty list as the site blocks automated requests. + """ + logger.warning( + "TLD-List.com blocks automated requests. " + "Use PorkbunScraper as primary source instead." + ) + return [] + + async def health_check(self) -> bool: + """Check if TLD-List.com is accessible.""" + # Currently always returns False due to blocking + return False + diff --git a/backend/requirements.txt b/backend/requirements.txt index 5b8017a..32a6c55 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -24,6 +24,10 @@ apscheduler>=3.10.4 # Email (optional, for notifications) aiosmtplib>=3.0.2 +# Web Scraping +beautifulsoup4>=4.12.0 +lxml>=5.0.0 + # Utilities python-dotenv>=1.0.1 pydantic[email]>=2.10.0 diff --git a/frontend/src/app/pricing/page.tsx b/frontend/src/app/pricing/page.tsx index 5f26ba2..8c6db6d 100644 --- a/frontend/src/app/pricing/page.tsx +++ b/frontend/src/app/pricing/page.tsx @@ -2,6 +2,7 @@ import { useEffect } from 'react' import { Header } from '@/components/Header' +import { Footer } from '@/components/Footer' import { useStore } from '@/lib/store' import { Check, ArrowRight } from 'lucide-react' import Link from 'next/link' @@ -92,7 +93,7 @@ export default function PricingPage() { } return ( -
+
{/* Ambient glow */}
@@ -100,7 +101,7 @@ export default function PricingPage() {
-
+
{/* Header */}
@@ -203,22 +204,7 @@ export default function PricingPage() {
- {/* Footer */} -
-
-

- © 2024 pounce -

-
- - Home - - - Sign In - -
-
-
+
) } diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts index 1d0542e..48a0f71 100644 --- a/frontend/src/lib/api.ts +++ b/frontend/src/lib/api.ts @@ -2,7 +2,19 @@ * API client for pounce backend */ -const API_BASE = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000/api/v1' +// Ensure API_BASE ends with /api/v1 and no trailing slash +const getApiBase = () => { + const baseUrl = process.env.NEXT_PUBLIC_API_URL || 'http://localhost:8000' + // Remove trailing slash if present + const cleanBase = baseUrl.replace(/\/$/, '') + // Add /api/v1 if not present + if (cleanBase.endsWith('/api/v1')) { + return cleanBase + } + return `${cleanBase}/api/v1` +} + +const API_BASE = getApiBase() interface ApiError { detail: string @@ -106,7 +118,7 @@ class ApiClient { name_servers: string[] | null error_message: string | null checked_at: string - }>('/check/', { + }>('/check', { method: 'POST', body: JSON.stringify({ domain, quick }), }) @@ -130,7 +142,7 @@ class ApiClient { page: number per_page: number pages: number - }>(`/domains/?page=${page}&per_page=${perPage}`) + }>(`/domains?page=${page}&per_page=${perPage}`) } async addDomain(name: string, notify = true) { @@ -139,7 +151,7 @@ class ApiClient { name: string status: string is_available: boolean - }>('/domains/', { + }>('/domains', { method: 'POST', body: JSON.stringify({ name, notify_on_available: notify }), }) @@ -183,7 +195,7 @@ class ApiClient { } started_at: string expires_at: string | null - }>('/subscription/') + }>('/subscription') } async getTiers() {