yves.gugger f0cc69ac95 feat: TLD price scraper, .ch domain fix, DB integration
Major changes:
- Add TLD price scraper with Porkbun API (886+ TLDs, no API key needed)
- Fix .ch domain checker using rdap.nic.ch custom RDAP
- Integrate database for TLD price history tracking
- Add admin endpoints for manual scrape and stats
- Extend scheduler with daily TLD price scrape job (03:00 UTC)
- Update API to use DB data with static fallback
- Update README with complete documentation

New files:
- backend/app/services/tld_scraper/ (scraper package)
- TLD_TRACKING_PLAN.md (implementation plan)

API changes:
- POST /admin/scrape-tld-prices - trigger manual scrape
- GET /admin/tld-prices/stats - database statistics
- GET /tld-prices/overview now uses DB data
2025-12-08 09:12:44 +01:00

135 lines
5.0 KiB
Python

"""Porkbun TLD price scraper using their public API."""
import logging
from datetime import datetime
import httpx
from app.services.tld_scraper.base import BaseTLDScraper, TLDPriceData, ScraperError
logger = logging.getLogger(__name__)
class PorkbunScraper(BaseTLDScraper):
"""
Scraper for Porkbun domain prices.
Uses Porkbun's public pricing API (no API key required!).
This is the most reliable source as it's an official API.
"""
name = "porkbun"
base_url = "https://api.porkbun.com"
# API endpoint for pricing
PRICING_ENDPOINT = "https://api.porkbun.com/api/json/v3/pricing/get"
async def scrape(self) -> list[TLDPriceData]:
"""
Scrape TLD prices from Porkbun's public API.
Returns:
List of TLDPriceData objects with pricing for all available TLDs
"""
results = []
try:
async with httpx.AsyncClient(timeout=self.timeout) as client:
# Porkbun API requires POST with empty JSON body
response = await client.post(
self.PRICING_ENDPOINT,
json={},
headers={
"Content-Type": "application/json",
"User-Agent": self.get_user_agent(),
},
)
if response.status_code != 200:
raise ScraperError(f"Porkbun API returned {response.status_code}")
data = response.json()
if data.get("status") != "SUCCESS":
raise ScraperError(f"Porkbun API error: {data.get('message', 'Unknown error')}")
pricing = data.get("pricing", {})
if not pricing:
raise ScraperError("No pricing data returned from Porkbun API")
logger.info(f"Porkbun API returned {len(pricing)} TLDs")
now = datetime.utcnow()
for tld, prices in pricing.items():
try:
# Parse prices - Porkbun returns strings
reg_price = self._parse_porkbun_price(prices.get("registration"))
renewal_price = self._parse_porkbun_price(prices.get("renewal"))
transfer_price = self._parse_porkbun_price(prices.get("transfer"))
# Skip if no registration price
if reg_price is None:
continue
# Check for special/promo pricing
special_price = self._parse_porkbun_price(prices.get("special"))
results.append(TLDPriceData(
tld=tld.lower().lstrip("."),
registrar="porkbun",
registration_price=reg_price,
renewal_price=renewal_price,
transfer_price=transfer_price,
promo_price=special_price,
currency="USD",
source="api",
confidence=1.0, # Official API = highest confidence
scraped_at=now,
))
except Exception as e:
logger.warning(f"Error parsing TLD {tld}: {e}")
continue
logger.info(f"Successfully scraped {len(results)} TLD prices from Porkbun")
return results
except httpx.TimeoutException:
raise ScraperError("Porkbun API timeout")
except httpx.RequestError as e:
raise ScraperError(f"Porkbun API request error: {e}")
def _parse_porkbun_price(self, price_str: str | None) -> float | None:
"""Parse Porkbun price string to float."""
if not price_str:
return None
try:
price = float(price_str)
# Sanity check
if 0 < price < 1000:
return round(price, 2)
except (ValueError, TypeError):
pass
return None
async def health_check(self) -> bool:
"""Check if Porkbun API is accessible."""
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
self.PRICING_ENDPOINT,
json={},
headers={"Content-Type": "application/json"},
)
if response.status_code == 200:
data = response.json()
return data.get("status") == "SUCCESS"
return False
except Exception as e:
logger.debug(f"Porkbun health check failed: {e}")
return False