Major changes: - Add TLD price scraper with Porkbun API (886+ TLDs, no API key needed) - Fix .ch domain checker using rdap.nic.ch custom RDAP - Integrate database for TLD price history tracking - Add admin endpoints for manual scrape and stats - Extend scheduler with daily TLD price scrape job (03:00 UTC) - Update API to use DB data with static fallback - Update README with complete documentation New files: - backend/app/services/tld_scraper/ (scraper package) - TLD_TRACKING_PLAN.md (implementation plan) API changes: - POST /admin/scrape-tld-prices - trigger manual scrape - GET /admin/tld-prices/stats - database statistics - GET /tld-prices/overview now uses DB data
135 lines
5.0 KiB
Python
135 lines
5.0 KiB
Python
"""Porkbun TLD price scraper using their public API."""
|
|
import logging
|
|
from datetime import datetime
|
|
|
|
import httpx
|
|
|
|
from app.services.tld_scraper.base import BaseTLDScraper, TLDPriceData, ScraperError
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class PorkbunScraper(BaseTLDScraper):
|
|
"""
|
|
Scraper for Porkbun domain prices.
|
|
|
|
Uses Porkbun's public pricing API (no API key required!).
|
|
This is the most reliable source as it's an official API.
|
|
"""
|
|
|
|
name = "porkbun"
|
|
base_url = "https://api.porkbun.com"
|
|
|
|
# API endpoint for pricing
|
|
PRICING_ENDPOINT = "https://api.porkbun.com/api/json/v3/pricing/get"
|
|
|
|
async def scrape(self) -> list[TLDPriceData]:
|
|
"""
|
|
Scrape TLD prices from Porkbun's public API.
|
|
|
|
Returns:
|
|
List of TLDPriceData objects with pricing for all available TLDs
|
|
"""
|
|
results = []
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=self.timeout) as client:
|
|
# Porkbun API requires POST with empty JSON body
|
|
response = await client.post(
|
|
self.PRICING_ENDPOINT,
|
|
json={},
|
|
headers={
|
|
"Content-Type": "application/json",
|
|
"User-Agent": self.get_user_agent(),
|
|
},
|
|
)
|
|
|
|
if response.status_code != 200:
|
|
raise ScraperError(f"Porkbun API returned {response.status_code}")
|
|
|
|
data = response.json()
|
|
|
|
if data.get("status") != "SUCCESS":
|
|
raise ScraperError(f"Porkbun API error: {data.get('message', 'Unknown error')}")
|
|
|
|
pricing = data.get("pricing", {})
|
|
|
|
if not pricing:
|
|
raise ScraperError("No pricing data returned from Porkbun API")
|
|
|
|
logger.info(f"Porkbun API returned {len(pricing)} TLDs")
|
|
|
|
now = datetime.utcnow()
|
|
|
|
for tld, prices in pricing.items():
|
|
try:
|
|
# Parse prices - Porkbun returns strings
|
|
reg_price = self._parse_porkbun_price(prices.get("registration"))
|
|
renewal_price = self._parse_porkbun_price(prices.get("renewal"))
|
|
transfer_price = self._parse_porkbun_price(prices.get("transfer"))
|
|
|
|
# Skip if no registration price
|
|
if reg_price is None:
|
|
continue
|
|
|
|
# Check for special/promo pricing
|
|
special_price = self._parse_porkbun_price(prices.get("special"))
|
|
|
|
results.append(TLDPriceData(
|
|
tld=tld.lower().lstrip("."),
|
|
registrar="porkbun",
|
|
registration_price=reg_price,
|
|
renewal_price=renewal_price,
|
|
transfer_price=transfer_price,
|
|
promo_price=special_price,
|
|
currency="USD",
|
|
source="api",
|
|
confidence=1.0, # Official API = highest confidence
|
|
scraped_at=now,
|
|
))
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error parsing TLD {tld}: {e}")
|
|
continue
|
|
|
|
logger.info(f"Successfully scraped {len(results)} TLD prices from Porkbun")
|
|
return results
|
|
|
|
except httpx.TimeoutException:
|
|
raise ScraperError("Porkbun API timeout")
|
|
except httpx.RequestError as e:
|
|
raise ScraperError(f"Porkbun API request error: {e}")
|
|
|
|
def _parse_porkbun_price(self, price_str: str | None) -> float | None:
|
|
"""Parse Porkbun price string to float."""
|
|
if not price_str:
|
|
return None
|
|
|
|
try:
|
|
price = float(price_str)
|
|
# Sanity check
|
|
if 0 < price < 1000:
|
|
return round(price, 2)
|
|
except (ValueError, TypeError):
|
|
pass
|
|
|
|
return None
|
|
|
|
async def health_check(self) -> bool:
|
|
"""Check if Porkbun API is accessible."""
|
|
try:
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
response = await client.post(
|
|
self.PRICING_ENDPOINT,
|
|
json={},
|
|
headers={"Content-Type": "application/json"},
|
|
)
|
|
if response.status_code == 200:
|
|
data = response.json()
|
|
return data.get("status") == "SUCCESS"
|
|
return False
|
|
except Exception as e:
|
|
logger.debug(f"Porkbun health check failed: {e}")
|
|
return False
|
|
|