pounce/backend/app/services/tld_scraper/porkbun.py

"""Porkbun TLD price scraper using their public API."""
import logging
from datetime import datetime

import httpx

from app.services.tld_scraper.base import BaseTLDScraper, TLDPriceData, ScraperError

logger = logging.getLogger(__name__)


class PorkbunScraper(BaseTLDScraper):
    """
    Scraper for Porkbun domain prices.

    Uses Porkbun's public pricing API (no API key required!).
    This is the most reliable source as it's an official API.
    """

    name = "porkbun"
    base_url = "https://api.porkbun.com"

    # API endpoint for pricing
    PRICING_ENDPOINT = "https://api.porkbun.com/api/json/v3/pricing/get"

    async def scrape(self) -> list[TLDPriceData]:
        """
        Scrape TLD prices from Porkbun's public API.

        Returns:
            List of TLDPriceData objects with pricing for all available TLDs
        """
        results = []

        try:
            async with httpx.AsyncClient(timeout=self.timeout) as client:
                # Porkbun API requires POST with empty JSON body
                response = await client.post(
                    self.PRICING_ENDPOINT,
                    json={},
                    headers={
                        "Content-Type": "application/json",
                        "User-Agent": self.get_user_agent(),
                    },
                )

                if response.status_code != 200:
                    raise ScraperError(f"Porkbun API returned {response.status_code}")

                data = response.json()

                if data.get("status") != "SUCCESS":
                    raise ScraperError(f"Porkbun API error: {data.get('message', 'Unknown error')}")

                pricing = data.get("pricing", {})

                if not pricing:
                    raise ScraperError("No pricing data returned from Porkbun API")

                logger.info(f"Porkbun API returned {len(pricing)} TLDs")

                now = datetime.utcnow()

                for tld, prices in pricing.items():
                    try:
                        # Parse prices - Porkbun returns strings
                        reg_price = self._parse_porkbun_price(prices.get("registration"))
                        renewal_price = self._parse_porkbun_price(prices.get("renewal"))
                        transfer_price = self._parse_porkbun_price(prices.get("transfer"))

                        # Skip if no registration price
                        if reg_price is None:
                            continue

                        # Check for special/promo pricing
                        special_price = self._parse_porkbun_price(prices.get("special"))

                        results.append(TLDPriceData(
                            tld=tld.lower().lstrip("."),
                            registrar="porkbun",
                            registration_price=reg_price,
                            renewal_price=renewal_price,
                            transfer_price=transfer_price,
                            promo_price=special_price,
                            currency="USD",
                            source="api",
                            confidence=1.0,  # Official API = highest confidence
                            scraped_at=now,
                        ))

                    except Exception as e:
                        logger.warning(f"Error parsing TLD {tld}: {e}")
                        continue

                logger.info(f"Successfully scraped {len(results)} TLD prices from Porkbun")
                return results

        except httpx.TimeoutException:
            raise ScraperError("Porkbun API timeout")
        except httpx.RequestError as e:
            raise ScraperError(f"Porkbun API request error: {e}")

    def _parse_porkbun_price(self, price_str: str | None) -> float | None:
        """Parse Porkbun price string to float."""
        if not price_str:
            return None

        try:
            price = float(price_str)
            # Sanity check
            if 0 < price < 1000:
                return round(price, 2)
        except (ValueError, TypeError):
            pass

        return None

    async def health_check(self) -> bool:
        """Check if Porkbun API is accessible."""
        try:
            async with httpx.AsyncClient(timeout=30.0) as client:
                response = await client.post(
                    self.PRICING_ENDPOINT,
                    json={},
                    headers={"Content-Type": "application/json"},
                )
                if response.status_code == 200:
                    data = response.json()
                    return data.get("status") == "SUCCESS"
                return False
        except Exception as e:
            logger.debug(f"Porkbun health check failed: {e}")
            return False