pounce/backend/app/services/hidden_api_scrapers.py

"""
Hidden JSON API Scrapers for Domain Auction Platforms.

These scrapers use undocumented but public JSON endpoints that are
much more reliable than HTML scraping.

Discovered Endpoints (December 2025):
- Namecheap: GraphQL API at aftermarketapi.namecheap.com
- Dynadot: REST API at dynadot-vue-api
- Sav.com: AJAX endpoint for auction listings
"""

import logging
from datetime import datetime, timedelta
from typing import Dict, Any, List, Optional

import httpx

logger = logging.getLogger(__name__)

# ═══════════════════════════════════════════════════════════════════════════════
# AFFILIATE LINKS — Monetization through referral commissions
# ═══════════════════════════════════════════════════════════════════════════════

AFFILIATE_CONFIG = {
    "Namecheap": {
        "base_url": "https://www.namecheap.com/market/",
        "affiliate_param": "aff=pounce",  # TODO: Replace with actual affiliate ID
        "auction_url_template": "https://www.namecheap.com/market/domain/{domain}?aff=pounce",
    },
    "Dynadot": {
        "base_url": "https://www.dynadot.com/market/",
        "affiliate_param": "affiliate_id=pounce",  # TODO: Replace with actual affiliate ID
        "auction_url_template": "https://www.dynadot.com/market/auction/{domain}?affiliate_id=pounce",
    },
    "Sav": {
        "base_url": "https://www.sav.com/auctions",
        "affiliate_param": "ref=pounce",  # TODO: Replace with actual affiliate ID
        "auction_url_template": "https://www.sav.com/domain/{domain}?ref=pounce",
    },
    "GoDaddy": {
        "base_url": "https://auctions.godaddy.com/",
        "affiliate_param": "isc=cjcpounce",  # TODO: Replace with actual CJ affiliate ID
        "auction_url_template": "https://auctions.godaddy.com/trpItemListing.aspx?domain={domain}&isc=cjcpounce",
    },
    "DropCatch": {
        "base_url": "https://www.dropcatch.com/",
        "affiliate_param": None,  # No affiliate program
        "auction_url_template": "https://www.dropcatch.com/domain/{domain}",
    },
    "Sedo": {
        "base_url": "https://sedo.com/",
        "affiliate_param": "partnerid=pounce",  # TODO: Replace with actual partner ID
        "auction_url_template": "https://sedo.com/search/details/?domain={domain}&partnerid=pounce",
    },
    "NameJet": {
        "base_url": "https://www.namejet.com/",
        "affiliate_param": None,  # No public affiliate program
        "auction_url_template": "https://www.namejet.com/pages/Auctions/ViewAuctions.aspx?domain={domain}",
    },
    "ExpiredDomains": {
        "base_url": "https://www.expireddomains.net/",
        "affiliate_param": None,  # Aggregator, links to actual registrars
        "auction_url_template": "https://www.expireddomains.net/domain-name-search/?q={domain}",
    },
}


def build_affiliate_url(platform: str, domain: str, original_url: Optional[str] = None) -> str:
    """
    Build an affiliate URL for a given platform and domain.

    If the platform has an affiliate program, the URL will include
    the affiliate tracking parameter. Otherwise, returns the original URL.
    """
    config = AFFILIATE_CONFIG.get(platform, {})

    if config.get("auction_url_template"):
        return config["auction_url_template"].format(domain=domain)

    return original_url or f"https://www.google.com/search?q={domain}+auction"


# ═══════════════════════════════════════════════════════════════════════════════
# NAMECHEAP SCRAPER — GraphQL API
# ═══════════════════════════════════════════════════════════════════════════════

class NamecheapApiScraper:
    """
    Scraper for Namecheap Marketplace using their hidden GraphQL API.

    Endpoint: https://aftermarketapi.namecheap.com/client/graphql

    This is a public API used by their frontend, stable and reliable.
    """

    GRAPHQL_ENDPOINT = "https://aftermarketapi.namecheap.com/client/graphql"

    # GraphQL query for fetching auctions
    AUCTIONS_QUERY = """
    query GetAuctions($filter: AuctionFilterInput, $pagination: PaginationInput, $sort: SortInput) {
        auctions(filter: $filter, pagination: $pagination, sort: $sort) {
            items {
                id
                domain
                currentBid
                minBid
                bidCount
                endTime
                status
                buyNowPrice
                hasBuyNow
            }
            totalCount
            pageInfo {
                hasNextPage
                endCursor
            }
        }
    }
    """

    async def fetch_auctions(
        self,
        limit: int = 100,
        offset: int = 0,
        keyword: Optional[str] = None,
        tld: Optional[str] = None,
    ) -> Dict[str, Any]:
        """Fetch auctions from Namecheap GraphQL API."""
        try:
            async with httpx.AsyncClient(timeout=30.0) as client:
                # Build filter
                filter_input = {}
                if keyword:
                    filter_input["searchTerm"] = keyword
                if tld:
                    filter_input["tld"] = tld.lstrip(".")

                variables = {
                    "filter": filter_input,
                    "pagination": {"limit": limit, "offset": offset},
                    "sort": {"field": "endTime", "direction": "ASC"},
                }

                response = await client.post(
                    self.GRAPHQL_ENDPOINT,
                    json={
                        "query": self.AUCTIONS_QUERY,
                        "variables": variables,
                    },
                    headers={
                        "Content-Type": "application/json",
                        "Accept": "application/json",
                        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
                        "Origin": "https://www.namecheap.com",
                        "Referer": "https://www.namecheap.com/market/",
                    },
                )

                if response.status_code != 200:
                    logger.error(f"Namecheap API error: {response.status_code}")
                    return {"items": [], "total": 0, "error": response.text}

                data = response.json()

                if "errors" in data:
                    logger.error(f"Namecheap GraphQL errors: {data['errors']}")
                    return {"items": [], "total": 0, "error": str(data["errors"])}

                auctions_data = data.get("data", {}).get("auctions", {})
                items = auctions_data.get("items", [])

                # Transform to Pounce format
                transformed = []
                for item in items:
                    domain = item.get("domain", "")
                    tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""

                    transformed.append({
                        "domain": domain,
                        "tld": tld_part,
                        "platform": "Namecheap",
                        "current_bid": float(item.get("currentBid", 0)),
                        "min_bid": float(item.get("minBid", 0)),
                        "num_bids": int(item.get("bidCount", 0)),
                        "end_time": item.get("endTime"),
                        "buy_now_price": float(item.get("buyNowPrice")) if item.get("hasBuyNow") else None,
                        "auction_url": build_affiliate_url("Namecheap", domain),
                        "currency": "USD",
                        "is_active": True,
                    })

                return {
                    "items": transformed,
                    "total": auctions_data.get("totalCount", 0),
                    "has_more": auctions_data.get("pageInfo", {}).get("hasNextPage", False),
                }

        except Exception as e:
            logger.exception(f"Namecheap API scraper error: {e}")
            return {"items": [], "total": 0, "error": str(e)}


# ═══════════════════════════════════════════════════════════════════════════════
# DYNADOT SCRAPER — REST JSON API
# ═══════════════════════════════════════════════════════════════════════════════

class DynadotApiScraper:
    """
    Scraper for Dynadot Marketplace using their hidden JSON API.

    Endpoints:
    - /dynadot-vue-api/dynadot-service/marketplace-api
    - /dynadot-vue-api/dynadot-service/main-site-api

    Supports:
    - EXPIRED_AUCTION: Expired auctions
    - BACKORDER: Backorder listings
    - USER_LISTING: User marketplace listings
    """

    BASE_URL = "https://www.dynadot.com"
    MARKETPLACE_API = "/dynadot-vue-api/dynadot-service/marketplace-api"

    async def fetch_auctions(
        self,
        aftermarket_type: str = "EXPIRED_AUCTION",
        page_size: int = 100,
        page_index: int = 0,
        keyword: Optional[str] = None,
    ) -> Dict[str, Any]:
        """Fetch auctions from Dynadot REST API."""
        try:
            async with httpx.AsyncClient(timeout=30.0) as client:
                params = {
                    "command": "get_list",
                    "aftermarket_type": aftermarket_type,
                    "page_size": page_size,
                    "page_index": page_index,
                    "lang": "en",
                }

                if keyword:
                    params["keyword"] = keyword

                response = await client.post(
                    f"{self.BASE_URL}{self.MARKETPLACE_API}",
                    params=params,
                    headers={
                        "Accept": "application/json",
                        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
                        "Referer": "https://www.dynadot.com/market",
                    },
                )

                if response.status_code != 200:
                    logger.error(f"Dynadot API error: {response.status_code}")
                    return {"items": [], "total": 0, "error": response.text}

                data = response.json()

                # Dynadot returns code: 200 for success
                if data.get("code") not in [0, 200] and data.get("msg") != "success":
                    logger.error(f"Dynadot API error: {data}")
                    return {"items": [], "total": 0, "error": str(data)}

                # Data can be in 'records' or 'list'
                listings = data.get("data", {}).get("records", []) or data.get("data", {}).get("list", [])

                # Transform to Pounce format
                transformed = []
                for item in listings:
                    domain = item.get("domain", "") or item.get("name", "") or item.get("utf8_name", "")
                    tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""

                    # Parse end time (Dynadot uses timestamp in milliseconds or string)
                    end_time = None
                    end_time_stamp = item.get("end_time_stamp")
                    if end_time_stamp:
                        try:
                            end_time = datetime.fromtimestamp(end_time_stamp / 1000)
                        except:
                            pass

                    if not end_time:
                        end_time_str = item.get("end_time") or item.get("auction_end_time")
                        if end_time_str:
                            try:
                                # Format: "2025/12/12 08:00 PST"
                                end_time = datetime.strptime(end_time_str.split(" PST")[0], "%Y/%m/%d %H:%M")
                            except:
                                end_time = datetime.utcnow() + timedelta(days=1)

                    # Parse bid price (can be string or number)
                    bid_price = item.get("bid_price") or item.get("current_bid") or item.get("price") or 0
                    if isinstance(bid_price, str):
                        bid_price = float(bid_price.replace(",", "").replace("$", ""))

                    transformed.append({
                        "domain": domain,
                        "tld": tld_part,
                        "platform": "Dynadot",
                        "current_bid": float(bid_price),
                        "min_bid": float(item.get("start_price", 0) or 0),
                        "num_bids": int(item.get("bids", 0) or item.get("bid_count", 0) or 0),
                        "end_time": end_time or datetime.utcnow() + timedelta(days=1),
                        "buy_now_price": float(item.get("accepted_bid_price")) if item.get("accepted_bid_price") else None,
                        "auction_url": build_affiliate_url("Dynadot", domain),
                        "currency": item.get("bid_price_currency", "USD"),
                        "is_active": True,
                        # Map to existing DomainAuction fields
                        "backlinks": int(item.get("links", 0) or 0),
                        "age_years": int(item.get("age", 0) or 0),
                    })

                return {
                    "items": transformed,
                    "total": data.get("data", {}).get("total_count", len(transformed)),
                    "has_more": len(listings) >= page_size,
                }

        except Exception as e:
            logger.exception(f"Dynadot API scraper error: {e}")
            return {"items": [], "total": 0, "error": str(e)}


# ═══════════════════════════════════════════════════════════════════════════════
# SAV.COM SCRAPER — AJAX JSON API
# ═══════════════════════════════════════════════════════════════════════════════

class SavApiScraper:
    """
    Scraper for Sav.com Auctions using their hidden AJAX endpoint.

    Endpoint: /auctions/load_domains_ajax/{page}

    Simple POST request that returns paginated auction data.
    """

    BASE_URL = "https://www.sav.com"
    AJAX_ENDPOINT = "/auctions/load_domains_ajax"

    async def fetch_auctions(
        self,
        page: int = 0,
    ) -> Dict[str, Any]:
        """Fetch auctions from Sav.com AJAX API."""
        try:
            async with httpx.AsyncClient(timeout=30.0) as client:
                response = await client.post(
                    f"{self.BASE_URL}{self.AJAX_ENDPOINT}/{page}",
                    headers={
                        "Accept": "application/json, text/html",
                        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
                        "Referer": "https://www.sav.com/domains/auctions",
                        "X-Requested-With": "XMLHttpRequest",
                    },
                )

                if response.status_code != 200:
                    logger.error(f"Sav API error: {response.status_code}")
                    return {"items": [], "total": 0, "error": response.text}

                # The response is HTML but contains structured data
                # We need to parse it or check for JSON
                content_type = response.headers.get("content-type", "")

                if "application/json" in content_type:
                    data = response.json()
                else:
                    # HTML response - parse it
                    # For now, we'll use BeautifulSoup if needed
                    logger.warning("Sav returned HTML instead of JSON, parsing...")
                    return await self._parse_html_response(response.text)

                listings = data.get("domains", data.get("auctions", []))

                # Transform to Pounce format
                transformed = []
                for item in listings:
                    domain = item.get("domain", "") or item.get("name", "")
                    tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""

                    # Parse end time
                    end_time_str = item.get("end_time") or item.get("ends_at")
                    end_time = None
                    if end_time_str:
                        try:
                            end_time = datetime.fromisoformat(end_time_str.replace("Z", "+00:00"))
                        except:
                            end_time = datetime.utcnow() + timedelta(days=1)

                    transformed.append({
                        "domain": domain,
                        "tld": tld_part,
                        "platform": "Sav",
                        "current_bid": float(item.get("current_bid", 0) or item.get("price", 0)),
                        "min_bid": float(item.get("min_bid", 0) or 0),
                        "num_bids": int(item.get("bids", 0) or 0),
                        "end_time": end_time,
                        "buy_now_price": float(item.get("buy_now")) if item.get("buy_now") else None,
                        "auction_url": build_affiliate_url("Sav", domain),
                        "currency": "USD",
                        "is_active": True,
                    })

                return {
                    "items": transformed,
                    "total": len(transformed),
                    "has_more": len(listings) >= 20,  # Default page size
                }

        except Exception as e:
            logger.exception(f"Sav API scraper error: {e}")
            return {"items": [], "total": 0, "error": str(e)}

    async def _parse_html_response(self, html: str) -> Dict[str, Any]:
        """Parse HTML response from Sav.com when JSON is not available."""
        try:
            from bs4 import BeautifulSoup

            soup = BeautifulSoup(html, "html.parser")

            # Find auction rows
            rows = soup.select(".auction-row, .domain-row, tr[data-domain]")

            transformed = []
            for row in rows:
                domain_el = row.select_one(".domain-name, .name, [data-domain]")
                price_el = row.select_one(".price, .bid, .current-bid")
                time_el = row.select_one(".time-left, .ends, .countdown")
                bids_el = row.select_one(".bids, .bid-count")

                if not domain_el:
                    continue

                domain = domain_el.get_text(strip=True) or domain_el.get("data-domain", "")
                tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""

                price_text = price_el.get_text(strip=True) if price_el else "0"
                price = float("".join(c for c in price_text if c.isdigit() or c == ".") or "0")

                bids_text = bids_el.get_text(strip=True) if bids_el else "0"
                bids = int("".join(c for c in bids_text if c.isdigit()) or "0")

                transformed.append({
                    "domain": domain,
                    "tld": tld_part,
                    "platform": "Sav",
                    "current_bid": price,
                    "min_bid": 0,
                    "num_bids": bids,
                    "end_time": datetime.utcnow() + timedelta(days=1),  # Estimate
                    "buy_now_price": None,
                    "auction_url": build_affiliate_url("Sav", domain),
                    "currency": "USD",
                    "is_active": True,
                })

            return {
                "items": transformed,
                "total": len(transformed),
                "has_more": len(rows) >= 20,
            }

        except Exception as e:
            logger.exception(f"Sav HTML parsing error: {e}")
            return {"items": [], "total": 0, "error": str(e)}


# ═══════════════════════════════════════════════════════════════════════════════
# UNIFIED SCRAPER — Combines all hidden API scrapers
# ═══════════════════════════════════════════════════════════════════════════════

class HiddenApiScraperService:
    """
    Unified service that combines all hidden API scrapers.

    Priority order:
    1. JSON APIs (most reliable)
    2. GraphQL APIs (Namecheap)
    3. AJAX endpoints (fallback)

    All URLs include affiliate tracking for monetization.
    """

    def __init__(self):
        self.namecheap = NamecheapApiScraper()
        self.dynadot = DynadotApiScraper()
        self.sav = SavApiScraper()

    async def scrape_all(self, limit_per_platform: int = 100) -> Dict[str, Any]:
        """
        Scrape all platforms using hidden APIs.

        Returns combined results with platform breakdown.
        """
        results = {
            "total_found": 0,
            "platforms": {},
            "errors": [],
            "items": [],
        }

        # Scrape Namecheap
        try:
            namecheap_data = await self.namecheap.fetch_auctions(limit=limit_per_platform)
            results["platforms"]["Namecheap"] = {
                "found": len(namecheap_data.get("items", [])),
                "total": namecheap_data.get("total", 0),
            }
            results["items"].extend(namecheap_data.get("items", []))
            results["total_found"] += len(namecheap_data.get("items", []))

            if namecheap_data.get("error"):
                results["errors"].append(f"Namecheap: {namecheap_data['error']}")

        except Exception as e:
            results["errors"].append(f"Namecheap: {str(e)}")

        # Scrape Dynadot
        try:
            dynadot_data = await self.dynadot.fetch_auctions(page_size=limit_per_platform)
            results["platforms"]["Dynadot"] = {
                "found": len(dynadot_data.get("items", [])),
                "total": dynadot_data.get("total", 0),
            }
            results["items"].extend(dynadot_data.get("items", []))
            results["total_found"] += len(dynadot_data.get("items", []))

            if dynadot_data.get("error"):
                results["errors"].append(f"Dynadot: {dynadot_data['error']}")

        except Exception as e:
            results["errors"].append(f"Dynadot: {str(e)}")

        # Scrape Sav.com
        try:
            sav_data = await self.sav.fetch_auctions(page=0)
            results["platforms"]["Sav"] = {
                "found": len(sav_data.get("items", [])),
                "total": sav_data.get("total", 0),
            }
            results["items"].extend(sav_data.get("items", []))
            results["total_found"] += len(sav_data.get("items", []))

            if sav_data.get("error"):
                results["errors"].append(f"Sav: {sav_data['error']}")

        except Exception as e:
            results["errors"].append(f"Sav: {str(e)}")

        return results


# Export instances
namecheap_scraper = NamecheapApiScraper()
dynadot_scraper = DynadotApiScraper()
sav_scraper = SavApiScraper()
hidden_api_scraper = HiddenApiScraperService()