pounce/backend/app/services/hidden_api_scrapers.py

"""
Hidden JSON API Scrapers for Domain Auction Platforms.

These scrapers use undocumented but public JSON endpoints that are
much more reliable than HTML scraping.

Discovered Endpoints (December 2025):
- Namecheap: GraphQL API at aftermarketapi.namecheap.com
- Dynadot: REST API at dynadot-vue-api
- Sav.com: AJAX endpoint for auction listings
"""

import logging
from datetime import datetime, timedelta
from typing import Dict, Any, List, Optional

import httpx

logger = logging.getLogger(__name__)

# ═══════════════════════════════════════════════════════════════════════════════
# AFFILIATE LINKS — Monetization through referral commissions
# ═══════════════════════════════════════════════════════════════════════════════

AFFILIATE_CONFIG = {
    "Namecheap": {
        "base_url": "https://www.namecheap.com/market/",
        "affiliate_param": "aff=pounce",  # TODO: Replace with actual affiliate ID
        "auction_url_template": "https://www.namecheap.com/market/domain/{domain}?aff=pounce",
    },
    "Dynadot": {
        "base_url": "https://www.dynadot.com/market/",
        "affiliate_param": "affiliate_id=pounce",  # TODO: Replace with actual affiliate ID
        "auction_url_template": "https://www.dynadot.com/market/auction/{domain}?affiliate_id=pounce",
    },
    "Sav": {
        "base_url": "https://www.sav.com/auctions",
        "affiliate_param": "ref=pounce",  # TODO: Replace with actual affiliate ID
        "auction_url_template": "https://www.sav.com/domain/{domain}?ref=pounce",
    },
    "GoDaddy": {
        "base_url": "https://auctions.godaddy.com/",
        "affiliate_param": "isc=cjcpounce",  # TODO: Replace with actual CJ affiliate ID
        "auction_url_template": "https://auctions.godaddy.com/trpItemListing.aspx?domain={domain}&isc=cjcpounce",
    },
    "DropCatch": {
        "base_url": "https://www.dropcatch.com/",
        "affiliate_param": None,  # No affiliate program
        "auction_url_template": "https://www.dropcatch.com/domain/{domain}",
    },
    "Sedo": {
        "base_url": "https://sedo.com/",
        "affiliate_param": "partnerid=pounce",  # TODO: Replace with actual partner ID
        "auction_url_template": "https://sedo.com/search/details/?domain={domain}&partnerid=pounce",
    },
    "NameJet": {
        "base_url": "https://www.namejet.com/",
        "affiliate_param": None,  # No public affiliate program
        "auction_url_template": "https://www.namejet.com/pages/Auctions/ViewAuctions.aspx?domain={domain}",
    },
    "ExpiredDomains": {
        "base_url": "https://www.expireddomains.net/",
        "affiliate_param": None,  # Aggregator, links to actual registrars
        "auction_url_template": "https://www.expireddomains.net/domain-name-search/?q={domain}",
    },
}


def build_affiliate_url(platform: str, domain: str, original_url: Optional[str] = None) -> str:
    """
    Build an affiliate URL for a given platform and domain.

    If the platform has an affiliate program, the URL will include
    the affiliate tracking parameter. Otherwise, returns the original URL.
    """
    config = AFFILIATE_CONFIG.get(platform, {})

    if config.get("auction_url_template"):
        return config["auction_url_template"].format(domain=domain)

    return original_url or f"https://www.google.com/search?q={domain}+auction"


# ═══════════════════════════════════════════════════════════════════════════════
# NAMECHEAP SCRAPER — GraphQL API
# ═══════════════════════════════════════════════════════════════════════════════

class NamecheapApiScraper:
    """
    Scraper for Namecheap Marketplace using their hidden GraphQL API.

    Endpoint: https://aftermarketapi.namecheap.com/client/graphql

    This is a public API used by their frontend, stable and reliable.
    """

    GRAPHQL_ENDPOINT = "https://aftermarketapi.namecheap.com/client/graphql"

    # GraphQL query for fetching auctions
    AUCTIONS_QUERY = """
    query GetAuctions($filter: AuctionFilterInput, $pagination: PaginationInput, $sort: SortInput) {
        auctions(filter: $filter, pagination: $pagination, sort: $sort) {
            items {
                id
                domain
                currentBid
                minBid
                bidCount
                endTime
                status
                buyNowPrice
                hasBuyNow
            }
            totalCount
            pageInfo {
                hasNextPage
                endCursor
            }
        }
    }
    """

    async def fetch_auctions(
        self,
        limit: int = 100,
        offset: int = 0,
        keyword: Optional[str] = None,
        tld: Optional[str] = None,
    ) -> Dict[str, Any]:
        """Fetch auctions from Namecheap GraphQL API."""
        try:
            async with httpx.AsyncClient(timeout=30.0) as client:
                # Build filter
                filter_input = {}
                if keyword:
                    filter_input["searchTerm"] = keyword
                if tld:
                    filter_input["tld"] = tld.lstrip(".")

                variables = {
                    "filter": filter_input,
                    "pagination": {"limit": limit, "offset": offset},
                    "sort": {"field": "endTime", "direction": "ASC"},
                }

                response = await client.post(
                    self.GRAPHQL_ENDPOINT,
                    json={
                        "query": self.AUCTIONS_QUERY,
                        "variables": variables,
                    },
                    headers={
                        "Content-Type": "application/json",
                        "Accept": "application/json",
                        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
                        "Origin": "https://www.namecheap.com",
                        "Referer": "https://www.namecheap.com/market/",
                    },
                )

                if response.status_code != 200:
                    logger.error(f"Namecheap API error: {response.status_code}")
                    return {"items": [], "total": 0, "error": response.text}

                data = response.json()

                if "errors" in data:
                    logger.error(f"Namecheap GraphQL errors: {data['errors']}")
                    return {"items": [], "total": 0, "error": str(data["errors"])}

                auctions_data = data.get("data", {}).get("auctions", {})
                items = auctions_data.get("items", [])

                # Transform to Pounce format
                transformed = []
                for item in items:
                    domain = item.get("domain", "")
                    tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""

                    transformed.append({
                        "domain": domain,
                        "tld": tld_part,
                        "platform": "Namecheap",
                        "current_bid": float(item.get("currentBid", 0)),
                        "min_bid": float(item.get("minBid", 0)),
                        "num_bids": int(item.get("bidCount", 0)),
                        "end_time": item.get("endTime"),
                        "buy_now_price": float(item.get("buyNowPrice")) if item.get("hasBuyNow") else None,
                        "auction_url": build_affiliate_url("Namecheap", domain),
                        "currency": "USD",
                        "is_active": True,
                    })

                return {
                    "items": transformed,
                    "total": auctions_data.get("totalCount", 0),
                    "has_more": auctions_data.get("pageInfo", {}).get("hasNextPage", False),
                }

        except Exception as e:
            logger.exception(f"Namecheap API scraper error: {e}")
            return {"items": [], "total": 0, "error": str(e)}


# ═══════════════════════════════════════════════════════════════════════════════
# DYNADOT SCRAPER — REST JSON API
# ═══════════════════════════════════════════════════════════════════════════════

class DynadotApiScraper:
    """
    Scraper for Dynadot Marketplace using their hidden JSON API.

    Endpoints:
    - /dynadot-vue-api/dynadot-service/marketplace-api
    - /dynadot-vue-api/dynadot-service/main-site-api

    Supports:
    - EXPIRED_AUCTION: Expired auctions
    - BACKORDER: Backorder listings
    - USER_LISTING: User marketplace listings
    """

    BASE_URL = "https://www.dynadot.com"
    MARKETPLACE_API = "/dynadot-vue-api/dynadot-service/marketplace-api"

    async def fetch_auctions(
        self,
        aftermarket_type: str = "EXPIRED_AUCTION",
        page_size: int = 100,
        page_index: int = 0,
        keyword: Optional[str] = None,
    ) -> Dict[str, Any]:
        """Fetch auctions from Dynadot REST API."""
        try:
            async with httpx.AsyncClient(timeout=30.0) as client:
                params = {
                    "command": "get_list",
                    "aftermarket_type": aftermarket_type,
                    "page_size": page_size,
                    "page_index": page_index,
                    "lang": "en",
                }

                if keyword:
                    params["keyword"] = keyword

                response = await client.post(
                    f"{self.BASE_URL}{self.MARKETPLACE_API}",
                    params=params,
                    headers={
                        "Accept": "application/json",
                        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
                        "Referer": "https://www.dynadot.com/market",
                    },
                )

                if response.status_code != 200:
                    logger.error(f"Dynadot API error: {response.status_code}")
                    return {"items": [], "total": 0, "error": response.text}

                data = response.json()

                # Dynadot returns code: 200 for success
                if data.get("code") not in [0, 200] and data.get("msg") != "success":
                    logger.error(f"Dynadot API error: {data}")
                    return {"items": [], "total": 0, "error": str(data)}

                # Data can be in 'records' or 'list'
                listings = data.get("data", {}).get("records", []) or data.get("data", {}).get("list", [])

                # Transform to Pounce format
                transformed = []
                for item in listings:
                    domain = item.get("domain", "") or item.get("name", "") or item.get("utf8_name", "")
                    tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""

                    # Parse end time (Dynadot uses timestamp in milliseconds or string)
                    end_time = None
                    end_time_stamp = item.get("end_time_stamp")
                    if end_time_stamp:
                        try:
                            end_time = datetime.fromtimestamp(end_time_stamp / 1000)
                        except:
                            pass

                    if not end_time:
                        end_time_str = item.get("end_time") or item.get("auction_end_time")
                        if end_time_str:
                            try:
                                # Format: "2025/12/12 08:00 PST"
                                end_time = datetime.strptime(end_time_str.split(" PST")[0], "%Y/%m/%d %H:%M")
                            except:
                                end_time = datetime.utcnow() + timedelta(days=1)

                    # Parse bid price (can be string or number)
                    bid_price = item.get("bid_price") or item.get("current_bid") or item.get("price") or 0
                    if isinstance(bid_price, str):
                        bid_price = float(bid_price.replace(",", "").replace("$", ""))

                    transformed.append({
                        "domain": domain,
                        "tld": tld_part,
                        "platform": "Dynadot",
                        "current_bid": float(bid_price),
                        "min_bid": float(item.get("start_price", 0) or 0),
                        "num_bids": int(item.get("bids", 0) or item.get("bid_count", 0) or 0),
                        "end_time": end_time or datetime.utcnow() + timedelta(days=1),
                        "buy_now_price": float(item.get("accepted_bid_price")) if item.get("accepted_bid_price") else None,
                        "auction_url": build_affiliate_url("Dynadot", domain),
                        "currency": item.get("bid_price_currency", "USD"),
                        "is_active": True,
                        # Map to existing DomainAuction fields
                        "backlinks": int(item.get("links", 0) or 0),
                        "age_years": int(item.get("age", 0) or 0),
                    })

                return {
                    "items": transformed,
                    "total": data.get("data", {}).get("total_count", len(transformed)),
                    "has_more": len(listings) >= page_size,
                }

        except Exception as e:
            logger.exception(f"Dynadot API scraper error: {e}")
            return {"items": [], "total": 0, "error": str(e)}


# ═══════════════════════════════════════════════════════════════════════════════
# SAV.COM SCRAPER — AJAX JSON API
# ═══════════════════════════════════════════════════════════════════════════════

class SavApiScraper:
    """
    Scraper for Sav.com Auctions using their hidden AJAX endpoint.

    Endpoint: /auctions/load_domains_ajax/{page}

    Simple POST request that returns paginated auction data.
    """

    BASE_URL = "https://www.sav.com"
    AJAX_ENDPOINT = "/auctions/load_domains_ajax"

    async def fetch_auctions(
        self,
        page: int = 0,
    ) -> Dict[str, Any]:
        """Fetch auctions from Sav.com AJAX API."""
        try:
            async with httpx.AsyncClient(timeout=30.0) as client:
                response = await client.post(
                    f"{self.BASE_URL}{self.AJAX_ENDPOINT}/{page}",
                    headers={
                        "Accept": "application/json, text/html",
                        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
                        "Referer": "https://www.sav.com/domains/auctions",
                        "X-Requested-With": "XMLHttpRequest",
                    },
                )

                if response.status_code != 200:
                    logger.error(f"Sav API error: {response.status_code}")
                    return {"items": [], "total": 0, "error": response.text}

                # The response is HTML but contains structured data
                # We need to parse it or check for JSON
                content_type = response.headers.get("content-type", "")

                if "application/json" in content_type:
                    data = response.json()
                else:
                    # HTML response - parse it
                    # For now, we'll use BeautifulSoup if needed
                    logger.warning("Sav returned HTML instead of JSON, parsing...")
                    return await self._parse_html_response(response.text)

                listings = data.get("domains", data.get("auctions", []))

                # Transform to Pounce format
                transformed = []
                for item in listings:
                    domain = item.get("domain", "") or item.get("name", "")
                    tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""

                    # Parse end time
                    end_time_str = item.get("end_time") or item.get("ends_at")
                    end_time = None
                    if end_time_str:
                        try:
                            end_time = datetime.fromisoformat(end_time_str.replace("Z", "+00:00"))
                        except:
                            end_time = datetime.utcnow() + timedelta(days=1)

                    transformed.append({
                        "domain": domain,
                        "tld": tld_part,
                        "platform": "Sav",
                        "current_bid": float(item.get("current_bid", 0) or item.get("price", 0)),
                        "min_bid": float(item.get("min_bid", 0) or 0),
                        "num_bids": int(item.get("bids", 0) or 0),
                        "end_time": end_time,
                        "buy_now_price": float(item.get("buy_now")) if item.get("buy_now") else None,
                        "auction_url": build_affiliate_url("Sav", domain),
                        "currency": "USD",
                        "is_active": True,
                    })

                return {
                    "items": transformed,
                    "total": len(transformed),
                    "has_more": len(listings) >= 20,  # Default page size
                }

        except Exception as e:
            logger.exception(f"Sav API scraper error: {e}")
            return {"items": [], "total": 0, "error": str(e)}

    async def _parse_html_response(self, html: str) -> Dict[str, Any]:
        """Parse HTML response from Sav.com when JSON is not available."""
        try:
            from bs4 import BeautifulSoup

            soup = BeautifulSoup(html, "html.parser")

            # Find auction rows
            rows = soup.select(".auction-row, .domain-row, tr[data-domain]")

            transformed = []
            for row in rows:
                domain_el = row.select_one(".domain-name, .name, [data-domain]")
                price_el = row.select_one(".price, .bid, .current-bid")
                time_el = row.select_one(".time-left, .ends, .countdown")
                bids_el = row.select_one(".bids, .bid-count")

                if not domain_el:
                    continue

                domain = domain_el.get_text(strip=True) or domain_el.get("data-domain", "")
                tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""

                price_text = price_el.get_text(strip=True) if price_el else "0"
                price = float("".join(c for c in price_text if c.isdigit() or c == ".") or "0")

                bids_text = bids_el.get_text(strip=True) if bids_el else "0"
                bids = int("".join(c for c in bids_text if c.isdigit()) or "0")

                transformed.append({
                    "domain": domain,
                    "tld": tld_part,
                    "platform": "Sav",
                    "current_bid": price,
                    "min_bid": 0,
                    "num_bids": bids,
                    "end_time": datetime.utcnow() + timedelta(days=1),  # Estimate
                    "buy_now_price": None,
                    "auction_url": build_affiliate_url("Sav", domain),
                    "currency": "USD",
                    "is_active": True,
                })

            return {
                "items": transformed,
                "total": len(transformed),
                "has_more": len(rows) >= 20,
            }

        except Exception as e:
            logger.exception(f"Sav HTML parsing error: {e}")
            return {"items": [], "total": 0, "error": str(e)}


# ═══════════════════════════════════════════════════════════════════════════════
# GODADDY SCRAPER — Hidden REST JSON API
# ═══════════════════════════════════════════════════════════════════════════════

class GoDaddyApiScraper:
    """
    Scraper for GoDaddy Auctions using their hidden JSON API.

    Discovered Endpoint:
    https://auctions.godaddy.com/beta/findApiProxy/v4/aftermarket/find/auction/recommend

    Parameters:
    - paginationSize: number of results (max 150)
    - paginationStart: offset
    - sortBy: auctionBids:desc, auctionValuationPrice:desc, endingAt:asc
    - endTimeAfter: ISO timestamp
    - typeIncludeList: 14,16,38 (auction types)
    """

    BASE_URL = "https://auctions.godaddy.com"
    API_ENDPOINT = "/beta/findApiProxy/v4/aftermarket/find/auction/recommend"

    async def fetch_auctions(
        self,
        limit: int = 100,
        offset: int = 0,
        sort_by: str = "auctionBids:desc",
        ending_within_hours: Optional[int] = None,
    ) -> Dict[str, Any]:
        """Fetch auctions from GoDaddy hidden JSON API."""
        try:
            async with httpx.AsyncClient(timeout=30.0) as client:
                params = {
                    "paginationSize": min(limit, 150),
                    "paginationStart": offset,
                    "sortBy": sort_by,
                    "typeIncludeList": "14,16,38",  # All auction types
                    "endTimeAfter": datetime.utcnow().isoformat() + "Z",
                }

                if ending_within_hours:
                    end_before = (datetime.utcnow() + timedelta(hours=ending_within_hours)).isoformat() + "Z"
                    params["endTimeBefore"] = end_before

                response = await client.get(
                    f"{self.BASE_URL}{self.API_ENDPOINT}",
                    params=params,
                    headers={
                        "Accept": "application/json",
                        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
                        "Referer": "https://auctions.godaddy.com/beta",
                    },
                )

                if response.status_code != 200:
                    logger.error(f"GoDaddy API error: {response.status_code}")
                    return {"items": [], "total": 0, "error": response.text}

                data = response.json()

                # GoDaddy returns listings in 'results' array
                listings = data.get("results", [])

                # Transform to Pounce format
                transformed = []
                for item in listings:
                    domain = item.get("fqdn", "") or item.get("domain", "")
                    tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""

                    # Parse end time
                    end_time = None
                    end_at = item.get("endingAt") or item.get("auctionEndTime")
                    if end_at:
                        try:
                            end_time = datetime.fromisoformat(end_at.replace("Z", "+00:00")).replace(tzinfo=None)
                        except:
                            pass

                    # Parse price (can be in different fields)
                    price = (
                        item.get("price") or
                        item.get("currentBidPrice") or
                        item.get("auctionPrice") or
                        item.get("minBid") or 0
                    )

                    transformed.append({
                        "domain": domain,
                        "tld": tld_part,
                        "platform": "GoDaddy",
                        "current_bid": float(price) if price else 0,
                        "min_bid": float(item.get("minBid", 0) or 0),
                        "num_bids": int(item.get("bids", 0) or item.get("bidCount", 0) or 0),
                        "end_time": end_time or datetime.utcnow() + timedelta(days=1),
                        "buy_now_price": float(item.get("buyNowPrice")) if item.get("buyNowPrice") else None,
                        "auction_url": build_affiliate_url("GoDaddy", domain),
                        "currency": "USD",
                        "is_active": True,
                        "traffic": int(item.get("traffic", 0) or 0),
                        "domain_authority": int(item.get("domainAuthority", 0) or item.get("valuationPrice", 0) or 0),
                    })

                return {
                    "items": transformed,
                    "total": data.get("totalRecordCount", len(transformed)),
                    "has_more": len(listings) >= limit,
                }

        except Exception as e:
            logger.exception(f"GoDaddy API scraper error: {e}")
            return {"items": [], "total": 0, "error": str(e)}


# ═══════════════════════════════════════════════════════════════════════════════
# GODADDY RSS SCRAPER — Public RSS Feed (NO Cloudflare!)
# ═══════════════════════════════════════════════════════════════════════════════

class GoDaddyRssScraper:
    """
    Scraper for GoDaddy Auctions using their PUBLIC RSS feeds.

    These RSS feeds are NOT protected by Cloudflare and always work!

    Feeds:
    - https://auctions.godaddy.com/rss/ending.aspx (Ending Soon)
    - https://auctions.godaddy.com/rss/new.aspx (New Auctions)
    - https://auctions.godaddy.com/rss/closeouts.aspx (Closeouts)
    """

    RSS_FEEDS = {
        "ending": "https://auctions.godaddy.com/rss/ending.aspx",
        "new": "https://auctions.godaddy.com/rss/new.aspx",
        "closeouts": "https://auctions.godaddy.com/rss/closeouts.aspx",
    }

    async def fetch_auctions(
        self,
        feed_type: str = "ending",  # "ending", "new", or "closeouts"
        limit: int = 100,
    ) -> Dict[str, Any]:
        """Fetch auctions from GoDaddy RSS feeds."""
        try:
            import xml.etree.ElementTree as ET

            feed_url = self.RSS_FEEDS.get(feed_type, self.RSS_FEEDS["ending"])

            async with httpx.AsyncClient(timeout=30.0) as client:
                response = await client.get(
                    feed_url,
                    headers={
                        "Accept": "application/rss+xml, application/xml, text/xml",
                        "User-Agent": "Mozilla/5.0 (compatible; PounceBot/1.0; +https://pounce.ch)",
                    },
                )

                if response.status_code != 200:
                    logger.error(f"GoDaddy RSS error: {response.status_code}")
                    return {"items": [], "total": 0, "error": f"HTTP {response.status_code}"}

                # Parse RSS XML
                root = ET.fromstring(response.text)

                # Find all items in the RSS feed
                items = root.findall(".//item")

                transformed = []
                for item in items[:limit]:
                    try:
                        title = item.find("title").text if item.find("title") is not None else ""
                        link = item.find("link").text if item.find("link") is not None else ""
                        description = item.find("description").text if item.find("description") is not None else ""

                        # Extract domain from title (format: "domain.com - $XX")
                        domain = ""
                        price = 0

                        if title:
                            # Title format: "example.com - $12" or "example.com"
                            parts = title.split(" - ")
                            domain = parts[0].strip().lower()

                            if len(parts) > 1:
                                price_str = parts[1].replace("$", "").replace(",", "").strip()
                                try:
                                    price = float(price_str)
                                except:
                                    pass

                        # Try to extract price from description if not in title
                        if price == 0 and description:
                            import re
                            price_match = re.search(r'\$([0-9,]+(?:\.[0-9]+)?)', description)
                            if price_match:
                                price = float(price_match.group(1).replace(",", ""))

                        if not domain or "." not in domain:
                            continue

                        tld = domain.rsplit(".", 1)[-1]

                        # Add affiliate param to link
                        affiliate_url = link
                        if link and "?" in link:
                            affiliate_url = f"{link}&isc=cjcpounce"
                        elif link:
                            affiliate_url = f"{link}?isc=cjcpounce"
                        else:
                            affiliate_url = build_affiliate_url("GoDaddy", domain)

                        transformed.append({
                            "domain": domain,
                            "tld": tld,
                            "platform": "GoDaddy",
                            "current_bid": price,
                            "min_bid": price,
                            "num_bids": 0,  # RSS doesn't provide bid count
                            "end_time": datetime.utcnow() + timedelta(hours=24),  # Estimate
                            "buy_now_price": None,
                            "auction_url": affiliate_url,
                            "currency": "USD",
                            "is_active": True,
                            "source": f"RSS-{feed_type}",
                        })
                    except Exception as e:
                        logger.warning(f"Error parsing GoDaddy RSS item: {e}")
                        continue

                logger.info(f"GoDaddy RSS ({feed_type}): Found {len(transformed)} auctions")
                return {
                    "items": transformed,
                    "total": len(transformed),
                    "has_more": False,
                }

        except Exception as e:
            logger.exception(f"GoDaddy RSS scraper error: {e}")
            return {"items": [], "total": 0, "error": str(e)}

    async def fetch_all_feeds(self) -> Dict[str, Any]:
        """Fetch from all GoDaddy RSS feeds."""
        all_items = []
        errors = []

        for feed_type in ["ending", "new", "closeouts"]:
            result = await self.fetch_auctions(feed_type=feed_type, limit=50)
            all_items.extend(result.get("items", []))
            if result.get("error"):
                errors.append(f"{feed_type}: {result['error']}")

        # Dedupe by domain
        seen = set()
        unique_items = []
        for item in all_items:
            if item["domain"] not in seen:
                seen.add(item["domain"])
                unique_items.append(item)

        return {
            "items": unique_items,
            "total": len(unique_items),
            "errors": errors if errors else None,
        }


# ═══════════════════════════════════════════════════════════════════════════════
# PARK.IO SCRAPER — Backorder Service API
# ═══════════════════════════════════════════════════════════════════════════════

class ParkIoApiScraper:
    """
    Scraper for Park.io domain backorders.

    Park.io specializes in catching expiring domains - great for drops!

    Endpoint: https://park.io/api/domains
    """

    BASE_URL = "https://park.io"
    API_ENDPOINT = "/api/domains"

    async def fetch_pending_drops(
        self,
        limit: int = 100,
        tld: Optional[str] = None,
    ) -> Dict[str, Any]:
        """Fetch pending domain drops from Park.io."""
        try:
            async with httpx.AsyncClient(timeout=30.0) as client:
                params = {
                    "limit": limit,
                    "status": "pending",  # Pending drops
                }

                if tld:
                    params["tld"] = tld.lstrip(".")

                response = await client.get(
                    f"{self.BASE_URL}{self.API_ENDPOINT}",
                    params=params,
                    headers={
                        "Accept": "application/json",
                        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
                    },
                )

                if response.status_code != 200:
                    logger.error(f"Park.io API error: {response.status_code}")
                    return {"items": [], "total": 0, "error": response.text}

                data = response.json()
                domains = data.get("domains", []) if isinstance(data, dict) else data

                # Transform to Pounce format
                transformed = []
                for item in domains:
                    domain = item.get("domain", "") or item.get("name", "")
                    tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""

                    # Parse drop date
                    drop_date = None
                    drop_at = item.get("drop_date") or item.get("expires_at")
                    if drop_at:
                        try:
                            drop_date = datetime.fromisoformat(drop_at.replace("Z", "+00:00")).replace(tzinfo=None)
                        except:
                            drop_date = datetime.utcnow() + timedelta(days=1)

                    transformed.append({
                        "domain": domain,
                        "tld": tld_part,
                        "platform": "Park.io",
                        "current_bid": float(item.get("price", 99)),  # Park.io default price
                        "min_bid": float(item.get("min_price", 99)),
                        "num_bids": int(item.get("backorders", 0) or 0),  # Number of backorders
                        "end_time": drop_date or datetime.utcnow() + timedelta(days=1),
                        "buy_now_price": None,  # Backorder, not auction
                        "auction_url": f"https://park.io/domains/{domain}",
                        "auction_type": "backorder",
                        "currency": "USD",
                        "is_active": True,
                    })

                return {
                    "items": transformed,
                    "total": len(transformed),
                    "has_more": len(domains) >= limit,
                }

        except Exception as e:
            logger.exception(f"Park.io API scraper error: {e}")
            return {"items": [], "total": 0, "error": str(e)}


# ═══════════════════════════════════════════════════════════════════════════════
# NAMEJET SCRAPER — Hidden AJAX API
# ═══════════════════════════════════════════════════════════════════════════════

class NameJetApiScraper:
    """
    Scraper for NameJet auctions using their AJAX endpoint.

    NameJet is owned by GoDaddy but operates independently.
    Uses a hidden AJAX endpoint for loading auction data.
    """

    BASE_URL = "https://www.namejet.com"
    AJAX_ENDPOINT = "/PreRelease/Auctions/LoadPage"

    async def fetch_auctions(
        self,
        limit: int = 100,
        page: int = 1,
        sort_by: str = "EndTime",
    ) -> Dict[str, Any]:
        """Fetch auctions from NameJet AJAX API."""
        try:
            async with httpx.AsyncClient(timeout=30.0) as client:
                # NameJet uses POST with form data
                form_data = {
                    "page": page,
                    "rows": limit,
                    "sidx": sort_by,
                    "sord": "asc",
                }

                response = await client.post(
                    f"{self.BASE_URL}{self.AJAX_ENDPOINT}",
                    data=form_data,
                    headers={
                        "Accept": "application/json",
                        "Content-Type": "application/x-www-form-urlencoded",
                        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
                        "Referer": "https://www.namejet.com/PreRelease/Auctions",
                        "X-Requested-With": "XMLHttpRequest",
                    },
                )

                if response.status_code != 200:
                    logger.error(f"NameJet API error: {response.status_code}")
                    return {"items": [], "total": 0, "error": response.text}

                # Try JSON first, fall back to HTML parsing
                try:
                    data = response.json()
                except:
                    return await self._parse_html_response(response.text)

                # NameJet returns 'rows' array with auction data
                rows = data.get("rows", [])

                # Transform to Pounce format
                transformed = []
                for item in rows:
                    # NameJet format: item.cell contains [domain, endTime, price, bids, ...]
                    cell = item.get("cell", [])
                    if len(cell) < 4:
                        continue

                    domain = cell[0] if isinstance(cell[0], str) else cell[0].get("domain", "")
                    tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""

                    # Parse end time
                    end_time = None
                    if len(cell) > 1 and cell[1]:
                        try:
                            end_time = datetime.strptime(cell[1], "%m/%d/%Y %H:%M:%S")
                        except:
                            try:
                                end_time = datetime.strptime(cell[1], "%Y-%m-%d %H:%M")
                            except:
                                pass

                    # Parse price
                    price = 0
                    if len(cell) > 2:
                        price_str = str(cell[2]).replace("$", "").replace(",", "")
                        try:
                            price = float(price_str)
                        except:
                            pass

                    # Parse bids
                    bids = 0
                    if len(cell) > 3:
                        try:
                            bids = int(cell[3])
                        except:
                            pass

                    transformed.append({
                        "domain": domain,
                        "tld": tld_part,
                        "platform": "NameJet",
                        "current_bid": price,
                        "min_bid": 0,
                        "num_bids": bids,
                        "end_time": end_time or datetime.utcnow() + timedelta(days=1),
                        "buy_now_price": None,
                        "auction_url": build_affiliate_url("NameJet", domain),
                        "currency": "USD",
                        "is_active": True,
                    })

                return {
                    "items": transformed,
                    "total": data.get("records", len(transformed)),
                    "has_more": len(rows) >= limit,
                }

        except Exception as e:
            logger.exception(f"NameJet API scraper error: {e}")
            return {"items": [], "total": 0, "error": str(e)}

    async def _parse_html_response(self, html: str) -> Dict[str, Any]:
        """Parse HTML response from NameJet when JSON is not available."""
        try:
            from bs4 import BeautifulSoup

            soup = BeautifulSoup(html, "html.parser")
            rows = soup.select("tr[data-domain], .auction-row")

            transformed = []
            for row in rows:
                domain_el = row.select_one("td:first-child, .domain")
                if not domain_el:
                    continue

                domain = domain_el.get_text(strip=True)
                tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""

                transformed.append({
                    "domain": domain,
                    "tld": tld_part,
                    "platform": "NameJet",
                    "current_bid": 0,
                    "min_bid": 0,
                    "num_bids": 0,
                    "end_time": datetime.utcnow() + timedelta(days=1),
                    "buy_now_price": None,
                    "auction_url": build_affiliate_url("NameJet", domain),
                    "currency": "USD",
                    "is_active": True,
                })

            return {
                "items": transformed,
                "total": len(transformed),
                "has_more": False,
            }

        except Exception as e:
            logger.exception(f"NameJet HTML parsing error: {e}")
            return {"items": [], "total": 0, "error": str(e)}


# ═══════════════════════════════════════════════════════════════════════════════
# UNIFIED SCRAPER — Combines all hidden API scrapers
# ═══════════════════════════════════════════════════════════════════════════════

class HiddenApiScraperService:
    """
    Unified service that combines all hidden API scrapers.

    Priority order:
    1. GoDaddy JSON API (most reliable, 150 auctions/request)
    2. Dynadot REST API (100 auctions/request)
    3. NameJet AJAX (requires parsing)
    4. Park.io (backorders)
    5. Namecheap GraphQL (requires query hash - may fail)
    6. Sav.com AJAX (HTML fallback)

    All URLs include affiliate tracking for monetization.
    """

    def __init__(self):
        self.namecheap = NamecheapApiScraper()
        self.dynadot = DynadotApiScraper()
        self.sav = SavApiScraper()
        self.godaddy = GoDaddyApiScraper()
        self.godaddy_rss = GoDaddyRssScraper()  # RSS fallback (NO Cloudflare!)
        self.parkio = ParkIoApiScraper()
        self.namejet = NameJetApiScraper()

    async def scrape_all(self, limit_per_platform: int = 100) -> Dict[str, Any]:
        """
        Scrape all platforms using hidden APIs.

        Returns combined results with platform breakdown.
        """
        results = {
            "total_found": 0,
            "platforms": {},
            "errors": [],
            "items": [],
        }

        # ═══════════════════════════════════════════════════════════
        # TIER 0: RSS Feeds (Most Reliable - NO Cloudflare!)
        # ═══════════════════════════════════════════════════════════

        # Scrape GoDaddy RSS (Always works!)
        try:
            rss_data = await self.godaddy_rss.fetch_all_feeds()
            rss_count = len(rss_data.get("items", []))
            if rss_count > 0:
                results["platforms"]["GoDaddy-RSS"] = {
                    "found": rss_count,
                    "total": rss_count,
                }
                results["items"].extend(rss_data.get("items", []))
                results["total_found"] += rss_count
                logger.info(f"✅ GoDaddy RSS: {rss_count} auctions")
        except Exception as e:
            results["errors"].append(f"GoDaddy-RSS: {str(e)}")

        # ═══════════════════════════════════════════════════════════
        # TIER 1: Most Reliable JSON APIs
        # ═══════════════════════════════════════════════════════════

        # Scrape GoDaddy JSON API (may have Cloudflare issues)
        try:
            godaddy_data = await self.godaddy.fetch_auctions(limit=limit_per_platform)
            godaddy_count = len(godaddy_data.get("items", []))
            if godaddy_count > 0:
                results["platforms"]["GoDaddy-API"] = {
                    "found": godaddy_count,
                    "total": godaddy_data.get("total", 0),
                }
                results["items"].extend(godaddy_data.get("items", []))
                results["total_found"] += godaddy_count

            if godaddy_data.get("error"):
                results["errors"].append(f"GoDaddy-API: {godaddy_data['error'][:100]}")

        except Exception as e:
            results["errors"].append(f"GoDaddy-API: {str(e)[:100]}")

        # Scrape Dynadot
        try:
            dynadot_data = await self.dynadot.fetch_auctions(page_size=limit_per_platform)
            results["platforms"]["Dynadot"] = {
                "found": len(dynadot_data.get("items", [])),
                "total": dynadot_data.get("total", 0),
            }
            results["items"].extend(dynadot_data.get("items", []))
            results["total_found"] += len(dynadot_data.get("items", []))

            if dynadot_data.get("error"):
                results["errors"].append(f"Dynadot: {dynadot_data['error']}")

        except Exception as e:
            results["errors"].append(f"Dynadot: {str(e)}")

        # ═══════════════════════════════════════════════════════════
        # TIER 2: AJAX/HTML Scrapers
        # ═══════════════════════════════════════════════════════════

        # Scrape NameJet (NEW)
        try:
            namejet_data = await self.namejet.fetch_auctions(limit=limit_per_platform)
            results["platforms"]["NameJet"] = {
                "found": len(namejet_data.get("items", [])),
                "total": namejet_data.get("total", 0),
            }
            results["items"].extend(namejet_data.get("items", []))
            results["total_found"] += len(namejet_data.get("items", []))

            if namejet_data.get("error"):
                results["errors"].append(f"NameJet: {namejet_data['error']}")

        except Exception as e:
            results["errors"].append(f"NameJet: {str(e)}")

        # Scrape Park.io (Backorders - NEW)
        try:
            parkio_data = await self.parkio.fetch_pending_drops(limit=limit_per_platform)
            results["platforms"]["Park.io"] = {
                "found": len(parkio_data.get("items", [])),
                "total": parkio_data.get("total", 0),
            }
            results["items"].extend(parkio_data.get("items", []))
            results["total_found"] += len(parkio_data.get("items", []))

            if parkio_data.get("error"):
                results["errors"].append(f"Park.io: {parkio_data['error']}")

        except Exception as e:
            results["errors"].append(f"Park.io: {str(e)}")

        # Scrape Sav.com
        try:
            sav_data = await self.sav.fetch_auctions(page=0)
            results["platforms"]["Sav"] = {
                "found": len(sav_data.get("items", [])),
                "total": sav_data.get("total", 0),
            }
            results["items"].extend(sav_data.get("items", []))
            results["total_found"] += len(sav_data.get("items", []))

            if sav_data.get("error"):
                results["errors"].append(f"Sav: {sav_data['error']}")

        except Exception as e:
            results["errors"].append(f"Sav: {str(e)}")

        # ═══════════════════════════════════════════════════════════
        # TIER 3: Experimental (May require fixes)
        # ═══════════════════════════════════════════════════════════

        # Scrape Namecheap (GraphQL - needs query hash)
        try:
            namecheap_data = await self.namecheap.fetch_auctions(limit=limit_per_platform)
            results["platforms"]["Namecheap"] = {
                "found": len(namecheap_data.get("items", [])),
                "total": namecheap_data.get("total", 0),
            }
            results["items"].extend(namecheap_data.get("items", []))
            results["total_found"] += len(namecheap_data.get("items", []))

            if namecheap_data.get("error"):
                results["errors"].append(f"Namecheap: {namecheap_data['error']}")

        except Exception as e:
            results["errors"].append(f"Namecheap: {str(e)}")

        return results


# Export instances
namecheap_scraper = NamecheapApiScraper()
dynadot_scraper = DynadotApiScraper()
sav_scraper = SavApiScraper()
godaddy_scraper = GoDaddyApiScraper()
godaddy_rss_scraper = GoDaddyRssScraper()  # RSS fallback (always works!)
parkio_scraper = ParkIoApiScraper()
namejet_scraper = NameJetApiScraper()
hidden_api_scraper = HiddenApiScraperService()