""" Hidden JSON API Scrapers for Domain Auction Platforms. These scrapers use undocumented but public JSON endpoints that are much more reliable than HTML scraping. Discovered Endpoints (December 2025): - Namecheap: GraphQL API at aftermarketapi.namecheap.com - Dynadot: REST API at dynadot-vue-api - Sav.com: AJAX endpoint for auction listings """ import logging from datetime import datetime, timedelta from typing import Dict, Any, List, Optional import httpx logger = logging.getLogger(__name__) # ═══════════════════════════════════════════════════════════════════════════════ # AFFILIATE LINKS — Monetization through referral commissions # ═══════════════════════════════════════════════════════════════════════════════ AFFILIATE_CONFIG = { "Namecheap": { "base_url": "https://www.namecheap.com/market/", "affiliate_param": "aff=pounce", # TODO: Replace with actual affiliate ID "auction_url_template": "https://www.namecheap.com/market/domain/{domain}?aff=pounce", }, "Dynadot": { "base_url": "https://www.dynadot.com/market/", "affiliate_param": "affiliate_id=pounce", # TODO: Replace with actual affiliate ID "auction_url_template": "https://www.dynadot.com/market/auction/{domain}?affiliate_id=pounce", }, "Sav": { "base_url": "https://www.sav.com/auctions", "affiliate_param": "ref=pounce", # TODO: Replace with actual affiliate ID "auction_url_template": "https://www.sav.com/domain/{domain}?ref=pounce", }, "GoDaddy": { "base_url": "https://auctions.godaddy.com/", "affiliate_param": "isc=cjcpounce", # TODO: Replace with actual CJ affiliate ID "auction_url_template": "https://auctions.godaddy.com/trpItemListing.aspx?domain={domain}&isc=cjcpounce", }, "DropCatch": { "base_url": "https://www.dropcatch.com/", "affiliate_param": None, # No affiliate program "auction_url_template": "https://www.dropcatch.com/domain/{domain}", }, "Sedo": { "base_url": "https://sedo.com/", "affiliate_param": "partnerid=pounce", # TODO: Replace with actual partner ID "auction_url_template": "https://sedo.com/search/details/?domain={domain}&partnerid=pounce", }, "NameJet": { "base_url": "https://www.namejet.com/", "affiliate_param": None, # No public affiliate program "auction_url_template": "https://www.namejet.com/pages/Auctions/ViewAuctions.aspx?domain={domain}", }, "ExpiredDomains": { "base_url": "https://www.expireddomains.net/", "affiliate_param": None, # Aggregator, links to actual registrars "auction_url_template": "https://www.expireddomains.net/domain-name-search/?q={domain}", }, } def build_affiliate_url(platform: str, domain: str, original_url: Optional[str] = None) -> str: """ Build an affiliate URL for a given platform and domain. If the platform has an affiliate program, the URL will include the affiliate tracking parameter. Otherwise, returns the original URL. """ config = AFFILIATE_CONFIG.get(platform, {}) if config.get("auction_url_template"): return config["auction_url_template"].format(domain=domain) return original_url or f"https://www.google.com/search?q={domain}+auction" # ═══════════════════════════════════════════════════════════════════════════════ # NAMECHEAP SCRAPER — GraphQL API # ═══════════════════════════════════════════════════════════════════════════════ class NamecheapApiScraper: """ Scraper for Namecheap Marketplace using their hidden GraphQL API. Endpoint: https://aftermarketapi.namecheap.com/client/graphql This is a public API used by their frontend, stable and reliable. """ GRAPHQL_ENDPOINT = "https://aftermarketapi.namecheap.com/client/graphql" # GraphQL query for fetching auctions AUCTIONS_QUERY = """ query GetAuctions($filter: AuctionFilterInput, $pagination: PaginationInput, $sort: SortInput) { auctions(filter: $filter, pagination: $pagination, sort: $sort) { items { id domain currentBid minBid bidCount endTime status buyNowPrice hasBuyNow } totalCount pageInfo { hasNextPage endCursor } } } """ async def fetch_auctions( self, limit: int = 100, offset: int = 0, keyword: Optional[str] = None, tld: Optional[str] = None, ) -> Dict[str, Any]: """Fetch auctions from Namecheap GraphQL API.""" try: async with httpx.AsyncClient(timeout=30.0) as client: # Build filter filter_input = {} if keyword: filter_input["searchTerm"] = keyword if tld: filter_input["tld"] = tld.lstrip(".") variables = { "filter": filter_input, "pagination": {"limit": limit, "offset": offset}, "sort": {"field": "endTime", "direction": "ASC"}, } response = await client.post( self.GRAPHQL_ENDPOINT, json={ "query": self.AUCTIONS_QUERY, "variables": variables, }, headers={ "Content-Type": "application/json", "Accept": "application/json", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", "Origin": "https://www.namecheap.com", "Referer": "https://www.namecheap.com/market/", }, ) if response.status_code != 200: logger.error(f"Namecheap API error: {response.status_code}") return {"items": [], "total": 0, "error": response.text} data = response.json() if "errors" in data: logger.error(f"Namecheap GraphQL errors: {data['errors']}") return {"items": [], "total": 0, "error": str(data["errors"])} auctions_data = data.get("data", {}).get("auctions", {}) items = auctions_data.get("items", []) # Transform to Pounce format transformed = [] for item in items: domain = item.get("domain", "") tld_part = domain.rsplit(".", 1)[-1] if "." in domain else "" transformed.append({ "domain": domain, "tld": tld_part, "platform": "Namecheap", "current_bid": float(item.get("currentBid", 0)), "min_bid": float(item.get("minBid", 0)), "num_bids": int(item.get("bidCount", 0)), "end_time": item.get("endTime"), "buy_now_price": float(item.get("buyNowPrice")) if item.get("hasBuyNow") else None, "auction_url": build_affiliate_url("Namecheap", domain), "currency": "USD", "is_active": True, }) return { "items": transformed, "total": auctions_data.get("totalCount", 0), "has_more": auctions_data.get("pageInfo", {}).get("hasNextPage", False), } except Exception as e: logger.exception(f"Namecheap API scraper error: {e}") return {"items": [], "total": 0, "error": str(e)} # ═══════════════════════════════════════════════════════════════════════════════ # DYNADOT SCRAPER — REST JSON API # ═══════════════════════════════════════════════════════════════════════════════ class DynadotApiScraper: """ Scraper for Dynadot Marketplace using their hidden JSON API. Endpoints: - /dynadot-vue-api/dynadot-service/marketplace-api - /dynadot-vue-api/dynadot-service/main-site-api Supports: - EXPIRED_AUCTION: Expired auctions - BACKORDER: Backorder listings - USER_LISTING: User marketplace listings """ BASE_URL = "https://www.dynadot.com" MARKETPLACE_API = "/dynadot-vue-api/dynadot-service/marketplace-api" async def fetch_auctions( self, aftermarket_type: str = "EXPIRED_AUCTION", page_size: int = 100, page_index: int = 0, keyword: Optional[str] = None, ) -> Dict[str, Any]: """Fetch auctions from Dynadot REST API.""" try: async with httpx.AsyncClient(timeout=30.0) as client: params = { "command": "get_list", "aftermarket_type": aftermarket_type, "page_size": page_size, "page_index": page_index, "lang": "en", } if keyword: params["keyword"] = keyword response = await client.post( f"{self.BASE_URL}{self.MARKETPLACE_API}", params=params, headers={ "Accept": "application/json", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", "Referer": "https://www.dynadot.com/market", }, ) if response.status_code != 200: logger.error(f"Dynadot API error: {response.status_code}") return {"items": [], "total": 0, "error": response.text} data = response.json() # Dynadot returns code: 200 for success if data.get("code") not in [0, 200] and data.get("msg") != "success": logger.error(f"Dynadot API error: {data}") return {"items": [], "total": 0, "error": str(data)} # Data can be in 'records' or 'list' listings = data.get("data", {}).get("records", []) or data.get("data", {}).get("list", []) # Transform to Pounce format transformed = [] for item in listings: domain = item.get("domain", "") or item.get("name", "") or item.get("utf8_name", "") tld_part = domain.rsplit(".", 1)[-1] if "." in domain else "" # Parse end time (Dynadot uses timestamp in milliseconds or string) end_time = None end_time_stamp = item.get("end_time_stamp") if end_time_stamp: try: end_time = datetime.fromtimestamp(end_time_stamp / 1000) except: pass if not end_time: end_time_str = item.get("end_time") or item.get("auction_end_time") if end_time_str: try: # Format: "2025/12/12 08:00 PST" end_time = datetime.strptime(end_time_str.split(" PST")[0], "%Y/%m/%d %H:%M") except: end_time = datetime.utcnow() + timedelta(days=1) # Parse bid price (can be string or number) bid_price = item.get("bid_price") or item.get("current_bid") or item.get("price") or 0 if isinstance(bid_price, str): bid_price = float(bid_price.replace(",", "").replace("$", "")) transformed.append({ "domain": domain, "tld": tld_part, "platform": "Dynadot", "current_bid": float(bid_price), "min_bid": float(item.get("start_price", 0) or 0), "num_bids": int(item.get("bids", 0) or item.get("bid_count", 0) or 0), "end_time": end_time or datetime.utcnow() + timedelta(days=1), "buy_now_price": float(item.get("accepted_bid_price")) if item.get("accepted_bid_price") else None, "auction_url": build_affiliate_url("Dynadot", domain), "currency": item.get("bid_price_currency", "USD"), "is_active": True, # Map to existing DomainAuction fields "backlinks": int(item.get("links", 0) or 0), "age_years": int(item.get("age", 0) or 0), }) return { "items": transformed, "total": data.get("data", {}).get("total_count", len(transformed)), "has_more": len(listings) >= page_size, } except Exception as e: logger.exception(f"Dynadot API scraper error: {e}") return {"items": [], "total": 0, "error": str(e)} # ═══════════════════════════════════════════════════════════════════════════════ # SAV.COM SCRAPER — AJAX JSON API # ═══════════════════════════════════════════════════════════════════════════════ class SavApiScraper: """ Scraper for Sav.com Auctions using their hidden AJAX endpoint. Endpoint: /auctions/load_domains_ajax/{page} Simple POST request that returns paginated auction data. """ BASE_URL = "https://www.sav.com" AJAX_ENDPOINT = "/auctions/load_domains_ajax" async def fetch_auctions( self, page: int = 0, ) -> Dict[str, Any]: """Fetch auctions from Sav.com AJAX API.""" try: async with httpx.AsyncClient(timeout=30.0) as client: response = await client.post( f"{self.BASE_URL}{self.AJAX_ENDPOINT}/{page}", headers={ "Accept": "application/json, text/html", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", "Referer": "https://www.sav.com/domains/auctions", "X-Requested-With": "XMLHttpRequest", }, ) if response.status_code != 200: logger.error(f"Sav API error: {response.status_code}") return {"items": [], "total": 0, "error": response.text} # The response is HTML but contains structured data # We need to parse it or check for JSON content_type = response.headers.get("content-type", "") if "application/json" in content_type: data = response.json() else: # HTML response - parse it # For now, we'll use BeautifulSoup if needed logger.warning("Sav returned HTML instead of JSON, parsing...") return await self._parse_html_response(response.text) listings = data.get("domains", data.get("auctions", [])) # Transform to Pounce format transformed = [] for item in listings: domain = item.get("domain", "") or item.get("name", "") tld_part = domain.rsplit(".", 1)[-1] if "." in domain else "" # Parse end time end_time_str = item.get("end_time") or item.get("ends_at") end_time = None if end_time_str: try: end_time = datetime.fromisoformat(end_time_str.replace("Z", "+00:00")) except: end_time = datetime.utcnow() + timedelta(days=1) transformed.append({ "domain": domain, "tld": tld_part, "platform": "Sav", "current_bid": float(item.get("current_bid", 0) or item.get("price", 0)), "min_bid": float(item.get("min_bid", 0) or 0), "num_bids": int(item.get("bids", 0) or 0), "end_time": end_time, "buy_now_price": float(item.get("buy_now")) if item.get("buy_now") else None, "auction_url": build_affiliate_url("Sav", domain), "currency": "USD", "is_active": True, }) return { "items": transformed, "total": len(transformed), "has_more": len(listings) >= 20, # Default page size } except Exception as e: logger.exception(f"Sav API scraper error: {e}") return {"items": [], "total": 0, "error": str(e)} async def _parse_html_response(self, html: str) -> Dict[str, Any]: """Parse HTML response from Sav.com when JSON is not available.""" try: from bs4 import BeautifulSoup soup = BeautifulSoup(html, "html.parser") # Find auction rows rows = soup.select(".auction-row, .domain-row, tr[data-domain]") transformed = [] for row in rows: domain_el = row.select_one(".domain-name, .name, [data-domain]") price_el = row.select_one(".price, .bid, .current-bid") time_el = row.select_one(".time-left, .ends, .countdown") bids_el = row.select_one(".bids, .bid-count") if not domain_el: continue domain = domain_el.get_text(strip=True) or domain_el.get("data-domain", "") tld_part = domain.rsplit(".", 1)[-1] if "." in domain else "" price_text = price_el.get_text(strip=True) if price_el else "0" price = float("".join(c for c in price_text if c.isdigit() or c == ".") or "0") bids_text = bids_el.get_text(strip=True) if bids_el else "0" bids = int("".join(c for c in bids_text if c.isdigit()) or "0") transformed.append({ "domain": domain, "tld": tld_part, "platform": "Sav", "current_bid": price, "min_bid": 0, "num_bids": bids, "end_time": datetime.utcnow() + timedelta(days=1), # Estimate "buy_now_price": None, "auction_url": build_affiliate_url("Sav", domain), "currency": "USD", "is_active": True, }) return { "items": transformed, "total": len(transformed), "has_more": len(rows) >= 20, } except Exception as e: logger.exception(f"Sav HTML parsing error: {e}") return {"items": [], "total": 0, "error": str(e)} # ═══════════════════════════════════════════════════════════════════════════════ # UNIFIED SCRAPER — Combines all hidden API scrapers # ═══════════════════════════════════════════════════════════════════════════════ class HiddenApiScraperService: """ Unified service that combines all hidden API scrapers. Priority order: 1. JSON APIs (most reliable) 2. GraphQL APIs (Namecheap) 3. AJAX endpoints (fallback) All URLs include affiliate tracking for monetization. """ def __init__(self): self.namecheap = NamecheapApiScraper() self.dynadot = DynadotApiScraper() self.sav = SavApiScraper() async def scrape_all(self, limit_per_platform: int = 100) -> Dict[str, Any]: """ Scrape all platforms using hidden APIs. Returns combined results with platform breakdown. """ results = { "total_found": 0, "platforms": {}, "errors": [], "items": [], } # Scrape Namecheap try: namecheap_data = await self.namecheap.fetch_auctions(limit=limit_per_platform) results["platforms"]["Namecheap"] = { "found": len(namecheap_data.get("items", [])), "total": namecheap_data.get("total", 0), } results["items"].extend(namecheap_data.get("items", [])) results["total_found"] += len(namecheap_data.get("items", [])) if namecheap_data.get("error"): results["errors"].append(f"Namecheap: {namecheap_data['error']}") except Exception as e: results["errors"].append(f"Namecheap: {str(e)}") # Scrape Dynadot try: dynadot_data = await self.dynadot.fetch_auctions(page_size=limit_per_platform) results["platforms"]["Dynadot"] = { "found": len(dynadot_data.get("items", [])), "total": dynadot_data.get("total", 0), } results["items"].extend(dynadot_data.get("items", [])) results["total_found"] += len(dynadot_data.get("items", [])) if dynadot_data.get("error"): results["errors"].append(f"Dynadot: {dynadot_data['error']}") except Exception as e: results["errors"].append(f"Dynadot: {str(e)}") # Scrape Sav.com try: sav_data = await self.sav.fetch_auctions(page=0) results["platforms"]["Sav"] = { "found": len(sav_data.get("items", [])), "total": sav_data.get("total", 0), } results["items"].extend(sav_data.get("items", [])) results["total_found"] += len(sav_data.get("items", [])) if sav_data.get("error"): results["errors"].append(f"Sav: {sav_data['error']}") except Exception as e: results["errors"].append(f"Sav: {str(e)}") return results # Export instances namecheap_scraper = NamecheapApiScraper() dynadot_scraper = DynadotApiScraper() sav_scraper = SavApiScraper() hidden_api_scraper = HiddenApiScraperService()