pounce/backend/app/services/hidden_api_scrapers.py
yves.gugger 9c64f61fb6 fix: Remove estibot_appraisal field - Dynadot now works!
- Fixed: 'estibot_appraisal' is not a DomainAuction field
- Dynadot now saves 100 auctions to DB
- Total active auctions: 511 (was 386)

Sample data:
- embedgooglemap.net: $10,200 (51 bids)
- 9454.com: $2,550 (73 bids)
- swosh.com: $2,550 (31 bids)
2025-12-11 11:37:59 +01:00

563 lines
24 KiB
Python

"""
Hidden JSON API Scrapers for Domain Auction Platforms.
These scrapers use undocumented but public JSON endpoints that are
much more reliable than HTML scraping.
Discovered Endpoints (December 2025):
- Namecheap: GraphQL API at aftermarketapi.namecheap.com
- Dynadot: REST API at dynadot-vue-api
- Sav.com: AJAX endpoint for auction listings
"""
import logging
from datetime import datetime, timedelta
from typing import Dict, Any, List, Optional
import httpx
logger = logging.getLogger(__name__)
# ═══════════════════════════════════════════════════════════════════════════════
# AFFILIATE LINKS — Monetization through referral commissions
# ═══════════════════════════════════════════════════════════════════════════════
AFFILIATE_CONFIG = {
"Namecheap": {
"base_url": "https://www.namecheap.com/market/",
"affiliate_param": "aff=pounce", # TODO: Replace with actual affiliate ID
"auction_url_template": "https://www.namecheap.com/market/domain/{domain}?aff=pounce",
},
"Dynadot": {
"base_url": "https://www.dynadot.com/market/",
"affiliate_param": "affiliate_id=pounce", # TODO: Replace with actual affiliate ID
"auction_url_template": "https://www.dynadot.com/market/auction/{domain}?affiliate_id=pounce",
},
"Sav": {
"base_url": "https://www.sav.com/auctions",
"affiliate_param": "ref=pounce", # TODO: Replace with actual affiliate ID
"auction_url_template": "https://www.sav.com/domain/{domain}?ref=pounce",
},
"GoDaddy": {
"base_url": "https://auctions.godaddy.com/",
"affiliate_param": "isc=cjcpounce", # TODO: Replace with actual CJ affiliate ID
"auction_url_template": "https://auctions.godaddy.com/trpItemListing.aspx?domain={domain}&isc=cjcpounce",
},
"DropCatch": {
"base_url": "https://www.dropcatch.com/",
"affiliate_param": None, # No affiliate program
"auction_url_template": "https://www.dropcatch.com/domain/{domain}",
},
"Sedo": {
"base_url": "https://sedo.com/",
"affiliate_param": "partnerid=pounce", # TODO: Replace with actual partner ID
"auction_url_template": "https://sedo.com/search/details/?domain={domain}&partnerid=pounce",
},
"NameJet": {
"base_url": "https://www.namejet.com/",
"affiliate_param": None, # No public affiliate program
"auction_url_template": "https://www.namejet.com/pages/Auctions/ViewAuctions.aspx?domain={domain}",
},
"ExpiredDomains": {
"base_url": "https://www.expireddomains.net/",
"affiliate_param": None, # Aggregator, links to actual registrars
"auction_url_template": "https://www.expireddomains.net/domain-name-search/?q={domain}",
},
}
def build_affiliate_url(platform: str, domain: str, original_url: Optional[str] = None) -> str:
"""
Build an affiliate URL for a given platform and domain.
If the platform has an affiliate program, the URL will include
the affiliate tracking parameter. Otherwise, returns the original URL.
"""
config = AFFILIATE_CONFIG.get(platform, {})
if config.get("auction_url_template"):
return config["auction_url_template"].format(domain=domain)
return original_url or f"https://www.google.com/search?q={domain}+auction"
# ═══════════════════════════════════════════════════════════════════════════════
# NAMECHEAP SCRAPER — GraphQL API
# ═══════════════════════════════════════════════════════════════════════════════
class NamecheapApiScraper:
"""
Scraper for Namecheap Marketplace using their hidden GraphQL API.
Endpoint: https://aftermarketapi.namecheap.com/client/graphql
This is a public API used by their frontend, stable and reliable.
"""
GRAPHQL_ENDPOINT = "https://aftermarketapi.namecheap.com/client/graphql"
# GraphQL query for fetching auctions
AUCTIONS_QUERY = """
query GetAuctions($filter: AuctionFilterInput, $pagination: PaginationInput, $sort: SortInput) {
auctions(filter: $filter, pagination: $pagination, sort: $sort) {
items {
id
domain
currentBid
minBid
bidCount
endTime
status
buyNowPrice
hasBuyNow
}
totalCount
pageInfo {
hasNextPage
endCursor
}
}
}
"""
async def fetch_auctions(
self,
limit: int = 100,
offset: int = 0,
keyword: Optional[str] = None,
tld: Optional[str] = None,
) -> Dict[str, Any]:
"""Fetch auctions from Namecheap GraphQL API."""
try:
async with httpx.AsyncClient(timeout=30.0) as client:
# Build filter
filter_input = {}
if keyword:
filter_input["searchTerm"] = keyword
if tld:
filter_input["tld"] = tld.lstrip(".")
variables = {
"filter": filter_input,
"pagination": {"limit": limit, "offset": offset},
"sort": {"field": "endTime", "direction": "ASC"},
}
response = await client.post(
self.GRAPHQL_ENDPOINT,
json={
"query": self.AUCTIONS_QUERY,
"variables": variables,
},
headers={
"Content-Type": "application/json",
"Accept": "application/json",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
"Origin": "https://www.namecheap.com",
"Referer": "https://www.namecheap.com/market/",
},
)
if response.status_code != 200:
logger.error(f"Namecheap API error: {response.status_code}")
return {"items": [], "total": 0, "error": response.text}
data = response.json()
if "errors" in data:
logger.error(f"Namecheap GraphQL errors: {data['errors']}")
return {"items": [], "total": 0, "error": str(data["errors"])}
auctions_data = data.get("data", {}).get("auctions", {})
items = auctions_data.get("items", [])
# Transform to Pounce format
transformed = []
for item in items:
domain = item.get("domain", "")
tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""
transformed.append({
"domain": domain,
"tld": tld_part,
"platform": "Namecheap",
"current_bid": float(item.get("currentBid", 0)),
"min_bid": float(item.get("minBid", 0)),
"num_bids": int(item.get("bidCount", 0)),
"end_time": item.get("endTime"),
"buy_now_price": float(item.get("buyNowPrice")) if item.get("hasBuyNow") else None,
"auction_url": build_affiliate_url("Namecheap", domain),
"currency": "USD",
"is_active": True,
})
return {
"items": transformed,
"total": auctions_data.get("totalCount", 0),
"has_more": auctions_data.get("pageInfo", {}).get("hasNextPage", False),
}
except Exception as e:
logger.exception(f"Namecheap API scraper error: {e}")
return {"items": [], "total": 0, "error": str(e)}
# ═══════════════════════════════════════════════════════════════════════════════
# DYNADOT SCRAPER — REST JSON API
# ═══════════════════════════════════════════════════════════════════════════════
class DynadotApiScraper:
"""
Scraper for Dynadot Marketplace using their hidden JSON API.
Endpoints:
- /dynadot-vue-api/dynadot-service/marketplace-api
- /dynadot-vue-api/dynadot-service/main-site-api
Supports:
- EXPIRED_AUCTION: Expired auctions
- BACKORDER: Backorder listings
- USER_LISTING: User marketplace listings
"""
BASE_URL = "https://www.dynadot.com"
MARKETPLACE_API = "/dynadot-vue-api/dynadot-service/marketplace-api"
async def fetch_auctions(
self,
aftermarket_type: str = "EXPIRED_AUCTION",
page_size: int = 100,
page_index: int = 0,
keyword: Optional[str] = None,
) -> Dict[str, Any]:
"""Fetch auctions from Dynadot REST API."""
try:
async with httpx.AsyncClient(timeout=30.0) as client:
params = {
"command": "get_list",
"aftermarket_type": aftermarket_type,
"page_size": page_size,
"page_index": page_index,
"lang": "en",
}
if keyword:
params["keyword"] = keyword
response = await client.post(
f"{self.BASE_URL}{self.MARKETPLACE_API}",
params=params,
headers={
"Accept": "application/json",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
"Referer": "https://www.dynadot.com/market",
},
)
if response.status_code != 200:
logger.error(f"Dynadot API error: {response.status_code}")
return {"items": [], "total": 0, "error": response.text}
data = response.json()
# Dynadot returns code: 200 for success
if data.get("code") not in [0, 200] and data.get("msg") != "success":
logger.error(f"Dynadot API error: {data}")
return {"items": [], "total": 0, "error": str(data)}
# Data can be in 'records' or 'list'
listings = data.get("data", {}).get("records", []) or data.get("data", {}).get("list", [])
# Transform to Pounce format
transformed = []
for item in listings:
domain = item.get("domain", "") or item.get("name", "") or item.get("utf8_name", "")
tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""
# Parse end time (Dynadot uses timestamp in milliseconds or string)
end_time = None
end_time_stamp = item.get("end_time_stamp")
if end_time_stamp:
try:
end_time = datetime.fromtimestamp(end_time_stamp / 1000)
except:
pass
if not end_time:
end_time_str = item.get("end_time") or item.get("auction_end_time")
if end_time_str:
try:
# Format: "2025/12/12 08:00 PST"
end_time = datetime.strptime(end_time_str.split(" PST")[0], "%Y/%m/%d %H:%M")
except:
end_time = datetime.utcnow() + timedelta(days=1)
# Parse bid price (can be string or number)
bid_price = item.get("bid_price") or item.get("current_bid") or item.get("price") or 0
if isinstance(bid_price, str):
bid_price = float(bid_price.replace(",", "").replace("$", ""))
transformed.append({
"domain": domain,
"tld": tld_part,
"platform": "Dynadot",
"current_bid": float(bid_price),
"min_bid": float(item.get("start_price", 0) or 0),
"num_bids": int(item.get("bids", 0) or item.get("bid_count", 0) or 0),
"end_time": end_time or datetime.utcnow() + timedelta(days=1),
"buy_now_price": float(item.get("accepted_bid_price")) if item.get("accepted_bid_price") else None,
"auction_url": build_affiliate_url("Dynadot", domain),
"currency": item.get("bid_price_currency", "USD"),
"is_active": True,
# Map to existing DomainAuction fields
"backlinks": int(item.get("links", 0) or 0),
"age_years": int(item.get("age", 0) or 0),
})
return {
"items": transformed,
"total": data.get("data", {}).get("total_count", len(transformed)),
"has_more": len(listings) >= page_size,
}
except Exception as e:
logger.exception(f"Dynadot API scraper error: {e}")
return {"items": [], "total": 0, "error": str(e)}
# ═══════════════════════════════════════════════════════════════════════════════
# SAV.COM SCRAPER — AJAX JSON API
# ═══════════════════════════════════════════════════════════════════════════════
class SavApiScraper:
"""
Scraper for Sav.com Auctions using their hidden AJAX endpoint.
Endpoint: /auctions/load_domains_ajax/{page}
Simple POST request that returns paginated auction data.
"""
BASE_URL = "https://www.sav.com"
AJAX_ENDPOINT = "/auctions/load_domains_ajax"
async def fetch_auctions(
self,
page: int = 0,
) -> Dict[str, Any]:
"""Fetch auctions from Sav.com AJAX API."""
try:
async with httpx.AsyncClient(timeout=30.0) as client:
response = await client.post(
f"{self.BASE_URL}{self.AJAX_ENDPOINT}/{page}",
headers={
"Accept": "application/json, text/html",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
"Referer": "https://www.sav.com/domains/auctions",
"X-Requested-With": "XMLHttpRequest",
},
)
if response.status_code != 200:
logger.error(f"Sav API error: {response.status_code}")
return {"items": [], "total": 0, "error": response.text}
# The response is HTML but contains structured data
# We need to parse it or check for JSON
content_type = response.headers.get("content-type", "")
if "application/json" in content_type:
data = response.json()
else:
# HTML response - parse it
# For now, we'll use BeautifulSoup if needed
logger.warning("Sav returned HTML instead of JSON, parsing...")
return await self._parse_html_response(response.text)
listings = data.get("domains", data.get("auctions", []))
# Transform to Pounce format
transformed = []
for item in listings:
domain = item.get("domain", "") or item.get("name", "")
tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""
# Parse end time
end_time_str = item.get("end_time") or item.get("ends_at")
end_time = None
if end_time_str:
try:
end_time = datetime.fromisoformat(end_time_str.replace("Z", "+00:00"))
except:
end_time = datetime.utcnow() + timedelta(days=1)
transformed.append({
"domain": domain,
"tld": tld_part,
"platform": "Sav",
"current_bid": float(item.get("current_bid", 0) or item.get("price", 0)),
"min_bid": float(item.get("min_bid", 0) or 0),
"num_bids": int(item.get("bids", 0) or 0),
"end_time": end_time,
"buy_now_price": float(item.get("buy_now")) if item.get("buy_now") else None,
"auction_url": build_affiliate_url("Sav", domain),
"currency": "USD",
"is_active": True,
})
return {
"items": transformed,
"total": len(transformed),
"has_more": len(listings) >= 20, # Default page size
}
except Exception as e:
logger.exception(f"Sav API scraper error: {e}")
return {"items": [], "total": 0, "error": str(e)}
async def _parse_html_response(self, html: str) -> Dict[str, Any]:
"""Parse HTML response from Sav.com when JSON is not available."""
try:
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, "html.parser")
# Find auction rows
rows = soup.select(".auction-row, .domain-row, tr[data-domain]")
transformed = []
for row in rows:
domain_el = row.select_one(".domain-name, .name, [data-domain]")
price_el = row.select_one(".price, .bid, .current-bid")
time_el = row.select_one(".time-left, .ends, .countdown")
bids_el = row.select_one(".bids, .bid-count")
if not domain_el:
continue
domain = domain_el.get_text(strip=True) or domain_el.get("data-domain", "")
tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""
price_text = price_el.get_text(strip=True) if price_el else "0"
price = float("".join(c for c in price_text if c.isdigit() or c == ".") or "0")
bids_text = bids_el.get_text(strip=True) if bids_el else "0"
bids = int("".join(c for c in bids_text if c.isdigit()) or "0")
transformed.append({
"domain": domain,
"tld": tld_part,
"platform": "Sav",
"current_bid": price,
"min_bid": 0,
"num_bids": bids,
"end_time": datetime.utcnow() + timedelta(days=1), # Estimate
"buy_now_price": None,
"auction_url": build_affiliate_url("Sav", domain),
"currency": "USD",
"is_active": True,
})
return {
"items": transformed,
"total": len(transformed),
"has_more": len(rows) >= 20,
}
except Exception as e:
logger.exception(f"Sav HTML parsing error: {e}")
return {"items": [], "total": 0, "error": str(e)}
# ═══════════════════════════════════════════════════════════════════════════════
# UNIFIED SCRAPER — Combines all hidden API scrapers
# ═══════════════════════════════════════════════════════════════════════════════
class HiddenApiScraperService:
"""
Unified service that combines all hidden API scrapers.
Priority order:
1. JSON APIs (most reliable)
2. GraphQL APIs (Namecheap)
3. AJAX endpoints (fallback)
All URLs include affiliate tracking for monetization.
"""
def __init__(self):
self.namecheap = NamecheapApiScraper()
self.dynadot = DynadotApiScraper()
self.sav = SavApiScraper()
async def scrape_all(self, limit_per_platform: int = 100) -> Dict[str, Any]:
"""
Scrape all platforms using hidden APIs.
Returns combined results with platform breakdown.
"""
results = {
"total_found": 0,
"platforms": {},
"errors": [],
"items": [],
}
# Scrape Namecheap
try:
namecheap_data = await self.namecheap.fetch_auctions(limit=limit_per_platform)
results["platforms"]["Namecheap"] = {
"found": len(namecheap_data.get("items", [])),
"total": namecheap_data.get("total", 0),
}
results["items"].extend(namecheap_data.get("items", []))
results["total_found"] += len(namecheap_data.get("items", []))
if namecheap_data.get("error"):
results["errors"].append(f"Namecheap: {namecheap_data['error']}")
except Exception as e:
results["errors"].append(f"Namecheap: {str(e)}")
# Scrape Dynadot
try:
dynadot_data = await self.dynadot.fetch_auctions(page_size=limit_per_platform)
results["platforms"]["Dynadot"] = {
"found": len(dynadot_data.get("items", [])),
"total": dynadot_data.get("total", 0),
}
results["items"].extend(dynadot_data.get("items", []))
results["total_found"] += len(dynadot_data.get("items", []))
if dynadot_data.get("error"):
results["errors"].append(f"Dynadot: {dynadot_data['error']}")
except Exception as e:
results["errors"].append(f"Dynadot: {str(e)}")
# Scrape Sav.com
try:
sav_data = await self.sav.fetch_auctions(page=0)
results["platforms"]["Sav"] = {
"found": len(sav_data.get("items", [])),
"total": sav_data.get("total", 0),
}
results["items"].extend(sav_data.get("items", []))
results["total_found"] += len(sav_data.get("items", []))
if sav_data.get("error"):
results["errors"].append(f"Sav: {sav_data['error']}")
except Exception as e:
results["errors"].append(f"Sav: {str(e)}")
return results
# Export instances
namecheap_scraper = NamecheapApiScraper()
dynadot_scraper = DynadotApiScraper()
sav_scraper = SavApiScraper()
hidden_api_scraper = HiddenApiScraperService()