pounce/backend/app/services/hidden_api_scrapers.py
Yves Gugger 0916ad6c27
Some checks failed
CI / Frontend Lint & Type Check (push) Has been cancelled
CI / Frontend Build (push) Has been cancelled
CI / Backend Lint (push) Has been cancelled
CI / Backend Tests (push) Has been cancelled
CI / Docker Build (push) Has been cancelled
CI / Security Scan (push) Has been cancelled
Deploy / Build & Push Images (push) Has been cancelled
Deploy / Deploy to Server (push) Has been cancelled
Deploy / Notify (push) Has been cancelled
Less top padding, more horizontal padding
2025-12-12 21:49:33 +01:00

229 lines
8.6 KiB
Python

"""
Hidden JSON API scrapers for auction platforms.
Important project rule:
- We do NOT generate mock/demo/estimated auction values.
- This module only includes sources that provide verifiable auction fields.
Currently enabled:
- Dynadot hidden JSON API (used by their frontend)
Affiliate links:
- Read from environment variables. If not configured, plain URLs are used.
- No placeholder affiliate IDs are baked into code.
"""
import logging
import os
from datetime import datetime, timedelta
from typing import Any, Dict, List, Optional
import httpx
logger = logging.getLogger(__name__)
def build_affiliate_url(platform: str, domain: str, original_url: Optional[str] = None) -> str:
"""
Build an affiliate URL for a given platform and domain.
If the affiliate program is not configured, returns the plain provider URL.
If `original_url` is provided, it is preferred (e.g. ExpiredDomains click-through links).
"""
if original_url:
return original_url
p = (platform or "").strip()
d = (domain or "").strip().lower()
if not d:
return ""
if p == "Dynadot":
base = f"https://www.dynadot.com/market/auction/{d}"
affiliate_id = os.getenv("DYNADOT_AFFILIATE_ID")
return f"{base}?affiliate_id={affiliate_id}" if affiliate_id else base
if p == "GoDaddy":
base = f"https://auctions.godaddy.com/trpItemListing.aspx?domain={d}"
isc = os.getenv("GODADDY_ISC")
return f"{base}&isc={isc}" if isc else base
if p == "Namecheap":
base = f"https://www.namecheap.com/market/domain/{d}"
aff = os.getenv("NAMECHEAP_AFFILIATE_ID")
return f"{base}?aff={aff}" if aff else base
if p == "Sedo":
base = f"https://sedo.com/search/details/?domain={d}"
partner = os.getenv("SEDO_PARTNER_ID")
return f"{base}&partnerid={partner}" if partner else base
if p == "Park.io":
return f"https://park.io/domain/{d}"
if p == "Sav":
ref = os.getenv("SAV_REF")
base = f"https://www.sav.com/domain/{d}"
return f"{base}?ref={ref}" if ref else base
return ""
class DynadotApiScraper:
"""
Scraper for Dynadot Marketplace using their hidden JSON API.
Endpoint:
- https://www.dynadot.com/dynadot-vue-api/dynadot-service/marketplace-api
"""
BASE_URL = "https://www.dynadot.com"
MARKETPLACE_API = "/dynadot-vue-api/dynadot-service/marketplace-api"
def _parse_end_time(self, item: Dict[str, Any]) -> Optional[datetime]:
# Dynadot often provides an epoch timestamp in ms
end_time_stamp = item.get("end_time_stamp")
if isinstance(end_time_stamp, (int, float)) and end_time_stamp > 0:
try:
return datetime.utcfromtimestamp(end_time_stamp / 1000).replace(tzinfo=None)
except Exception:
pass
# Or a string like "2025/12/12 08:00 PST" (timezone ambiguous)
end_time_str = item.get("end_time") or item.get("auction_end_time")
if isinstance(end_time_str, str) and end_time_str.strip():
raw = end_time_str.strip()
raw = raw.replace(" PST", "").replace(" PDT", "").replace(" UTC", "")
for fmt in ("%Y/%m/%d %H:%M", "%Y-%m-%d %H:%M:%S"):
try:
return datetime.strptime(raw, fmt).replace(tzinfo=None)
except Exception:
continue
return None
async def fetch_auctions(
self,
page_size: int = 100,
page_index: int = 0,
keyword: Optional[str] = None,
) -> Dict[str, Any]:
"""Fetch auctions from Dynadot hidden API."""
try:
proxy = os.getenv("SCRAPER_HTTP_PROXY") or os.getenv("SCRAPER_PROXY_URL")
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True, proxy=proxy) as client:
params = {
"command": "get_list",
"aftermarket_type": "EXPIRED_AUCTION",
"page_size": page_size,
"page_index": page_index,
"lang": "en",
}
if keyword:
params["keyword"] = keyword
resp = await client.post(
f"{self.BASE_URL}{self.MARKETPLACE_API}",
params=params,
headers={
"Accept": "application/json",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
"Referer": "https://www.dynadot.com/market",
},
)
if resp.status_code != 200:
return {"items": [], "total": 0, "error": f"HTTP {resp.status_code}: {resp.text[:200]}"}
data = resp.json()
listings = data.get("data", {}).get("records", []) or data.get("data", {}).get("list", [])
transformed: List[Dict[str, Any]] = []
for item in listings:
domain = item.get("domain") or item.get("name") or item.get("utf8_name") or ""
domain = str(domain).strip().lower()
if not domain or "." not in domain:
continue
end_time = self._parse_end_time(item)
if end_time is None:
# No end time -> skip (no fake)
continue
bid_price = item.get("bid_price") or item.get("current_bid") or item.get("price") or 0
if isinstance(bid_price, str):
bid_price = bid_price.replace(",", "").replace("$", "").strip()
try:
current_bid = float(bid_price)
except Exception:
continue
if current_bid <= 0:
continue
bids = item.get("bids") or item.get("bid_count") or 0
try:
num_bids = int(bids)
except Exception:
num_bids = 0
tld = domain.rsplit(".", 1)[-1].lower()
transformed.append(
{
"domain": domain,
"tld": tld,
"platform": "Dynadot",
"current_bid": current_bid,
"currency": str(item.get("bid_price_currency") or "USD").upper(),
"num_bids": num_bids,
"end_time": end_time,
"auction_url": build_affiliate_url("Dynadot", domain),
"buy_now_price": float(item.get("accepted_bid_price")) if item.get("accepted_bid_price") else None,
"age_years": int(item.get("age", 0) or 0) or None,
"backlinks": int(item.get("links", 0) or 0) or None,
"scrape_source": "dynadot:hidden_api",
}
)
return {
"items": transformed,
"total": data.get("data", {}).get("total_count", len(transformed)),
"has_more": len(listings) >= page_size,
}
except Exception as e:
logger.exception(f"Dynadot API scraper error: {e}")
return {"items": [], "total": 0, "error": str(e)}
class HiddenApiScraperService:
"""Orchestrates enabled hidden API scrapers."""
def __init__(self):
self.dynadot = DynadotApiScraper()
async def scrape_all(self, limit_per_platform: int = 100) -> Dict[str, Any]:
results: Dict[str, Any] = {"total_found": 0, "platforms": {}, "errors": [], "items": []}
try:
dynadot_data = await self.dynadot.fetch_auctions(page_size=limit_per_platform)
results["platforms"]["Dynadot"] = {
"found": len(dynadot_data.get("items", [])),
"total": dynadot_data.get("total", 0),
}
results["items"].extend(dynadot_data.get("items", []))
results["total_found"] += len(dynadot_data.get("items", []))
if dynadot_data.get("error"):
results["errors"].append(f"Dynadot: {dynadot_data['error']}")
except Exception as e:
results["errors"].append(f"Dynadot: {str(e)}")
return results
# Export instances
dynadot_scraper = DynadotApiScraper()
hidden_api_scraper = HiddenApiScraperService()