"""Namecheap TLD price scraper using their public pricing API.""" import logging from datetime import datetime from typing import Optional import httpx from app.services.tld_scraper.base import BaseTLDScraper, TLDPriceData, ScraperError logger = logging.getLogger(__name__) class NamecheapScraper(BaseTLDScraper): """ Scraper for Namecheap domain prices. Uses Namecheap's public API endpoint that powers their pricing page. No API key required - this is the same data shown on their website. """ name = "namecheap" base_url = "https://www.namecheap.com" # Namecheap's internal API for TLD pricing (used by their website) PRICING_API = "https://www.namecheap.com/domains/domain-search/api/searchDomains" TLD_LIST_API = "https://www.namecheap.com/domains/registration/api/getTldList" # Alternative: Their public pricing page data endpoint PRICING_PAGE = "https://www.namecheap.com/domains/new-tlds/explore/" async def scrape(self) -> list[TLDPriceData]: """ Scrape TLD prices from Namecheap. Returns: List of TLDPriceData objects with pricing for available TLDs """ results = [] try: async with httpx.AsyncClient(timeout=self.timeout) as client: # Try to get TLD list with pricing response = await client.get( self.TLD_LIST_API, headers={ "User-Agent": self.get_user_agent(), "Accept": "application/json", "Referer": "https://www.namecheap.com/domains/registration/", }, ) if response.status_code != 200: # Try alternate method: scrape from static data return await self._scrape_from_static() data = response.json() if not data: return await self._scrape_from_static() now = datetime.utcnow() # Process TLD data tlds = data if isinstance(data, list) else data.get("tlds", []) for tld_data in tlds: try: tld = self._extract_tld(tld_data) if not tld: continue reg_price = self._extract_price(tld_data, "registration") if reg_price is None: continue renewal_price = self._extract_price(tld_data, "renewal") transfer_price = self._extract_price(tld_data, "transfer") promo_price = self._extract_price(tld_data, "promo") or self._extract_price(tld_data, "special") results.append(TLDPriceData( tld=tld.lower().lstrip("."), registrar="namecheap", registration_price=reg_price, renewal_price=renewal_price or reg_price, transfer_price=transfer_price, promo_price=promo_price, currency="USD", source="api", confidence=0.95, # Slightly lower than official API scraped_at=now, )) except Exception as e: logger.warning(f"Error parsing Namecheap TLD: {e}") continue logger.info(f"Successfully scraped {len(results)} TLD prices from Namecheap") return results except httpx.TimeoutException: logger.warning("Namecheap API timeout, falling back to static data") return await self._scrape_from_static() except httpx.RequestError as e: logger.warning(f"Namecheap API request error: {e}, falling back to static data") return await self._scrape_from_static() except Exception as e: logger.error(f"Namecheap scraper error: {e}") return await self._scrape_from_static() async def _scrape_from_static(self) -> list[TLDPriceData]: """ Fallback: Return commonly known Namecheap prices. These are manually curated prices for the most important TLDs. Updated periodically based on Namecheap's public pricing page. """ now = datetime.utcnow() # Known Namecheap prices (as of Dec 2024) # Source: https://www.namecheap.com/domains/registration/ KNOWN_PRICES = { "com": {"reg": 9.58, "renew": 14.58, "transfer": 9.48}, "net": {"reg": 12.88, "renew": 16.88, "transfer": 12.78}, "org": {"reg": 10.98, "renew": 15.98, "transfer": 10.88}, "io": {"reg": 32.88, "renew": 38.88, "transfer": 32.78}, "co": {"reg": 11.98, "renew": 29.98, "transfer": 11.88}, "ai": {"reg": 74.98, "renew": 74.98, "transfer": 74.88}, "dev": {"reg": 14.98, "renew": 17.98, "transfer": 14.88}, "app": {"reg": 16.98, "renew": 19.98, "transfer": 16.88}, "xyz": {"reg": 1.00, "renew": 13.98, "transfer": 1.00, "promo": True}, "tech": {"reg": 5.98, "renew": 49.98, "transfer": 5.88, "promo": True}, "online": {"reg": 2.98, "renew": 39.98, "transfer": 2.88, "promo": True}, "store": {"reg": 3.88, "renew": 56.88, "transfer": 3.78, "promo": True}, "me": {"reg": 5.98, "renew": 19.98, "transfer": 5.88}, "info": {"reg": 4.98, "renew": 22.98, "transfer": 4.88}, "biz": {"reg": 14.98, "renew": 20.98, "transfer": 14.88}, "ch": {"reg": 12.98, "renew": 12.98, "transfer": 12.88}, "de": {"reg": 9.98, "renew": 11.98, "transfer": 9.88}, "uk": {"reg": 8.88, "renew": 10.98, "transfer": 8.78}, } results = [] for tld, prices in KNOWN_PRICES.items(): results.append(TLDPriceData( tld=tld, registrar="namecheap", registration_price=prices["reg"], renewal_price=prices["renew"], transfer_price=prices.get("transfer"), promo_price=prices["reg"] if prices.get("promo") else None, currency="USD", source="static_fallback", confidence=0.9, scraped_at=now, )) logger.info(f"Using {len(results)} static Namecheap prices as fallback") return results def _extract_tld(self, data: dict) -> Optional[str]: """Extract TLD from various response formats.""" for key in ["tld", "extension", "name", "Tld"]: if key in data: return str(data[key]).lower().lstrip(".") return None def _extract_price(self, data: dict, price_type: str) -> Optional[float]: """Extract price from response data.""" # Try various key patterns keys_to_try = [ price_type, f"{price_type}Price", f"{price_type}_price", price_type.capitalize(), f"{price_type.capitalize()}Price", ] for key in keys_to_try: if key in data: try: price = float(data[key]) if 0 < price < 1000: return round(price, 2) except (ValueError, TypeError): pass return None async def health_check(self) -> bool: """Check if Namecheap is accessible.""" try: async with httpx.AsyncClient(timeout=10.0) as client: response = await client.get( self.base_url, headers=self.get_headers(), follow_redirects=True, ) return response.status_code == 200 except Exception as e: logger.debug(f"Namecheap health check failed: {e}") return False