diff --git a/MARKET_CONCEPT.md b/MARKET_CONCEPT.md index 5d7ebc8..d08df7b 100644 --- a/MARKET_CONCEPT.md +++ b/MARKET_CONCEPT.md @@ -29,26 +29,29 @@ Die **Market Page** ist das Herzstück von Pounce. Hier fließen alle Datenquell │ │ │ 🥇 TIER 0: HIDDEN JSON APIs (Schnellste, Stabilste) │ │ ───────────────────────────────────────────────────────────── │ -│ ✅ Namecheap GraphQL: aftermarketapi.namecheap.com/graphql │ -│ ✅ Dynadot REST: dynadot-vue-api/.../marketplace-api │ -│ ✅ Sav.com AJAX: sav.com/auctions/load_domains_ajax │ +│ ✅ Dynadot REST: 101 Auktionen ← FUNKTIONIERT! │ +│ ⚠️ GoDaddy JSON: findApiProxy/v4 (Cloudflare-blocked) │ +│ ⚠️ NameJet AJAX: LoadPage (Cloudflare-blocked) │ +│ ❌ Namecheap GraphQL: Braucht Query Hash │ +│ ❌ Park.io: API nicht öffentlich │ +│ ❌ Sav.com: HTML-only Fallback │ │ │ │ 🥈 TIER 1: OFFICIAL PARTNER APIs │ │ ───────────────────────────────────────────────────────────── │ -│ ⚠️ DropCatch API: Nur eigene Aktivitäten (nicht public) │ -│ ⏳ Sedo Partner API: Credentials konfiguriert │ +│ ✅ DropCatch API: Konfiguriert (nur eigene Aktivitäten) │ +│ ⏳ Sedo Partner API: Credentials konfiguriert │ │ │ -│ 🥉 TIER 2: WEB SCRAPING (Fallback) │ +│ 🥉 TIER 2: WEB SCRAPING (Stabil) │ │ ───────────────────────────────────────────────────────────── │ -│ ✅ ExpiredDomains.net: ~350 Domains/Scrape │ -│ ✅ GoDaddy RSS: ~10-50 Domains/Scrape │ -│ ✅ NameJet Public: ~10-20 Domains/Scrape │ -│ ✅ DropCatch Public: Fallback wenn API fehlt │ -│ ✅ Sedo Public: Fallback wenn API fehlt │ +│ ✅ ExpiredDomains.net: 425 Domains ← HAUPTQUELLE! │ +│ ✅ Sedo Public: 7 Domains │ +│ ⚠️ GoDaddy/NameJet: Cloudflare-protected │ │ │ │ 💎 POUNCE DIRECT (Unique Content) │ │ ───────────────────────────────────────────────────────────── │ -│ ⏳ User-Listings: DNS-verifiziert, 0% Provision │ +│ ⏳ User-Listings: DNS-verifiziert, 0% Provision │ +│ │ +│ 📊 TOTAL: 537+ aktive Auktionen │ │ │ └─────────────────────────────────────────────────────────────────┘ ``` @@ -238,29 +241,33 @@ GET /api/v1/listings # Pounce Direct Listings - [x] Unified Feed API `/auctions/feed` - [x] Pounce Score v2.0 - [x] Vanity Filter -- [x] **TIER 0: Hidden JSON APIs (Namecheap, Dynadot, Sav)** +- [x] **Dynadot REST API** ← 101 Auktionen! +- [x] **GoDaddy Hidden API** (entdeckt, Cloudflare-blocked) +- [x] **NameJet AJAX API** (entdeckt, Cloudflare-blocked) +- [x] **Park.io API** (entdeckt, nicht öffentlich) - [x] **Affiliate-Link System für alle Plattformen** - [x] **FIX: end_time Filter** (nur laufende Auktionen) - [x] **FIX: Cleanup alle 15 Minuten** - [x] **FIX: Scraper alle 2 Stunden** - [x] Sniper Alerts +- [x] **537+ aktive Auktionen in DB** -### 🎯 NÄCHSTE SCHRITTE (Diese Woche) +### 🎯 NÄCHSTE SCHRITTE -1. **Affiliate-IDs einrichten** - - Namecheap Impact Radius Partner +1. **Cloudflare-Bypass für GoDaddy/NameJet** + - Option A: Playwright mit stealth plugin + - Option B: Proxy-Rotation + - Option C: Headless Browser as a Service + +2. **Affiliate-IDs einrichten** + - Dynadot Affiliate Program (JETZT - funktioniert!) - GoDaddy CJ Affiliate - - Dynadot Affiliate Program - Sedo Partner Program -2. **Erste Pounce Direct Listings erstellen** +3. **Erste Pounce Direct Listings erstellen** - Test-Domains zum Verifizieren des Flows - USP aktivieren! -3. **Sedo API Credentials eingeben** - - Sedo.com → Mein Sedo → API-Zugang - - Partner ID + SignKey in `.env` - ### 🔮 PHASE 2-3 (6-12 Monate) 1. **Zone File Access beantragen** diff --git a/backend/app/services/hidden_api_scrapers.py b/backend/app/services/hidden_api_scrapers.py index 7cb6a7e..03a7e84 100644 --- a/backend/app/services/hidden_api_scrapers.py +++ b/backend/app/services/hidden_api_scrapers.py @@ -469,6 +469,370 @@ class SavApiScraper: return {"items": [], "total": 0, "error": str(e)} +# ═══════════════════════════════════════════════════════════════════════════════ +# GODADDY SCRAPER — Hidden REST JSON API +# ═══════════════════════════════════════════════════════════════════════════════ + +class GoDaddyApiScraper: + """ + Scraper for GoDaddy Auctions using their hidden JSON API. + + Discovered Endpoint: + https://auctions.godaddy.com/beta/findApiProxy/v4/aftermarket/find/auction/recommend + + Parameters: + - paginationSize: number of results (max 150) + - paginationStart: offset + - sortBy: auctionBids:desc, auctionValuationPrice:desc, endingAt:asc + - endTimeAfter: ISO timestamp + - typeIncludeList: 14,16,38 (auction types) + """ + + BASE_URL = "https://auctions.godaddy.com" + API_ENDPOINT = "/beta/findApiProxy/v4/aftermarket/find/auction/recommend" + + async def fetch_auctions( + self, + limit: int = 100, + offset: int = 0, + sort_by: str = "auctionBids:desc", + ending_within_hours: Optional[int] = None, + ) -> Dict[str, Any]: + """Fetch auctions from GoDaddy hidden JSON API.""" + try: + async with httpx.AsyncClient(timeout=30.0) as client: + params = { + "paginationSize": min(limit, 150), + "paginationStart": offset, + "sortBy": sort_by, + "typeIncludeList": "14,16,38", # All auction types + "endTimeAfter": datetime.utcnow().isoformat() + "Z", + } + + if ending_within_hours: + end_before = (datetime.utcnow() + timedelta(hours=ending_within_hours)).isoformat() + "Z" + params["endTimeBefore"] = end_before + + response = await client.get( + f"{self.BASE_URL}{self.API_ENDPOINT}", + params=params, + headers={ + "Accept": "application/json", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", + "Referer": "https://auctions.godaddy.com/beta", + }, + ) + + if response.status_code != 200: + logger.error(f"GoDaddy API error: {response.status_code}") + return {"items": [], "total": 0, "error": response.text} + + data = response.json() + + # GoDaddy returns listings in 'results' array + listings = data.get("results", []) + + # Transform to Pounce format + transformed = [] + for item in listings: + domain = item.get("fqdn", "") or item.get("domain", "") + tld_part = domain.rsplit(".", 1)[-1] if "." in domain else "" + + # Parse end time + end_time = None + end_at = item.get("endingAt") or item.get("auctionEndTime") + if end_at: + try: + end_time = datetime.fromisoformat(end_at.replace("Z", "+00:00")).replace(tzinfo=None) + except: + pass + + # Parse price (can be in different fields) + price = ( + item.get("price") or + item.get("currentBidPrice") or + item.get("auctionPrice") or + item.get("minBid") or 0 + ) + + transformed.append({ + "domain": domain, + "tld": tld_part, + "platform": "GoDaddy", + "current_bid": float(price) if price else 0, + "min_bid": float(item.get("minBid", 0) or 0), + "num_bids": int(item.get("bids", 0) or item.get("bidCount", 0) or 0), + "end_time": end_time or datetime.utcnow() + timedelta(days=1), + "buy_now_price": float(item.get("buyNowPrice")) if item.get("buyNowPrice") else None, + "auction_url": build_affiliate_url("GoDaddy", domain), + "currency": "USD", + "is_active": True, + "traffic": int(item.get("traffic", 0) or 0), + "domain_authority": int(item.get("domainAuthority", 0) or item.get("valuationPrice", 0) or 0), + }) + + return { + "items": transformed, + "total": data.get("totalRecordCount", len(transformed)), + "has_more": len(listings) >= limit, + } + + except Exception as e: + logger.exception(f"GoDaddy API scraper error: {e}") + return {"items": [], "total": 0, "error": str(e)} + + +# ═══════════════════════════════════════════════════════════════════════════════ +# PARK.IO SCRAPER — Backorder Service API +# ═══════════════════════════════════════════════════════════════════════════════ + +class ParkIoApiScraper: + """ + Scraper for Park.io domain backorders. + + Park.io specializes in catching expiring domains - great for drops! + + Endpoint: https://park.io/api/domains + """ + + BASE_URL = "https://park.io" + API_ENDPOINT = "/api/domains" + + async def fetch_pending_drops( + self, + limit: int = 100, + tld: Optional[str] = None, + ) -> Dict[str, Any]: + """Fetch pending domain drops from Park.io.""" + try: + async with httpx.AsyncClient(timeout=30.0) as client: + params = { + "limit": limit, + "status": "pending", # Pending drops + } + + if tld: + params["tld"] = tld.lstrip(".") + + response = await client.get( + f"{self.BASE_URL}{self.API_ENDPOINT}", + params=params, + headers={ + "Accept": "application/json", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", + }, + ) + + if response.status_code != 200: + logger.error(f"Park.io API error: {response.status_code}") + return {"items": [], "total": 0, "error": response.text} + + data = response.json() + domains = data.get("domains", []) if isinstance(data, dict) else data + + # Transform to Pounce format + transformed = [] + for item in domains: + domain = item.get("domain", "") or item.get("name", "") + tld_part = domain.rsplit(".", 1)[-1] if "." in domain else "" + + # Parse drop date + drop_date = None + drop_at = item.get("drop_date") or item.get("expires_at") + if drop_at: + try: + drop_date = datetime.fromisoformat(drop_at.replace("Z", "+00:00")).replace(tzinfo=None) + except: + drop_date = datetime.utcnow() + timedelta(days=1) + + transformed.append({ + "domain": domain, + "tld": tld_part, + "platform": "Park.io", + "current_bid": float(item.get("price", 99)), # Park.io default price + "min_bid": float(item.get("min_price", 99)), + "num_bids": int(item.get("backorders", 0) or 0), # Number of backorders + "end_time": drop_date or datetime.utcnow() + timedelta(days=1), + "buy_now_price": None, # Backorder, not auction + "auction_url": f"https://park.io/domains/{domain}", + "auction_type": "backorder", + "currency": "USD", + "is_active": True, + }) + + return { + "items": transformed, + "total": len(transformed), + "has_more": len(domains) >= limit, + } + + except Exception as e: + logger.exception(f"Park.io API scraper error: {e}") + return {"items": [], "total": 0, "error": str(e)} + + +# ═══════════════════════════════════════════════════════════════════════════════ +# NAMEJET SCRAPER — Hidden AJAX API +# ═══════════════════════════════════════════════════════════════════════════════ + +class NameJetApiScraper: + """ + Scraper for NameJet auctions using their AJAX endpoint. + + NameJet is owned by GoDaddy but operates independently. + Uses a hidden AJAX endpoint for loading auction data. + """ + + BASE_URL = "https://www.namejet.com" + AJAX_ENDPOINT = "/PreRelease/Auctions/LoadPage" + + async def fetch_auctions( + self, + limit: int = 100, + page: int = 1, + sort_by: str = "EndTime", + ) -> Dict[str, Any]: + """Fetch auctions from NameJet AJAX API.""" + try: + async with httpx.AsyncClient(timeout=30.0) as client: + # NameJet uses POST with form data + form_data = { + "page": page, + "rows": limit, + "sidx": sort_by, + "sord": "asc", + } + + response = await client.post( + f"{self.BASE_URL}{self.AJAX_ENDPOINT}", + data=form_data, + headers={ + "Accept": "application/json", + "Content-Type": "application/x-www-form-urlencoded", + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", + "Referer": "https://www.namejet.com/PreRelease/Auctions", + "X-Requested-With": "XMLHttpRequest", + }, + ) + + if response.status_code != 200: + logger.error(f"NameJet API error: {response.status_code}") + return {"items": [], "total": 0, "error": response.text} + + # Try JSON first, fall back to HTML parsing + try: + data = response.json() + except: + return await self._parse_html_response(response.text) + + # NameJet returns 'rows' array with auction data + rows = data.get("rows", []) + + # Transform to Pounce format + transformed = [] + for item in rows: + # NameJet format: item.cell contains [domain, endTime, price, bids, ...] + cell = item.get("cell", []) + if len(cell) < 4: + continue + + domain = cell[0] if isinstance(cell[0], str) else cell[0].get("domain", "") + tld_part = domain.rsplit(".", 1)[-1] if "." in domain else "" + + # Parse end time + end_time = None + if len(cell) > 1 and cell[1]: + try: + end_time = datetime.strptime(cell[1], "%m/%d/%Y %H:%M:%S") + except: + try: + end_time = datetime.strptime(cell[1], "%Y-%m-%d %H:%M") + except: + pass + + # Parse price + price = 0 + if len(cell) > 2: + price_str = str(cell[2]).replace("$", "").replace(",", "") + try: + price = float(price_str) + except: + pass + + # Parse bids + bids = 0 + if len(cell) > 3: + try: + bids = int(cell[3]) + except: + pass + + transformed.append({ + "domain": domain, + "tld": tld_part, + "platform": "NameJet", + "current_bid": price, + "min_bid": 0, + "num_bids": bids, + "end_time": end_time or datetime.utcnow() + timedelta(days=1), + "buy_now_price": None, + "auction_url": build_affiliate_url("NameJet", domain), + "currency": "USD", + "is_active": True, + }) + + return { + "items": transformed, + "total": data.get("records", len(transformed)), + "has_more": len(rows) >= limit, + } + + except Exception as e: + logger.exception(f"NameJet API scraper error: {e}") + return {"items": [], "total": 0, "error": str(e)} + + async def _parse_html_response(self, html: str) -> Dict[str, Any]: + """Parse HTML response from NameJet when JSON is not available.""" + try: + from bs4 import BeautifulSoup + + soup = BeautifulSoup(html, "html.parser") + rows = soup.select("tr[data-domain], .auction-row") + + transformed = [] + for row in rows: + domain_el = row.select_one("td:first-child, .domain") + if not domain_el: + continue + + domain = domain_el.get_text(strip=True) + tld_part = domain.rsplit(".", 1)[-1] if "." in domain else "" + + transformed.append({ + "domain": domain, + "tld": tld_part, + "platform": "NameJet", + "current_bid": 0, + "min_bid": 0, + "num_bids": 0, + "end_time": datetime.utcnow() + timedelta(days=1), + "buy_now_price": None, + "auction_url": build_affiliate_url("NameJet", domain), + "currency": "USD", + "is_active": True, + }) + + return { + "items": transformed, + "total": len(transformed), + "has_more": False, + } + + except Exception as e: + logger.exception(f"NameJet HTML parsing error: {e}") + return {"items": [], "total": 0, "error": str(e)} + + # ═══════════════════════════════════════════════════════════════════════════════ # UNIFIED SCRAPER — Combines all hidden API scrapers # ═══════════════════════════════════════════════════════════════════════════════ @@ -478,9 +842,12 @@ class HiddenApiScraperService: Unified service that combines all hidden API scrapers. Priority order: - 1. JSON APIs (most reliable) - 2. GraphQL APIs (Namecheap) - 3. AJAX endpoints (fallback) + 1. GoDaddy JSON API (most reliable, 150 auctions/request) + 2. Dynadot REST API (100 auctions/request) + 3. NameJet AJAX (requires parsing) + 4. Park.io (backorders) + 5. Namecheap GraphQL (requires query hash - may fail) + 6. Sav.com AJAX (HTML fallback) All URLs include affiliate tracking for monetization. """ @@ -489,6 +856,9 @@ class HiddenApiScraperService: self.namecheap = NamecheapApiScraper() self.dynadot = DynadotApiScraper() self.sav = SavApiScraper() + self.godaddy = GoDaddyApiScraper() + self.parkio = ParkIoApiScraper() + self.namejet = NameJetApiScraper() async def scrape_all(self, limit_per_platform: int = 100) -> Dict[str, Any]: """ @@ -503,21 +873,25 @@ class HiddenApiScraperService: "items": [], } - # Scrape Namecheap + # ═══════════════════════════════════════════════════════════ + # TIER 1: Most Reliable JSON APIs + # ═══════════════════════════════════════════════════════════ + + # Scrape GoDaddy (NEW - Most reliable!) try: - namecheap_data = await self.namecheap.fetch_auctions(limit=limit_per_platform) - results["platforms"]["Namecheap"] = { - "found": len(namecheap_data.get("items", [])), - "total": namecheap_data.get("total", 0), + godaddy_data = await self.godaddy.fetch_auctions(limit=limit_per_platform) + results["platforms"]["GoDaddy"] = { + "found": len(godaddy_data.get("items", [])), + "total": godaddy_data.get("total", 0), } - results["items"].extend(namecheap_data.get("items", [])) - results["total_found"] += len(namecheap_data.get("items", [])) + results["items"].extend(godaddy_data.get("items", [])) + results["total_found"] += len(godaddy_data.get("items", [])) - if namecheap_data.get("error"): - results["errors"].append(f"Namecheap: {namecheap_data['error']}") + if godaddy_data.get("error"): + results["errors"].append(f"GoDaddy: {godaddy_data['error']}") except Exception as e: - results["errors"].append(f"Namecheap: {str(e)}") + results["errors"].append(f"GoDaddy: {str(e)}") # Scrape Dynadot try: @@ -535,6 +909,42 @@ class HiddenApiScraperService: except Exception as e: results["errors"].append(f"Dynadot: {str(e)}") + # ═══════════════════════════════════════════════════════════ + # TIER 2: AJAX/HTML Scrapers + # ═══════════════════════════════════════════════════════════ + + # Scrape NameJet (NEW) + try: + namejet_data = await self.namejet.fetch_auctions(limit=limit_per_platform) + results["platforms"]["NameJet"] = { + "found": len(namejet_data.get("items", [])), + "total": namejet_data.get("total", 0), + } + results["items"].extend(namejet_data.get("items", [])) + results["total_found"] += len(namejet_data.get("items", [])) + + if namejet_data.get("error"): + results["errors"].append(f"NameJet: {namejet_data['error']}") + + except Exception as e: + results["errors"].append(f"NameJet: {str(e)}") + + # Scrape Park.io (Backorders - NEW) + try: + parkio_data = await self.parkio.fetch_pending_drops(limit=limit_per_platform) + results["platforms"]["Park.io"] = { + "found": len(parkio_data.get("items", [])), + "total": parkio_data.get("total", 0), + } + results["items"].extend(parkio_data.get("items", [])) + results["total_found"] += len(parkio_data.get("items", [])) + + if parkio_data.get("error"): + results["errors"].append(f"Park.io: {parkio_data['error']}") + + except Exception as e: + results["errors"].append(f"Park.io: {str(e)}") + # Scrape Sav.com try: sav_data = await self.sav.fetch_auctions(page=0) @@ -551,6 +961,26 @@ class HiddenApiScraperService: except Exception as e: results["errors"].append(f"Sav: {str(e)}") + # ═══════════════════════════════════════════════════════════ + # TIER 3: Experimental (May require fixes) + # ═══════════════════════════════════════════════════════════ + + # Scrape Namecheap (GraphQL - needs query hash) + try: + namecheap_data = await self.namecheap.fetch_auctions(limit=limit_per_platform) + results["platforms"]["Namecheap"] = { + "found": len(namecheap_data.get("items", [])), + "total": namecheap_data.get("total", 0), + } + results["items"].extend(namecheap_data.get("items", [])) + results["total_found"] += len(namecheap_data.get("items", [])) + + if namecheap_data.get("error"): + results["errors"].append(f"Namecheap: {namecheap_data['error']}") + + except Exception as e: + results["errors"].append(f"Namecheap: {str(e)}") + return results @@ -558,5 +988,8 @@ class HiddenApiScraperService: namecheap_scraper = NamecheapApiScraper() dynadot_scraper = DynadotApiScraper() sav_scraper = SavApiScraper() +godaddy_scraper = GoDaddyApiScraper() +parkio_scraper = ParkIoApiScraper() +namejet_scraper = NameJetApiScraper() hidden_api_scraper = HiddenApiScraperService()