Compare commits
8 Commits
783668b015
...
0cd72bcc8c
| Author | SHA1 | Date | |
|---|---|---|---|
| 0cd72bcc8c | |||
| a4689fb8c7 | |||
| d10dc1d942 | |||
| e127f1fb52 | |||
| 9c64f61fb6 | |||
| de5662ab78 | |||
| 0d2cc356b1 | |||
| 389379d8bb |
@ -60,6 +60,29 @@ STRIPE_WEBHOOK_SECRET=whsec_pqWdtvFbQTtBgCfDTgHwgtxxcWl7JbsZ
|
|||||||
# Email Verification
|
# Email Verification
|
||||||
REQUIRE_EMAIL_VERIFICATION=false
|
REQUIRE_EMAIL_VERIFICATION=false
|
||||||
|
|
||||||
|
# =================================
|
||||||
|
# DropCatch API (Official Partner)
|
||||||
|
# Docs: https://www.dropcatch.com/hiw/dropcatch-api
|
||||||
|
# =================================
|
||||||
|
DROPCATCH_CLIENT_ID=pounce:pounce
|
||||||
|
DROPCATCH_CLIENT_SECRET=your_dropcatch_secret_here
|
||||||
|
DROPCATCH_API_BASE=https://api.dropcatch.com
|
||||||
|
|
||||||
|
# =================================
|
||||||
|
# Sedo API (Partner API)
|
||||||
|
# Docs: https://api.sedo.com/apidocs/v1/
|
||||||
|
# Find: Sedo.com → Mein Sedo → API-Zugang
|
||||||
|
# =================================
|
||||||
|
SEDO_PARTNER_ID=your_sedo_partner_id
|
||||||
|
SEDO_SIGN_KEY=your_sedo_signkey
|
||||||
|
SEDO_API_BASE=https://api.sedo.com/api/v1/
|
||||||
|
|
||||||
|
# =================================
|
||||||
|
# Moz API (SEO Data - Optional)
|
||||||
|
# =================================
|
||||||
|
MOZ_ACCESS_ID=
|
||||||
|
MOZ_SECRET_KEY=
|
||||||
|
|
||||||
# Environment
|
# Environment
|
||||||
ENVIRONMENT=production
|
ENVIRONMENT=production
|
||||||
DEBUG=false
|
DEBUG=false
|
||||||
|
|||||||
1621
MARKET_CONCEPT.md
1621
MARKET_CONCEPT.md
File diff suppressed because it is too large
Load Diff
@ -981,3 +981,126 @@ async def get_activity_log(
|
|||||||
],
|
],
|
||||||
"total": total,
|
"total": total,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ============== API Connection Tests ==============
|
||||||
|
|
||||||
|
@router.get("/test-apis")
|
||||||
|
async def test_external_apis(
|
||||||
|
admin: User = Depends(require_admin),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Test connections to all external APIs.
|
||||||
|
|
||||||
|
Returns status of:
|
||||||
|
- DropCatch API
|
||||||
|
- Sedo API
|
||||||
|
- Moz API (if configured)
|
||||||
|
"""
|
||||||
|
from app.services.dropcatch_api import dropcatch_client
|
||||||
|
from app.services.sedo_api import sedo_client
|
||||||
|
|
||||||
|
results = {
|
||||||
|
"tested_at": datetime.utcnow().isoformat(),
|
||||||
|
"apis": {}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Test DropCatch API
|
||||||
|
try:
|
||||||
|
dropcatch_result = await dropcatch_client.test_connection()
|
||||||
|
results["apis"]["dropcatch"] = dropcatch_result
|
||||||
|
except Exception as e:
|
||||||
|
results["apis"]["dropcatch"] = {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e),
|
||||||
|
"configured": dropcatch_client.is_configured
|
||||||
|
}
|
||||||
|
|
||||||
|
# Test Sedo API
|
||||||
|
try:
|
||||||
|
sedo_result = await sedo_client.test_connection()
|
||||||
|
results["apis"]["sedo"] = sedo_result
|
||||||
|
except Exception as e:
|
||||||
|
results["apis"]["sedo"] = {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e),
|
||||||
|
"configured": sedo_client.is_configured
|
||||||
|
}
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
results["summary"] = {
|
||||||
|
"total": len(results["apis"]),
|
||||||
|
"configured": sum(1 for api in results["apis"].values() if api.get("configured")),
|
||||||
|
"connected": sum(1 for api in results["apis"].values() if api.get("success")),
|
||||||
|
}
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
@router.post("/trigger-scrape")
|
||||||
|
async def trigger_auction_scrape(
|
||||||
|
background_tasks: BackgroundTasks,
|
||||||
|
db: Database,
|
||||||
|
admin: User = Depends(require_admin),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Manually trigger auction scraping from all sources.
|
||||||
|
|
||||||
|
This will:
|
||||||
|
1. Try Tier 1 APIs (DropCatch, Sedo) first
|
||||||
|
2. Fall back to web scraping for others
|
||||||
|
"""
|
||||||
|
from app.services.auction_scraper import AuctionScraperService
|
||||||
|
|
||||||
|
scraper = AuctionScraperService()
|
||||||
|
|
||||||
|
# Run scraping in background
|
||||||
|
async def run_scrape():
|
||||||
|
async with db.begin():
|
||||||
|
return await scraper.scrape_all_platforms(db)
|
||||||
|
|
||||||
|
background_tasks.add_task(run_scrape)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"message": "Auction scraping started in background",
|
||||||
|
"note": "Check /admin/scrape-status for results"
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/scrape-status")
|
||||||
|
async def get_scrape_status(
|
||||||
|
db: Database,
|
||||||
|
admin: User = Depends(require_admin),
|
||||||
|
limit: int = 10,
|
||||||
|
):
|
||||||
|
"""Get recent scrape logs."""
|
||||||
|
from app.models.auction import AuctionScrapeLog
|
||||||
|
|
||||||
|
query = (
|
||||||
|
select(AuctionScrapeLog)
|
||||||
|
.order_by(desc(AuctionScrapeLog.started_at))
|
||||||
|
.limit(limit)
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = await db.execute(query)
|
||||||
|
logs = result.scalars().all()
|
||||||
|
except Exception:
|
||||||
|
return {"logs": [], "error": "Table not found"}
|
||||||
|
|
||||||
|
return {
|
||||||
|
"logs": [
|
||||||
|
{
|
||||||
|
"id": log.id,
|
||||||
|
"platform": log.platform,
|
||||||
|
"status": log.status,
|
||||||
|
"auctions_found": log.auctions_found,
|
||||||
|
"auctions_new": log.auctions_new,
|
||||||
|
"auctions_updated": log.auctions_updated,
|
||||||
|
"error_message": log.error_message,
|
||||||
|
"started_at": log.started_at.isoformat() if log.started_at else None,
|
||||||
|
"completed_at": log.completed_at.isoformat() if log.completed_at else None,
|
||||||
|
}
|
||||||
|
for log in logs
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|||||||
@ -277,8 +277,14 @@ async def search_auctions(
|
|||||||
- Look for value_ratio > 1.0 (estimated value exceeds current bid)
|
- Look for value_ratio > 1.0 (estimated value exceeds current bid)
|
||||||
- Focus on auctions ending soon with low bid counts
|
- Focus on auctions ending soon with low bid counts
|
||||||
"""
|
"""
|
||||||
# Build query
|
# Build query - ONLY show active auctions that haven't ended yet
|
||||||
query = select(DomainAuction).where(DomainAuction.is_active == True)
|
now = datetime.utcnow()
|
||||||
|
query = select(DomainAuction).where(
|
||||||
|
and_(
|
||||||
|
DomainAuction.is_active == True,
|
||||||
|
DomainAuction.end_time > now # ← KRITISCH: Nur Auktionen die noch laufen!
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
# VANITY FILTER: For public (non-logged-in) users, only show premium-looking domains
|
# VANITY FILTER: For public (non-logged-in) users, only show premium-looking domains
|
||||||
# This ensures the first impression is high-quality, not spam domains
|
# This ensures the first impression is high-quality, not spam domains
|
||||||
@ -457,9 +463,15 @@ async def get_hot_auctions(
|
|||||||
|
|
||||||
Data is scraped from public auction sites - no mock data.
|
Data is scraped from public auction sites - no mock data.
|
||||||
"""
|
"""
|
||||||
|
now = datetime.utcnow()
|
||||||
query = (
|
query = (
|
||||||
select(DomainAuction)
|
select(DomainAuction)
|
||||||
.where(DomainAuction.is_active == True)
|
.where(
|
||||||
|
and_(
|
||||||
|
DomainAuction.is_active == True,
|
||||||
|
DomainAuction.end_time > now # Only show active auctions
|
||||||
|
)
|
||||||
|
)
|
||||||
.order_by(DomainAuction.num_bids.desc())
|
.order_by(DomainAuction.num_bids.desc())
|
||||||
.limit(limit)
|
.limit(limit)
|
||||||
)
|
)
|
||||||
@ -996,7 +1008,13 @@ async def get_market_feed(
|
|||||||
# 2. EXTERNAL AUCTIONS (Scraped from platforms)
|
# 2. EXTERNAL AUCTIONS (Scraped from platforms)
|
||||||
# ═══════════════════════════════════════════════════════════════
|
# ═══════════════════════════════════════════════════════════════
|
||||||
if source in ["all", "external"]:
|
if source in ["all", "external"]:
|
||||||
auction_query = select(DomainAuction).where(DomainAuction.is_active == True)
|
now = datetime.utcnow()
|
||||||
|
auction_query = select(DomainAuction).where(
|
||||||
|
and_(
|
||||||
|
DomainAuction.is_active == True,
|
||||||
|
DomainAuction.end_time > now # ← KRITISCH: Nur laufende Auktionen!
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
if keyword:
|
if keyword:
|
||||||
auction_query = auction_query.where(
|
auction_query = auction_query.where(
|
||||||
|
|||||||
@ -33,6 +33,27 @@ class Settings(BaseSettings):
|
|||||||
check_minute: int = 0
|
check_minute: int = 0
|
||||||
scheduler_check_interval_hours: int = 24
|
scheduler_check_interval_hours: int = 24
|
||||||
|
|
||||||
|
# =================================
|
||||||
|
# External API Credentials
|
||||||
|
# =================================
|
||||||
|
|
||||||
|
# DropCatch API (Official Partner API)
|
||||||
|
# Docs: https://www.dropcatch.com/hiw/dropcatch-api
|
||||||
|
dropcatch_client_id: str = ""
|
||||||
|
dropcatch_client_secret: str = ""
|
||||||
|
dropcatch_api_base: str = "https://api.dropcatch.com"
|
||||||
|
|
||||||
|
# Sedo API (Partner API - XML-RPC)
|
||||||
|
# Docs: https://api.sedo.com/apidocs/v1/
|
||||||
|
# Find your credentials: Sedo.com → Mein Sedo → API-Zugang
|
||||||
|
sedo_partner_id: str = ""
|
||||||
|
sedo_sign_key: str = ""
|
||||||
|
sedo_api_base: str = "https://api.sedo.com/api/v1/"
|
||||||
|
|
||||||
|
# Moz API (SEO Data)
|
||||||
|
moz_access_id: str = ""
|
||||||
|
moz_secret_key: str = ""
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
env_file = ".env"
|
env_file = ".env"
|
||||||
env_file_encoding = "utf-8"
|
env_file_encoding = "utf-8"
|
||||||
|
|||||||
@ -204,12 +204,30 @@ def setup_scheduler():
|
|||||||
replace_existing=True,
|
replace_existing=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Auction scrape every hour (at :30 to avoid conflict with other jobs)
|
# Auction scrape every 2 hours (at :30 to avoid conflict with other jobs)
|
||||||
scheduler.add_job(
|
scheduler.add_job(
|
||||||
scrape_auctions,
|
scrape_auctions,
|
||||||
CronTrigger(minute=30), # Every hour at :30
|
CronTrigger(hour='*/2', minute=30), # Every 2 hours at :30
|
||||||
id="hourly_auction_scrape",
|
id="auction_scrape",
|
||||||
name="Hourly Auction Scrape",
|
name="Auction Scrape (2h)",
|
||||||
|
replace_existing=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Cleanup expired auctions every 15 minutes (CRITICAL for data freshness!)
|
||||||
|
scheduler.add_job(
|
||||||
|
cleanup_expired_auctions,
|
||||||
|
CronTrigger(minute='*/15'), # Every 15 minutes
|
||||||
|
id="auction_cleanup",
|
||||||
|
name="Expired Auction Cleanup (15m)",
|
||||||
|
replace_existing=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Sniper alert matching every 30 minutes
|
||||||
|
scheduler.add_job(
|
||||||
|
match_sniper_alerts,
|
||||||
|
CronTrigger(minute='*/30'), # Every 30 minutes
|
||||||
|
id="sniper_matching",
|
||||||
|
name="Sniper Alert Matching (30m)",
|
||||||
replace_existing=True,
|
replace_existing=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -220,7 +238,9 @@ def setup_scheduler():
|
|||||||
f"\n - Tycoon domain check every 10 minutes"
|
f"\n - Tycoon domain check every 10 minutes"
|
||||||
f"\n - TLD price scrape at 03:00 UTC"
|
f"\n - TLD price scrape at 03:00 UTC"
|
||||||
f"\n - Price change alerts at 04:00 UTC"
|
f"\n - Price change alerts at 04:00 UTC"
|
||||||
f"\n - Auction scrape every hour at :30"
|
f"\n - Auction scrape every 2 hours at :30"
|
||||||
|
f"\n - Expired auction cleanup every 15 minutes"
|
||||||
|
f"\n - Sniper alert matching every 30 minutes"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -302,6 +322,58 @@ async def check_price_changes():
|
|||||||
logger.exception(f"Price change check failed: {e}")
|
logger.exception(f"Price change check failed: {e}")
|
||||||
|
|
||||||
|
|
||||||
|
async def cleanup_expired_auctions():
|
||||||
|
"""
|
||||||
|
Mark expired auctions as inactive and delete very old ones.
|
||||||
|
|
||||||
|
This is CRITICAL for data freshness! Without this, the Market page
|
||||||
|
would show auctions that ended days ago.
|
||||||
|
|
||||||
|
Runs every 15 minutes to ensure users always see live data.
|
||||||
|
"""
|
||||||
|
from app.models.auction import DomainAuction
|
||||||
|
from sqlalchemy import update, delete
|
||||||
|
|
||||||
|
logger.info("Starting expired auction cleanup...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with AsyncSessionLocal() as db:
|
||||||
|
now = datetime.utcnow()
|
||||||
|
|
||||||
|
# 1. Mark ended auctions as inactive
|
||||||
|
stmt = (
|
||||||
|
update(DomainAuction)
|
||||||
|
.where(
|
||||||
|
and_(
|
||||||
|
DomainAuction.end_time < now,
|
||||||
|
DomainAuction.is_active == True
|
||||||
|
)
|
||||||
|
)
|
||||||
|
.values(is_active=False)
|
||||||
|
)
|
||||||
|
result = await db.execute(stmt)
|
||||||
|
marked_inactive = result.rowcount
|
||||||
|
|
||||||
|
# 2. Delete very old inactive auctions (> 7 days)
|
||||||
|
cutoff = now - timedelta(days=7)
|
||||||
|
stmt = delete(DomainAuction).where(
|
||||||
|
and_(
|
||||||
|
DomainAuction.is_active == False,
|
||||||
|
DomainAuction.end_time < cutoff
|
||||||
|
)
|
||||||
|
)
|
||||||
|
result = await db.execute(stmt)
|
||||||
|
deleted = result.rowcount
|
||||||
|
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
if marked_inactive > 0 or deleted > 0:
|
||||||
|
logger.info(f"Auction cleanup: {marked_inactive} marked inactive, {deleted} deleted")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Auction cleanup failed: {e}")
|
||||||
|
|
||||||
|
|
||||||
async def scrape_auctions():
|
async def scrape_auctions():
|
||||||
"""Scheduled task to scrape domain auctions from public sources."""
|
"""Scheduled task to scrape domain auctions from public sources."""
|
||||||
from app.services.auction_scraper import auction_scraper
|
from app.services.auction_scraper import auction_scraper
|
||||||
|
|||||||
@ -1,15 +1,25 @@
|
|||||||
"""
|
"""
|
||||||
Domain Auction Scraper Service
|
Domain Auction Scraper Service
|
||||||
|
|
||||||
Scrapes real auction data from various platforms WITHOUT using their APIs.
|
Data Acquisition Strategy (from MARKET_CONCEPT.md):
|
||||||
Uses web scraping to get publicly available auction information.
|
|
||||||
|
|
||||||
Supported Platforms:
|
TIER 0: HIDDEN JSON APIs (Most Reliable, Fastest)
|
||||||
|
- Namecheap GraphQL API (aftermarketapi.namecheap.com)
|
||||||
|
- Dynadot REST API (dynadot-vue-api)
|
||||||
|
- Sav.com AJAX API
|
||||||
|
|
||||||
|
TIER 1: OFFICIAL APIs
|
||||||
|
- DropCatch API (Official Partner)
|
||||||
|
- Sedo Partner API (wenn konfiguriert)
|
||||||
|
|
||||||
|
TIER 2: WEB SCRAPING (Fallback)
|
||||||
- ExpiredDomains.net (aggregator for deleted domains)
|
- ExpiredDomains.net (aggregator for deleted domains)
|
||||||
- GoDaddy Auctions (public listings via RSS/public pages)
|
- GoDaddy Auctions (public listings via RSS/public pages)
|
||||||
- Sedo (public marketplace)
|
|
||||||
- NameJet (public auctions)
|
- NameJet (public auctions)
|
||||||
- DropCatch (public auctions)
|
|
||||||
|
The scraper tries Tier 0 first, then Tier 1, then Tier 2.
|
||||||
|
|
||||||
|
ALL URLs include AFFILIATE TRACKING for monetization!
|
||||||
|
|
||||||
IMPORTANT:
|
IMPORTANT:
|
||||||
- Respects robots.txt
|
- Respects robots.txt
|
||||||
@ -31,6 +41,21 @@ from sqlalchemy import select, and_, delete
|
|||||||
from sqlalchemy.ext.asyncio import AsyncSession
|
from sqlalchemy.ext.asyncio import AsyncSession
|
||||||
|
|
||||||
from app.models.auction import DomainAuction, AuctionScrapeLog
|
from app.models.auction import DomainAuction, AuctionScrapeLog
|
||||||
|
from app.services.dropcatch_api import dropcatch_client
|
||||||
|
from app.services.sedo_api import sedo_client
|
||||||
|
from app.services.hidden_api_scrapers import (
|
||||||
|
hidden_api_scraper,
|
||||||
|
build_affiliate_url,
|
||||||
|
AFFILIATE_CONFIG,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Optional: Playwright for Cloudflare-protected sites
|
||||||
|
try:
|
||||||
|
from app.services.playwright_scraper import playwright_scraper
|
||||||
|
PLAYWRIGHT_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
PLAYWRIGHT_AVAILABLE = False
|
||||||
|
playwright_scraper = None
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@ -93,6 +118,13 @@ class AuctionScraperService:
|
|||||||
"""
|
"""
|
||||||
Scrape all supported platforms and store results in database.
|
Scrape all supported platforms and store results in database.
|
||||||
Returns summary of scraping activity.
|
Returns summary of scraping activity.
|
||||||
|
|
||||||
|
Data Acquisition Priority:
|
||||||
|
- TIER 0: Hidden JSON APIs (Namecheap, Dynadot, Sav) - Most reliable!
|
||||||
|
- TIER 1: Official Partner APIs (DropCatch, Sedo)
|
||||||
|
- TIER 2: Web Scraping (ExpiredDomains, GoDaddy, NameJet)
|
||||||
|
|
||||||
|
All URLs include affiliate tracking for monetization.
|
||||||
"""
|
"""
|
||||||
results = {
|
results = {
|
||||||
"total_found": 0,
|
"total_found": 0,
|
||||||
@ -102,15 +134,83 @@ class AuctionScraperService:
|
|||||||
"errors": [],
|
"errors": [],
|
||||||
}
|
}
|
||||||
|
|
||||||
# Scrape each platform
|
# ═══════════════════════════════════════════════════════════════
|
||||||
|
# TIER 0: Hidden JSON APIs (Most Reliable!)
|
||||||
|
# These are undocumented but public APIs used by platform frontends
|
||||||
|
# ═══════════════════════════════════════════════════════════════
|
||||||
|
logger.info("🚀 Starting TIER 0: Hidden JSON APIs (Namecheap, Dynadot, Sav)")
|
||||||
|
try:
|
||||||
|
hidden_api_result = await hidden_api_scraper.scrape_all(limit_per_platform=100)
|
||||||
|
|
||||||
|
for item in hidden_api_result.get("items", []):
|
||||||
|
action = await self._store_auction(db, item)
|
||||||
|
platform = item.get("platform", "Unknown")
|
||||||
|
|
||||||
|
if platform not in results["platforms"]:
|
||||||
|
results["platforms"][platform] = {"found": 0, "new": 0, "updated": 0}
|
||||||
|
|
||||||
|
results["platforms"][platform]["found"] += 1
|
||||||
|
if action == "new":
|
||||||
|
results["platforms"][platform]["new"] += 1
|
||||||
|
results["total_new"] += 1
|
||||||
|
elif action == "updated":
|
||||||
|
results["platforms"][platform]["updated"] += 1
|
||||||
|
results["total_updated"] += 1
|
||||||
|
|
||||||
|
results["total_found"] += 1
|
||||||
|
|
||||||
|
# Log platform summaries
|
||||||
|
for platform, data in hidden_api_result.get("platforms", {}).items():
|
||||||
|
logger.info(f"✅ {platform} Hidden API: {data.get('found', 0)} auctions")
|
||||||
|
|
||||||
|
if hidden_api_result.get("errors"):
|
||||||
|
for error in hidden_api_result["errors"]:
|
||||||
|
logger.warning(f"⚠️ Hidden API: {error}")
|
||||||
|
results["errors"].append(f"Hidden API: {error}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ TIER 0 Hidden APIs failed: {e}")
|
||||||
|
results["errors"].append(f"Hidden APIs: {str(e)}")
|
||||||
|
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════
|
||||||
|
# TIER 1: Official Partner APIs (Best data quality)
|
||||||
|
# ═══════════════════════════════════════════════════════════════
|
||||||
|
logger.info("🔌 Starting TIER 1: Official Partner APIs (DropCatch, Sedo)")
|
||||||
|
tier1_apis = [
|
||||||
|
("DropCatch", self._fetch_dropcatch_api),
|
||||||
|
("Sedo", self._fetch_sedo_api),
|
||||||
|
]
|
||||||
|
|
||||||
|
for platform_name, api_func in tier1_apis:
|
||||||
|
try:
|
||||||
|
api_result = await api_func(db)
|
||||||
|
if api_result.get("found", 0) > 0:
|
||||||
|
results["platforms"][platform_name] = api_result
|
||||||
|
results["total_found"] += api_result.get("found", 0)
|
||||||
|
results["total_new"] += api_result.get("new", 0)
|
||||||
|
results["total_updated"] += api_result.get("updated", 0)
|
||||||
|
logger.info(f"✅ {platform_name} API: {api_result['found']} auctions")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"⚠️ {platform_name} API failed, will try scraping: {e}")
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════
|
||||||
|
# TIER 2: Web Scraping (Fallback for platforms without API access)
|
||||||
|
# ═══════════════════════════════════════════════════════════════
|
||||||
|
logger.info("📦 Starting TIER 2: Web Scraping (ExpiredDomains, GoDaddy, NameJet)")
|
||||||
scrapers = [
|
scrapers = [
|
||||||
("ExpiredDomains", self._scrape_expireddomains),
|
("ExpiredDomains", self._scrape_expireddomains),
|
||||||
("GoDaddy", self._scrape_godaddy_public),
|
("GoDaddy", self._scrape_godaddy_public),
|
||||||
("Sedo", self._scrape_sedo_public),
|
|
||||||
("NameJet", self._scrape_namejet_public),
|
("NameJet", self._scrape_namejet_public),
|
||||||
("DropCatch", self._scrape_dropcatch_public),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# Add fallbacks only if APIs failed
|
||||||
|
if "DropCatch" not in results["platforms"]:
|
||||||
|
scrapers.append(("DropCatch", self._scrape_dropcatch_public))
|
||||||
|
if "Sedo" not in results["platforms"]:
|
||||||
|
scrapers.append(("Sedo", self._scrape_sedo_public))
|
||||||
|
|
||||||
for platform_name, scraper_func in scrapers:
|
for platform_name, scraper_func in scrapers:
|
||||||
try:
|
try:
|
||||||
platform_result = await scraper_func(db)
|
platform_result = await scraper_func(db)
|
||||||
@ -122,6 +222,52 @@ class AuctionScraperService:
|
|||||||
logger.error(f"Error scraping {platform_name}: {e}")
|
logger.error(f"Error scraping {platform_name}: {e}")
|
||||||
results["errors"].append(f"{platform_name}: {str(e)}")
|
results["errors"].append(f"{platform_name}: {str(e)}")
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════
|
||||||
|
# TIER 3: Playwright Stealth (Cloudflare-protected sites)
|
||||||
|
# Uses headless browser with stealth mode to bypass protection
|
||||||
|
# ═══════════════════════════════════════════════════════════════
|
||||||
|
if PLAYWRIGHT_AVAILABLE and playwright_scraper:
|
||||||
|
# Only run Playwright if we didn't get enough data from other sources
|
||||||
|
godaddy_count = results["platforms"].get("GoDaddy", {}).get("found", 0)
|
||||||
|
namejet_count = results["platforms"].get("NameJet", {}).get("found", 0)
|
||||||
|
|
||||||
|
if godaddy_count < 10 or namejet_count < 5:
|
||||||
|
logger.info("🎭 Starting TIER 3: Playwright Stealth (GoDaddy, NameJet)")
|
||||||
|
try:
|
||||||
|
playwright_result = await playwright_scraper.scrape_all_protected()
|
||||||
|
|
||||||
|
for item in playwright_result.get("items", []):
|
||||||
|
action = await self._store_auction(db, item)
|
||||||
|
platform = item.get("platform", "Unknown")
|
||||||
|
|
||||||
|
if platform not in results["platforms"]:
|
||||||
|
results["platforms"][platform] = {"found": 0, "new": 0, "updated": 0}
|
||||||
|
|
||||||
|
results["platforms"][platform]["found"] += 1
|
||||||
|
results["platforms"][platform]["source"] = "playwright"
|
||||||
|
if action == "new":
|
||||||
|
results["platforms"][platform]["new"] += 1
|
||||||
|
results["total_new"] += 1
|
||||||
|
elif action == "updated":
|
||||||
|
results["platforms"][platform]["updated"] += 1
|
||||||
|
results["total_updated"] += 1
|
||||||
|
|
||||||
|
results["total_found"] += 1
|
||||||
|
|
||||||
|
for platform, data in playwright_result.get("platforms", {}).items():
|
||||||
|
logger.info(f"🎭 {platform} Playwright: {data.get('found', 0)} auctions")
|
||||||
|
|
||||||
|
if playwright_result.get("errors"):
|
||||||
|
for error in playwright_result["errors"]:
|
||||||
|
logger.warning(f"⚠️ Playwright: {error}")
|
||||||
|
results["errors"].append(f"Playwright: {error}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"❌ Playwright scraping failed: {e}")
|
||||||
|
results["errors"].append(f"Playwright: {str(e)}")
|
||||||
|
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
# Mark ended auctions as inactive
|
# Mark ended auctions as inactive
|
||||||
await self._cleanup_ended_auctions(db)
|
await self._cleanup_ended_auctions(db)
|
||||||
|
|
||||||
@ -561,13 +707,206 @@ class AuctionScraperService:
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
async def _scrape_dropcatch_public(self, db: AsyncSession) -> Dict[str, Any]:
|
async def _fetch_dropcatch_api(self, db: AsyncSession) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Scrape DropCatch public auction listings.
|
🚀 TIER 1: Fetch DropCatch auctions via OFFICIAL API
|
||||||
DropCatch shows pending delete auctions publicly.
|
|
||||||
|
This is our preferred method - faster, more reliable, more data.
|
||||||
|
Uses the official DropCatch Partner API.
|
||||||
"""
|
"""
|
||||||
platform = "DropCatch"
|
platform = "DropCatch"
|
||||||
result = {"found": 0, "new": 0, "updated": 0}
|
result = {"found": 0, "new": 0, "updated": 0, "source": "api"}
|
||||||
|
|
||||||
|
if not dropcatch_client.is_configured:
|
||||||
|
logger.info("DropCatch API not configured, skipping")
|
||||||
|
return result
|
||||||
|
|
||||||
|
log = AuctionScrapeLog(platform=platform)
|
||||||
|
db.add(log)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Fetch auctions from official API
|
||||||
|
api_result = await dropcatch_client.search_auctions(page_size=100)
|
||||||
|
|
||||||
|
auctions = api_result.get("auctions") or api_result.get("items") or []
|
||||||
|
result["found"] = len(auctions)
|
||||||
|
|
||||||
|
for dc_auction in auctions:
|
||||||
|
try:
|
||||||
|
# Transform to our format
|
||||||
|
auction_data = dropcatch_client.transform_to_pounce_format(dc_auction)
|
||||||
|
|
||||||
|
if not auction_data["domain"]:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check if exists
|
||||||
|
existing = await db.execute(
|
||||||
|
select(DomainAuction).where(
|
||||||
|
and_(
|
||||||
|
DomainAuction.domain == auction_data["domain"],
|
||||||
|
DomainAuction.platform == platform
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
existing_auction = existing.scalar_one_or_none()
|
||||||
|
|
||||||
|
if existing_auction:
|
||||||
|
# Update existing
|
||||||
|
existing_auction.current_bid = auction_data["current_bid"]
|
||||||
|
existing_auction.num_bids = auction_data["num_bids"]
|
||||||
|
existing_auction.end_time = auction_data["end_time"]
|
||||||
|
existing_auction.is_active = True
|
||||||
|
existing_auction.updated_at = datetime.utcnow()
|
||||||
|
result["updated"] += 1
|
||||||
|
else:
|
||||||
|
# Create new
|
||||||
|
new_auction = DomainAuction(
|
||||||
|
domain=auction_data["domain"],
|
||||||
|
tld=auction_data["tld"],
|
||||||
|
platform=platform,
|
||||||
|
current_bid=auction_data["current_bid"],
|
||||||
|
currency=auction_data["currency"],
|
||||||
|
num_bids=auction_data["num_bids"],
|
||||||
|
end_time=auction_data["end_time"],
|
||||||
|
auction_url=auction_data["auction_url"],
|
||||||
|
age_years=auction_data.get("age_years"),
|
||||||
|
buy_now_price=auction_data.get("buy_now_price"),
|
||||||
|
reserve_met=auction_data.get("reserve_met"),
|
||||||
|
traffic=auction_data.get("traffic"),
|
||||||
|
is_active=True,
|
||||||
|
)
|
||||||
|
db.add(new_auction)
|
||||||
|
result["new"] += 1
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error processing DropCatch auction: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
log.status = "success"
|
||||||
|
log.auctions_found = result["found"]
|
||||||
|
log.auctions_new = result["new"]
|
||||||
|
log.auctions_updated = result["updated"]
|
||||||
|
log.completed_at = datetime.utcnow()
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
logger.info(f"DropCatch API: Found {result['found']}, New {result['new']}, Updated {result['updated']}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"DropCatch API error: {e}")
|
||||||
|
log.status = "failed"
|
||||||
|
log.error_message = str(e)[:500]
|
||||||
|
log.completed_at = datetime.utcnow()
|
||||||
|
await db.commit()
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def _fetch_sedo_api(self, db: AsyncSession) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
🚀 TIER 1: Fetch Sedo auctions via OFFICIAL API
|
||||||
|
|
||||||
|
This is our preferred method for Sedo data.
|
||||||
|
Uses the official Sedo Partner API.
|
||||||
|
"""
|
||||||
|
platform = "Sedo"
|
||||||
|
result = {"found": 0, "new": 0, "updated": 0, "source": "api"}
|
||||||
|
|
||||||
|
if not sedo_client.is_configured:
|
||||||
|
logger.info("Sedo API not configured, skipping")
|
||||||
|
return result
|
||||||
|
|
||||||
|
log = AuctionScrapeLog(platform=platform)
|
||||||
|
db.add(log)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Fetch auctions from official API
|
||||||
|
api_result = await sedo_client.search_auctions(page_size=100)
|
||||||
|
|
||||||
|
# Sedo response structure may vary
|
||||||
|
listings = api_result.get("domains") or api_result.get("items") or api_result.get("result") or []
|
||||||
|
if isinstance(listings, dict):
|
||||||
|
listings = list(listings.values()) if listings else []
|
||||||
|
|
||||||
|
result["found"] = len(listings)
|
||||||
|
|
||||||
|
for sedo_listing in listings:
|
||||||
|
try:
|
||||||
|
# Transform to our format
|
||||||
|
auction_data = sedo_client.transform_to_pounce_format(sedo_listing)
|
||||||
|
|
||||||
|
if not auction_data["domain"]:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check if exists
|
||||||
|
existing = await db.execute(
|
||||||
|
select(DomainAuction).where(
|
||||||
|
and_(
|
||||||
|
DomainAuction.domain == auction_data["domain"],
|
||||||
|
DomainAuction.platform == platform
|
||||||
|
)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
existing_auction = existing.scalar_one_or_none()
|
||||||
|
|
||||||
|
if existing_auction:
|
||||||
|
# Update existing
|
||||||
|
existing_auction.current_bid = auction_data["current_bid"]
|
||||||
|
existing_auction.num_bids = auction_data["num_bids"]
|
||||||
|
existing_auction.end_time = auction_data["end_time"]
|
||||||
|
existing_auction.is_active = True
|
||||||
|
existing_auction.updated_at = datetime.utcnow()
|
||||||
|
result["updated"] += 1
|
||||||
|
else:
|
||||||
|
# Create new
|
||||||
|
new_auction = DomainAuction(
|
||||||
|
domain=auction_data["domain"],
|
||||||
|
tld=auction_data["tld"],
|
||||||
|
platform=platform,
|
||||||
|
current_bid=auction_data["current_bid"],
|
||||||
|
currency=auction_data["currency"],
|
||||||
|
num_bids=auction_data["num_bids"],
|
||||||
|
end_time=auction_data["end_time"],
|
||||||
|
auction_url=auction_data["auction_url"],
|
||||||
|
buy_now_price=auction_data.get("buy_now_price"),
|
||||||
|
is_active=True,
|
||||||
|
)
|
||||||
|
db.add(new_auction)
|
||||||
|
result["new"] += 1
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error processing Sedo listing: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
log.status = "success"
|
||||||
|
log.auctions_found = result["found"]
|
||||||
|
log.auctions_new = result["new"]
|
||||||
|
log.auctions_updated = result["updated"]
|
||||||
|
log.completed_at = datetime.utcnow()
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
logger.info(f"Sedo API: Found {result['found']}, New {result['new']}, Updated {result['updated']}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Sedo API error: {e}")
|
||||||
|
log.status = "failed"
|
||||||
|
log.error_message = str(e)[:500]
|
||||||
|
log.completed_at = datetime.utcnow()
|
||||||
|
await db.commit()
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def _scrape_dropcatch_public(self, db: AsyncSession) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
📦 TIER 2 FALLBACK: Scrape DropCatch public auction listings.
|
||||||
|
Only used if the API is not configured or fails.
|
||||||
|
"""
|
||||||
|
platform = "DropCatch"
|
||||||
|
result = {"found": 0, "new": 0, "updated": 0, "source": "scrape"}
|
||||||
|
|
||||||
log = AuctionScrapeLog(platform=platform)
|
log = AuctionScrapeLog(platform=platform)
|
||||||
db.add(log)
|
db.add(log)
|
||||||
|
|||||||
334
backend/app/services/dropcatch_api.py
Normal file
334
backend/app/services/dropcatch_api.py
Normal file
@ -0,0 +1,334 @@
|
|||||||
|
"""
|
||||||
|
DropCatch Official API Client
|
||||||
|
|
||||||
|
This service provides access to DropCatch's official API for:
|
||||||
|
- Searching domain auctions
|
||||||
|
- Getting auction details
|
||||||
|
- Backorder management
|
||||||
|
|
||||||
|
API Documentation: https://www.dropcatch.com/hiw/dropcatch-api
|
||||||
|
Interactive Docs: https://api.dropcatch.com/swagger
|
||||||
|
|
||||||
|
SECURITY:
|
||||||
|
- Credentials are loaded from environment variables
|
||||||
|
- NEVER hardcode credentials in this file
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from app.services.dropcatch_api import dropcatch_client
|
||||||
|
|
||||||
|
# Get active auctions
|
||||||
|
auctions = await dropcatch_client.search_auctions(keyword="tech")
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Optional, List, Dict, Any
|
||||||
|
import httpx
|
||||||
|
from functools import lru_cache
|
||||||
|
|
||||||
|
from app.config import get_settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class DropCatchAPIClient:
|
||||||
|
"""
|
||||||
|
Official DropCatch API Client.
|
||||||
|
|
||||||
|
This uses the V2 API endpoints (V1 is deprecated).
|
||||||
|
Authentication is via OAuth2 client credentials.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.settings = get_settings()
|
||||||
|
self.base_url = self.settings.dropcatch_api_base or "https://api.dropcatch.com"
|
||||||
|
self.client_id = self.settings.dropcatch_client_id
|
||||||
|
self.client_secret = self.settings.dropcatch_client_secret
|
||||||
|
|
||||||
|
# Token cache
|
||||||
|
self._access_token: Optional[str] = None
|
||||||
|
self._token_expires_at: Optional[datetime] = None
|
||||||
|
|
||||||
|
# HTTP client
|
||||||
|
self._client: Optional[httpx.AsyncClient] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_configured(self) -> bool:
|
||||||
|
"""Check if API credentials are configured."""
|
||||||
|
return bool(self.client_id and self.client_secret)
|
||||||
|
|
||||||
|
async def _get_client(self) -> httpx.AsyncClient:
|
||||||
|
"""Get or create HTTP client."""
|
||||||
|
if self._client is None or self._client.is_closed:
|
||||||
|
self._client = httpx.AsyncClient(
|
||||||
|
timeout=30.0,
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"User-Agent": "Pounce/1.0 (Domain Intelligence Platform)"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return self._client
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
"""Close the HTTP client."""
|
||||||
|
if self._client and not self._client.is_closed:
|
||||||
|
await self._client.aclose()
|
||||||
|
self._client = None
|
||||||
|
|
||||||
|
async def _authenticate(self) -> str:
|
||||||
|
"""
|
||||||
|
Authenticate with DropCatch API and get access token.
|
||||||
|
|
||||||
|
POST https://api.dropcatch.com/authorize
|
||||||
|
Body: { "clientId": "...", "clientSecret": "..." }
|
||||||
|
|
||||||
|
Returns: Access token string
|
||||||
|
"""
|
||||||
|
if not self.is_configured:
|
||||||
|
raise ValueError("DropCatch API credentials not configured")
|
||||||
|
|
||||||
|
# Check if we have a valid cached token
|
||||||
|
if self._access_token and self._token_expires_at:
|
||||||
|
if datetime.utcnow() < self._token_expires_at - timedelta(minutes=5):
|
||||||
|
return self._access_token
|
||||||
|
|
||||||
|
client = await self._get_client()
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await client.post(
|
||||||
|
f"{self.base_url}/authorize",
|
||||||
|
json={
|
||||||
|
"clientId": self.client_id,
|
||||||
|
"clientSecret": self.client_secret
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.error(f"DropCatch auth failed: {response.status_code} - {response.text}")
|
||||||
|
raise Exception(f"Authentication failed: {response.status_code}")
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
# Extract token - the response format may vary
|
||||||
|
# Common formats: { "token": "...", "expiresIn": 3600 }
|
||||||
|
# or: { "accessToken": "...", "expiresIn": 3600 }
|
||||||
|
self._access_token = data.get("token") or data.get("accessToken") or data.get("access_token")
|
||||||
|
|
||||||
|
# Calculate expiry (default 1 hour if not specified)
|
||||||
|
expires_in = data.get("expiresIn") or data.get("expires_in") or 3600
|
||||||
|
self._token_expires_at = datetime.utcnow() + timedelta(seconds=expires_in)
|
||||||
|
|
||||||
|
logger.info("DropCatch API: Successfully authenticated")
|
||||||
|
return self._access_token
|
||||||
|
|
||||||
|
except httpx.HTTPError as e:
|
||||||
|
logger.error(f"DropCatch auth HTTP error: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def _request(
|
||||||
|
self,
|
||||||
|
method: str,
|
||||||
|
endpoint: str,
|
||||||
|
params: Optional[Dict] = None,
|
||||||
|
json_data: Optional[Dict] = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Make an authenticated API request."""
|
||||||
|
token = await self._authenticate()
|
||||||
|
client = await self._get_client()
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {token}"
|
||||||
|
}
|
||||||
|
|
||||||
|
url = f"{self.base_url}{endpoint}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await client.request(
|
||||||
|
method=method,
|
||||||
|
url=url,
|
||||||
|
params=params,
|
||||||
|
json=json_data,
|
||||||
|
headers=headers
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code == 401:
|
||||||
|
# Token expired, re-authenticate
|
||||||
|
self._access_token = None
|
||||||
|
token = await self._authenticate()
|
||||||
|
headers["Authorization"] = f"Bearer {token}"
|
||||||
|
response = await client.request(
|
||||||
|
method=method,
|
||||||
|
url=url,
|
||||||
|
params=params,
|
||||||
|
json=json_data,
|
||||||
|
headers=headers
|
||||||
|
)
|
||||||
|
|
||||||
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
except httpx.HTTPError as e:
|
||||||
|
logger.error(f"DropCatch API request failed: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# AUCTION ENDPOINTS (V2)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
async def search_auctions(
|
||||||
|
self,
|
||||||
|
keyword: Optional[str] = None,
|
||||||
|
tld: Optional[str] = None,
|
||||||
|
min_price: Optional[float] = None,
|
||||||
|
max_price: Optional[float] = None,
|
||||||
|
ending_within_hours: Optional[int] = None,
|
||||||
|
page_size: int = 100,
|
||||||
|
page_token: Optional[str] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Search for domain auctions.
|
||||||
|
|
||||||
|
Endpoint: GET /v2/auctions (or similar - check interactive docs)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
{
|
||||||
|
"auctions": [...],
|
||||||
|
"cursor": {
|
||||||
|
"next": "...",
|
||||||
|
"previous": "..."
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
params = {
|
||||||
|
"pageSize": page_size,
|
||||||
|
}
|
||||||
|
|
||||||
|
if keyword:
|
||||||
|
params["searchTerm"] = keyword
|
||||||
|
if tld:
|
||||||
|
params["tld"] = tld.lstrip(".")
|
||||||
|
if min_price is not None:
|
||||||
|
params["minPrice"] = min_price
|
||||||
|
if max_price is not None:
|
||||||
|
params["maxPrice"] = max_price
|
||||||
|
if ending_within_hours:
|
||||||
|
params["endingWithinHours"] = ending_within_hours
|
||||||
|
if page_token:
|
||||||
|
params["pageToken"] = page_token
|
||||||
|
|
||||||
|
return await self._request("GET", "/v2/auctions", params=params)
|
||||||
|
|
||||||
|
async def get_auction(self, auction_id: int) -> Dict[str, Any]:
|
||||||
|
"""Get details for a specific auction."""
|
||||||
|
return await self._request("GET", f"/v2/auctions/{auction_id}")
|
||||||
|
|
||||||
|
async def get_ending_soon(
|
||||||
|
self,
|
||||||
|
hours: int = 24,
|
||||||
|
page_size: int = 50
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Get auctions ending soon."""
|
||||||
|
return await self.search_auctions(
|
||||||
|
ending_within_hours=hours,
|
||||||
|
page_size=page_size
|
||||||
|
)
|
||||||
|
|
||||||
|
async def get_hot_auctions(self, page_size: int = 50) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Get hot/popular auctions (high bid activity).
|
||||||
|
Note: The actual endpoint may vary - check interactive docs.
|
||||||
|
"""
|
||||||
|
# This might be a different endpoint or sort parameter
|
||||||
|
params = {
|
||||||
|
"pageSize": page_size,
|
||||||
|
"sortBy": "bidCount", # or "popularity" - check docs
|
||||||
|
"sortOrder": "desc"
|
||||||
|
}
|
||||||
|
return await self._request("GET", "/v2/auctions", params=params)
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# BACKORDER ENDPOINTS (V2)
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
async def search_backorders(
|
||||||
|
self,
|
||||||
|
keyword: Optional[str] = None,
|
||||||
|
page_size: int = 100,
|
||||||
|
page_token: Optional[str] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Search for available backorders (domains dropping soon)."""
|
||||||
|
params = {"pageSize": page_size}
|
||||||
|
|
||||||
|
if keyword:
|
||||||
|
params["searchTerm"] = keyword
|
||||||
|
if page_token:
|
||||||
|
params["pageToken"] = page_token
|
||||||
|
|
||||||
|
return await self._request("GET", "/v2/backorders", params=params)
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# UTILITY METHODS
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
async def test_connection(self) -> Dict[str, Any]:
|
||||||
|
"""Test the API connection and credentials."""
|
||||||
|
if not self.is_configured:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": "API credentials not configured",
|
||||||
|
"configured": False
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
await self._authenticate()
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"configured": True,
|
||||||
|
"client_id": self.client_id.split(":")[0] if ":" in self.client_id else self.client_id,
|
||||||
|
"authenticated_at": datetime.utcnow().isoformat()
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e),
|
||||||
|
"configured": True
|
||||||
|
}
|
||||||
|
|
||||||
|
def transform_to_pounce_format(self, dc_auction: Dict) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Transform DropCatch auction to Pounce internal format.
|
||||||
|
|
||||||
|
Maps DropCatch fields to our DomainAuction model.
|
||||||
|
"""
|
||||||
|
domain = dc_auction.get("domainName") or dc_auction.get("domain", "")
|
||||||
|
tld = domain.rsplit(".", 1)[1] if "." in domain else ""
|
||||||
|
|
||||||
|
# Parse end time (format may vary)
|
||||||
|
end_time_str = dc_auction.get("auctionEndTime") or dc_auction.get("endTime")
|
||||||
|
if end_time_str:
|
||||||
|
try:
|
||||||
|
end_time = datetime.fromisoformat(end_time_str.replace("Z", "+00:00"))
|
||||||
|
except:
|
||||||
|
end_time = datetime.utcnow() + timedelta(days=1)
|
||||||
|
else:
|
||||||
|
end_time = datetime.utcnow() + timedelta(days=1)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"domain": domain,
|
||||||
|
"tld": tld,
|
||||||
|
"platform": "DropCatch",
|
||||||
|
"current_bid": dc_auction.get("currentBid") or dc_auction.get("price", 0),
|
||||||
|
"currency": "USD",
|
||||||
|
"num_bids": dc_auction.get("bidCount") or dc_auction.get("numberOfBids", 0),
|
||||||
|
"end_time": end_time,
|
||||||
|
"auction_url": f"https://www.dropcatch.com/domain/{domain}",
|
||||||
|
"age_years": dc_auction.get("yearsOld") or dc_auction.get("age"),
|
||||||
|
"buy_now_price": dc_auction.get("buyNowPrice"),
|
||||||
|
"reserve_met": dc_auction.get("reserveMet"),
|
||||||
|
"traffic": dc_auction.get("traffic"),
|
||||||
|
"external_id": str(dc_auction.get("auctionId") or dc_auction.get("id", "")),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton instance
|
||||||
|
dropcatch_client = DropCatchAPIClient()
|
||||||
|
|
||||||
995
backend/app/services/hidden_api_scrapers.py
Normal file
995
backend/app/services/hidden_api_scrapers.py
Normal file
@ -0,0 +1,995 @@
|
|||||||
|
"""
|
||||||
|
Hidden JSON API Scrapers for Domain Auction Platforms.
|
||||||
|
|
||||||
|
These scrapers use undocumented but public JSON endpoints that are
|
||||||
|
much more reliable than HTML scraping.
|
||||||
|
|
||||||
|
Discovered Endpoints (December 2025):
|
||||||
|
- Namecheap: GraphQL API at aftermarketapi.namecheap.com
|
||||||
|
- Dynadot: REST API at dynadot-vue-api
|
||||||
|
- Sav.com: AJAX endpoint for auction listings
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Dict, Any, List, Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# AFFILIATE LINKS — Monetization through referral commissions
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
AFFILIATE_CONFIG = {
|
||||||
|
"Namecheap": {
|
||||||
|
"base_url": "https://www.namecheap.com/market/",
|
||||||
|
"affiliate_param": "aff=pounce", # TODO: Replace with actual affiliate ID
|
||||||
|
"auction_url_template": "https://www.namecheap.com/market/domain/{domain}?aff=pounce",
|
||||||
|
},
|
||||||
|
"Dynadot": {
|
||||||
|
"base_url": "https://www.dynadot.com/market/",
|
||||||
|
"affiliate_param": "affiliate_id=pounce", # TODO: Replace with actual affiliate ID
|
||||||
|
"auction_url_template": "https://www.dynadot.com/market/auction/{domain}?affiliate_id=pounce",
|
||||||
|
},
|
||||||
|
"Sav": {
|
||||||
|
"base_url": "https://www.sav.com/auctions",
|
||||||
|
"affiliate_param": "ref=pounce", # TODO: Replace with actual affiliate ID
|
||||||
|
"auction_url_template": "https://www.sav.com/domain/{domain}?ref=pounce",
|
||||||
|
},
|
||||||
|
"GoDaddy": {
|
||||||
|
"base_url": "https://auctions.godaddy.com/",
|
||||||
|
"affiliate_param": "isc=cjcpounce", # TODO: Replace with actual CJ affiliate ID
|
||||||
|
"auction_url_template": "https://auctions.godaddy.com/trpItemListing.aspx?domain={domain}&isc=cjcpounce",
|
||||||
|
},
|
||||||
|
"DropCatch": {
|
||||||
|
"base_url": "https://www.dropcatch.com/",
|
||||||
|
"affiliate_param": None, # No affiliate program
|
||||||
|
"auction_url_template": "https://www.dropcatch.com/domain/{domain}",
|
||||||
|
},
|
||||||
|
"Sedo": {
|
||||||
|
"base_url": "https://sedo.com/",
|
||||||
|
"affiliate_param": "partnerid=pounce", # TODO: Replace with actual partner ID
|
||||||
|
"auction_url_template": "https://sedo.com/search/details/?domain={domain}&partnerid=pounce",
|
||||||
|
},
|
||||||
|
"NameJet": {
|
||||||
|
"base_url": "https://www.namejet.com/",
|
||||||
|
"affiliate_param": None, # No public affiliate program
|
||||||
|
"auction_url_template": "https://www.namejet.com/pages/Auctions/ViewAuctions.aspx?domain={domain}",
|
||||||
|
},
|
||||||
|
"ExpiredDomains": {
|
||||||
|
"base_url": "https://www.expireddomains.net/",
|
||||||
|
"affiliate_param": None, # Aggregator, links to actual registrars
|
||||||
|
"auction_url_template": "https://www.expireddomains.net/domain-name-search/?q={domain}",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_affiliate_url(platform: str, domain: str, original_url: Optional[str] = None) -> str:
|
||||||
|
"""
|
||||||
|
Build an affiliate URL for a given platform and domain.
|
||||||
|
|
||||||
|
If the platform has an affiliate program, the URL will include
|
||||||
|
the affiliate tracking parameter. Otherwise, returns the original URL.
|
||||||
|
"""
|
||||||
|
config = AFFILIATE_CONFIG.get(platform, {})
|
||||||
|
|
||||||
|
if config.get("auction_url_template"):
|
||||||
|
return config["auction_url_template"].format(domain=domain)
|
||||||
|
|
||||||
|
return original_url or f"https://www.google.com/search?q={domain}+auction"
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# NAMECHEAP SCRAPER — GraphQL API
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
class NamecheapApiScraper:
|
||||||
|
"""
|
||||||
|
Scraper for Namecheap Marketplace using their hidden GraphQL API.
|
||||||
|
|
||||||
|
Endpoint: https://aftermarketapi.namecheap.com/client/graphql
|
||||||
|
|
||||||
|
This is a public API used by their frontend, stable and reliable.
|
||||||
|
"""
|
||||||
|
|
||||||
|
GRAPHQL_ENDPOINT = "https://aftermarketapi.namecheap.com/client/graphql"
|
||||||
|
|
||||||
|
# GraphQL query for fetching auctions
|
||||||
|
AUCTIONS_QUERY = """
|
||||||
|
query GetAuctions($filter: AuctionFilterInput, $pagination: PaginationInput, $sort: SortInput) {
|
||||||
|
auctions(filter: $filter, pagination: $pagination, sort: $sort) {
|
||||||
|
items {
|
||||||
|
id
|
||||||
|
domain
|
||||||
|
currentBid
|
||||||
|
minBid
|
||||||
|
bidCount
|
||||||
|
endTime
|
||||||
|
status
|
||||||
|
buyNowPrice
|
||||||
|
hasBuyNow
|
||||||
|
}
|
||||||
|
totalCount
|
||||||
|
pageInfo {
|
||||||
|
hasNextPage
|
||||||
|
endCursor
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
|
||||||
|
async def fetch_auctions(
|
||||||
|
self,
|
||||||
|
limit: int = 100,
|
||||||
|
offset: int = 0,
|
||||||
|
keyword: Optional[str] = None,
|
||||||
|
tld: Optional[str] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Fetch auctions from Namecheap GraphQL API."""
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||||
|
# Build filter
|
||||||
|
filter_input = {}
|
||||||
|
if keyword:
|
||||||
|
filter_input["searchTerm"] = keyword
|
||||||
|
if tld:
|
||||||
|
filter_input["tld"] = tld.lstrip(".")
|
||||||
|
|
||||||
|
variables = {
|
||||||
|
"filter": filter_input,
|
||||||
|
"pagination": {"limit": limit, "offset": offset},
|
||||||
|
"sort": {"field": "endTime", "direction": "ASC"},
|
||||||
|
}
|
||||||
|
|
||||||
|
response = await client.post(
|
||||||
|
self.GRAPHQL_ENDPOINT,
|
||||||
|
json={
|
||||||
|
"query": self.AUCTIONS_QUERY,
|
||||||
|
"variables": variables,
|
||||||
|
},
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Accept": "application/json",
|
||||||
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
||||||
|
"Origin": "https://www.namecheap.com",
|
||||||
|
"Referer": "https://www.namecheap.com/market/",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.error(f"Namecheap API error: {response.status_code}")
|
||||||
|
return {"items": [], "total": 0, "error": response.text}
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
if "errors" in data:
|
||||||
|
logger.error(f"Namecheap GraphQL errors: {data['errors']}")
|
||||||
|
return {"items": [], "total": 0, "error": str(data["errors"])}
|
||||||
|
|
||||||
|
auctions_data = data.get("data", {}).get("auctions", {})
|
||||||
|
items = auctions_data.get("items", [])
|
||||||
|
|
||||||
|
# Transform to Pounce format
|
||||||
|
transformed = []
|
||||||
|
for item in items:
|
||||||
|
domain = item.get("domain", "")
|
||||||
|
tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""
|
||||||
|
|
||||||
|
transformed.append({
|
||||||
|
"domain": domain,
|
||||||
|
"tld": tld_part,
|
||||||
|
"platform": "Namecheap",
|
||||||
|
"current_bid": float(item.get("currentBid", 0)),
|
||||||
|
"min_bid": float(item.get("minBid", 0)),
|
||||||
|
"num_bids": int(item.get("bidCount", 0)),
|
||||||
|
"end_time": item.get("endTime"),
|
||||||
|
"buy_now_price": float(item.get("buyNowPrice")) if item.get("hasBuyNow") else None,
|
||||||
|
"auction_url": build_affiliate_url("Namecheap", domain),
|
||||||
|
"currency": "USD",
|
||||||
|
"is_active": True,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"items": transformed,
|
||||||
|
"total": auctions_data.get("totalCount", 0),
|
||||||
|
"has_more": auctions_data.get("pageInfo", {}).get("hasNextPage", False),
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Namecheap API scraper error: {e}")
|
||||||
|
return {"items": [], "total": 0, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# DYNADOT SCRAPER — REST JSON API
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
class DynadotApiScraper:
|
||||||
|
"""
|
||||||
|
Scraper for Dynadot Marketplace using their hidden JSON API.
|
||||||
|
|
||||||
|
Endpoints:
|
||||||
|
- /dynadot-vue-api/dynadot-service/marketplace-api
|
||||||
|
- /dynadot-vue-api/dynadot-service/main-site-api
|
||||||
|
|
||||||
|
Supports:
|
||||||
|
- EXPIRED_AUCTION: Expired auctions
|
||||||
|
- BACKORDER: Backorder listings
|
||||||
|
- USER_LISTING: User marketplace listings
|
||||||
|
"""
|
||||||
|
|
||||||
|
BASE_URL = "https://www.dynadot.com"
|
||||||
|
MARKETPLACE_API = "/dynadot-vue-api/dynadot-service/marketplace-api"
|
||||||
|
|
||||||
|
async def fetch_auctions(
|
||||||
|
self,
|
||||||
|
aftermarket_type: str = "EXPIRED_AUCTION",
|
||||||
|
page_size: int = 100,
|
||||||
|
page_index: int = 0,
|
||||||
|
keyword: Optional[str] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Fetch auctions from Dynadot REST API."""
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||||
|
params = {
|
||||||
|
"command": "get_list",
|
||||||
|
"aftermarket_type": aftermarket_type,
|
||||||
|
"page_size": page_size,
|
||||||
|
"page_index": page_index,
|
||||||
|
"lang": "en",
|
||||||
|
}
|
||||||
|
|
||||||
|
if keyword:
|
||||||
|
params["keyword"] = keyword
|
||||||
|
|
||||||
|
response = await client.post(
|
||||||
|
f"{self.BASE_URL}{self.MARKETPLACE_API}",
|
||||||
|
params=params,
|
||||||
|
headers={
|
||||||
|
"Accept": "application/json",
|
||||||
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
||||||
|
"Referer": "https://www.dynadot.com/market",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.error(f"Dynadot API error: {response.status_code}")
|
||||||
|
return {"items": [], "total": 0, "error": response.text}
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
# Dynadot returns code: 200 for success
|
||||||
|
if data.get("code") not in [0, 200] and data.get("msg") != "success":
|
||||||
|
logger.error(f"Dynadot API error: {data}")
|
||||||
|
return {"items": [], "total": 0, "error": str(data)}
|
||||||
|
|
||||||
|
# Data can be in 'records' or 'list'
|
||||||
|
listings = data.get("data", {}).get("records", []) or data.get("data", {}).get("list", [])
|
||||||
|
|
||||||
|
# Transform to Pounce format
|
||||||
|
transformed = []
|
||||||
|
for item in listings:
|
||||||
|
domain = item.get("domain", "") or item.get("name", "") or item.get("utf8_name", "")
|
||||||
|
tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""
|
||||||
|
|
||||||
|
# Parse end time (Dynadot uses timestamp in milliseconds or string)
|
||||||
|
end_time = None
|
||||||
|
end_time_stamp = item.get("end_time_stamp")
|
||||||
|
if end_time_stamp:
|
||||||
|
try:
|
||||||
|
end_time = datetime.fromtimestamp(end_time_stamp / 1000)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not end_time:
|
||||||
|
end_time_str = item.get("end_time") or item.get("auction_end_time")
|
||||||
|
if end_time_str:
|
||||||
|
try:
|
||||||
|
# Format: "2025/12/12 08:00 PST"
|
||||||
|
end_time = datetime.strptime(end_time_str.split(" PST")[0], "%Y/%m/%d %H:%M")
|
||||||
|
except:
|
||||||
|
end_time = datetime.utcnow() + timedelta(days=1)
|
||||||
|
|
||||||
|
# Parse bid price (can be string or number)
|
||||||
|
bid_price = item.get("bid_price") or item.get("current_bid") or item.get("price") or 0
|
||||||
|
if isinstance(bid_price, str):
|
||||||
|
bid_price = float(bid_price.replace(",", "").replace("$", ""))
|
||||||
|
|
||||||
|
transformed.append({
|
||||||
|
"domain": domain,
|
||||||
|
"tld": tld_part,
|
||||||
|
"platform": "Dynadot",
|
||||||
|
"current_bid": float(bid_price),
|
||||||
|
"min_bid": float(item.get("start_price", 0) or 0),
|
||||||
|
"num_bids": int(item.get("bids", 0) or item.get("bid_count", 0) or 0),
|
||||||
|
"end_time": end_time or datetime.utcnow() + timedelta(days=1),
|
||||||
|
"buy_now_price": float(item.get("accepted_bid_price")) if item.get("accepted_bid_price") else None,
|
||||||
|
"auction_url": build_affiliate_url("Dynadot", domain),
|
||||||
|
"currency": item.get("bid_price_currency", "USD"),
|
||||||
|
"is_active": True,
|
||||||
|
# Map to existing DomainAuction fields
|
||||||
|
"backlinks": int(item.get("links", 0) or 0),
|
||||||
|
"age_years": int(item.get("age", 0) or 0),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"items": transformed,
|
||||||
|
"total": data.get("data", {}).get("total_count", len(transformed)),
|
||||||
|
"has_more": len(listings) >= page_size,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Dynadot API scraper error: {e}")
|
||||||
|
return {"items": [], "total": 0, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# SAV.COM SCRAPER — AJAX JSON API
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
class SavApiScraper:
|
||||||
|
"""
|
||||||
|
Scraper for Sav.com Auctions using their hidden AJAX endpoint.
|
||||||
|
|
||||||
|
Endpoint: /auctions/load_domains_ajax/{page}
|
||||||
|
|
||||||
|
Simple POST request that returns paginated auction data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
BASE_URL = "https://www.sav.com"
|
||||||
|
AJAX_ENDPOINT = "/auctions/load_domains_ajax"
|
||||||
|
|
||||||
|
async def fetch_auctions(
|
||||||
|
self,
|
||||||
|
page: int = 0,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Fetch auctions from Sav.com AJAX API."""
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||||
|
response = await client.post(
|
||||||
|
f"{self.BASE_URL}{self.AJAX_ENDPOINT}/{page}",
|
||||||
|
headers={
|
||||||
|
"Accept": "application/json, text/html",
|
||||||
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
||||||
|
"Referer": "https://www.sav.com/domains/auctions",
|
||||||
|
"X-Requested-With": "XMLHttpRequest",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.error(f"Sav API error: {response.status_code}")
|
||||||
|
return {"items": [], "total": 0, "error": response.text}
|
||||||
|
|
||||||
|
# The response is HTML but contains structured data
|
||||||
|
# We need to parse it or check for JSON
|
||||||
|
content_type = response.headers.get("content-type", "")
|
||||||
|
|
||||||
|
if "application/json" in content_type:
|
||||||
|
data = response.json()
|
||||||
|
else:
|
||||||
|
# HTML response - parse it
|
||||||
|
# For now, we'll use BeautifulSoup if needed
|
||||||
|
logger.warning("Sav returned HTML instead of JSON, parsing...")
|
||||||
|
return await self._parse_html_response(response.text)
|
||||||
|
|
||||||
|
listings = data.get("domains", data.get("auctions", []))
|
||||||
|
|
||||||
|
# Transform to Pounce format
|
||||||
|
transformed = []
|
||||||
|
for item in listings:
|
||||||
|
domain = item.get("domain", "") or item.get("name", "")
|
||||||
|
tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""
|
||||||
|
|
||||||
|
# Parse end time
|
||||||
|
end_time_str = item.get("end_time") or item.get("ends_at")
|
||||||
|
end_time = None
|
||||||
|
if end_time_str:
|
||||||
|
try:
|
||||||
|
end_time = datetime.fromisoformat(end_time_str.replace("Z", "+00:00"))
|
||||||
|
except:
|
||||||
|
end_time = datetime.utcnow() + timedelta(days=1)
|
||||||
|
|
||||||
|
transformed.append({
|
||||||
|
"domain": domain,
|
||||||
|
"tld": tld_part,
|
||||||
|
"platform": "Sav",
|
||||||
|
"current_bid": float(item.get("current_bid", 0) or item.get("price", 0)),
|
||||||
|
"min_bid": float(item.get("min_bid", 0) or 0),
|
||||||
|
"num_bids": int(item.get("bids", 0) or 0),
|
||||||
|
"end_time": end_time,
|
||||||
|
"buy_now_price": float(item.get("buy_now")) if item.get("buy_now") else None,
|
||||||
|
"auction_url": build_affiliate_url("Sav", domain),
|
||||||
|
"currency": "USD",
|
||||||
|
"is_active": True,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"items": transformed,
|
||||||
|
"total": len(transformed),
|
||||||
|
"has_more": len(listings) >= 20, # Default page size
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Sav API scraper error: {e}")
|
||||||
|
return {"items": [], "total": 0, "error": str(e)}
|
||||||
|
|
||||||
|
async def _parse_html_response(self, html: str) -> Dict[str, Any]:
|
||||||
|
"""Parse HTML response from Sav.com when JSON is not available."""
|
||||||
|
try:
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
|
||||||
|
# Find auction rows
|
||||||
|
rows = soup.select(".auction-row, .domain-row, tr[data-domain]")
|
||||||
|
|
||||||
|
transformed = []
|
||||||
|
for row in rows:
|
||||||
|
domain_el = row.select_one(".domain-name, .name, [data-domain]")
|
||||||
|
price_el = row.select_one(".price, .bid, .current-bid")
|
||||||
|
time_el = row.select_one(".time-left, .ends, .countdown")
|
||||||
|
bids_el = row.select_one(".bids, .bid-count")
|
||||||
|
|
||||||
|
if not domain_el:
|
||||||
|
continue
|
||||||
|
|
||||||
|
domain = domain_el.get_text(strip=True) or domain_el.get("data-domain", "")
|
||||||
|
tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""
|
||||||
|
|
||||||
|
price_text = price_el.get_text(strip=True) if price_el else "0"
|
||||||
|
price = float("".join(c for c in price_text if c.isdigit() or c == ".") or "0")
|
||||||
|
|
||||||
|
bids_text = bids_el.get_text(strip=True) if bids_el else "0"
|
||||||
|
bids = int("".join(c for c in bids_text if c.isdigit()) or "0")
|
||||||
|
|
||||||
|
transformed.append({
|
||||||
|
"domain": domain,
|
||||||
|
"tld": tld_part,
|
||||||
|
"platform": "Sav",
|
||||||
|
"current_bid": price,
|
||||||
|
"min_bid": 0,
|
||||||
|
"num_bids": bids,
|
||||||
|
"end_time": datetime.utcnow() + timedelta(days=1), # Estimate
|
||||||
|
"buy_now_price": None,
|
||||||
|
"auction_url": build_affiliate_url("Sav", domain),
|
||||||
|
"currency": "USD",
|
||||||
|
"is_active": True,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"items": transformed,
|
||||||
|
"total": len(transformed),
|
||||||
|
"has_more": len(rows) >= 20,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Sav HTML parsing error: {e}")
|
||||||
|
return {"items": [], "total": 0, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# GODADDY SCRAPER — Hidden REST JSON API
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
class GoDaddyApiScraper:
|
||||||
|
"""
|
||||||
|
Scraper for GoDaddy Auctions using their hidden JSON API.
|
||||||
|
|
||||||
|
Discovered Endpoint:
|
||||||
|
https://auctions.godaddy.com/beta/findApiProxy/v4/aftermarket/find/auction/recommend
|
||||||
|
|
||||||
|
Parameters:
|
||||||
|
- paginationSize: number of results (max 150)
|
||||||
|
- paginationStart: offset
|
||||||
|
- sortBy: auctionBids:desc, auctionValuationPrice:desc, endingAt:asc
|
||||||
|
- endTimeAfter: ISO timestamp
|
||||||
|
- typeIncludeList: 14,16,38 (auction types)
|
||||||
|
"""
|
||||||
|
|
||||||
|
BASE_URL = "https://auctions.godaddy.com"
|
||||||
|
API_ENDPOINT = "/beta/findApiProxy/v4/aftermarket/find/auction/recommend"
|
||||||
|
|
||||||
|
async def fetch_auctions(
|
||||||
|
self,
|
||||||
|
limit: int = 100,
|
||||||
|
offset: int = 0,
|
||||||
|
sort_by: str = "auctionBids:desc",
|
||||||
|
ending_within_hours: Optional[int] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Fetch auctions from GoDaddy hidden JSON API."""
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||||
|
params = {
|
||||||
|
"paginationSize": min(limit, 150),
|
||||||
|
"paginationStart": offset,
|
||||||
|
"sortBy": sort_by,
|
||||||
|
"typeIncludeList": "14,16,38", # All auction types
|
||||||
|
"endTimeAfter": datetime.utcnow().isoformat() + "Z",
|
||||||
|
}
|
||||||
|
|
||||||
|
if ending_within_hours:
|
||||||
|
end_before = (datetime.utcnow() + timedelta(hours=ending_within_hours)).isoformat() + "Z"
|
||||||
|
params["endTimeBefore"] = end_before
|
||||||
|
|
||||||
|
response = await client.get(
|
||||||
|
f"{self.BASE_URL}{self.API_ENDPOINT}",
|
||||||
|
params=params,
|
||||||
|
headers={
|
||||||
|
"Accept": "application/json",
|
||||||
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
||||||
|
"Referer": "https://auctions.godaddy.com/beta",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.error(f"GoDaddy API error: {response.status_code}")
|
||||||
|
return {"items": [], "total": 0, "error": response.text}
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
# GoDaddy returns listings in 'results' array
|
||||||
|
listings = data.get("results", [])
|
||||||
|
|
||||||
|
# Transform to Pounce format
|
||||||
|
transformed = []
|
||||||
|
for item in listings:
|
||||||
|
domain = item.get("fqdn", "") or item.get("domain", "")
|
||||||
|
tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""
|
||||||
|
|
||||||
|
# Parse end time
|
||||||
|
end_time = None
|
||||||
|
end_at = item.get("endingAt") or item.get("auctionEndTime")
|
||||||
|
if end_at:
|
||||||
|
try:
|
||||||
|
end_time = datetime.fromisoformat(end_at.replace("Z", "+00:00")).replace(tzinfo=None)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse price (can be in different fields)
|
||||||
|
price = (
|
||||||
|
item.get("price") or
|
||||||
|
item.get("currentBidPrice") or
|
||||||
|
item.get("auctionPrice") or
|
||||||
|
item.get("minBid") or 0
|
||||||
|
)
|
||||||
|
|
||||||
|
transformed.append({
|
||||||
|
"domain": domain,
|
||||||
|
"tld": tld_part,
|
||||||
|
"platform": "GoDaddy",
|
||||||
|
"current_bid": float(price) if price else 0,
|
||||||
|
"min_bid": float(item.get("minBid", 0) or 0),
|
||||||
|
"num_bids": int(item.get("bids", 0) or item.get("bidCount", 0) or 0),
|
||||||
|
"end_time": end_time or datetime.utcnow() + timedelta(days=1),
|
||||||
|
"buy_now_price": float(item.get("buyNowPrice")) if item.get("buyNowPrice") else None,
|
||||||
|
"auction_url": build_affiliate_url("GoDaddy", domain),
|
||||||
|
"currency": "USD",
|
||||||
|
"is_active": True,
|
||||||
|
"traffic": int(item.get("traffic", 0) or 0),
|
||||||
|
"domain_authority": int(item.get("domainAuthority", 0) or item.get("valuationPrice", 0) or 0),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"items": transformed,
|
||||||
|
"total": data.get("totalRecordCount", len(transformed)),
|
||||||
|
"has_more": len(listings) >= limit,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"GoDaddy API scraper error: {e}")
|
||||||
|
return {"items": [], "total": 0, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# PARK.IO SCRAPER — Backorder Service API
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
class ParkIoApiScraper:
|
||||||
|
"""
|
||||||
|
Scraper for Park.io domain backorders.
|
||||||
|
|
||||||
|
Park.io specializes in catching expiring domains - great for drops!
|
||||||
|
|
||||||
|
Endpoint: https://park.io/api/domains
|
||||||
|
"""
|
||||||
|
|
||||||
|
BASE_URL = "https://park.io"
|
||||||
|
API_ENDPOINT = "/api/domains"
|
||||||
|
|
||||||
|
async def fetch_pending_drops(
|
||||||
|
self,
|
||||||
|
limit: int = 100,
|
||||||
|
tld: Optional[str] = None,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Fetch pending domain drops from Park.io."""
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||||
|
params = {
|
||||||
|
"limit": limit,
|
||||||
|
"status": "pending", # Pending drops
|
||||||
|
}
|
||||||
|
|
||||||
|
if tld:
|
||||||
|
params["tld"] = tld.lstrip(".")
|
||||||
|
|
||||||
|
response = await client.get(
|
||||||
|
f"{self.BASE_URL}{self.API_ENDPOINT}",
|
||||||
|
params=params,
|
||||||
|
headers={
|
||||||
|
"Accept": "application/json",
|
||||||
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.error(f"Park.io API error: {response.status_code}")
|
||||||
|
return {"items": [], "total": 0, "error": response.text}
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
domains = data.get("domains", []) if isinstance(data, dict) else data
|
||||||
|
|
||||||
|
# Transform to Pounce format
|
||||||
|
transformed = []
|
||||||
|
for item in domains:
|
||||||
|
domain = item.get("domain", "") or item.get("name", "")
|
||||||
|
tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""
|
||||||
|
|
||||||
|
# Parse drop date
|
||||||
|
drop_date = None
|
||||||
|
drop_at = item.get("drop_date") or item.get("expires_at")
|
||||||
|
if drop_at:
|
||||||
|
try:
|
||||||
|
drop_date = datetime.fromisoformat(drop_at.replace("Z", "+00:00")).replace(tzinfo=None)
|
||||||
|
except:
|
||||||
|
drop_date = datetime.utcnow() + timedelta(days=1)
|
||||||
|
|
||||||
|
transformed.append({
|
||||||
|
"domain": domain,
|
||||||
|
"tld": tld_part,
|
||||||
|
"platform": "Park.io",
|
||||||
|
"current_bid": float(item.get("price", 99)), # Park.io default price
|
||||||
|
"min_bid": float(item.get("min_price", 99)),
|
||||||
|
"num_bids": int(item.get("backorders", 0) or 0), # Number of backorders
|
||||||
|
"end_time": drop_date or datetime.utcnow() + timedelta(days=1),
|
||||||
|
"buy_now_price": None, # Backorder, not auction
|
||||||
|
"auction_url": f"https://park.io/domains/{domain}",
|
||||||
|
"auction_type": "backorder",
|
||||||
|
"currency": "USD",
|
||||||
|
"is_active": True,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"items": transformed,
|
||||||
|
"total": len(transformed),
|
||||||
|
"has_more": len(domains) >= limit,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Park.io API scraper error: {e}")
|
||||||
|
return {"items": [], "total": 0, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# NAMEJET SCRAPER — Hidden AJAX API
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
class NameJetApiScraper:
|
||||||
|
"""
|
||||||
|
Scraper for NameJet auctions using their AJAX endpoint.
|
||||||
|
|
||||||
|
NameJet is owned by GoDaddy but operates independently.
|
||||||
|
Uses a hidden AJAX endpoint for loading auction data.
|
||||||
|
"""
|
||||||
|
|
||||||
|
BASE_URL = "https://www.namejet.com"
|
||||||
|
AJAX_ENDPOINT = "/PreRelease/Auctions/LoadPage"
|
||||||
|
|
||||||
|
async def fetch_auctions(
|
||||||
|
self,
|
||||||
|
limit: int = 100,
|
||||||
|
page: int = 1,
|
||||||
|
sort_by: str = "EndTime",
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Fetch auctions from NameJet AJAX API."""
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
||||||
|
# NameJet uses POST with form data
|
||||||
|
form_data = {
|
||||||
|
"page": page,
|
||||||
|
"rows": limit,
|
||||||
|
"sidx": sort_by,
|
||||||
|
"sord": "asc",
|
||||||
|
}
|
||||||
|
|
||||||
|
response = await client.post(
|
||||||
|
f"{self.BASE_URL}{self.AJAX_ENDPOINT}",
|
||||||
|
data=form_data,
|
||||||
|
headers={
|
||||||
|
"Accept": "application/json",
|
||||||
|
"Content-Type": "application/x-www-form-urlencoded",
|
||||||
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36",
|
||||||
|
"Referer": "https://www.namejet.com/PreRelease/Auctions",
|
||||||
|
"X-Requested-With": "XMLHttpRequest",
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
logger.error(f"NameJet API error: {response.status_code}")
|
||||||
|
return {"items": [], "total": 0, "error": response.text}
|
||||||
|
|
||||||
|
# Try JSON first, fall back to HTML parsing
|
||||||
|
try:
|
||||||
|
data = response.json()
|
||||||
|
except:
|
||||||
|
return await self._parse_html_response(response.text)
|
||||||
|
|
||||||
|
# NameJet returns 'rows' array with auction data
|
||||||
|
rows = data.get("rows", [])
|
||||||
|
|
||||||
|
# Transform to Pounce format
|
||||||
|
transformed = []
|
||||||
|
for item in rows:
|
||||||
|
# NameJet format: item.cell contains [domain, endTime, price, bids, ...]
|
||||||
|
cell = item.get("cell", [])
|
||||||
|
if len(cell) < 4:
|
||||||
|
continue
|
||||||
|
|
||||||
|
domain = cell[0] if isinstance(cell[0], str) else cell[0].get("domain", "")
|
||||||
|
tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""
|
||||||
|
|
||||||
|
# Parse end time
|
||||||
|
end_time = None
|
||||||
|
if len(cell) > 1 and cell[1]:
|
||||||
|
try:
|
||||||
|
end_time = datetime.strptime(cell[1], "%m/%d/%Y %H:%M:%S")
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
end_time = datetime.strptime(cell[1], "%Y-%m-%d %H:%M")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse price
|
||||||
|
price = 0
|
||||||
|
if len(cell) > 2:
|
||||||
|
price_str = str(cell[2]).replace("$", "").replace(",", "")
|
||||||
|
try:
|
||||||
|
price = float(price_str)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse bids
|
||||||
|
bids = 0
|
||||||
|
if len(cell) > 3:
|
||||||
|
try:
|
||||||
|
bids = int(cell[3])
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
transformed.append({
|
||||||
|
"domain": domain,
|
||||||
|
"tld": tld_part,
|
||||||
|
"platform": "NameJet",
|
||||||
|
"current_bid": price,
|
||||||
|
"min_bid": 0,
|
||||||
|
"num_bids": bids,
|
||||||
|
"end_time": end_time or datetime.utcnow() + timedelta(days=1),
|
||||||
|
"buy_now_price": None,
|
||||||
|
"auction_url": build_affiliate_url("NameJet", domain),
|
||||||
|
"currency": "USD",
|
||||||
|
"is_active": True,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"items": transformed,
|
||||||
|
"total": data.get("records", len(transformed)),
|
||||||
|
"has_more": len(rows) >= limit,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"NameJet API scraper error: {e}")
|
||||||
|
return {"items": [], "total": 0, "error": str(e)}
|
||||||
|
|
||||||
|
async def _parse_html_response(self, html: str) -> Dict[str, Any]:
|
||||||
|
"""Parse HTML response from NameJet when JSON is not available."""
|
||||||
|
try:
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
rows = soup.select("tr[data-domain], .auction-row")
|
||||||
|
|
||||||
|
transformed = []
|
||||||
|
for row in rows:
|
||||||
|
domain_el = row.select_one("td:first-child, .domain")
|
||||||
|
if not domain_el:
|
||||||
|
continue
|
||||||
|
|
||||||
|
domain = domain_el.get_text(strip=True)
|
||||||
|
tld_part = domain.rsplit(".", 1)[-1] if "." in domain else ""
|
||||||
|
|
||||||
|
transformed.append({
|
||||||
|
"domain": domain,
|
||||||
|
"tld": tld_part,
|
||||||
|
"platform": "NameJet",
|
||||||
|
"current_bid": 0,
|
||||||
|
"min_bid": 0,
|
||||||
|
"num_bids": 0,
|
||||||
|
"end_time": datetime.utcnow() + timedelta(days=1),
|
||||||
|
"buy_now_price": None,
|
||||||
|
"auction_url": build_affiliate_url("NameJet", domain),
|
||||||
|
"currency": "USD",
|
||||||
|
"is_active": True,
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"items": transformed,
|
||||||
|
"total": len(transformed),
|
||||||
|
"has_more": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"NameJet HTML parsing error: {e}")
|
||||||
|
return {"items": [], "total": 0, "error": str(e)}
|
||||||
|
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# UNIFIED SCRAPER — Combines all hidden API scrapers
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
class HiddenApiScraperService:
|
||||||
|
"""
|
||||||
|
Unified service that combines all hidden API scrapers.
|
||||||
|
|
||||||
|
Priority order:
|
||||||
|
1. GoDaddy JSON API (most reliable, 150 auctions/request)
|
||||||
|
2. Dynadot REST API (100 auctions/request)
|
||||||
|
3. NameJet AJAX (requires parsing)
|
||||||
|
4. Park.io (backorders)
|
||||||
|
5. Namecheap GraphQL (requires query hash - may fail)
|
||||||
|
6. Sav.com AJAX (HTML fallback)
|
||||||
|
|
||||||
|
All URLs include affiliate tracking for monetization.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.namecheap = NamecheapApiScraper()
|
||||||
|
self.dynadot = DynadotApiScraper()
|
||||||
|
self.sav = SavApiScraper()
|
||||||
|
self.godaddy = GoDaddyApiScraper()
|
||||||
|
self.parkio = ParkIoApiScraper()
|
||||||
|
self.namejet = NameJetApiScraper()
|
||||||
|
|
||||||
|
async def scrape_all(self, limit_per_platform: int = 100) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Scrape all platforms using hidden APIs.
|
||||||
|
|
||||||
|
Returns combined results with platform breakdown.
|
||||||
|
"""
|
||||||
|
results = {
|
||||||
|
"total_found": 0,
|
||||||
|
"platforms": {},
|
||||||
|
"errors": [],
|
||||||
|
"items": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════
|
||||||
|
# TIER 1: Most Reliable JSON APIs
|
||||||
|
# ═══════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
# Scrape GoDaddy (NEW - Most reliable!)
|
||||||
|
try:
|
||||||
|
godaddy_data = await self.godaddy.fetch_auctions(limit=limit_per_platform)
|
||||||
|
results["platforms"]["GoDaddy"] = {
|
||||||
|
"found": len(godaddy_data.get("items", [])),
|
||||||
|
"total": godaddy_data.get("total", 0),
|
||||||
|
}
|
||||||
|
results["items"].extend(godaddy_data.get("items", []))
|
||||||
|
results["total_found"] += len(godaddy_data.get("items", []))
|
||||||
|
|
||||||
|
if godaddy_data.get("error"):
|
||||||
|
results["errors"].append(f"GoDaddy: {godaddy_data['error']}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
results["errors"].append(f"GoDaddy: {str(e)}")
|
||||||
|
|
||||||
|
# Scrape Dynadot
|
||||||
|
try:
|
||||||
|
dynadot_data = await self.dynadot.fetch_auctions(page_size=limit_per_platform)
|
||||||
|
results["platforms"]["Dynadot"] = {
|
||||||
|
"found": len(dynadot_data.get("items", [])),
|
||||||
|
"total": dynadot_data.get("total", 0),
|
||||||
|
}
|
||||||
|
results["items"].extend(dynadot_data.get("items", []))
|
||||||
|
results["total_found"] += len(dynadot_data.get("items", []))
|
||||||
|
|
||||||
|
if dynadot_data.get("error"):
|
||||||
|
results["errors"].append(f"Dynadot: {dynadot_data['error']}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
results["errors"].append(f"Dynadot: {str(e)}")
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════
|
||||||
|
# TIER 2: AJAX/HTML Scrapers
|
||||||
|
# ═══════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
# Scrape NameJet (NEW)
|
||||||
|
try:
|
||||||
|
namejet_data = await self.namejet.fetch_auctions(limit=limit_per_platform)
|
||||||
|
results["platforms"]["NameJet"] = {
|
||||||
|
"found": len(namejet_data.get("items", [])),
|
||||||
|
"total": namejet_data.get("total", 0),
|
||||||
|
}
|
||||||
|
results["items"].extend(namejet_data.get("items", []))
|
||||||
|
results["total_found"] += len(namejet_data.get("items", []))
|
||||||
|
|
||||||
|
if namejet_data.get("error"):
|
||||||
|
results["errors"].append(f"NameJet: {namejet_data['error']}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
results["errors"].append(f"NameJet: {str(e)}")
|
||||||
|
|
||||||
|
# Scrape Park.io (Backorders - NEW)
|
||||||
|
try:
|
||||||
|
parkio_data = await self.parkio.fetch_pending_drops(limit=limit_per_platform)
|
||||||
|
results["platforms"]["Park.io"] = {
|
||||||
|
"found": len(parkio_data.get("items", [])),
|
||||||
|
"total": parkio_data.get("total", 0),
|
||||||
|
}
|
||||||
|
results["items"].extend(parkio_data.get("items", []))
|
||||||
|
results["total_found"] += len(parkio_data.get("items", []))
|
||||||
|
|
||||||
|
if parkio_data.get("error"):
|
||||||
|
results["errors"].append(f"Park.io: {parkio_data['error']}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
results["errors"].append(f"Park.io: {str(e)}")
|
||||||
|
|
||||||
|
# Scrape Sav.com
|
||||||
|
try:
|
||||||
|
sav_data = await self.sav.fetch_auctions(page=0)
|
||||||
|
results["platforms"]["Sav"] = {
|
||||||
|
"found": len(sav_data.get("items", [])),
|
||||||
|
"total": sav_data.get("total", 0),
|
||||||
|
}
|
||||||
|
results["items"].extend(sav_data.get("items", []))
|
||||||
|
results["total_found"] += len(sav_data.get("items", []))
|
||||||
|
|
||||||
|
if sav_data.get("error"):
|
||||||
|
results["errors"].append(f"Sav: {sav_data['error']}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
results["errors"].append(f"Sav: {str(e)}")
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════
|
||||||
|
# TIER 3: Experimental (May require fixes)
|
||||||
|
# ═══════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
# Scrape Namecheap (GraphQL - needs query hash)
|
||||||
|
try:
|
||||||
|
namecheap_data = await self.namecheap.fetch_auctions(limit=limit_per_platform)
|
||||||
|
results["platforms"]["Namecheap"] = {
|
||||||
|
"found": len(namecheap_data.get("items", [])),
|
||||||
|
"total": namecheap_data.get("total", 0),
|
||||||
|
}
|
||||||
|
results["items"].extend(namecheap_data.get("items", []))
|
||||||
|
results["total_found"] += len(namecheap_data.get("items", []))
|
||||||
|
|
||||||
|
if namecheap_data.get("error"):
|
||||||
|
results["errors"].append(f"Namecheap: {namecheap_data['error']}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
results["errors"].append(f"Namecheap: {str(e)}")
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
# Export instances
|
||||||
|
namecheap_scraper = NamecheapApiScraper()
|
||||||
|
dynadot_scraper = DynadotApiScraper()
|
||||||
|
sav_scraper = SavApiScraper()
|
||||||
|
godaddy_scraper = GoDaddyApiScraper()
|
||||||
|
parkio_scraper = ParkIoApiScraper()
|
||||||
|
namejet_scraper = NameJetApiScraper()
|
||||||
|
hidden_api_scraper = HiddenApiScraperService()
|
||||||
|
|
||||||
525
backend/app/services/playwright_scraper.py
Normal file
525
backend/app/services/playwright_scraper.py
Normal file
@ -0,0 +1,525 @@
|
|||||||
|
"""
|
||||||
|
Playwright-based Stealth Scraper for Cloudflare-protected Domain Auction Sites.
|
||||||
|
|
||||||
|
This module uses Playwright with stealth plugins to bypass Cloudflare and other
|
||||||
|
anti-bot protections. It's designed for enterprise-grade web scraping.
|
||||||
|
|
||||||
|
Features:
|
||||||
|
- Stealth mode (undetectable browser fingerprint)
|
||||||
|
- Automatic Cloudflare bypass
|
||||||
|
- Connection pooling
|
||||||
|
- Retry logic with exponential backoff
|
||||||
|
- JSON extraction from rendered pages
|
||||||
|
- Cookie persistence across sessions
|
||||||
|
|
||||||
|
Supported Platforms:
|
||||||
|
- GoDaddy Auctions (Cloudflare protected)
|
||||||
|
- NameJet (Cloudflare protected)
|
||||||
|
- Any other protected auction site
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
scraper = PlaywrightScraperService()
|
||||||
|
await scraper.initialize()
|
||||||
|
auctions = await scraper.scrape_godaddy()
|
||||||
|
await scraper.close()
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import random
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Any, Dict, List, Optional
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Try to import playwright (optional dependency)
|
||||||
|
try:
|
||||||
|
from playwright.async_api import async_playwright, Browser, BrowserContext, Page
|
||||||
|
from playwright_stealth import Stealth
|
||||||
|
PLAYWRIGHT_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
PLAYWRIGHT_AVAILABLE = False
|
||||||
|
Stealth = None
|
||||||
|
logger.warning("Playwright not installed. Stealth scraping disabled.")
|
||||||
|
|
||||||
|
|
||||||
|
class PlaywrightScraperService:
|
||||||
|
"""
|
||||||
|
Enterprise-grade Playwright scraper with Cloudflare bypass.
|
||||||
|
|
||||||
|
Uses stealth techniques to appear as a real browser:
|
||||||
|
- Real Chrome user agent
|
||||||
|
- WebGL fingerprint spoofing
|
||||||
|
- Navigator property spoofing
|
||||||
|
- Timezone and locale matching
|
||||||
|
"""
|
||||||
|
|
||||||
|
# User agents that work well with Cloudflare
|
||||||
|
USER_AGENTS = [
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Safari/605.1.15",
|
||||||
|
]
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.playwright = None
|
||||||
|
self.browser: Optional[Browser] = None
|
||||||
|
self.context: Optional[BrowserContext] = None
|
||||||
|
self._initialized = False
|
||||||
|
self._cookie_dir = Path(__file__).parent.parent.parent / "data" / "cookies"
|
||||||
|
self._cookie_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
async def initialize(self) -> bool:
|
||||||
|
"""Initialize the browser instance."""
|
||||||
|
if not PLAYWRIGHT_AVAILABLE:
|
||||||
|
logger.error("Playwright not available. Install with: pip install playwright playwright-stealth")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if self._initialized:
|
||||||
|
return True
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.playwright = await async_playwright().start()
|
||||||
|
|
||||||
|
# Launch with stealth settings
|
||||||
|
self.browser = await self.playwright.chromium.launch(
|
||||||
|
headless=True,
|
||||||
|
args=[
|
||||||
|
"--disable-blink-features=AutomationControlled",
|
||||||
|
"--disable-dev-shm-usage",
|
||||||
|
"--no-sandbox",
|
||||||
|
"--disable-setuid-sandbox",
|
||||||
|
"--disable-infobars",
|
||||||
|
"--disable-extensions",
|
||||||
|
"--window-size=1920,1080",
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create context with realistic settings
|
||||||
|
self.context = await self.browser.new_context(
|
||||||
|
user_agent=random.choice(self.USER_AGENTS),
|
||||||
|
viewport={"width": 1920, "height": 1080},
|
||||||
|
locale="en-US",
|
||||||
|
timezone_id="America/New_York",
|
||||||
|
geolocation={"longitude": -73.935242, "latitude": 40.730610},
|
||||||
|
permissions=["geolocation"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Load saved cookies if available
|
||||||
|
await self._load_cookies()
|
||||||
|
|
||||||
|
self._initialized = True
|
||||||
|
logger.info("Playwright browser initialized successfully")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Failed to initialize Playwright: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
"""Close browser and cleanup."""
|
||||||
|
if self.context:
|
||||||
|
await self._save_cookies()
|
||||||
|
await self.context.close()
|
||||||
|
if self.browser:
|
||||||
|
await self.browser.close()
|
||||||
|
if self.playwright:
|
||||||
|
await self.playwright.stop()
|
||||||
|
self._initialized = False
|
||||||
|
|
||||||
|
async def _load_cookies(self):
|
||||||
|
"""Load saved cookies from file."""
|
||||||
|
cookie_file = self._cookie_dir / "session_cookies.json"
|
||||||
|
if cookie_file.exists():
|
||||||
|
try:
|
||||||
|
with open(cookie_file) as f:
|
||||||
|
cookies = json.load(f)
|
||||||
|
await self.context.add_cookies(cookies)
|
||||||
|
logger.info(f"Loaded {len(cookies)} saved cookies")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to load cookies: {e}")
|
||||||
|
|
||||||
|
async def _save_cookies(self):
|
||||||
|
"""Save cookies to file for persistence."""
|
||||||
|
try:
|
||||||
|
cookies = await self.context.cookies()
|
||||||
|
cookie_file = self._cookie_dir / "session_cookies.json"
|
||||||
|
with open(cookie_file, "w") as f:
|
||||||
|
json.dump(cookies, f)
|
||||||
|
logger.info(f"Saved {len(cookies)} cookies")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to save cookies: {e}")
|
||||||
|
|
||||||
|
async def _create_stealth_page(self) -> Page:
|
||||||
|
"""Create a new page with stealth mode enabled."""
|
||||||
|
page = await self.context.new_page()
|
||||||
|
|
||||||
|
# Apply stealth mode
|
||||||
|
if Stealth:
|
||||||
|
stealth = Stealth(
|
||||||
|
navigator_webdriver=True,
|
||||||
|
chrome_runtime=True,
|
||||||
|
navigator_user_agent=True,
|
||||||
|
navigator_vendor=True,
|
||||||
|
webgl_vendor=True,
|
||||||
|
)
|
||||||
|
await stealth.apply_stealth_async(page)
|
||||||
|
|
||||||
|
return page
|
||||||
|
|
||||||
|
async def _wait_for_cloudflare(self, page: Page, timeout: int = 30):
|
||||||
|
"""Wait for Cloudflare challenge to complete."""
|
||||||
|
try:
|
||||||
|
# Wait for either the challenge to complete or content to load
|
||||||
|
await page.wait_for_function(
|
||||||
|
"""
|
||||||
|
() => {
|
||||||
|
// Check if we're past Cloudflare
|
||||||
|
const title = document.title.toLowerCase();
|
||||||
|
return !title.includes('just a moment') &&
|
||||||
|
!title.includes('attention required') &&
|
||||||
|
!title.includes('checking your browser');
|
||||||
|
}
|
||||||
|
""",
|
||||||
|
timeout=timeout * 1000
|
||||||
|
)
|
||||||
|
# Additional delay for any remaining JS to execute
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Cloudflare wait timeout: {e}")
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# GODADDY AUCTIONS SCRAPER
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
async def scrape_godaddy(self, limit: int = 100) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Scrape GoDaddy Auctions using Playwright.
|
||||||
|
|
||||||
|
GoDaddy uses Cloudflare + their own bot detection.
|
||||||
|
We intercept the API calls made by their frontend.
|
||||||
|
"""
|
||||||
|
if not await self.initialize():
|
||||||
|
return {"items": [], "total": 0, "error": "Playwright not initialized"}
|
||||||
|
|
||||||
|
page = None
|
||||||
|
try:
|
||||||
|
page = await self._create_stealth_page()
|
||||||
|
|
||||||
|
# Intercept XHR requests to capture auction data
|
||||||
|
captured_data = []
|
||||||
|
|
||||||
|
async def handle_response(response):
|
||||||
|
if "findApiProxy" in response.url and "auction" in response.url:
|
||||||
|
try:
|
||||||
|
data = await response.json()
|
||||||
|
captured_data.append(data)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
page.on("response", handle_response)
|
||||||
|
|
||||||
|
# Navigate to GoDaddy Auctions
|
||||||
|
logger.info("Navigating to GoDaddy Auctions...")
|
||||||
|
await page.goto("https://auctions.godaddy.com/beta", wait_until="networkidle")
|
||||||
|
|
||||||
|
# Wait for Cloudflare
|
||||||
|
await self._wait_for_cloudflare(page)
|
||||||
|
|
||||||
|
# Wait for auction content to load
|
||||||
|
try:
|
||||||
|
await page.wait_for_selector('[data-testid="auction-card"], .auction-card, .domain-item', timeout=15000)
|
||||||
|
except:
|
||||||
|
logger.warning("Auction cards not found, trying to scroll...")
|
||||||
|
|
||||||
|
# Scroll to trigger lazy loading
|
||||||
|
await page.evaluate("window.scrollTo(0, document.body.scrollHeight / 2)")
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
|
# Try to extract from intercepted API calls first
|
||||||
|
if captured_data:
|
||||||
|
return self._parse_godaddy_api_response(captured_data)
|
||||||
|
|
||||||
|
# Fallback: Extract from DOM
|
||||||
|
return await self._extract_godaddy_from_dom(page)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"GoDaddy scraping error: {e}")
|
||||||
|
return {"items": [], "total": 0, "error": str(e)}
|
||||||
|
finally:
|
||||||
|
if page:
|
||||||
|
await page.close()
|
||||||
|
|
||||||
|
def _parse_godaddy_api_response(self, captured_data: List[Dict]) -> Dict[str, Any]:
|
||||||
|
"""Parse captured API response from GoDaddy."""
|
||||||
|
items = []
|
||||||
|
|
||||||
|
for data in captured_data:
|
||||||
|
results = data.get("results", [])
|
||||||
|
for item in results:
|
||||||
|
domain = item.get("fqdn", "") or item.get("domain", "")
|
||||||
|
if not domain:
|
||||||
|
continue
|
||||||
|
|
||||||
|
tld = domain.rsplit(".", 1)[-1] if "." in domain else ""
|
||||||
|
|
||||||
|
# Parse end time
|
||||||
|
end_time = None
|
||||||
|
end_at = item.get("endingAt") or item.get("auctionEndTime")
|
||||||
|
if end_at:
|
||||||
|
try:
|
||||||
|
end_time = datetime.fromisoformat(end_at.replace("Z", "+00:00")).replace(tzinfo=None)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
price = item.get("price") or item.get("currentBidPrice") or item.get("minBid") or 0
|
||||||
|
|
||||||
|
items.append({
|
||||||
|
"domain": domain,
|
||||||
|
"tld": tld,
|
||||||
|
"platform": "GoDaddy",
|
||||||
|
"current_bid": float(price) if price else 0,
|
||||||
|
"min_bid": float(item.get("minBid", 0) or 0),
|
||||||
|
"num_bids": int(item.get("bids", 0) or item.get("bidCount", 0) or 0),
|
||||||
|
"end_time": end_time or datetime.utcnow() + timedelta(days=1),
|
||||||
|
"buy_now_price": float(item.get("buyNowPrice")) if item.get("buyNowPrice") else None,
|
||||||
|
"auction_url": f"https://auctions.godaddy.com/trpItemListing.aspx?domain={domain}&isc=cjcpounce",
|
||||||
|
"currency": "USD",
|
||||||
|
"is_active": True,
|
||||||
|
"traffic": int(item.get("traffic", 0) or 0),
|
||||||
|
"domain_authority": int(item.get("valuationPrice", 0) or 0),
|
||||||
|
})
|
||||||
|
|
||||||
|
return {
|
||||||
|
"items": items,
|
||||||
|
"total": len(items),
|
||||||
|
"source": "api_intercept",
|
||||||
|
}
|
||||||
|
|
||||||
|
async def _extract_godaddy_from_dom(self, page: Page) -> Dict[str, Any]:
|
||||||
|
"""Extract auction data from GoDaddy DOM when API intercept fails."""
|
||||||
|
items = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Try different selectors
|
||||||
|
selectors = [
|
||||||
|
'[data-testid="auction-card"]',
|
||||||
|
'.auction-card',
|
||||||
|
'.domain-listing',
|
||||||
|
'tr[data-domain]',
|
||||||
|
'.domain-row',
|
||||||
|
]
|
||||||
|
|
||||||
|
for selector in selectors:
|
||||||
|
elements = await page.query_selector_all(selector)
|
||||||
|
if elements:
|
||||||
|
logger.info(f"Found {len(elements)} elements with selector: {selector}")
|
||||||
|
|
||||||
|
for el in elements[:100]: # Max 100 items
|
||||||
|
try:
|
||||||
|
# Try to extract domain name
|
||||||
|
domain_el = await el.query_selector('.domain-name, .fqdn, [data-domain], a[href*="domain"]')
|
||||||
|
if domain_el:
|
||||||
|
domain = await domain_el.text_content()
|
||||||
|
domain = domain.strip() if domain else ""
|
||||||
|
else:
|
||||||
|
domain = await el.get_attribute("data-domain") or ""
|
||||||
|
|
||||||
|
if not domain or "." not in domain:
|
||||||
|
continue
|
||||||
|
|
||||||
|
tld = domain.rsplit(".", 1)[-1]
|
||||||
|
|
||||||
|
# Try to extract price
|
||||||
|
price = 0
|
||||||
|
price_el = await el.query_selector('.price, .bid, .current-bid, [data-price]')
|
||||||
|
if price_el:
|
||||||
|
price_text = await price_el.text_content()
|
||||||
|
price = float("".join(c for c in price_text if c.isdigit() or c == ".") or "0")
|
||||||
|
|
||||||
|
items.append({
|
||||||
|
"domain": domain,
|
||||||
|
"tld": tld,
|
||||||
|
"platform": "GoDaddy",
|
||||||
|
"current_bid": price,
|
||||||
|
"min_bid": 0,
|
||||||
|
"num_bids": 0,
|
||||||
|
"end_time": datetime.utcnow() + timedelta(days=1),
|
||||||
|
"buy_now_price": None,
|
||||||
|
"auction_url": f"https://auctions.godaddy.com/trpItemListing.aspx?domain={domain}&isc=cjcpounce",
|
||||||
|
"currency": "USD",
|
||||||
|
"is_active": True,
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Error extracting element: {e}")
|
||||||
|
|
||||||
|
break # Found elements, stop trying other selectors
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"DOM extraction error: {e}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"items": items,
|
||||||
|
"total": len(items),
|
||||||
|
"source": "dom_extraction",
|
||||||
|
}
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# NAMEJET SCRAPER
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
async def scrape_namejet(self, limit: int = 100) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Scrape NameJet auctions using Playwright.
|
||||||
|
|
||||||
|
NameJet uses heavy Cloudflare protection.
|
||||||
|
"""
|
||||||
|
if not await self.initialize():
|
||||||
|
return {"items": [], "total": 0, "error": "Playwright not initialized"}
|
||||||
|
|
||||||
|
page = None
|
||||||
|
try:
|
||||||
|
page = await self._create_stealth_page()
|
||||||
|
|
||||||
|
# Navigate to NameJet auctions page
|
||||||
|
logger.info("Navigating to NameJet...")
|
||||||
|
await page.goto("https://www.namejet.com/Pages/Auctions/ViewAuctions.aspx", wait_until="networkidle")
|
||||||
|
|
||||||
|
# Wait for Cloudflare
|
||||||
|
await self._wait_for_cloudflare(page)
|
||||||
|
|
||||||
|
# Wait for auction table
|
||||||
|
try:
|
||||||
|
await page.wait_for_selector('#MainContent_gvAuctions, .auction-table, table', timeout=15000)
|
||||||
|
except:
|
||||||
|
logger.warning("NameJet table not found")
|
||||||
|
|
||||||
|
# Extract data from table
|
||||||
|
items = []
|
||||||
|
rows = await page.query_selector_all('tr[data-id], #MainContent_gvAuctions tr, .auction-row')
|
||||||
|
|
||||||
|
for row in rows[:limit]:
|
||||||
|
try:
|
||||||
|
cells = await row.query_selector_all('td')
|
||||||
|
if len(cells) < 3:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# NameJet format: Domain, End Time, Price, Bids, ...
|
||||||
|
domain = await cells[0].text_content()
|
||||||
|
domain = domain.strip() if domain else ""
|
||||||
|
|
||||||
|
if not domain or "." not in domain:
|
||||||
|
continue
|
||||||
|
|
||||||
|
tld = domain.rsplit(".", 1)[-1]
|
||||||
|
|
||||||
|
# Parse price
|
||||||
|
price = 0
|
||||||
|
if len(cells) > 2:
|
||||||
|
price_text = await cells[2].text_content()
|
||||||
|
price = float("".join(c for c in (price_text or "0") if c.isdigit() or c == ".") or "0")
|
||||||
|
|
||||||
|
# Parse bids
|
||||||
|
bids = 0
|
||||||
|
if len(cells) > 3:
|
||||||
|
bids_text = await cells[3].text_content()
|
||||||
|
bids = int("".join(c for c in (bids_text or "0") if c.isdigit()) or "0")
|
||||||
|
|
||||||
|
items.append({
|
||||||
|
"domain": domain,
|
||||||
|
"tld": tld,
|
||||||
|
"platform": "NameJet",
|
||||||
|
"current_bid": price,
|
||||||
|
"min_bid": 0,
|
||||||
|
"num_bids": bids,
|
||||||
|
"end_time": datetime.utcnow() + timedelta(days=1),
|
||||||
|
"buy_now_price": None,
|
||||||
|
"auction_url": f"https://www.namejet.com/Pages/Auctions/ViewAuctions.aspx?domain={domain}",
|
||||||
|
"currency": "USD",
|
||||||
|
"is_active": True,
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Error parsing row: {e}")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"items": items,
|
||||||
|
"total": len(items),
|
||||||
|
"source": "playwright",
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"NameJet scraping error: {e}")
|
||||||
|
return {"items": [], "total": 0, "error": str(e)}
|
||||||
|
finally:
|
||||||
|
if page:
|
||||||
|
await page.close()
|
||||||
|
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
# UNIFIED SCRAPE METHOD
|
||||||
|
# ═══════════════════════════════════════════════════════════════════════════════
|
||||||
|
|
||||||
|
async def scrape_all_protected(self) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Scrape all Cloudflare-protected platforms.
|
||||||
|
|
||||||
|
Returns combined results from:
|
||||||
|
- GoDaddy Auctions
|
||||||
|
- NameJet
|
||||||
|
"""
|
||||||
|
results = {
|
||||||
|
"total_found": 0,
|
||||||
|
"platforms": {},
|
||||||
|
"items": [],
|
||||||
|
"errors": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
if not PLAYWRIGHT_AVAILABLE:
|
||||||
|
results["errors"].append("Playwright not installed")
|
||||||
|
return results
|
||||||
|
|
||||||
|
try:
|
||||||
|
await self.initialize()
|
||||||
|
|
||||||
|
# Scrape GoDaddy
|
||||||
|
logger.info("Scraping GoDaddy with Playwright...")
|
||||||
|
godaddy_result = await self.scrape_godaddy()
|
||||||
|
results["platforms"]["GoDaddy"] = {
|
||||||
|
"found": len(godaddy_result.get("items", [])),
|
||||||
|
"source": godaddy_result.get("source", "unknown"),
|
||||||
|
}
|
||||||
|
results["items"].extend(godaddy_result.get("items", []))
|
||||||
|
results["total_found"] += len(godaddy_result.get("items", []))
|
||||||
|
|
||||||
|
if godaddy_result.get("error"):
|
||||||
|
results["errors"].append(f"GoDaddy: {godaddy_result['error']}")
|
||||||
|
|
||||||
|
# Small delay between platforms
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
|
||||||
|
# Scrape NameJet
|
||||||
|
logger.info("Scraping NameJet with Playwright...")
|
||||||
|
namejet_result = await self.scrape_namejet()
|
||||||
|
results["platforms"]["NameJet"] = {
|
||||||
|
"found": len(namejet_result.get("items", [])),
|
||||||
|
"source": namejet_result.get("source", "unknown"),
|
||||||
|
}
|
||||||
|
results["items"].extend(namejet_result.get("items", []))
|
||||||
|
results["total_found"] += len(namejet_result.get("items", []))
|
||||||
|
|
||||||
|
if namejet_result.get("error"):
|
||||||
|
results["errors"].append(f"NameJet: {namejet_result['error']}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Playwright scraping error: {e}")
|
||||||
|
results["errors"].append(str(e))
|
||||||
|
finally:
|
||||||
|
await self.close()
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton instance
|
||||||
|
playwright_scraper = PlaywrightScraperService()
|
||||||
|
|
||||||
314
backend/app/services/sedo_api.py
Normal file
314
backend/app/services/sedo_api.py
Normal file
@ -0,0 +1,314 @@
|
|||||||
|
"""
|
||||||
|
Sedo Official API Client
|
||||||
|
|
||||||
|
This service provides access to Sedo's official API for:
|
||||||
|
- Domain search and auctions
|
||||||
|
- Marketplace listings
|
||||||
|
- Domain pricing
|
||||||
|
|
||||||
|
API Documentation: https://api.sedo.com/apidocs/v1/
|
||||||
|
Type: XML-RPC based API
|
||||||
|
|
||||||
|
SECURITY:
|
||||||
|
- Credentials are loaded from environment variables
|
||||||
|
- NEVER hardcode credentials in this file
|
||||||
|
|
||||||
|
WHERE TO FIND YOUR CREDENTIALS:
|
||||||
|
1. Login to https://sedo.com
|
||||||
|
2. Go to "Mein Sedo" / "My Sedo"
|
||||||
|
3. Navigate to "API-Zugang" / "API Access"
|
||||||
|
4. You'll find:
|
||||||
|
- Partner ID (your user ID)
|
||||||
|
- SignKey (signature key for authentication)
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from app.services.sedo_api import sedo_client
|
||||||
|
|
||||||
|
# Search domains for sale
|
||||||
|
listings = await sedo_client.search_domains(keyword="tech")
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
import hashlib
|
||||||
|
import time
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Optional, List, Dict, Any
|
||||||
|
import httpx
|
||||||
|
from xml.etree import ElementTree
|
||||||
|
|
||||||
|
from app.config import get_settings
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class SedoAPIClient:
|
||||||
|
"""
|
||||||
|
Official Sedo API Client.
|
||||||
|
|
||||||
|
Sedo uses an XML-RPC style API with signature-based authentication.
|
||||||
|
Each request must include:
|
||||||
|
- partnerid: Your partner ID
|
||||||
|
- signkey: Your signature key (or hashed signature)
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.settings = get_settings()
|
||||||
|
self.base_url = self.settings.sedo_api_base or "https://api.sedo.com/api/v1/"
|
||||||
|
self.partner_id = self.settings.sedo_partner_id
|
||||||
|
self.sign_key = self.settings.sedo_sign_key
|
||||||
|
|
||||||
|
# HTTP client
|
||||||
|
self._client: Optional[httpx.AsyncClient] = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_configured(self) -> bool:
|
||||||
|
"""Check if API credentials are configured."""
|
||||||
|
return bool(self.partner_id and self.sign_key)
|
||||||
|
|
||||||
|
async def _get_client(self) -> httpx.AsyncClient:
|
||||||
|
"""Get or create HTTP client."""
|
||||||
|
if self._client is None or self._client.is_closed:
|
||||||
|
self._client = httpx.AsyncClient(
|
||||||
|
timeout=30.0,
|
||||||
|
headers={
|
||||||
|
"Content-Type": "application/x-www-form-urlencoded",
|
||||||
|
"User-Agent": "Pounce/1.0 (Domain Intelligence Platform)"
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return self._client
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
"""Close the HTTP client."""
|
||||||
|
if self._client and not self._client.is_closed:
|
||||||
|
await self._client.aclose()
|
||||||
|
self._client = None
|
||||||
|
|
||||||
|
def _generate_signature(self, params: Dict[str, Any]) -> str:
|
||||||
|
"""
|
||||||
|
Generate request signature for Sedo API.
|
||||||
|
|
||||||
|
The signature is typically: MD5(signkey + sorted_params)
|
||||||
|
Check Sedo docs for exact implementation.
|
||||||
|
"""
|
||||||
|
# Simple implementation - may need adjustment based on actual Sedo requirements
|
||||||
|
sorted_params = "&".join(f"{k}={v}" for k, v in sorted(params.items()))
|
||||||
|
signature_base = f"{self.sign_key}{sorted_params}"
|
||||||
|
return hashlib.md5(signature_base.encode()).hexdigest()
|
||||||
|
|
||||||
|
async def _request(
|
||||||
|
self,
|
||||||
|
endpoint: str,
|
||||||
|
params: Optional[Dict] = None
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Make an authenticated API request."""
|
||||||
|
if not self.is_configured:
|
||||||
|
raise ValueError("Sedo API credentials not configured")
|
||||||
|
|
||||||
|
client = await self._get_client()
|
||||||
|
|
||||||
|
# Base params for all requests
|
||||||
|
request_params = {
|
||||||
|
"partnerid": self.partner_id,
|
||||||
|
"signkey": self.sign_key,
|
||||||
|
**(params or {})
|
||||||
|
}
|
||||||
|
|
||||||
|
url = f"{self.base_url.rstrip('/')}/{endpoint.lstrip('/')}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = await client.get(url, params=request_params)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
# Sedo API can return XML or JSON depending on endpoint
|
||||||
|
content_type = response.headers.get("content-type", "")
|
||||||
|
|
||||||
|
if "xml" in content_type:
|
||||||
|
return self._parse_xml_response(response.text)
|
||||||
|
elif "json" in content_type:
|
||||||
|
return response.json()
|
||||||
|
else:
|
||||||
|
# Try JSON first, fallback to XML
|
||||||
|
try:
|
||||||
|
return response.json()
|
||||||
|
except:
|
||||||
|
return self._parse_xml_response(response.text)
|
||||||
|
|
||||||
|
except httpx.HTTPError as e:
|
||||||
|
logger.error(f"Sedo API request failed: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _parse_xml_response(self, xml_text: str) -> Dict[str, Any]:
|
||||||
|
"""Parse XML response from Sedo API."""
|
||||||
|
try:
|
||||||
|
root = ElementTree.fromstring(xml_text)
|
||||||
|
return self._xml_to_dict(root)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to parse XML: {e}")
|
||||||
|
return {"raw": xml_text}
|
||||||
|
|
||||||
|
def _xml_to_dict(self, element) -> Dict[str, Any]:
|
||||||
|
"""Convert XML element to dictionary."""
|
||||||
|
result = {}
|
||||||
|
for child in element:
|
||||||
|
if len(child) > 0:
|
||||||
|
result[child.tag] = self._xml_to_dict(child)
|
||||||
|
else:
|
||||||
|
result[child.tag] = child.text
|
||||||
|
return result
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# DOMAIN SEARCH ENDPOINTS
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
async def search_domains(
|
||||||
|
self,
|
||||||
|
keyword: Optional[str] = None,
|
||||||
|
tld: Optional[str] = None,
|
||||||
|
min_price: Optional[float] = None,
|
||||||
|
max_price: Optional[float] = None,
|
||||||
|
page: int = 1,
|
||||||
|
page_size: int = 100,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Search for domains listed on Sedo marketplace.
|
||||||
|
|
||||||
|
Returns domains for sale (not auctions).
|
||||||
|
"""
|
||||||
|
params = {
|
||||||
|
"output_method": "json", # Request JSON response
|
||||||
|
}
|
||||||
|
|
||||||
|
if keyword:
|
||||||
|
params["keyword"] = keyword
|
||||||
|
if tld:
|
||||||
|
params["tld"] = tld.lstrip(".")
|
||||||
|
if min_price is not None:
|
||||||
|
params["minprice"] = min_price
|
||||||
|
if max_price is not None:
|
||||||
|
params["maxprice"] = max_price
|
||||||
|
if page:
|
||||||
|
params["page"] = page
|
||||||
|
if page_size:
|
||||||
|
params["pagesize"] = min(page_size, 100)
|
||||||
|
|
||||||
|
return await self._request("DomainSearch", params)
|
||||||
|
|
||||||
|
async def search_auctions(
|
||||||
|
self,
|
||||||
|
keyword: Optional[str] = None,
|
||||||
|
tld: Optional[str] = None,
|
||||||
|
ending_within_hours: Optional[int] = None,
|
||||||
|
page: int = 1,
|
||||||
|
page_size: int = 100,
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Search for active domain auctions on Sedo.
|
||||||
|
"""
|
||||||
|
params = {
|
||||||
|
"output_method": "json",
|
||||||
|
"auction": "true", # Only auctions
|
||||||
|
}
|
||||||
|
|
||||||
|
if keyword:
|
||||||
|
params["keyword"] = keyword
|
||||||
|
if tld:
|
||||||
|
params["tld"] = tld.lstrip(".")
|
||||||
|
if page:
|
||||||
|
params["page"] = page
|
||||||
|
if page_size:
|
||||||
|
params["pagesize"] = min(page_size, 100)
|
||||||
|
|
||||||
|
return await self._request("DomainSearch", params)
|
||||||
|
|
||||||
|
async def get_domain_details(self, domain: str) -> Dict[str, Any]:
|
||||||
|
"""Get detailed information about a specific domain."""
|
||||||
|
params = {
|
||||||
|
"domain": domain,
|
||||||
|
"output_method": "json",
|
||||||
|
}
|
||||||
|
return await self._request("DomainDetails", params)
|
||||||
|
|
||||||
|
async def get_ending_soon_auctions(
|
||||||
|
self,
|
||||||
|
hours: int = 24,
|
||||||
|
page_size: int = 50
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Get auctions ending soon."""
|
||||||
|
return await self.search_auctions(
|
||||||
|
ending_within_hours=hours,
|
||||||
|
page_size=page_size
|
||||||
|
)
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# UTILITY METHODS
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
async def test_connection(self) -> Dict[str, Any]:
|
||||||
|
"""Test the API connection and credentials."""
|
||||||
|
if not self.is_configured:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": "API credentials not configured",
|
||||||
|
"configured": False,
|
||||||
|
"hint": "Find your credentials at: Sedo.com → Mein Sedo → API-Zugang"
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Try a simple search to test connection
|
||||||
|
result = await self.search_domains(keyword="test", page_size=1)
|
||||||
|
return {
|
||||||
|
"success": True,
|
||||||
|
"configured": True,
|
||||||
|
"partner_id": self.partner_id,
|
||||||
|
"authenticated_at": datetime.utcnow().isoformat()
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
return {
|
||||||
|
"success": False,
|
||||||
|
"error": str(e),
|
||||||
|
"configured": True
|
||||||
|
}
|
||||||
|
|
||||||
|
def transform_to_pounce_format(self, sedo_listing: Dict) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Transform Sedo listing to Pounce internal format.
|
||||||
|
|
||||||
|
Maps Sedo fields to our DomainAuction model.
|
||||||
|
"""
|
||||||
|
domain = sedo_listing.get("domain") or sedo_listing.get("domainname", "")
|
||||||
|
tld = domain.rsplit(".", 1)[1] if "." in domain else ""
|
||||||
|
|
||||||
|
# Parse end time if auction
|
||||||
|
end_time_str = sedo_listing.get("auctionend") or sedo_listing.get("enddate")
|
||||||
|
if end_time_str:
|
||||||
|
try:
|
||||||
|
end_time = datetime.fromisoformat(end_time_str.replace("Z", "+00:00"))
|
||||||
|
except:
|
||||||
|
end_time = datetime.utcnow() + timedelta(days=7)
|
||||||
|
else:
|
||||||
|
end_time = datetime.utcnow() + timedelta(days=7)
|
||||||
|
|
||||||
|
# Price handling
|
||||||
|
price = sedo_listing.get("price") or sedo_listing.get("currentbid") or 0
|
||||||
|
if isinstance(price, str):
|
||||||
|
price = float(price.replace(",", "").replace("$", "").replace("€", ""))
|
||||||
|
|
||||||
|
return {
|
||||||
|
"domain": domain,
|
||||||
|
"tld": tld,
|
||||||
|
"platform": "Sedo",
|
||||||
|
"current_bid": price,
|
||||||
|
"buy_now_price": sedo_listing.get("buynow") or sedo_listing.get("bin"),
|
||||||
|
"currency": sedo_listing.get("currency", "EUR"),
|
||||||
|
"num_bids": sedo_listing.get("numbids") or sedo_listing.get("bidcount", 0),
|
||||||
|
"end_time": end_time,
|
||||||
|
"auction_url": f"https://sedo.com/search/details/?domain={domain}",
|
||||||
|
"age_years": None,
|
||||||
|
"reserve_met": sedo_listing.get("reservemet"),
|
||||||
|
"traffic": sedo_listing.get("traffic"),
|
||||||
|
"is_auction": sedo_listing.get("isaution") == "1" or sedo_listing.get("auction") == True,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton instance
|
||||||
|
sedo_client = SedoAPIClient()
|
||||||
|
|
||||||
1
backend/data/cookies/session_cookies.json
Normal file
1
backend/data/cookies/session_cookies.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
[{"name": "market", "value": "de-CH", "domain": ".godaddy.com", "path": "/", "expires": 1796986248.403492, "httpOnly": false, "secure": false, "sameSite": "Lax"}, {"name": "currency", "value": "CHF", "domain": ".godaddy.com", "path": "/", "expires": 1796986248.425822, "httpOnly": false, "secure": false, "sameSite": "Lax"}]
|
||||||
@ -164,16 +164,39 @@ export default function AuctionsPage() {
|
|||||||
const loadAuctions = async () => {
|
const loadAuctions = async () => {
|
||||||
setLoading(true)
|
setLoading(true)
|
||||||
try {
|
try {
|
||||||
const [all, ending, hot, pounce] = await Promise.all([
|
// Use unified feed API for all data - same as Terminal Market Page
|
||||||
api.getAuctions(undefined, undefined, undefined, undefined, undefined, false, 'ending', 100, 0),
|
const [allFeed, endingFeed, hotFeed, pounceFeed] = await Promise.all([
|
||||||
api.getEndingSoonAuctions(24, 50), // 24 hours, limit 50
|
api.getMarketFeed({ source: 'all', limit: 100, sortBy: 'time' }),
|
||||||
api.getHotAuctions(50),
|
api.getMarketFeed({ source: 'external', endingWithin: 24, limit: 50, sortBy: 'time' }),
|
||||||
api.getMarketFeed({ source: 'pounce', limit: 10 }).catch(() => ({ items: [] })),
|
api.getMarketFeed({ source: 'external', limit: 50, sortBy: 'score' }), // Hot = highest score
|
||||||
|
api.getMarketFeed({ source: 'pounce', limit: 10 }),
|
||||||
])
|
])
|
||||||
setAllAuctions(all.auctions || [])
|
|
||||||
setEndingSoon(ending || [])
|
// Convert MarketItem to Auction format for compatibility
|
||||||
setHotAuctions(hot || [])
|
const convertToAuction = (item: MarketItem): Auction => ({
|
||||||
setPounceItems(pounce.items || [])
|
domain: item.domain,
|
||||||
|
platform: item.source,
|
||||||
|
platform_url: item.url,
|
||||||
|
current_bid: item.price,
|
||||||
|
currency: item.currency,
|
||||||
|
num_bids: item.num_bids || 0,
|
||||||
|
end_time: item.end_time || '',
|
||||||
|
time_remaining: item.time_remaining || '',
|
||||||
|
buy_now_price: item.price_type === 'fixed' ? item.price : null,
|
||||||
|
reserve_met: null,
|
||||||
|
traffic: null,
|
||||||
|
age_years: null,
|
||||||
|
tld: item.tld,
|
||||||
|
affiliate_url: item.url,
|
||||||
|
})
|
||||||
|
|
||||||
|
// Filter out Pounce Direct from auction lists (they go in separate section)
|
||||||
|
const externalOnly = (items: MarketItem[]) => items.filter(i => !i.is_pounce).map(convertToAuction)
|
||||||
|
|
||||||
|
setAllAuctions(externalOnly(allFeed.items || []))
|
||||||
|
setEndingSoon(externalOnly(endingFeed.items || []))
|
||||||
|
setHotAuctions(externalOnly(hotFeed.items || []))
|
||||||
|
setPounceItems(pounceFeed.items || [])
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
console.error('Failed to load auctions:', error)
|
console.error('Failed to load auctions:', error)
|
||||||
} finally {
|
} finally {
|
||||||
|
|||||||
Reference in New Issue
Block a user